databricks-switch-plugin 0.1.0rc3__tar.gz → 0.1.1.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/PKG-INFO +6 -4
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/README.md +5 -3
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/pyproject.toml +18 -2
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/__init__.py +1 -1
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/__init__.py +1 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/installer.py +24 -41
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/job_parameters.py +25 -29
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/job_runner.py +4 -7
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/batch_inference_helper.py +52 -37
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/cell_split_helper.py +11 -13
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/conversion_prompt_helper.py +3 -4
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/conversion_result_clean_helper.py +2 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/llm_token_count_helper.py +15 -6
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/notebook_export_helper.py +19 -19
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/spark_sql_extract_helper.py +12 -4
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/__init__.py +2 -1
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/builtin_prompt.py +18 -4
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/comment_language.py +10 -17
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/log_level.py +4 -2
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/notebook_parameters.py +8 -6
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/source_format.py +2 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/table_config.py +2 -1
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/target_type.py +4 -2
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/common_utils.py +1 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/databricks_credentials.py +4 -7
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/parameter_validator.py +5 -2
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/token_utils.py +5 -2
- databricks_switch_plugin-0.1.1.dev2/switch/py.typed +2 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/.gitignore +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/LICENSE +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/NOTICE +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/lsp/config.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/00_main.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/convert_notebook_to_sql.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/export_to_file.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/export_to_notebook.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/external_model/external_model_amazon_bedrock.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/external_model/external_model_azure_openai.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/external_model/helper_external_model.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/notebook_utils.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/orchestrators/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/orchestrators/orchestrate_to_file.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/orchestrators/orchestrate_to_notebook.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/analyze_input_files.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/convert_with_llm.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/fix_syntax_with_llm.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/split_code_into_cells.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/validate_python_notebook.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/validation_utils.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/code_to_databricks_python_notebook/python.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/code_to_databricks_python_notebook/scala.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/common/sql_to_databricks_notebook_common_python.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/databricks_notebook_to_databricks_notebook/python_to_sql.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/mssql.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/mysql.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/netezza.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/oracle.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/postgresql.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/redshift.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/snowflake.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/teradata.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/workflow_to_databricks_jobs/airflow.yml +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/testing/__init__.py +0 -0
- {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/testing/e2e_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: databricks-switch-plugin
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1.dev2
|
|
4
4
|
Summary: LLM-powered tool to convert SQL, code, and workflow files into Databricks notebooks.
|
|
5
5
|
Project-URL: Documentation, https://databrickslabs.github.io/lakebridge
|
|
6
6
|
Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
|
|
@@ -57,6 +57,8 @@ Description-Content-Type: text/markdown
|
|
|
57
57
|
# Switch
|
|
58
58
|
LLM-Powered Code Conversion Plugin for Lakebridge
|
|
59
59
|
|
|
60
|
+
[](https://codecov.io/gh/databrickslabs/switch)
|
|
61
|
+
|
|
60
62
|
## Project Description
|
|
61
63
|
Switch is a Lakebridge transpiler plugin that transforms SQL and other source formats into Databricks-compatible notebooks using Large Language Models (LLMs). As a core component of the Lakebridge migration platform, Switch provides automated code conversion capabilities through a multi-stage processing pipeline designed for large-scale platform migrations.
|
|
62
64
|
|
|
@@ -90,14 +92,14 @@ For advanced testing or direct control, you can deploy Switch directly to a Data
|
|
|
90
92
|
|
|
91
93
|
**Cell 1: Install Switch package**
|
|
92
94
|
|
|
93
|
-
From PyPI (stable):
|
|
95
|
+
From PyPI (stable version):
|
|
94
96
|
```python
|
|
95
97
|
%pip install databricks-switch-plugin
|
|
96
98
|
```
|
|
97
99
|
|
|
98
|
-
|
|
100
|
+
For prerelease versions (dev/rc):
|
|
99
101
|
```python
|
|
100
|
-
%pip install --
|
|
102
|
+
%pip install --pre databricks-switch-plugin
|
|
101
103
|
```
|
|
102
104
|
|
|
103
105
|
**Cell 2: Deploy Switch to workspace**
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# Switch
|
|
2
2
|
LLM-Powered Code Conversion Plugin for Lakebridge
|
|
3
3
|
|
|
4
|
+
[](https://codecov.io/gh/databrickslabs/switch)
|
|
5
|
+
|
|
4
6
|
## Project Description
|
|
5
7
|
Switch is a Lakebridge transpiler plugin that transforms SQL and other source formats into Databricks-compatible notebooks using Large Language Models (LLMs). As a core component of the Lakebridge migration platform, Switch provides automated code conversion capabilities through a multi-stage processing pipeline designed for large-scale platform migrations.
|
|
6
8
|
|
|
@@ -34,14 +36,14 @@ For advanced testing or direct control, you can deploy Switch directly to a Data
|
|
|
34
36
|
|
|
35
37
|
**Cell 1: Install Switch package**
|
|
36
38
|
|
|
37
|
-
From PyPI (stable):
|
|
39
|
+
From PyPI (stable version):
|
|
38
40
|
```python
|
|
39
41
|
%pip install databricks-switch-plugin
|
|
40
42
|
```
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
For prerelease versions (dev/rc):
|
|
43
45
|
```python
|
|
44
|
-
%pip install --
|
|
46
|
+
%pip install --pre databricks-switch-plugin
|
|
45
47
|
```
|
|
46
48
|
|
|
47
49
|
**Cell 2: Deploy Switch to workspace**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "databricks-switch-plugin"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.1dev2"
|
|
4
4
|
description = "LLM-powered tool to convert SQL, code, and workflow files into Databricks notebooks."
|
|
5
5
|
license-files = { paths = ["LICENSE", "NOTICE"] }
|
|
6
6
|
keywords = ["Databricks", "SQL", "Migration", "LLM", "Conversion"]
|
|
@@ -326,4 +326,20 @@ allow-global-unused-variables = true
|
|
|
326
326
|
callbacks = ["cb_", "_cb"]
|
|
327
327
|
dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
|
|
328
328
|
ignored-argument-names = "_.*|^ignored_|^unused_"
|
|
329
|
-
redefining-builtins-modules = ["six.moves", "past.builtins", "future.builtins", "builtins", "io"]
|
|
329
|
+
redefining-builtins-modules = ["six.moves", "past.builtins", "future.builtins", "builtins", "io"]
|
|
330
|
+
|
|
331
|
+
[tool.coverage.run]
|
|
332
|
+
source = ["switch.api", "switch.notebooks.pyscripts"]
|
|
333
|
+
omit = [
|
|
334
|
+
"*/tests/*",
|
|
335
|
+
"switch/notebooks/*.py",
|
|
336
|
+
"*/test_*.py"
|
|
337
|
+
]
|
|
338
|
+
|
|
339
|
+
[tool.coverage.report]
|
|
340
|
+
exclude_lines = [
|
|
341
|
+
"pragma: no cover",
|
|
342
|
+
"def __repr__",
|
|
343
|
+
"raise AssertionError",
|
|
344
|
+
"raise NotImplementedError"
|
|
345
|
+
]
|
{databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/installer.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Switch installer for Databricks workspace deployment"""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
from dataclasses import dataclass
|
|
@@ -16,6 +17,7 @@ logger = logging.getLogger(__name__)
|
|
|
16
17
|
@dataclass
|
|
17
18
|
class InstallResult:
|
|
18
19
|
"""Result of installation"""
|
|
20
|
+
|
|
19
21
|
job_id: int
|
|
20
22
|
job_name: str
|
|
21
23
|
job_url: str
|
|
@@ -26,6 +28,7 @@ class InstallResult:
|
|
|
26
28
|
@dataclass
|
|
27
29
|
class UninstallResult:
|
|
28
30
|
"""Result of uninstallation"""
|
|
31
|
+
|
|
29
32
|
success: bool
|
|
30
33
|
message: str = ""
|
|
31
34
|
|
|
@@ -37,6 +40,7 @@ class SwitchInstaller:
|
|
|
37
40
|
- install(): Install Switch (optionally removes previous installation)
|
|
38
41
|
- uninstall(): Remove Switch installation
|
|
39
42
|
"""
|
|
43
|
+
|
|
40
44
|
# Directory and path constants
|
|
41
45
|
_WORKSPACE_BASE_DIR_NAME = ".lakebridge-switch"
|
|
42
46
|
_MAIN_NOTEBOOK_PATH = "switch/notebooks/00_main"
|
|
@@ -58,10 +62,12 @@ class SwitchInstaller:
|
|
|
58
62
|
"""
|
|
59
63
|
self.workspace_client = workspace_client
|
|
60
64
|
|
|
61
|
-
def install(
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
+
def install(
|
|
66
|
+
self,
|
|
67
|
+
default_parameters: Optional[SwitchJobParameters] = None,
|
|
68
|
+
previous_job_id: Optional[int] = None,
|
|
69
|
+
previous_switch_home: Optional[str] = None,
|
|
70
|
+
) -> InstallResult:
|
|
65
71
|
"""Install Switch for current user. Optionally cleans up previous installation.
|
|
66
72
|
|
|
67
73
|
Args:
|
|
@@ -77,13 +83,12 @@ class SwitchInstaller:
|
|
|
77
83
|
"""
|
|
78
84
|
# Clean up previous installation if specified
|
|
79
85
|
if previous_job_id or previous_switch_home:
|
|
80
|
-
logger.info(
|
|
81
|
-
|
|
82
|
-
uninstall_result = self.uninstall(
|
|
83
|
-
job_id=previous_job_id,
|
|
84
|
-
switch_home=previous_switch_home
|
|
86
|
+
logger.info(
|
|
87
|
+
f"Cleaning up previous installation: job_id={previous_job_id}, switch_home={previous_switch_home}"
|
|
85
88
|
)
|
|
86
89
|
|
|
90
|
+
uninstall_result = self.uninstall(job_id=previous_job_id, switch_home=previous_switch_home)
|
|
91
|
+
|
|
87
92
|
if uninstall_result.success:
|
|
88
93
|
logger.info("Successfully cleaned up previous installation")
|
|
89
94
|
else:
|
|
@@ -104,16 +109,10 @@ class SwitchInstaller:
|
|
|
104
109
|
logger.info(f"Successfully installed Switch: job_id={job_id}, job_name={self.JOB_NAME}")
|
|
105
110
|
|
|
106
111
|
return InstallResult(
|
|
107
|
-
job_id=job_id,
|
|
108
|
-
job_name=self.JOB_NAME,
|
|
109
|
-
job_url=job_url,
|
|
110
|
-
switch_home=switch_home,
|
|
111
|
-
created_by=created_by
|
|
112
|
+
job_id=job_id, job_name=self.JOB_NAME, job_url=job_url, switch_home=switch_home, created_by=created_by
|
|
112
113
|
)
|
|
113
114
|
|
|
114
|
-
def uninstall(self,
|
|
115
|
-
job_id: Optional[int] = None,
|
|
116
|
-
switch_home: Optional[str] = None) -> UninstallResult:
|
|
115
|
+
def uninstall(self, job_id: Optional[int] = None, switch_home: Optional[str] = None) -> UninstallResult:
|
|
117
116
|
"""Uninstall Switch
|
|
118
117
|
|
|
119
118
|
Args:
|
|
@@ -153,10 +152,7 @@ class SwitchInstaller:
|
|
|
153
152
|
else:
|
|
154
153
|
message = f"Failed to uninstall Switch: {'; '.join(errors)}"
|
|
155
154
|
|
|
156
|
-
return UninstallResult(
|
|
157
|
-
success=success,
|
|
158
|
-
message=message
|
|
159
|
-
)
|
|
155
|
+
return UninstallResult(success=success, message=message)
|
|
160
156
|
|
|
161
157
|
def _deploy_switch_package(self) -> str:
|
|
162
158
|
"""Deploy Switch package to workspace for both PyPI and development installations.
|
|
@@ -190,6 +186,7 @@ class SwitchInstaller:
|
|
|
190
186
|
|
|
191
187
|
# Get site-packages directory for PyPI installations
|
|
192
188
|
import switch
|
|
189
|
+
|
|
193
190
|
site_packages_dir = os.path.dirname(os.path.dirname(switch.__file__))
|
|
194
191
|
|
|
195
192
|
# Define search locations: PyPI install location first, then development location
|
|
@@ -206,9 +203,7 @@ class SwitchInstaller:
|
|
|
206
203
|
for root, _, files in os.walk(switch_package_dir):
|
|
207
204
|
for file_name in files:
|
|
208
205
|
# Skip unwanted files
|
|
209
|
-
if
|
|
210
|
-
file_name.endswith(('.pyc', '.pyo')) or
|
|
211
|
-
file_name == '__pycache__'):
|
|
206
|
+
if file_name.startswith('.') or file_name.endswith(('.pyc', '.pyo')) or file_name == '__pycache__':
|
|
212
207
|
continue
|
|
213
208
|
|
|
214
209
|
local_path = os.path.join(root, file_name)
|
|
@@ -225,10 +220,7 @@ class SwitchInstaller:
|
|
|
225
220
|
with open(local_path, 'rb') as f:
|
|
226
221
|
content = f.read()
|
|
227
222
|
self.workspace_client.workspace.upload(
|
|
228
|
-
path=remote_path,
|
|
229
|
-
content=content,
|
|
230
|
-
format=ImportFormat.AUTO,
|
|
231
|
-
overwrite=True
|
|
223
|
+
path=remote_path, content=content, format=ImportFormat.AUTO, overwrite=True
|
|
232
224
|
)
|
|
233
225
|
logger.debug(f"Uploaded {rel_path}")
|
|
234
226
|
|
|
@@ -263,10 +255,8 @@ class SwitchInstaller:
|
|
|
263
255
|
task_key=self._MAIN_TASK_KEY,
|
|
264
256
|
max_retries=self._MAIN_TASK_MAX_RETRIES,
|
|
265
257
|
notebook_task=NotebookTask(
|
|
266
|
-
notebook_path=notebook_path,
|
|
267
|
-
|
|
268
|
-
source=Source.WORKSPACE
|
|
269
|
-
)
|
|
258
|
+
notebook_path=notebook_path, base_parameters=template_params, source=Source.WORKSPACE
|
|
259
|
+
),
|
|
270
260
|
)
|
|
271
261
|
|
|
272
262
|
# Create the job with created_by tag
|
|
@@ -274,10 +264,7 @@ class SwitchInstaller:
|
|
|
274
264
|
tags = {self.JOB_TAG_CREATED_BY: current_user}
|
|
275
265
|
|
|
276
266
|
response = self.workspace_client.jobs.create(
|
|
277
|
-
name=self.JOB_NAME,
|
|
278
|
-
tasks=[task],
|
|
279
|
-
max_concurrent_runs=self._JOB_MAX_CONCURRENT_RUNS,
|
|
280
|
-
tags=tags
|
|
267
|
+
name=self.JOB_NAME, tasks=[task], max_concurrent_runs=self._JOB_MAX_CONCURRENT_RUNS, tags=tags
|
|
281
268
|
)
|
|
282
269
|
if response.job_id is None:
|
|
283
270
|
raise ValueError("Failed to create job: job_id is None")
|
|
@@ -316,11 +303,7 @@ class SwitchInstaller:
|
|
|
316
303
|
logger.debug(f"Found {filename} at {file_path}, uploading to {dest_path}")
|
|
317
304
|
with open(file_path, 'rb') as f:
|
|
318
305
|
content = f.read()
|
|
319
|
-
self.workspace_client.workspace.upload(
|
|
320
|
-
path=dest_path,
|
|
321
|
-
content=content,
|
|
322
|
-
format=ImportFormat.AUTO
|
|
323
|
-
)
|
|
306
|
+
self.workspace_client.workspace.upload(path=dest_path, content=content, format=ImportFormat.AUTO)
|
|
324
307
|
return True
|
|
325
308
|
|
|
326
309
|
logger.debug(f"{filename} not found in any of the provided locations")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Switch job parameters and related definitions"""
|
|
2
|
+
|
|
2
3
|
import dataclasses
|
|
3
4
|
import json
|
|
4
5
|
from dataclasses import dataclass
|
|
@@ -23,27 +24,28 @@ class SwitchJobParameters:
|
|
|
23
24
|
2. Template parameters for SwitchInstaller
|
|
24
25
|
3. Default value management
|
|
25
26
|
"""
|
|
27
|
+
|
|
26
28
|
# Basic settings
|
|
27
29
|
input_dir: Optional[str] = None
|
|
28
30
|
output_dir: Optional[str] = None
|
|
29
31
|
result_catalog: Optional[str] = None
|
|
30
32
|
result_schema: Optional[str] = None
|
|
31
|
-
builtin_prompt: Optional[BuiltinPrompt] = None
|
|
33
|
+
builtin_prompt: Optional[BuiltinPrompt | str] = None
|
|
32
34
|
|
|
33
35
|
# Conversion settings
|
|
34
|
-
source_format: Optional[SourceFormat] = None
|
|
35
|
-
target_type: Optional[TargetType] = None
|
|
36
|
+
source_format: Optional[SourceFormat | str] = None
|
|
37
|
+
target_type: Optional[TargetType | str] = None
|
|
36
38
|
output_extension: Optional[str] = None
|
|
37
39
|
|
|
38
40
|
# Execution settings
|
|
39
41
|
endpoint_name: Optional[str] = None
|
|
40
42
|
concurrency: Optional[int] = None
|
|
41
43
|
max_fix_attempts: Optional[int] = None
|
|
42
|
-
log_level: Optional[LogLevel] = None
|
|
44
|
+
log_level: Optional[LogLevel | str] = None
|
|
43
45
|
|
|
44
46
|
# Advanced settings
|
|
45
47
|
token_count_threshold: Optional[int] = None
|
|
46
|
-
comment_lang: Optional[CommentLanguage] = None
|
|
48
|
+
comment_lang: Optional[CommentLanguage | str] = None
|
|
47
49
|
conversion_prompt_yaml: Optional[str] = None
|
|
48
50
|
sql_output_dir: Optional[str] = None
|
|
49
51
|
|
|
@@ -99,7 +101,7 @@ class SwitchJobParameters:
|
|
|
99
101
|
"""Validate parameters
|
|
100
102
|
|
|
101
103
|
Args:
|
|
102
|
-
require_all: If True, validate all required parameters.
|
|
104
|
+
require_all: If True, validate all required parameters.
|
|
103
105
|
If False, only validate provided parameters.
|
|
104
106
|
|
|
105
107
|
Raises:
|
|
@@ -118,29 +120,19 @@ class SwitchJobParameters:
|
|
|
118
120
|
|
|
119
121
|
# Either builtin_prompt or conversion_prompt_yaml must be specified
|
|
120
122
|
if not self.builtin_prompt and not self.conversion_prompt_yaml:
|
|
121
|
-
raise ValueError(
|
|
122
|
-
"Either builtin_prompt or conversion_prompt_yaml must be specified"
|
|
123
|
-
)
|
|
123
|
+
raise ValueError("Either builtin_prompt or conversion_prompt_yaml must be specified")
|
|
124
124
|
|
|
125
125
|
# Validate source format if provided
|
|
126
126
|
if self.source_format and self.source_format not in SourceFormat:
|
|
127
|
-
raise ValueError(
|
|
128
|
-
f"source_format must be one of {list(SourceFormat)}, "
|
|
129
|
-
f"got '{self.source_format}'"
|
|
130
|
-
)
|
|
127
|
+
raise ValueError(f"source_format must be one of {list(SourceFormat)}, " f"got '{self.source_format}'")
|
|
131
128
|
|
|
132
129
|
# Validate target type if provided
|
|
133
130
|
if self.target_type and self.target_type not in TargetType:
|
|
134
|
-
raise ValueError(
|
|
135
|
-
f"target_type must be one of {list(TargetType)}, "
|
|
136
|
-
f"got '{self.target_type}'"
|
|
137
|
-
)
|
|
131
|
+
raise ValueError(f"target_type must be one of {list(TargetType)}, " f"got '{self.target_type}'")
|
|
138
132
|
|
|
139
133
|
# Validate output_extension for file target type
|
|
140
134
|
if self.target_type == TargetType.FILE and not self.output_extension:
|
|
141
|
-
raise ValueError(
|
|
142
|
-
"output_extension is required when target_type is 'file'"
|
|
143
|
-
)
|
|
135
|
+
raise ValueError("output_extension is required when target_type is 'file'")
|
|
144
136
|
|
|
145
137
|
# Validate numeric parameters if they're not None
|
|
146
138
|
if self.token_count_threshold is not None and self.token_count_threshold <= 0:
|
|
@@ -163,7 +155,13 @@ class SwitchJobParameters:
|
|
|
163
155
|
required_fields = {"input_dir", "output_dir", "result_catalog", "result_schema"}
|
|
164
156
|
|
|
165
157
|
# Define fields that should always be empty in template
|
|
166
|
-
template_empty_fields = {
|
|
158
|
+
template_empty_fields = {
|
|
159
|
+
"builtin_prompt",
|
|
160
|
+
"conversion_prompt_yaml",
|
|
161
|
+
"output_extension",
|
|
162
|
+
"request_params",
|
|
163
|
+
"sql_output_dir",
|
|
164
|
+
}
|
|
167
165
|
|
|
168
166
|
# Process all dataclass fields dynamically
|
|
169
167
|
template_params = {}
|
|
@@ -199,25 +197,25 @@ class SwitchJobParameters:
|
|
|
199
197
|
|
|
200
198
|
def _serialize_field_value(self, field_name: str, value: Any) -> str:
|
|
201
199
|
"""Serialize field value to string for job parameters
|
|
202
|
-
|
|
200
|
+
|
|
203
201
|
Args:
|
|
204
202
|
field_name: Name of the dataclass field
|
|
205
203
|
value: Value to serialize
|
|
206
|
-
|
|
204
|
+
|
|
207
205
|
Returns:
|
|
208
206
|
String representation suitable for Databricks job parameters
|
|
209
207
|
"""
|
|
210
208
|
if value is None:
|
|
211
209
|
return ""
|
|
212
|
-
|
|
210
|
+
|
|
213
211
|
# Handle enum types
|
|
214
212
|
if hasattr(value, 'value'):
|
|
215
213
|
return value.value
|
|
216
|
-
|
|
214
|
+
|
|
217
215
|
# Handle request_params special case (JSON serialization)
|
|
218
216
|
if field_name == 'request_params' and isinstance(value, dict):
|
|
219
217
|
return json.dumps(value)
|
|
220
|
-
|
|
218
|
+
|
|
221
219
|
# Handle all other types as string
|
|
222
220
|
return str(value)
|
|
223
221
|
|
|
@@ -234,9 +232,7 @@ def _load_switch_config_defaults() -> Dict[str, Any]:
|
|
|
234
232
|
config_path = Path(__file__).parent.parent.parent / "lsp" / "config.yml"
|
|
235
233
|
|
|
236
234
|
if not config_path.exists():
|
|
237
|
-
raise ValueError(
|
|
238
|
-
f"Switch configuration file not found at {config_path}"
|
|
239
|
-
)
|
|
235
|
+
raise ValueError(f"Switch configuration file not found at {config_path}")
|
|
240
236
|
|
|
241
237
|
with open(config_path, 'r') as f:
|
|
242
238
|
config_data = yaml.safe_load(f)
|
{databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/job_runner.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Switch job runner for SQL conversion operations"""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
|
|
4
5
|
from databricks.sdk import WorkspaceClient
|
|
@@ -39,10 +40,7 @@ class SwitchJobRunner:
|
|
|
39
40
|
|
|
40
41
|
# Run the job with parameters
|
|
41
42
|
try:
|
|
42
|
-
response = self.workspace_client.jobs.run_now(
|
|
43
|
-
job_id=self.job_id,
|
|
44
|
-
notebook_params=job_params
|
|
45
|
-
)
|
|
43
|
+
response = self.workspace_client.jobs.run_now(job_id=self.job_id, notebook_params=job_params)
|
|
46
44
|
run_id = response.run_id
|
|
47
45
|
logger.info(f"Started job run {run_id} for job {self.job_id}")
|
|
48
46
|
return run_id
|
|
@@ -69,12 +67,11 @@ class SwitchJobRunner:
|
|
|
69
67
|
# Run the job synchronously and wait for completion
|
|
70
68
|
try:
|
|
71
69
|
from datetime import timedelta
|
|
70
|
+
|
|
72
71
|
timeout_delta = timedelta(seconds=timeout_seconds)
|
|
73
72
|
|
|
74
73
|
run = self.workspace_client.jobs.run_now_and_wait(
|
|
75
|
-
job_id=self.job_id,
|
|
76
|
-
notebook_params=job_params,
|
|
77
|
-
timeout=timeout_delta
|
|
74
|
+
job_id=self.job_id, notebook_params=job_params, timeout=timeout_delta
|
|
78
75
|
)
|
|
79
76
|
if run.state and run.state.life_cycle_state:
|
|
80
77
|
logger.info(f"Job run {run.run_id} completed with state: {run.state.life_cycle_state}")
|
|
@@ -6,6 +6,7 @@ Key components:
|
|
|
6
6
|
- BatchInferenceManager: Manages concurrent processing of multiple requests
|
|
7
7
|
- BatchInferenceRequest/Response: Data structures for request/response handling
|
|
8
8
|
"""
|
|
9
|
+
|
|
9
10
|
import asyncio
|
|
10
11
|
import json
|
|
11
12
|
import logging
|
|
@@ -16,9 +17,14 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
|
16
17
|
|
|
17
18
|
import httpx
|
|
18
19
|
from httpx import codes
|
|
19
|
-
from tenacity import (
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
from tenacity import (
|
|
21
|
+
RetryCallState,
|
|
22
|
+
RetryError,
|
|
23
|
+
retry,
|
|
24
|
+
retry_if_exception_type,
|
|
25
|
+
stop_after_attempt,
|
|
26
|
+
wait_random_exponential,
|
|
27
|
+
)
|
|
22
28
|
|
|
23
29
|
from ..utils.databricks_credentials import DatabricksCredentials
|
|
24
30
|
from ..utils.common_utils import setup_logger
|
|
@@ -50,6 +56,7 @@ class BatchInferenceRequest:
|
|
|
50
56
|
system_message (str): The system message to guide the model's behavior.
|
|
51
57
|
few_shots (Optional[List[Dict[str, str]]]): Optional few-shot examples for the model.
|
|
52
58
|
"""
|
|
59
|
+
|
|
53
60
|
index: int
|
|
54
61
|
text: str
|
|
55
62
|
system_message: str
|
|
@@ -66,6 +73,7 @@ class TokenUsage:
|
|
|
66
73
|
completion_tokens (int): Number of tokens used for the completion/response.
|
|
67
74
|
total_tokens (int): Total number of tokens used (prompt + completion).
|
|
68
75
|
"""
|
|
76
|
+
|
|
69
77
|
prompt_tokens: int
|
|
70
78
|
completion_tokens: int
|
|
71
79
|
total_tokens: int
|
|
@@ -83,6 +91,7 @@ class BatchInferenceResponse:
|
|
|
83
91
|
error (Optional[str]): Any error message, if an error occurred during processing.
|
|
84
92
|
processing_time_seconds (Optional[float]): Total time in seconds spent processing this request.
|
|
85
93
|
"""
|
|
94
|
+
|
|
86
95
|
index: int
|
|
87
96
|
content: Optional[str]
|
|
88
97
|
token_usage: Optional[TokenUsage]
|
|
@@ -137,17 +146,16 @@ class BatchInferenceManager:
|
|
|
137
146
|
counter = AsyncCounter()
|
|
138
147
|
start_time = time.perf_counter()
|
|
139
148
|
|
|
140
|
-
tasks = [self._process_inference(request, semaphore, counter, start_time)
|
|
141
|
-
for request in requests]
|
|
149
|
+
tasks = [self._process_inference(request, semaphore, counter, start_time) for request in requests]
|
|
142
150
|
responses = await asyncio.gather(*tasks)
|
|
143
151
|
await self.client.close()
|
|
144
152
|
|
|
145
153
|
self.logger.info(f"Completed batch inference for {len(requests)} requests")
|
|
146
154
|
return responses
|
|
147
155
|
|
|
148
|
-
async def _process_inference(
|
|
149
|
-
|
|
150
|
-
|
|
156
|
+
async def _process_inference(
|
|
157
|
+
self, request: BatchInferenceRequest, semaphore: asyncio.Semaphore, counter: 'AsyncCounter', start_time: float
|
|
158
|
+
) -> BatchInferenceResponse:
|
|
151
159
|
"""
|
|
152
160
|
Process inference for a single request.
|
|
153
161
|
|
|
@@ -171,10 +179,11 @@ class BatchInferenceManager:
|
|
|
171
179
|
content=content,
|
|
172
180
|
token_usage=token_usage,
|
|
173
181
|
error=None,
|
|
174
|
-
processing_time_seconds=processing_time_seconds
|
|
182
|
+
processing_time_seconds=processing_time_seconds,
|
|
175
183
|
)
|
|
176
184
|
self.logger.info(
|
|
177
|
-
f"Completed inference for index {request.index} in {processing_time_seconds:.2f} seconds"
|
|
185
|
+
f"Completed inference for index {request.index} in {processing_time_seconds:.2f} seconds"
|
|
186
|
+
)
|
|
178
187
|
except Exception as e:
|
|
179
188
|
response = await self._handle_error(e, request.index)
|
|
180
189
|
|
|
@@ -226,11 +235,7 @@ class BatchInferenceManager:
|
|
|
226
235
|
self.client._log_general_error(e, request_index, logger=self.logger)
|
|
227
236
|
|
|
228
237
|
return BatchInferenceResponse(
|
|
229
|
-
index=request_index,
|
|
230
|
-
content=None,
|
|
231
|
-
token_usage=None,
|
|
232
|
-
error=error_message,
|
|
233
|
-
processing_time_seconds=None
|
|
238
|
+
index=request_index, content=None, token_usage=None, error=error_message, processing_time_seconds=None
|
|
234
239
|
)
|
|
235
240
|
|
|
236
241
|
|
|
@@ -292,6 +297,7 @@ class AsyncChatClient:
|
|
|
292
297
|
httpx.HTTPStatusError: If an HTTP error occurs that can't be resolved by retrying.
|
|
293
298
|
Exception: For any other unexpected errors.
|
|
294
299
|
"""
|
|
300
|
+
|
|
295
301
|
@retry(
|
|
296
302
|
retry=retry_if_exception_type(httpx.HTTPStatusError),
|
|
297
303
|
stop=lambda rs: self._get_stop_condition(rs),
|
|
@@ -320,11 +326,7 @@ class AsyncChatClient:
|
|
|
320
326
|
request_body = {"messages": messages}
|
|
321
327
|
if self.request_params:
|
|
322
328
|
request_body.update(self.request_params)
|
|
323
|
-
request_args = {
|
|
324
|
-
'url': url,
|
|
325
|
-
'headers': headers,
|
|
326
|
-
'json': request_body
|
|
327
|
-
}
|
|
329
|
+
request_args = {'url': url, 'headers': headers, 'json': request_body}
|
|
328
330
|
self._log_request_args(request_args, request.index, level=logging.INFO)
|
|
329
331
|
|
|
330
332
|
# Send the request
|
|
@@ -365,13 +367,16 @@ class AsyncChatClient:
|
|
|
365
367
|
# If the response indicates that the finished reason is "length",
|
|
366
368
|
# we need to continue the conversation by appending the last message
|
|
367
369
|
messages.append({"role": "assistant", "content": content})
|
|
368
|
-
messages.append(
|
|
369
|
-
|
|
370
|
+
messages.append(
|
|
371
|
+
{
|
|
372
|
+
"role": "user",
|
|
373
|
+
"content": f"The previous response ended with: '{content[-50:]}'. "
|
|
374
|
+
f"Please continue exactly from this point without repeating any content.",
|
|
375
|
+
}
|
|
376
|
+
)
|
|
370
377
|
|
|
371
378
|
return total_content, TokenUsage(
|
|
372
|
-
prompt_tokens=prompt_tokens,
|
|
373
|
-
completion_tokens=completion_tokens,
|
|
374
|
-
total_tokens=total_tokens
|
|
379
|
+
prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens
|
|
375
380
|
)
|
|
376
381
|
|
|
377
382
|
# Handle HTTP errors (e.g., 4xx, 5xx)
|
|
@@ -427,7 +432,7 @@ class AsyncChatClient:
|
|
|
427
432
|
|
|
428
433
|
Args:
|
|
429
434
|
content: The content field from the API response, which could be either
|
|
430
|
-
a string (standard format) or a list of structured objects
|
|
435
|
+
a string (standard format) or a list of structured objects
|
|
431
436
|
(Extended thinking mode with reasoning and text objects).
|
|
432
437
|
|
|
433
438
|
Returns:
|
|
@@ -441,9 +446,7 @@ class AsyncChatClient:
|
|
|
441
446
|
return content
|
|
442
447
|
elif isinstance(content, list):
|
|
443
448
|
return "".join(
|
|
444
|
-
content_part.get("text", "")
|
|
445
|
-
for content_part in content
|
|
446
|
-
if content_part.get("type") == "text"
|
|
449
|
+
content_part.get("text", "") for content_part in content if content_part.get("type") == "text"
|
|
447
450
|
)
|
|
448
451
|
raise TypeError(f"Unexpected content format: {type(content).__name__}")
|
|
449
452
|
|
|
@@ -519,7 +522,7 @@ class AsyncChatClient:
|
|
|
519
522
|
"required parameter",
|
|
520
523
|
"missing parameter",
|
|
521
524
|
"invalid format",
|
|
522
|
-
"not a valid"
|
|
525
|
+
"not a valid",
|
|
523
526
|
]
|
|
524
527
|
|
|
525
528
|
for pattern in validation_patterns:
|
|
@@ -553,8 +556,7 @@ class AsyncChatClient:
|
|
|
553
556
|
This is a temporary workaround for Databricks Claude endpoints
|
|
554
557
|
that may return 400 instead of 429 for certain size-related limitations.
|
|
555
558
|
"""
|
|
556
|
-
if
|
|
557
|
-
"databricks-claude" in self.endpoint_name):
|
|
559
|
+
if error.response.status_code == codes.BAD_REQUEST and "databricks-claude" in self.endpoint_name:
|
|
558
560
|
try:
|
|
559
561
|
response_data = error.response.json()
|
|
560
562
|
if response_data.get("message") is None:
|
|
@@ -567,7 +569,9 @@ class AsyncChatClient:
|
|
|
567
569
|
pass
|
|
568
570
|
return False
|
|
569
571
|
|
|
570
|
-
def _log_request_args(
|
|
572
|
+
def _log_request_args(
|
|
573
|
+
self, request_args: Dict[str, Any], request_index: int, max_head: int = 200, max_tail=100, level=logging.INFO
|
|
574
|
+
) -> None:
|
|
571
575
|
"""
|
|
572
576
|
Log the request arguments for debugging purposes.
|
|
573
577
|
|
|
@@ -585,10 +589,17 @@ class AsyncChatClient:
|
|
|
585
589
|
level,
|
|
586
590
|
f"Request for index {request_index} - Request details: "
|
|
587
591
|
f"Size: {body_len} chars, "
|
|
588
|
-
f"Content: {body_str[:max_head]}... [truncated] ...{body_str[-max_tail:]}"
|
|
592
|
+
f"Content: {body_str[:max_head]}... [truncated] ...{body_str[-max_tail:]}",
|
|
589
593
|
)
|
|
590
594
|
|
|
591
|
-
def _log_response_details(
|
|
595
|
+
def _log_response_details(
|
|
596
|
+
self,
|
|
597
|
+
response: httpx.Response,
|
|
598
|
+
request_index: int,
|
|
599
|
+
max_head: int = 200,
|
|
600
|
+
max_tail: int = 100,
|
|
601
|
+
level=logging.DEBUG,
|
|
602
|
+
) -> None:
|
|
592
603
|
"""
|
|
593
604
|
Log the detailed response data at DEBUG level, with truncation.
|
|
594
605
|
|
|
@@ -615,7 +626,7 @@ class AsyncChatClient:
|
|
|
615
626
|
f"Request for index {request_index} - Response details ({content_type}): "
|
|
616
627
|
f"Status: {response.status_code}, "
|
|
617
628
|
f"Headers: {dict(response.headers)}, "
|
|
618
|
-
f"Body: {body_content}"
|
|
629
|
+
f"Body: {body_content}",
|
|
619
630
|
)
|
|
620
631
|
|
|
621
632
|
def _truncate_long_strings_in_dict(self, data, max_length: int = 2000):
|
|
@@ -635,7 +646,11 @@ class AsyncChatClient:
|
|
|
635
646
|
return [self._truncate_long_strings_in_dict(item, max_length) for item in data]
|
|
636
647
|
elif isinstance(data, str) and len(data) > max_length:
|
|
637
648
|
# Truncate long string values
|
|
638
|
-
return
|
|
649
|
+
return (
|
|
650
|
+
data[: max_length // 2]
|
|
651
|
+
+ f" ... [truncated {len(data) - max_length} chars] ... "
|
|
652
|
+
+ data[-max_length // 2 :]
|
|
653
|
+
)
|
|
639
654
|
else:
|
|
640
655
|
return data
|
|
641
656
|
|