exasol-transformers-extension 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exasol_transformers_extension-0.10.0/LICENSE +21 -0
- exasol_transformers_extension-0.10.0/PKG-INFO +48 -0
- exasol_transformers_extension-0.10.0/README.md +20 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/__init__.py +1 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deploy.py +29 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/__init__.py +0 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/constants.py +29 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/deployment_utils.py +37 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/language_container.py +105 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/language_container_deployer.py +277 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/language_container_deployer_cli.py +171 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/scripts_deployer.py +62 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/scripts_deployer_cli.py +39 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/te_language_container_deployer.py +25 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/__init__.py +0 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/filling_mask_udf.jinja.sql +24 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/model_downloader_udf.jinja.sql +13 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/question_answering_udf.jinja.sql +26 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/sequence_classification_single_text_udf.jinja.sql +21 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/sequence_classification_text_pair_udf.jinja.sql +23 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/text_generation_udf.jinja.sql +24 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/token_classification_udf.jinja.sql +26 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/translation_udf.jinja.sql +26 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/resources/templates/zero_shot_text_classification_udf.jinja.sql +24 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/__init__.py +0 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/__init__.py +0 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/filling_mask_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/model_downloader_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/question_answering_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/sequence_classification_single_text_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/sequence_classification_text_pair_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/text_generation_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/token_classification_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/translation_udf_call.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/callers/zero_shot_text_classification_udf.py +8 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/__init__.py +0 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/base_model_udf.py +265 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/filling_mask_udf.py +115 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/model_downloader_udf.py +69 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/question_answering_udf.py +105 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/sequence_classification_single_text_udf.py +86 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/sequence_classification_text_pair_udf.py +94 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/text_generation_udf.py +99 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/token_classification_udf.py +118 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/translation_udf.py +100 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/udfs/models/zero_shot_text_classification_udf.py +100 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/upload_model.py +48 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/__init__.py +0 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/bucketfs_model_uploader.py +22 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/bucketfs_operations.py +72 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/dataframe_operations.py +40 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/device_management.py +13 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py +61 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py +89 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/load_local_model.py +66 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/load_model.py +56 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/model_factory_protocol.py +25 -0
- exasol_transformers_extension-0.10.0/exasol_transformers_extension/utils/temporary_directory_factory.py +7 -0
- exasol_transformers_extension-0.10.0/pyproject.toml +43 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022 Exasol
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: exasol-transformers-extension
|
|
3
|
+
Version: 0.10.0
|
|
4
|
+
Summary: An Exasol extension to use state-of-the-art pretrained machine learning models via the transformers api.
|
|
5
|
+
Home-page: https://github.com/exasol/transformers-extension
|
|
6
|
+
Keywords: exasol
|
|
7
|
+
Author: Umit Buyuksahin
|
|
8
|
+
Author-email: umit.buyuksahin@exasol.com
|
|
9
|
+
Requires-Python: >=3.8.0,<4.0.0
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Requires-Dist: Jinja2 (>=3.0.3,<4.0.0)
|
|
16
|
+
Requires-Dist: click (>=8.0.4,<9.0.0)
|
|
17
|
+
Requires-Dist: exasol-bucketfs (>=0.9.0,<1.0.0)
|
|
18
|
+
Requires-Dist: importlib-resources (>=5.4.0,<6.0.0)
|
|
19
|
+
Requires-Dist: pandas (>=1.4.2,<2.0.0)
|
|
20
|
+
Requires-Dist: pyexasol (>=0.25.0,<0.26.0)
|
|
21
|
+
Requires-Dist: sacremoses (>=0.0.53)
|
|
22
|
+
Requires-Dist: tenacity (>=8.2.2,<9.0.0)
|
|
23
|
+
Requires-Dist: torch (>=2.0.1,<3.0.0)
|
|
24
|
+
Requires-Dist: transformers[torch] (>=4.36.2,<5.0.0)
|
|
25
|
+
Project-URL: Repository, https://github.com/exasol/transformers-extension
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# Exasol Transformers Extension
|
|
29
|
+
|
|
30
|
+
An Exasol extension to use state-of-the-art pretrained machine learning models
|
|
31
|
+
via the [transformers api](https://github.com/huggingface/transformers).
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## Table of Contents
|
|
35
|
+
|
|
36
|
+
### Information for Users
|
|
37
|
+
|
|
38
|
+
* [User Guide](doc/user_guide/user_guide.md)
|
|
39
|
+
* [Developer Guide](doc/developer_guide/developer_guide.md)
|
|
40
|
+
* [Changelog](doc/changes/changelog.md)
|
|
41
|
+
* [License](LICENSE)
|
|
42
|
+
|
|
43
|
+
### Information for Contributors
|
|
44
|
+
|
|
45
|
+
* [Design](doc/design.md)
|
|
46
|
+
* [Dependencies](doc/dependencies.md)
|
|
47
|
+
|
|
48
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Exasol Transformers Extension
|
|
2
|
+
|
|
3
|
+
An Exasol extension to use state-of-the-art pretrained machine learning models
|
|
4
|
+
via the [transformers api](https://github.com/huggingface/transformers).
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## Table of Contents
|
|
8
|
+
|
|
9
|
+
### Information for Users
|
|
10
|
+
|
|
11
|
+
* [User Guide](doc/user_guide/user_guide.md)
|
|
12
|
+
* [Developer Guide](doc/developer_guide/developer_guide.md)
|
|
13
|
+
* [Changelog](doc/changes/changelog.md)
|
|
14
|
+
* [License](LICENSE)
|
|
15
|
+
|
|
16
|
+
### Information for Contributors
|
|
17
|
+
|
|
18
|
+
* [Design](doc/design.md)
|
|
19
|
+
* [Dependencies](doc/dependencies.md)
|
|
20
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.1.0'
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import click
|
|
3
|
+
from exasol_transformers_extension.deployment.scripts_deployer_cli import \
|
|
4
|
+
scripts_deployer_main
|
|
5
|
+
from exasol_transformers_extension.deployment.language_container_deployer_cli \
|
|
6
|
+
import language_container_deployer_main, slc_parameter_formatters, CustomizableParameters
|
|
7
|
+
from exasol_transformers_extension.deployment.te_language_container_deployer import TeLanguageContainerDeployer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.group()
|
|
11
|
+
def main():
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
slc_parameter_formatters.set_formatter(CustomizableParameters.container_url,
|
|
16
|
+
TeLanguageContainerDeployer.SLC_URL_FORMATTER)
|
|
17
|
+
slc_parameter_formatters.set_formatter(CustomizableParameters.container_name,
|
|
18
|
+
TeLanguageContainerDeployer.SLC_NAME)
|
|
19
|
+
|
|
20
|
+
main.add_command(scripts_deployer_main)
|
|
21
|
+
main.add_command(language_container_deployer_main)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if __name__ == '__main__':
|
|
25
|
+
logging.basicConfig(
|
|
26
|
+
format='%(asctime)s - %(module)s - %(message)s',
|
|
27
|
+
level=logging.DEBUG)
|
|
28
|
+
|
|
29
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
from importlib_resources import files
|
|
3
|
+
|
|
4
|
+
BASE_DIR = "exasol_transformers_extension"
|
|
5
|
+
TEMPLATES_DIR = pathlib.Path("resources", "templates")
|
|
6
|
+
UDF_CALLERS_DIR = files(f"{BASE_DIR}.udfs.callers")
|
|
7
|
+
|
|
8
|
+
UDF_CALL_TEMPLATES = {
|
|
9
|
+
"model_downloader_udf_call.py":
|
|
10
|
+
"model_downloader_udf.jinja.sql",
|
|
11
|
+
"sequence_classification_single_text_udf_call.py":
|
|
12
|
+
"sequence_classification_single_text_udf.jinja.sql",
|
|
13
|
+
"sequence_classification_text_pair_udf_call.py":
|
|
14
|
+
"sequence_classification_text_pair_udf.jinja.sql",
|
|
15
|
+
"question_answering_udf_call.py":
|
|
16
|
+
"question_answering_udf.jinja.sql",
|
|
17
|
+
"filling_mask_udf_call.py":
|
|
18
|
+
"filling_mask_udf.jinja.sql",
|
|
19
|
+
"text_generation_udf_call.py":
|
|
20
|
+
"text_generation_udf.jinja.sql",
|
|
21
|
+
"token_classification_udf_call.py":
|
|
22
|
+
"token_classification_udf.jinja.sql",
|
|
23
|
+
"translation_udf_call.py":
|
|
24
|
+
"translation_udf.jinja.sql",
|
|
25
|
+
"zero_shot_text_classification_udf.py":
|
|
26
|
+
"zero_shot_text_classification_udf.jinja.sql"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
ORDERED_COLUMNS = ['model_name', 'bucketfs_conn', 'token_conn', 'sub_dir']
|
exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/deployment_utils.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
|
|
2
|
+
import logging
|
|
3
|
+
import requests
|
|
4
|
+
import tempfile
|
|
5
|
+
import ssl
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from contextlib import contextmanager
|
|
8
|
+
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
9
|
+
from exasol_transformers_extension.deployment import constants
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
DB_PASSWORD_ENVIRONMENT_VARIABLE = f"TE_DB_PASSWORD"
|
|
16
|
+
BUCKETFS_PASSWORD_ENVIRONMENT_VARIABLE = f"TE_BUCKETFS_PASSWORD"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_and_render_statement(template_name, **kwargs) -> str:
|
|
20
|
+
env = Environment(
|
|
21
|
+
loader=PackageLoader(constants.BASE_DIR, constants.TEMPLATES_DIR),
|
|
22
|
+
autoescape=select_autoescape())
|
|
23
|
+
template = env.get_template(template_name)
|
|
24
|
+
statement = template.render(**kwargs)
|
|
25
|
+
return statement
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_websocket_ssl_options(use_ssl_cert_validation: bool, ssl_cert_path: str):
|
|
29
|
+
websocket_sslopt = {
|
|
30
|
+
"cert_reqs": ssl.CERT_REQUIRED,
|
|
31
|
+
}
|
|
32
|
+
if not use_ssl_cert_validation:
|
|
33
|
+
websocket_sslopt["cert_reqs"] = ssl.CERT_NONE
|
|
34
|
+
|
|
35
|
+
if ssl_cert_path is not None:
|
|
36
|
+
websocket_sslopt["ca_certs"] = ssl_cert_path
|
|
37
|
+
return websocket_sslopt
|
exasol_transformers_extension-0.10.0/exasol_transformers_extension/deployment/language_container.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
|
|
6
|
+
from exasol_integration_test_docker_environment.lib.docker.images.image_info import ImageInfo
|
|
7
|
+
from exasol_script_languages_container_tool.lib import api
|
|
8
|
+
from exasol_script_languages_container_tool.lib.tasks.export.export_containers import ExportContainerResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def find_file_or_folder_backwards(name: str) -> Path:
|
|
12
|
+
current_path = Path(__file__).parent
|
|
13
|
+
result_path = None
|
|
14
|
+
while current_path != current_path.root:
|
|
15
|
+
result_path = Path(current_path, name)
|
|
16
|
+
if result_path.exists():
|
|
17
|
+
break
|
|
18
|
+
current_path = current_path.parent
|
|
19
|
+
if result_path is not None and result_path.exists():
|
|
20
|
+
return result_path
|
|
21
|
+
else:
|
|
22
|
+
raise RuntimeError(f"Could not find {name} when searching backwards from {Path(__file__).parent}")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
CONTAINER_NAME = "exasol_transformers_extension_container"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def find_flavor_path() -> Path:
|
|
29
|
+
language_container_path = find_file_or_folder_backwards("language_container")
|
|
30
|
+
flavor_path = language_container_path / CONTAINER_NAME
|
|
31
|
+
return flavor_path
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_language_container(flavor_path: Path) -> Dict[str, ImageInfo]:
|
|
35
|
+
image_infos = api.build(flavor_path=(str(flavor_path),), goal=("release",))
|
|
36
|
+
return image_infos
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def export(flavor_path: Path,
|
|
40
|
+
export_path: Optional[Path] = None) -> ExportContainerResult:
|
|
41
|
+
if export_path is not None:
|
|
42
|
+
export_path = str(export_path)
|
|
43
|
+
export_result = api.export(flavor_path=(str(flavor_path),), export_path=export_path)
|
|
44
|
+
return export_result
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def upload(
|
|
48
|
+
flavor_path: Path,
|
|
49
|
+
bucketfs_name: str,
|
|
50
|
+
bucket_name: str,
|
|
51
|
+
database_host: str,
|
|
52
|
+
bucketfs_port: int,
|
|
53
|
+
user: str,
|
|
54
|
+
password: str,
|
|
55
|
+
path_in_bucket: str,
|
|
56
|
+
release_name: str
|
|
57
|
+
):
|
|
58
|
+
api.upload(
|
|
59
|
+
flavor_path=(str(flavor_path),),
|
|
60
|
+
bucketfs_name=bucketfs_name,
|
|
61
|
+
bucket_name=bucket_name,
|
|
62
|
+
bucketfs_port=bucketfs_port,
|
|
63
|
+
database_host=database_host,
|
|
64
|
+
bucketfs_username=user,
|
|
65
|
+
bucketfs_password=password,
|
|
66
|
+
path_in_bucket=path_in_bucket,
|
|
67
|
+
release_name=release_name
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def prepare_flavor(flavor_path: Path):
|
|
72
|
+
flavor_base_path = flavor_path / "flavor_base"
|
|
73
|
+
add_requirements_to_flavor(flavor_base_path)
|
|
74
|
+
add_wheel_to_flavor(flavor_base_path)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def find_project_directory():
|
|
78
|
+
project_directory = find_file_or_folder_backwards("pyproject.toml").parent
|
|
79
|
+
return project_directory
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def add_wheel_to_flavor(flavor_base_path):
|
|
83
|
+
project_directory = find_project_directory()
|
|
84
|
+
subprocess.call(["poetry", "build"], cwd=project_directory)
|
|
85
|
+
dist_path = project_directory / "dist"
|
|
86
|
+
wheels = list(dist_path.glob("*.whl"))
|
|
87
|
+
if len(wheels) != 1:
|
|
88
|
+
raise RuntimeError(f"Did not find exactly one wheel file in dist directory {dist_path}. "
|
|
89
|
+
f"Found the following wheels: {wheels}")
|
|
90
|
+
wheel = wheels[0]
|
|
91
|
+
wheel_target = flavor_base_path / "release" / "dist"
|
|
92
|
+
wheel_target.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
shutil.copyfile(wheel, wheel_target / wheel.name)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def add_requirements_to_flavor(flavor_base_path: Path):
|
|
97
|
+
project_directory = find_project_directory()
|
|
98
|
+
requirements_bytes = subprocess.check_output(["poetry", "export", "--without-hashes", "--without-urls"],
|
|
99
|
+
cwd=project_directory)
|
|
100
|
+
requirements = requirements_bytes.decode("UTF-8")
|
|
101
|
+
requirements_without_cuda = "\n".join(line
|
|
102
|
+
for line in requirements.splitlines()
|
|
103
|
+
if not line.startswith("nvidia"))
|
|
104
|
+
requirements_file = flavor_base_path / "dependencies" / "requirements.txt"
|
|
105
|
+
requirements_file.write_text(requirements_without_cuda)
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#########################################################
|
|
2
|
+
# To be migrated to the script-languages-container-tool #
|
|
3
|
+
#########################################################
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from textwrap import dedent
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
from pathlib import Path, PurePosixPath
|
|
8
|
+
import logging
|
|
9
|
+
import tempfile
|
|
10
|
+
import requests
|
|
11
|
+
import ssl
|
|
12
|
+
import pyexasol
|
|
13
|
+
from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation
|
|
14
|
+
from exasol_transformers_extension.utils.bucketfs_operations import create_bucketfs_location
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_websocket_sslopt(use_ssl_cert_validation: bool = True,
|
|
20
|
+
ssl_trusted_ca: Optional[str] = None,
|
|
21
|
+
ssl_client_certificate: Optional[str] = None,
|
|
22
|
+
ssl_private_key: Optional[str] = None) -> dict:
|
|
23
|
+
"""
|
|
24
|
+
Returns a dictionary in the winsocket-client format
|
|
25
|
+
(see https://websocket-client.readthedocs.io/en/latest/faq.html#what-else-can-i-do-with-sslopts)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
# Is server certificate validation required?
|
|
29
|
+
sslopt: dict[str, object] = {"cert_reqs": ssl.CERT_REQUIRED if use_ssl_cert_validation else ssl.CERT_NONE}
|
|
30
|
+
|
|
31
|
+
# Is a bundle with trusted CAs provided?
|
|
32
|
+
if ssl_trusted_ca:
|
|
33
|
+
trusted_ca_path = Path(ssl_trusted_ca)
|
|
34
|
+
if trusted_ca_path.is_dir():
|
|
35
|
+
sslopt["ca_cert_path"] = ssl_trusted_ca
|
|
36
|
+
elif trusted_ca_path.is_file():
|
|
37
|
+
sslopt["ca_certs"] = ssl_trusted_ca
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError(f"Trusted CA location {ssl_trusted_ca} doesn't exist.")
|
|
40
|
+
|
|
41
|
+
# Is client's own certificate provided?
|
|
42
|
+
if ssl_client_certificate:
|
|
43
|
+
if not Path(ssl_client_certificate).is_file():
|
|
44
|
+
raise ValueError(f"Certificate file {ssl_client_certificate} doesn't exist.")
|
|
45
|
+
sslopt["certfile"] = ssl_client_certificate
|
|
46
|
+
if ssl_private_key:
|
|
47
|
+
if not Path(ssl_private_key).is_file():
|
|
48
|
+
raise ValueError(f"Private key file {ssl_private_key} doesn't exist.")
|
|
49
|
+
sslopt["keyfile"] = ssl_private_key
|
|
50
|
+
|
|
51
|
+
return sslopt
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class LanguageActivationLevel(Enum):
|
|
55
|
+
f"""
|
|
56
|
+
Language activation level, i.e.
|
|
57
|
+
ALTER <LanguageActivationLevel> SET SCRIPT_LANGUAGES=...
|
|
58
|
+
"""
|
|
59
|
+
Session = 'SESSION'
|
|
60
|
+
System = 'SYSTEM'
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_language_settings(pyexasol_conn: pyexasol.ExaConnection, alter_type: LanguageActivationLevel) -> str:
|
|
64
|
+
"""
|
|
65
|
+
Reads the current language settings at the specified level.
|
|
66
|
+
|
|
67
|
+
pyexasol_conn - Opened database connection.
|
|
68
|
+
alter_type - Activation level - SYSTEM or SESSION.
|
|
69
|
+
"""
|
|
70
|
+
result = pyexasol_conn.execute(
|
|
71
|
+
f"""SELECT "{alter_type.value}_VALUE" FROM SYS.EXA_PARAMETERS WHERE
|
|
72
|
+
PARAMETER_NAME='SCRIPT_LANGUAGES'""").fetchall()
|
|
73
|
+
return result[0][0]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class LanguageContainerDeployer:
|
|
77
|
+
|
|
78
|
+
def __init__(self,
|
|
79
|
+
pyexasol_connection: pyexasol.ExaConnection,
|
|
80
|
+
language_alias: str,
|
|
81
|
+
bucketfs_location: BucketFSLocation) -> None:
|
|
82
|
+
|
|
83
|
+
self._bucketfs_location = bucketfs_location
|
|
84
|
+
self._language_alias = language_alias
|
|
85
|
+
self._pyexasol_conn = pyexasol_connection
|
|
86
|
+
logger.debug(f"Init {LanguageContainerDeployer.__name__}")
|
|
87
|
+
|
|
88
|
+
def download_and_run(self, url: str,
|
|
89
|
+
bucket_file_path: str,
|
|
90
|
+
alter_system: bool = True,
|
|
91
|
+
allow_override: bool = False) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Downloads the language container from the provided url to a temporary file and then deploys it.
|
|
94
|
+
See docstring on the `run` method for details on what is involved in the deployment.
|
|
95
|
+
|
|
96
|
+
url - Address where the container will be downloaded from.
|
|
97
|
+
bucket_file_path - Path within the designated bucket where the container should be uploaded.
|
|
98
|
+
alter_system - If True will try to activate the container at the System level.
|
|
99
|
+
allow_override - If True the activation of a language container with the same alias will be
|
|
100
|
+
overriden, otherwise a RuntimeException will be thrown.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
with tempfile.NamedTemporaryFile() as tmp_file:
|
|
104
|
+
response = requests.get(url, stream=True)
|
|
105
|
+
response.raise_for_status()
|
|
106
|
+
tmp_file.write(response.content)
|
|
107
|
+
|
|
108
|
+
self.run(Path(tmp_file.name), bucket_file_path, alter_system, allow_override)
|
|
109
|
+
|
|
110
|
+
def run(self, container_file: Optional[Path] = None,
|
|
111
|
+
bucket_file_path: Optional[str] = None,
|
|
112
|
+
alter_system: bool = True,
|
|
113
|
+
allow_override: bool = False) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Deploys the language container. This includes two steps, both of which are optional:
|
|
116
|
+
- Uploading the container into the database. This step can be skipped if the container
|
|
117
|
+
has already been uploaded.
|
|
118
|
+
- Activating the container. This step may have to be skipped if the user does not have
|
|
119
|
+
System Privileges in the database. In that case two alternative activation SQL commands
|
|
120
|
+
will be printed on the console.
|
|
121
|
+
|
|
122
|
+
container_file - Path of the container tar.gz file in a local file system.
|
|
123
|
+
If not provided the container is assumed to be uploaded already.
|
|
124
|
+
bucket_file_path - Path within the designated bucket where the container should be uploaded.
|
|
125
|
+
If not specified the name of the container file will be used instead.
|
|
126
|
+
alter_system - If True will try to activate the container at the System level.
|
|
127
|
+
allow_override - If True the activation of a language container with the same alias will be
|
|
128
|
+
overriden, otherwise a RuntimeException will be thrown.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
if not bucket_file_path:
|
|
132
|
+
if not container_file:
|
|
133
|
+
raise ValueError('Either a container file or a bucket file path must be specified.')
|
|
134
|
+
bucket_file_path = container_file.name
|
|
135
|
+
|
|
136
|
+
if container_file:
|
|
137
|
+
self.upload_container(container_file, bucket_file_path)
|
|
138
|
+
|
|
139
|
+
if alter_system:
|
|
140
|
+
self.activate_container(bucket_file_path, LanguageActivationLevel.System, allow_override)
|
|
141
|
+
else:
|
|
142
|
+
message = dedent(f"""
|
|
143
|
+
In SQL, you can activate the SLC of the Transformers Extension
|
|
144
|
+
by using the following statements:
|
|
145
|
+
|
|
146
|
+
To activate the SLC only for the current session:
|
|
147
|
+
{self.generate_activation_command(bucket_file_path, LanguageActivationLevel.Session, True)}
|
|
148
|
+
|
|
149
|
+
To activate the SLC on the system:
|
|
150
|
+
{self.generate_activation_command(bucket_file_path, LanguageActivationLevel.System, True)}
|
|
151
|
+
""")
|
|
152
|
+
print(message)
|
|
153
|
+
|
|
154
|
+
def upload_container(self, container_file: Path,
|
|
155
|
+
bucket_file_path: Optional[str] = None) -> None:
|
|
156
|
+
"""
|
|
157
|
+
Upload the language container to the BucketFS.
|
|
158
|
+
|
|
159
|
+
container_file - Path of the container tar.gz file in a local file system.
|
|
160
|
+
bucket_file_path - Path within the designated bucket where the container should be uploaded.
|
|
161
|
+
"""
|
|
162
|
+
if not container_file.is_file():
|
|
163
|
+
raise RuntimeError(f"Container file {container_file} "
|
|
164
|
+
f"is not a file.")
|
|
165
|
+
with open(container_file, "br") as f:
|
|
166
|
+
self._bucketfs_location.upload_fileobj_to_bucketfs(
|
|
167
|
+
fileobj=f, bucket_file_path=bucket_file_path)
|
|
168
|
+
logging.debug("Container is uploaded to bucketfs")
|
|
169
|
+
|
|
170
|
+
def activate_container(self, bucket_file_path: str,
|
|
171
|
+
alter_type: LanguageActivationLevel = LanguageActivationLevel.Session,
|
|
172
|
+
allow_override: bool = False) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Activates the language container at the required level.
|
|
175
|
+
|
|
176
|
+
bucket_file_path - Path within the designated bucket where the container is uploaded.
|
|
177
|
+
alter_type - Language activation level, defaults to the SESSION.
|
|
178
|
+
allow_override - If True the activation of a language container with the same alias will be overriden,
|
|
179
|
+
otherwise a RuntimeException will be thrown.
|
|
180
|
+
"""
|
|
181
|
+
alter_command = self.generate_activation_command(bucket_file_path, alter_type, allow_override)
|
|
182
|
+
self._pyexasol_conn.execute(alter_command)
|
|
183
|
+
logging.debug(alter_command)
|
|
184
|
+
|
|
185
|
+
def generate_activation_command(self, bucket_file_path: str,
|
|
186
|
+
alter_type: LanguageActivationLevel,
|
|
187
|
+
allow_override: bool = False) -> str:
|
|
188
|
+
"""
|
|
189
|
+
Generates an SQL command to activate the SLC container at the required level. The command will
|
|
190
|
+
preserve existing activations of other containers identified by different language aliases.
|
|
191
|
+
Activation of a container with the same alias, if exists, will be overwritten.
|
|
192
|
+
|
|
193
|
+
bucket_file_path - Path within the designated bucket where the container is uploaded.
|
|
194
|
+
alter_type - Activation level - SYSTEM or SESSION.
|
|
195
|
+
allow_override - If True the activation of a language container with the same alias will be overriden,
|
|
196
|
+
otherwise a RuntimeException will be thrown.
|
|
197
|
+
"""
|
|
198
|
+
path_in_udf = self._bucketfs_location.generate_bucket_udf_path(bucket_file_path)
|
|
199
|
+
new_settings = \
|
|
200
|
+
self._update_previous_language_settings(alter_type, allow_override, path_in_udf)
|
|
201
|
+
alter_command = \
|
|
202
|
+
f"ALTER {alter_type.value} SET SCRIPT_LANGUAGES='{new_settings}';"
|
|
203
|
+
return alter_command
|
|
204
|
+
|
|
205
|
+
def _update_previous_language_settings(self, alter_type: LanguageActivationLevel,
|
|
206
|
+
allow_override: bool,
|
|
207
|
+
path_in_udf: PurePosixPath) -> str:
|
|
208
|
+
prev_lang_settings = get_language_settings(self._pyexasol_conn, alter_type)
|
|
209
|
+
prev_lang_aliases = prev_lang_settings.split(" ")
|
|
210
|
+
self._check_if_requested_language_alias_already_exists(
|
|
211
|
+
allow_override, prev_lang_aliases)
|
|
212
|
+
new_definitions_str = self._generate_new_language_settings(
|
|
213
|
+
path_in_udf, prev_lang_aliases)
|
|
214
|
+
return new_definitions_str
|
|
215
|
+
|
|
216
|
+
def get_language_definition(self, bucket_file_path: str):
|
|
217
|
+
"""
|
|
218
|
+
Generate a language definition (ALIAS=URL) for the specified bucket file path.
|
|
219
|
+
|
|
220
|
+
bucket_file_path - Path within the designated bucket where the container is uploaded.
|
|
221
|
+
"""
|
|
222
|
+
path_in_udf = self._bucketfs_location.generate_bucket_udf_path(bucket_file_path)
|
|
223
|
+
result = self._generate_new_language_settings(path_in_udf=path_in_udf, prev_lang_aliases=[])
|
|
224
|
+
return result
|
|
225
|
+
|
|
226
|
+
def _generate_new_language_settings(self, path_in_udf: PurePosixPath,
|
|
227
|
+
prev_lang_aliases: List[str]) -> str:
|
|
228
|
+
other_definitions = [
|
|
229
|
+
alias_definition for alias_definition in prev_lang_aliases
|
|
230
|
+
if not alias_definition.startswith(self._language_alias + "=")]
|
|
231
|
+
path_in_udf_without_buckets = PurePosixPath(*path_in_udf.parts[2:])
|
|
232
|
+
new_language_alias_definition = \
|
|
233
|
+
f"{self._language_alias}=localzmq+protobuf:///" \
|
|
234
|
+
f"{path_in_udf_without_buckets}?lang=python#" \
|
|
235
|
+
f"{path_in_udf}/exaudf/exaudfclient_py3"
|
|
236
|
+
new_definitions = other_definitions + [new_language_alias_definition]
|
|
237
|
+
new_definitions_str = " ".join(new_definitions)
|
|
238
|
+
return new_definitions_str
|
|
239
|
+
|
|
240
|
+
def _check_if_requested_language_alias_already_exists(
|
|
241
|
+
self, allow_override: bool,
|
|
242
|
+
prev_lang_aliases: List[str]) -> None:
|
|
243
|
+
definition_for_requested_alias = [
|
|
244
|
+
alias_definition for alias_definition in prev_lang_aliases
|
|
245
|
+
if alias_definition.startswith(self._language_alias + "=")]
|
|
246
|
+
if not len(definition_for_requested_alias) == 0:
|
|
247
|
+
warning_message = f"The requested language alias {self._language_alias} is already in use."
|
|
248
|
+
if allow_override:
|
|
249
|
+
logging.warning(warning_message)
|
|
250
|
+
else:
|
|
251
|
+
raise RuntimeError(warning_message)
|
|
252
|
+
|
|
253
|
+
@classmethod
|
|
254
|
+
def create(cls, bucketfs_name: str, bucketfs_host: str, bucketfs_port: int,
|
|
255
|
+
bucketfs_use_https: bool, bucketfs_user: str,
|
|
256
|
+
bucketfs_password: str, bucket: str, path_in_bucket: str,
|
|
257
|
+
dsn: str, db_user: str, db_password: str, language_alias: str,
|
|
258
|
+
use_ssl_cert_validation: bool = True, ssl_trusted_ca: Optional[str] = None,
|
|
259
|
+
ssl_client_certificate: Optional[str] = None,
|
|
260
|
+
ssl_private_key: Optional[str] = None) -> "LanguageContainerDeployer":
|
|
261
|
+
|
|
262
|
+
websocket_sslopt = get_websocket_sslopt(use_ssl_cert_validation, ssl_trusted_ca,
|
|
263
|
+
ssl_client_certificate, ssl_private_key)
|
|
264
|
+
|
|
265
|
+
pyexasol_conn = pyexasol.connect(
|
|
266
|
+
dsn=dsn,
|
|
267
|
+
user=db_user,
|
|
268
|
+
password=db_password,
|
|
269
|
+
encryption=True,
|
|
270
|
+
websocket_sslopt=websocket_sslopt
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
bucketfs_location = create_bucketfs_location(
|
|
274
|
+
bucketfs_name, bucketfs_host, bucketfs_port, bucketfs_use_https,
|
|
275
|
+
bucketfs_user, bucketfs_password, bucket, path_in_bucket)
|
|
276
|
+
|
|
277
|
+
return cls(pyexasol_conn, language_alias, bucketfs_location)
|