sdk-seshat-python 0.4.2__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/PKG-INFO +2 -1
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/pyproject.toml +2 -1
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/__init__.py +156 -52
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/base.py +1 -1
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/base.py +6 -2
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/submit_to_network.py +115 -15
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/config.py +1 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/mixins.py +29 -3
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/database.py +14 -28
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/utils/postgres.py +74 -11
- sdk_seshat_python-0.4.4/seshat/transformer/aggregator/base.py +160 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/__init__.py +2 -1
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/base.py +405 -147
- sdk_seshat_python-0.4.4/seshat/transformer/imputer/base.py +57 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/nested_key.py +32 -37
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/branch.py +7 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/action_gate.py +1 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/trimmer/base.py +14 -0
- sdk_seshat_python-0.4.4/seshat/utils/date_utils.py +29 -0
- sdk_seshat_python-0.4.4/seshat/utils/llm_client/chatbot_factory.py +165 -0
- sdk_seshat_python-0.4.2/seshat/transformer/aggregator/base.py +0 -107
- sdk_seshat_python-0.4.2/seshat/transformer/imputer/base.py +0 -6
- sdk_seshat_python-0.4.2/seshat/utils/llm_client/chatbot_factory.py +0 -76
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/LICENSE +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/README.md +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/__main__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/pandas.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/pyspark.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/classification.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/clustering.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/general/regression.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/diversity.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/evaluation/evaluator/recommendation/ranking.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/feature_view/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/feature_view/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/code_inspect.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/job_status.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/setup_project.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/exceptions.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/lazy_config.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/models.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/README.md-tmpl +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/config.py-tmpl +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/env-templ +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/jobignore-tmpl +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/pyproject._toml-tmpl +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/recommender-jupyter.ipynb-tmpl +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/template/recommender.py-tmpl +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/decorator.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/profiler/format.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/database/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/database/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/exceptions.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/flip_side/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/flip_side/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/local/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/local/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/multisource/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/multisource/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/utils/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/aggregator/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/augmenter/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/augmenter/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/deriver/from_database.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/imputer/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/merger/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/recommendation/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pipeline/recommendation/address_pipeline.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/pseudo/table_existence.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/reducer/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/reducer/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/scaler/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/scaler/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/schema/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/schema/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/block/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/block/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/random/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/random/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/time_line/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/splitter/time_line/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/trimmer/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/base.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/cosine_similarity.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/pivot.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/transformer/vectorizer/utils.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/batcher.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/binary_utils.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/clean_json.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/col_to_list.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/contracts.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/file.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/file_cryptography.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/filter_json.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/grouper.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/jobignore.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/join_columns_to_list.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/join_str.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/llm_client/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/__init__.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/base_logger.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/console_logger.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/logstash_logger.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/logging/multi_logger.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/memory.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/mixin.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/obfuscate.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/package_utils.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/pandas_func.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/patching.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/pyspark_func.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/rest.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/singleton.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/validation.py +0 -0
- {sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/utils/zip_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sdk-seshat-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: Seshat python SDK is a library to help create ML data pipelines.
|
|
5
5
|
License: Commercial - see LICENSE.txt
|
|
6
6
|
Author: SeshatLabs
|
|
@@ -19,6 +19,7 @@ Requires-Dist: cryptography (>=44.0.0,<45.0.0)
|
|
|
19
19
|
Requires-Dist: dask[array,complete,dataframe,distributed] (>=2024.10.0,<2025.0.0)
|
|
20
20
|
Requires-Dist: flipside (>=2.1.0,<3.0.0) ; extra == "flipside-support"
|
|
21
21
|
Requires-Dist: langchain (>=0.3.23,<0.4.0)
|
|
22
|
+
Requires-Dist: langchain-aws (>=0.2.31,<0.3.0)
|
|
22
23
|
Requires-Dist: langchain-community (>=0.3.21,<0.4.0)
|
|
23
24
|
Requires-Dist: langchain-openai (>=0.3.12,<0.4.0)
|
|
24
25
|
Requires-Dist: loguru (>=0.7.3,<0.8.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "sdk-seshat-python"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.4"
|
|
4
4
|
description = "Seshat python SDK is a library to help create ML data pipelines."
|
|
5
5
|
authors = ["SeshatLabs <info@seshatlabs.xyz>"]
|
|
6
6
|
packages = [{ include = "seshat", from = "." }]
|
|
@@ -36,6 +36,7 @@ python-logstash-async = "^4.0.2"
|
|
|
36
36
|
croniter = "^6.0.0"
|
|
37
37
|
psycopg2-binary = { version = "^2.9", optional = true }
|
|
38
38
|
setuptools = "^80.9.0"
|
|
39
|
+
langchain-aws = "^0.2.31"
|
|
39
40
|
|
|
40
41
|
[tool.poetry.extras]
|
|
41
42
|
flipside_support = ["flipside"]
|
|
@@ -84,6 +84,119 @@ def inspect_code(
|
|
|
84
84
|
raise typer.Exit(1)
|
|
85
85
|
|
|
86
86
|
|
|
87
|
+
def _execute_job_submission(
|
|
88
|
+
directory: str,
|
|
89
|
+
name: str,
|
|
90
|
+
version: str,
|
|
91
|
+
config: dict,
|
|
92
|
+
confidential_level: str,
|
|
93
|
+
execution_mode: str,
|
|
94
|
+
executor_image_tag: str = None,
|
|
95
|
+
operation_type: str = "submit",
|
|
96
|
+
) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Common logic for submitting and publishing jobs.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
directory: Directory containing the code
|
|
102
|
+
name: Name of the package
|
|
103
|
+
version: Version of the package
|
|
104
|
+
config: Configuration dictionary
|
|
105
|
+
confidential_level: Confidential level for the pipeline
|
|
106
|
+
execution_mode: Execution mode for the pipeline
|
|
107
|
+
executor_image_tag: Image tag of the executor (optional, only for submit)
|
|
108
|
+
operation_type: Type of operation ("submit" or "publish")
|
|
109
|
+
"""
|
|
110
|
+
manager = SubmitCommand(config)
|
|
111
|
+
|
|
112
|
+
job_execution_schedule = None
|
|
113
|
+
if "execution" in config:
|
|
114
|
+
job_execution_schedule = JobExecutionSchedule(
|
|
115
|
+
**config.get("execution", {}).get("plan", {})
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
job_metadata = JobMetadata(
|
|
119
|
+
pipeline_hash="",
|
|
120
|
+
confidential_level=confidential_level,
|
|
121
|
+
execution_mode=ExecutionMode(execution_mode),
|
|
122
|
+
execution_plan=job_execution_schedule,
|
|
123
|
+
main_file_path=config.get("code", {}).get("main_file", "main.py"),
|
|
124
|
+
env_file_path=config.get("code", {}).get("env_file", ".env"),
|
|
125
|
+
data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
|
|
126
|
+
code_size=0 if operation_type == "publish" else None, # Only for publish
|
|
127
|
+
complexity_factor=config.get("code", {}).get("complexity_factor", 0),
|
|
128
|
+
requirement_file=None,
|
|
129
|
+
requirements_type=None,
|
|
130
|
+
secret_key=None,
|
|
131
|
+
iv=None,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Handle package creation
|
|
135
|
+
if operation_type == "submit":
|
|
136
|
+
package = manager.handle(
|
|
137
|
+
directory,
|
|
138
|
+
name,
|
|
139
|
+
version,
|
|
140
|
+
executor_image_tag=executor_image_tag,
|
|
141
|
+
metadata=job_metadata,
|
|
142
|
+
)
|
|
143
|
+
else:
|
|
144
|
+
package = manager.handle(directory, name, version, metadata=job_metadata)
|
|
145
|
+
|
|
146
|
+
# Handle code obfuscation
|
|
147
|
+
obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
|
|
148
|
+
obfuscate_code = obfuscate_code.lower() == "true"
|
|
149
|
+
if obfuscate_code:
|
|
150
|
+
package = manager.obfuscate_code(package)
|
|
151
|
+
|
|
152
|
+
job_metadata.pipeline_hash = package.hash
|
|
153
|
+
identifier = manager.store_code(package)
|
|
154
|
+
|
|
155
|
+
if operation_type == "submit":
|
|
156
|
+
job_response = manager.submit_job(
|
|
157
|
+
identifier,
|
|
158
|
+
name,
|
|
159
|
+
version,
|
|
160
|
+
job_metadata,
|
|
161
|
+
executor_image_tag=executor_image_tag,
|
|
162
|
+
expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
job_response = manager.publish_job(
|
|
166
|
+
identifier,
|
|
167
|
+
name,
|
|
168
|
+
version,
|
|
169
|
+
job_metadata,
|
|
170
|
+
expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
_display_job_summary(
|
|
174
|
+
package, identifier, job_response, include_executor=bool(executor_image_tag)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _display_job_summary(
|
|
179
|
+
package, identifier: str, job_response: dict, include_executor: bool = False
|
|
180
|
+
) -> None:
|
|
181
|
+
"""Display job submission summary in a formatted table."""
|
|
182
|
+
job_response_data = job_response.get("data", {})
|
|
183
|
+
|
|
184
|
+
table = Table(title="Upload Summary")
|
|
185
|
+
table.add_column("Property", style="cyan")
|
|
186
|
+
table.add_column("Value", style="green")
|
|
187
|
+
|
|
188
|
+
table.add_row("Name", package.name)
|
|
189
|
+
table.add_row("Version", package.version)
|
|
190
|
+
if include_executor:
|
|
191
|
+
table.add_row("ExecutorImageTag", package.executor_image_tag)
|
|
192
|
+
table.add_row("Hash", package.hash)
|
|
193
|
+
table.add_row("S3 Location", identifier)
|
|
194
|
+
table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
|
|
195
|
+
table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
|
|
196
|
+
|
|
197
|
+
console.print(table)
|
|
198
|
+
|
|
199
|
+
|
|
87
200
|
@app.command(name="submit")
|
|
88
201
|
def submit_job(
|
|
89
202
|
directory: str = typer.Argument(..., help="Directory containing the code"),
|
|
@@ -93,77 +206,68 @@ def submit_job(
|
|
|
93
206
|
"latest", help="Image tag of the executor which runs the job"
|
|
94
207
|
),
|
|
95
208
|
confidential_level: str = typer.Option(
|
|
96
|
-
"default", help="Confidential level desired for the pipeline
|
|
209
|
+
"default", help="Confidential level desired for the pipeline"
|
|
97
210
|
),
|
|
98
211
|
execution_mode: str = typer.Option(
|
|
99
|
-
"single", help="Execution mode for the pipeline
|
|
212
|
+
"single", help="Execution mode for the pipeline"
|
|
100
213
|
),
|
|
101
214
|
config_file: Path = typer.Option(
|
|
102
215
|
Path.home() / ".codemanager.toml", help="Path to config file"
|
|
103
216
|
),
|
|
104
217
|
):
|
|
218
|
+
"""Submit a job with executor image tag."""
|
|
105
219
|
try:
|
|
106
220
|
config = load_config(config_file)
|
|
107
221
|
if not config:
|
|
108
222
|
raise NoConfigSetError()
|
|
109
|
-
manager = SubmitCommand(config)
|
|
110
223
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
job_metadata = JobMetadata(
|
|
118
|
-
pipeline_hash="",
|
|
224
|
+
_execute_job_submission(
|
|
225
|
+
directory=directory,
|
|
226
|
+
name=name,
|
|
227
|
+
version=version,
|
|
228
|
+
config=config,
|
|
119
229
|
confidential_level=confidential_level,
|
|
120
|
-
execution_mode=
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
env_file_path=config.get("code", {}).get("env_file", ".env"),
|
|
124
|
-
data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
|
|
125
|
-
complexity_factor=config.get("code", {}).get("complexity_factor", 0),
|
|
126
|
-
requirement_file=None,
|
|
127
|
-
requirements_type=None,
|
|
128
|
-
secret_key="",
|
|
129
|
-
iv="",
|
|
230
|
+
execution_mode=execution_mode,
|
|
231
|
+
executor_image_tag=executor_image_tag,
|
|
232
|
+
operation_type="submit",
|
|
130
233
|
)
|
|
131
234
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
)
|
|
235
|
+
except Exception as e:
|
|
236
|
+
typer.echo(f"Error: {str(e)}", err=True)
|
|
237
|
+
raise typer.Exit(1)
|
|
135
238
|
|
|
136
|
-
obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
|
|
137
|
-
obfuscate_code = True if obfuscate_code.lower() == "true" else False
|
|
138
|
-
if obfuscate_code:
|
|
139
|
-
package = manager.obfuscate_code(package)
|
|
140
239
|
|
|
141
|
-
|
|
142
|
-
|
|
240
|
+
@app.command(name="publish")
|
|
241
|
+
def publish_job_on_cook(
|
|
242
|
+
directory: str = typer.Argument(..., help="Directory containing the code"),
|
|
243
|
+
name: str = typer.Option(..., help="Name of the package"),
|
|
244
|
+
version: str = typer.Option(..., help="Version of the package"),
|
|
245
|
+
confidential_level: str = typer.Option(
|
|
246
|
+
"default", help="Confidential level desired for the pipeline"
|
|
247
|
+
),
|
|
248
|
+
execution_mode: str = typer.Option(
|
|
249
|
+
"single", help="Execution mode for the pipeline"
|
|
250
|
+
),
|
|
251
|
+
config_file: Path = typer.Option(
|
|
252
|
+
Path.home() / ".codemanager.toml", help="Path to config file"
|
|
253
|
+
),
|
|
254
|
+
):
|
|
255
|
+
"""Publish a job on cook without executor image tag."""
|
|
256
|
+
try:
|
|
257
|
+
config = load_config(config_file)
|
|
258
|
+
if not config:
|
|
259
|
+
raise NoConfigSetError()
|
|
143
260
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
name,
|
|
147
|
-
version,
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
261
|
+
_execute_job_submission(
|
|
262
|
+
directory=directory,
|
|
263
|
+
name=name,
|
|
264
|
+
version=version,
|
|
265
|
+
config=config,
|
|
266
|
+
confidential_level=confidential_level,
|
|
267
|
+
execution_mode=execution_mode,
|
|
268
|
+
executor_image_tag=None,
|
|
269
|
+
operation_type="publish",
|
|
151
270
|
)
|
|
152
|
-
job_response_data = job_response.get("data", {})
|
|
153
|
-
|
|
154
|
-
table = Table(title="Upload Summary")
|
|
155
|
-
table.add_column("Property", style="cyan")
|
|
156
|
-
table.add_column("Value", style="green")
|
|
157
|
-
|
|
158
|
-
table.add_row("Name", package.name)
|
|
159
|
-
table.add_row("Version", package.version)
|
|
160
|
-
table.add_row("ExecutorImageTag", package.executor_image_tag)
|
|
161
|
-
table.add_row("Hash", package.hash)
|
|
162
|
-
table.add_row("S3 Location", identifier)
|
|
163
|
-
table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
|
|
164
|
-
table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
|
|
165
|
-
|
|
166
|
-
console.print(table)
|
|
167
271
|
|
|
168
272
|
except Exception as e:
|
|
169
273
|
typer.echo(f"Error: {str(e)}", err=True)
|
|
@@ -61,7 +61,7 @@ class SFrame:
|
|
|
61
61
|
def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
|
|
62
62
|
pass
|
|
63
63
|
|
|
64
|
-
def make_group(self, default_key=configs.DEFAULT_SF_KEY):
|
|
64
|
+
def make_group(self, default_key=configs.DEFAULT_SF_KEY) -> "GroupSFrame":
|
|
65
65
|
pass
|
|
66
66
|
|
|
67
67
|
def convert(
|
|
@@ -36,6 +36,8 @@ class JobExecutionSchedule:
|
|
|
36
36
|
until: datetime | None
|
|
37
37
|
cron_expression: str | None
|
|
38
38
|
run_overlap: bool = True
|
|
39
|
+
initial_run: bool = True
|
|
40
|
+
timezone: str = "UTC"
|
|
39
41
|
|
|
40
42
|
def __post_init__(self):
|
|
41
43
|
import re
|
|
@@ -108,8 +110,10 @@ class JobMetadata:
|
|
|
108
110
|
requirements_type: Optional[str]
|
|
109
111
|
complexity_factor: float
|
|
110
112
|
data_size: float
|
|
111
|
-
|
|
112
|
-
|
|
113
|
+
code_size: float
|
|
114
|
+
secret_key: Optional[str]
|
|
115
|
+
iv: Optional[str]
|
|
116
|
+
env_vars: dict = None
|
|
113
117
|
|
|
114
118
|
|
|
115
119
|
class BaseTyperCommand:
|
{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/submit_to_network.py
RENAMED
|
@@ -8,10 +8,12 @@ from typing import Optional, BinaryIO
|
|
|
8
8
|
import boto3
|
|
9
9
|
import typer
|
|
10
10
|
from botocore.exceptions import ClientError
|
|
11
|
+
from dotenv import dotenv_values
|
|
11
12
|
|
|
12
13
|
from seshat.general.command.base import BaseTyperCommand, ApiConfig, JobMetadata
|
|
13
14
|
from seshat.general.exceptions import RestClientException, EnvFileNotFound
|
|
14
15
|
from seshat.general.models import CodePackage
|
|
16
|
+
from seshat.utils.date_utils import format_datetime_for_api
|
|
15
17
|
from seshat.utils.file import is_binary_file
|
|
16
18
|
from seshat.utils.file_cryptography import AESCipher
|
|
17
19
|
from seshat.utils.jobignore import JobIgnoreHandler
|
|
@@ -191,15 +193,17 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
191
193
|
directory: str,
|
|
192
194
|
name: str,
|
|
193
195
|
version: str,
|
|
194
|
-
|
|
196
|
+
metadata: JobMetadata,
|
|
197
|
+
executor_image_tag: str = None,
|
|
195
198
|
requirements_file: Optional[str] = None,
|
|
196
|
-
|
|
199
|
+
secret_env: bool = False,
|
|
197
200
|
) -> CodePackage:
|
|
198
201
|
self.echo(f"📦 Packaging code from {directory}")
|
|
199
|
-
|
|
200
|
-
env_file, encrypted_data = None, None
|
|
202
|
+
env_file, env_data = None, None
|
|
201
203
|
try:
|
|
202
|
-
|
|
204
|
+
env_data, key, iv, env_file = self.handle_env_file(directory, secret_env)
|
|
205
|
+
if not secret_env:
|
|
206
|
+
metadata.env_vars = env_data
|
|
203
207
|
metadata.secret_key = key
|
|
204
208
|
metadata.iv = iv
|
|
205
209
|
except EnvFileNotFound:
|
|
@@ -225,9 +229,13 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
225
229
|
if job_ignore_handler.match_gitignore_like_path(relative_path):
|
|
226
230
|
continue
|
|
227
231
|
|
|
228
|
-
if
|
|
232
|
+
if (
|
|
233
|
+
secret_env
|
|
234
|
+
and env_file
|
|
235
|
+
and (pathlib.Path(filepath) == pathlib.Path(env_file))
|
|
236
|
+
):
|
|
229
237
|
with open(filepath, "r", encoding="utf-8") as _:
|
|
230
|
-
all_files[relative_path] =
|
|
238
|
+
all_files[relative_path] = env_data
|
|
231
239
|
# metadata.env_file_path = env_file
|
|
232
240
|
self.echo(f"📄 Added {relative_path}")
|
|
233
241
|
continue
|
|
@@ -236,6 +244,8 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
236
244
|
all_files, filepath, public_files, relative_path, total_size
|
|
237
245
|
)
|
|
238
246
|
|
|
247
|
+
metadata.code_size = total_size
|
|
248
|
+
|
|
239
249
|
content_hash = self._hash_package(public_files)
|
|
240
250
|
|
|
241
251
|
package = CodePackage(
|
|
@@ -243,7 +253,7 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
243
253
|
version=version,
|
|
244
254
|
executor_image_tag=executor_image_tag,
|
|
245
255
|
files=all_files,
|
|
246
|
-
metadata=
|
|
256
|
+
metadata=asdict(metadata),
|
|
247
257
|
hash=content_hash.hexdigest(),
|
|
248
258
|
binary_files=set(),
|
|
249
259
|
)
|
|
@@ -270,7 +280,7 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
270
280
|
|
|
271
281
|
return new_total_size
|
|
272
282
|
|
|
273
|
-
def handle_env_file(self, directory: str):
|
|
283
|
+
def handle_env_file(self, directory: str, secret_env: bool):
|
|
274
284
|
env_file_path = self.config.get("code").get("env_file")
|
|
275
285
|
if not env_file_path:
|
|
276
286
|
self.echo("No env file found in config")
|
|
@@ -289,11 +299,15 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
289
299
|
self.echo("No env file found to encrypt")
|
|
290
300
|
raise EnvFileNotFound
|
|
291
301
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
302
|
+
key, iv = None, None
|
|
303
|
+
if secret_env:
|
|
304
|
+
encryption_result = AESCipher().encrypt_file(env_file)
|
|
305
|
+
env_data = encryption_result["encrypted_data"]
|
|
306
|
+
key = encryption_result["key"]
|
|
307
|
+
iv = encryption_result["iv"]
|
|
308
|
+
else:
|
|
309
|
+
env_data = dotenv_values(env_file)
|
|
310
|
+
return env_data, key, iv, env_file
|
|
297
311
|
|
|
298
312
|
def store_code(self, package: CodePackage) -> str:
|
|
299
313
|
self.echo("☁️ Uploading to S3...")
|
|
@@ -311,8 +325,8 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
311
325
|
s3_key: str,
|
|
312
326
|
name: str,
|
|
313
327
|
version: str,
|
|
314
|
-
executor_image_tag: str,
|
|
315
328
|
metadata: JobMetadata,
|
|
329
|
+
executor_image_tag: str = None,
|
|
316
330
|
expiration=86400,
|
|
317
331
|
) -> dict:
|
|
318
332
|
"""Submit job to API after successful upload"""
|
|
@@ -357,3 +371,89 @@ class SubmitCommand(BaseTyperCommand):
|
|
|
357
371
|
except RestClientException as e:
|
|
358
372
|
self.echo(f"❌ Failed to submit job: {str(e)}")
|
|
359
373
|
raise
|
|
374
|
+
|
|
375
|
+
def publish_job(
|
|
376
|
+
self,
|
|
377
|
+
s3_key: str,
|
|
378
|
+
name: str,
|
|
379
|
+
version: str,
|
|
380
|
+
metadata: JobMetadata,
|
|
381
|
+
expiration=86400,
|
|
382
|
+
) -> dict:
|
|
383
|
+
"""Submit job to API after successful upload"""
|
|
384
|
+
if not self.job_config.base_url or not self.job_config.auth_token:
|
|
385
|
+
raise ValueError(
|
|
386
|
+
"API configuration missing. Please set base_url and auth_token"
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
presigned_url = self.backend.generate_presigned_url(s3_key, expiration)
|
|
390
|
+
executor_label = self.config.get("executor", {}).get("label")
|
|
391
|
+
|
|
392
|
+
payload = {
|
|
393
|
+
"name": name,
|
|
394
|
+
"label": name,
|
|
395
|
+
"public": False,
|
|
396
|
+
"pipeline": {
|
|
397
|
+
"configs": metadata.env_vars,
|
|
398
|
+
"schedule": {
|
|
399
|
+
"type": metadata.execution_plan.schedule_mode,
|
|
400
|
+
"expression": metadata.execution_plan.cron_expression,
|
|
401
|
+
"timezone": metadata.execution_plan.timezone,
|
|
402
|
+
"start_time": format_datetime_for_api(
|
|
403
|
+
metadata.execution_plan.start_time
|
|
404
|
+
),
|
|
405
|
+
"end_time": format_datetime_for_api(metadata.execution_plan.until),
|
|
406
|
+
"initial_run": metadata.execution_plan.initial_run,
|
|
407
|
+
"run_overlap": metadata.execution_plan.run_overlap,
|
|
408
|
+
}
|
|
409
|
+
if metadata.execution_plan is not None
|
|
410
|
+
else {"type": "once"},
|
|
411
|
+
"job_template": {
|
|
412
|
+
"name": f"{name}-job",
|
|
413
|
+
"label": f"{name}-job",
|
|
414
|
+
"description": "",
|
|
415
|
+
"version": str(version),
|
|
416
|
+
"execution_priority": "default",
|
|
417
|
+
"validation_priority": "default",
|
|
418
|
+
"directory": {"url": presigned_url, "type": "s3"},
|
|
419
|
+
"retry_policy": {
|
|
420
|
+
"retry_on_error": False,
|
|
421
|
+
"interval": "0",
|
|
422
|
+
"retry_count": 0,
|
|
423
|
+
"action_on_failure": "none",
|
|
424
|
+
},
|
|
425
|
+
"meta_data": {
|
|
426
|
+
"code_size": metadata.code_size,
|
|
427
|
+
"pipeline_hash": metadata.pipeline_hash,
|
|
428
|
+
"confidential_level": metadata.confidential_level,
|
|
429
|
+
"execution_mode": metadata.execution_mode,
|
|
430
|
+
"main_file_path": metadata.main_file_path,
|
|
431
|
+
"env_file_path": metadata.env_file_path,
|
|
432
|
+
"complexity_factor": metadata.complexity_factor,
|
|
433
|
+
"data_size": metadata.data_size,
|
|
434
|
+
},
|
|
435
|
+
"executor_label": executor_label,
|
|
436
|
+
},
|
|
437
|
+
"config_handler": "store_accounts",
|
|
438
|
+
"active": True,
|
|
439
|
+
},
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if metadata.secret_key:
|
|
443
|
+
payload["pipeline"]["job_template"]["meta_data"].update(
|
|
444
|
+
{
|
|
445
|
+
"encryption_secret_key": metadata.secret_key,
|
|
446
|
+
"encryption_iv": metadata.iv,
|
|
447
|
+
}
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
try:
|
|
451
|
+
self.echo("🚀 Publishing job to cook...")
|
|
452
|
+
response_data = self.rest_client.post(
|
|
453
|
+
"agent-launchers/data-agents/submit", json=payload
|
|
454
|
+
)
|
|
455
|
+
self.echo("✅ Job published to cook successfully!")
|
|
456
|
+
return response_data
|
|
457
|
+
except RestClientException as e:
|
|
458
|
+
self.echo(f"❌ Failed to publish job: {str(e)}")
|
|
459
|
+
raise
|
|
@@ -78,13 +78,31 @@ class SQLMixin:
|
|
|
78
78
|
trans.commit()
|
|
79
79
|
conn.close()
|
|
80
80
|
|
|
81
|
+
def _parse_table_name(self, table_name: str) -> tuple[Optional[str], str]:
|
|
82
|
+
"""
|
|
83
|
+
Parse a table name that might include a schema prefix.
|
|
84
|
+
Returns (schema_name, table_name)
|
|
85
|
+
"""
|
|
86
|
+
if "." in table_name:
|
|
87
|
+
parts = table_name.split(".", 1)
|
|
88
|
+
return parts[0], parts[1]
|
|
89
|
+
return None, table_name
|
|
90
|
+
|
|
81
91
|
def ensure_table_exists(self, table: str, schema: Schema):
|
|
82
92
|
engine = self.get_engine()
|
|
83
|
-
|
|
93
|
+
db_schema, table_name = self._parse_table_name(table)
|
|
94
|
+
|
|
95
|
+
# Check if table exists in the specific schema
|
|
96
|
+
inspector = inspect(engine)
|
|
97
|
+
existing_tables = inspector.get_table_names(schema=db_schema)
|
|
98
|
+
|
|
99
|
+
if table_name in existing_tables:
|
|
84
100
|
return
|
|
85
101
|
self.create_table(schema, table)
|
|
86
102
|
|
|
87
103
|
def create_table(self, schema: Schema, table: str):
|
|
104
|
+
db_schema, table_name = self._parse_table_name(table)
|
|
105
|
+
|
|
88
106
|
table_columns = []
|
|
89
107
|
pk_cols = []
|
|
90
108
|
for col in schema.cols:
|
|
@@ -96,7 +114,9 @@ class SQLMixin:
|
|
|
96
114
|
constraints = []
|
|
97
115
|
if pk_cols:
|
|
98
116
|
constraints.append(
|
|
99
|
-
PrimaryKeyConstraint(
|
|
117
|
+
PrimaryKeyConstraint(
|
|
118
|
+
*pk_cols, name=f"{table_name}_pk_{'_'.join(pk_cols)}"
|
|
119
|
+
)
|
|
100
120
|
)
|
|
101
121
|
_, metadata = self.get_table(
|
|
102
122
|
table, False, *table_columns, *constraints, extend_existing=True
|
|
@@ -104,7 +124,13 @@ class SQLMixin:
|
|
|
104
124
|
metadata.create_all(self.get_engine())
|
|
105
125
|
|
|
106
126
|
def get_table(self, table_name, autoload, *args, **kwargs):
|
|
127
|
+
db_schema, actual_table_name = self._parse_table_name(table_name)
|
|
128
|
+
|
|
107
129
|
metadata = MetaData()
|
|
108
130
|
if autoload:
|
|
109
131
|
kwargs.setdefault("autoload_with", self.get_engine())
|
|
110
|
-
|
|
132
|
+
|
|
133
|
+
if db_schema:
|
|
134
|
+
kwargs["schema"] = db_schema
|
|
135
|
+
|
|
136
|
+
return Table(actual_table_name, metadata, *args, **kwargs), metadata
|
|
@@ -2,12 +2,8 @@ import hashlib
|
|
|
2
2
|
import statistics
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
-
import sqlalchemy as db
|
|
6
5
|
from sqlalchemy import (
|
|
7
|
-
Column,
|
|
8
6
|
Index,
|
|
9
|
-
MetaData,
|
|
10
|
-
Table,
|
|
11
7
|
and_,
|
|
12
8
|
inspect,
|
|
13
9
|
select,
|
|
@@ -52,29 +48,21 @@ class SQLDBSaver(SQLMixin, Saver):
|
|
|
52
48
|
else:
|
|
53
49
|
self.insert(selected_sf, config)
|
|
54
50
|
|
|
55
|
-
def ensure_table_exists(self, table: str, schema: Schema):
|
|
56
|
-
engine = self.get_engine()
|
|
57
|
-
if table in inspect(engine).get_table_names():
|
|
58
|
-
return
|
|
59
|
-
self.create_table(schema, table)
|
|
60
|
-
|
|
61
|
-
def create_table(self, schema: Schema, table: str):
|
|
62
|
-
table_columns = []
|
|
63
|
-
for col in schema.cols:
|
|
64
|
-
col_name = col.to
|
|
65
|
-
col_type = getattr(db, col.dtype or "String")
|
|
66
|
-
table_columns.append(Column(col_name, col_type))
|
|
67
|
-
_, metadata = self.get_table(table, False, *table_columns, extend_existing=True)
|
|
68
|
-
metadata.create_all(self.get_engine())
|
|
69
|
-
|
|
70
51
|
def delete(self, table_name):
|
|
71
52
|
table, _ = self.get_table(table_name, autoload=True)
|
|
72
53
|
self.write_on_db(table.delete())
|
|
73
54
|
|
|
74
55
|
def drop_table(self, table_name):
|
|
75
|
-
|
|
56
|
+
db_schema, actual_table_name = self._parse_table_name(table_name)
|
|
57
|
+
engine = self.get_engine()
|
|
58
|
+
inspector = inspect(engine)
|
|
59
|
+
|
|
60
|
+
# Check if table exists in the specific schema
|
|
61
|
+
existing_tables = inspector.get_table_names(schema=db_schema)
|
|
62
|
+
|
|
63
|
+
if actual_table_name in existing_tables:
|
|
76
64
|
table, _ = self.get_table(table_name, autoload=True)
|
|
77
|
-
table.drop(
|
|
65
|
+
table.drop(engine)
|
|
78
66
|
|
|
79
67
|
def insert(self, selected_sf: SFrame, config: SaveConfig):
|
|
80
68
|
values = self.prepare_sf_to_insert(selected_sf, config).to_dict()
|
|
@@ -126,13 +114,17 @@ class SQLDBSaver(SQLMixin, Saver):
|
|
|
126
114
|
hashed_cols = self.hash_columns([col.key for col in index.columns])
|
|
127
115
|
current_indexes.add(hashed_cols)
|
|
128
116
|
|
|
117
|
+
# Parse table name to get the actual table name without schema
|
|
118
|
+
_, actual_table_name = self._parse_table_name(config.table)
|
|
119
|
+
|
|
129
120
|
for index in config.indexes:
|
|
130
121
|
index_cols = [index] if isinstance(index, str) else index
|
|
131
122
|
index_hash = self.hash_columns(index_cols)
|
|
132
123
|
if index_hash in current_indexes:
|
|
133
124
|
continue
|
|
134
125
|
|
|
135
|
-
|
|
126
|
+
# Use the actual table name (without schema) for index naming
|
|
127
|
+
index_name = f"{'_'.join(index_cols)}_index_{actual_table_name}"
|
|
136
128
|
index_obj = Index(
|
|
137
129
|
index_name,
|
|
138
130
|
*[getattr(table.c, index_col) for index_col in index_cols],
|
|
@@ -184,12 +176,6 @@ class SQLDBSaver(SQLMixin, Saver):
|
|
|
184
176
|
)
|
|
185
177
|
return self.get_from_db(query)
|
|
186
178
|
|
|
187
|
-
def get_table(self, table_name, autoload, *args, **kwargs):
|
|
188
|
-
metadata = MetaData()
|
|
189
|
-
if autoload:
|
|
190
|
-
kwargs.setdefault("autoload_with", self.get_engine())
|
|
191
|
-
return Table(table_name, metadata, *args, **kwargs), metadata
|
|
192
|
-
|
|
193
179
|
def get_from_db(self, query):
|
|
194
180
|
with self.get_engine().connect() as conn:
|
|
195
181
|
result = conn.execute(query)
|