PyPI - sdk-seshat-python - Versions diffs - 0.4.2__tar.gz → 0.4.4__tar.gz - Mend

sdk-seshat-python 0.4.2tar.gz → 0.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sdk-seshat-python
-Version: 0.4.2
+Version: 0.4.4
 Summary: Seshat python SDK is a library to help create ML data pipelines.
 License: Commercial - see LICENSE.txt
 Author: SeshatLabs
@@ -19,6 +19,7 @@ Requires-Dist: cryptography (>=44.0.0,<45.0.0)
 Requires-Dist: dask[array,complete,dataframe,distributed] (>=2024.10.0,<2025.0.0)
 Requires-Dist: flipside (>=2.1.0,<3.0.0) ; extra == "flipside-support"
 Requires-Dist: langchain (>=0.3.23,<0.4.0)
+Requires-Dist: langchain-aws (>=0.2.31,<0.3.0)
 Requires-Dist: langchain-community (>=0.3.21,<0.4.0)
 Requires-Dist: langchain-openai (>=0.3.12,<0.4.0)
 Requires-Dist: loguru (>=0.7.3,<0.8.0)

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sdk-seshat-python"
-version = "0.4.2"
+version = "0.4.4"
 description = "Seshat python SDK is a library to help create ML data pipelines."
 authors =  ["SeshatLabs <info@seshatlabs.xyz>"]
 packages = [{ include = "seshat", from = "." }]
@@ -36,6 +36,7 @@ python-logstash-async = "^4.0.2"
 croniter = "^6.0.0"
 psycopg2-binary = { version = "^2.9", optional = true }
 setuptools = "^80.9.0"
+langchain-aws = "^0.2.31"
 [tool.poetry.extras]
 flipside_support = ["flipside"]

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/__init__.py RENAMED Viewed

@@ -84,6 +84,119 @@ def inspect_code(
         raise typer.Exit(1)
+def _execute_job_submission(
+    directory: str,
+    name: str,
+    version: str,
+    config: dict,
+    confidential_level: str,
+    execution_mode: str,
+    executor_image_tag: str = None,
+    operation_type: str = "submit",
+) -> None:
+    """
+    Common logic for submitting and publishing jobs.
+    Args:
+        directory: Directory containing the code
+        name: Name of the package
+        version: Version of the package
+        config: Configuration dictionary
+        confidential_level: Confidential level for the pipeline
+        execution_mode: Execution mode for the pipeline
+        executor_image_tag: Image tag of the executor (optional, only for submit)
+        operation_type: Type of operation ("submit" or "publish")
+    """
+    manager = SubmitCommand(config)
+    job_execution_schedule = None
+    if "execution" in config:
+        job_execution_schedule = JobExecutionSchedule(
+            **config.get("execution", {}).get("plan", {})
+        )
+    job_metadata = JobMetadata(
+        pipeline_hash="",
+        confidential_level=confidential_level,
+        execution_mode=ExecutionMode(execution_mode),
+        execution_plan=job_execution_schedule,
+        main_file_path=config.get("code", {}).get("main_file", "main.py"),
+        env_file_path=config.get("code", {}).get("env_file", ".env"),
+        data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
+        code_size=0 if operation_type == "publish" else None,  # Only for publish
+        complexity_factor=config.get("code", {}).get("complexity_factor", 0),
+        requirement_file=None,
+        requirements_type=None,
+        secret_key=None,
+        iv=None,
+    )
+    # Handle package creation
+    if operation_type == "submit":
+        package = manager.handle(
+            directory,
+            name,
+            version,
+            executor_image_tag=executor_image_tag,
+            metadata=job_metadata,
+        )
+    else:
+        package = manager.handle(directory, name, version, metadata=job_metadata)
+    # Handle code obfuscation
+    obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
+    obfuscate_code = obfuscate_code.lower() == "true"
+    if obfuscate_code:
+        package = manager.obfuscate_code(package)
+    job_metadata.pipeline_hash = package.hash
+    identifier = manager.store_code(package)
+    if operation_type == "submit":
+        job_response = manager.submit_job(
+            identifier,
+            name,
+            version,
+            job_metadata,
+            executor_image_tag=executor_image_tag,
+            expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
+        )
+    else:
+        job_response = manager.publish_job(
+            identifier,
+            name,
+            version,
+            job_metadata,
+            expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
+        )
+    _display_job_summary(
+        package, identifier, job_response, include_executor=bool(executor_image_tag)
+    )
+def _display_job_summary(
+    package, identifier: str, job_response: dict, include_executor: bool = False
+) -> None:
+    """Display job submission summary in a formatted table."""
+    job_response_data = job_response.get("data", {})
+    table = Table(title="Upload Summary")
+    table.add_column("Property", style="cyan")
+    table.add_column("Value", style="green")
+    table.add_row("Name", package.name)
+    table.add_row("Version", package.version)
+    if include_executor:
+        table.add_row("ExecutorImageTag", package.executor_image_tag)
+    table.add_row("Hash", package.hash)
+    table.add_row("S3 Location", identifier)
+    table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
+    table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
+    console.print(table)
 @app.command(name="submit")
 def submit_job(
     directory: str = typer.Argument(..., help="Directory containing the code"),
@@ -93,77 +206,68 @@ def submit_job(
         "latest", help="Image tag of the executor which runs the job"
     ),
     confidential_level: str = typer.Option(
-        "default", help="Confidential level desired for the pipeline "
+        "default", help="Confidential level desired for the pipeline"
     ),
     execution_mode: str = typer.Option(
-        "single", help="Execution mode for the pipeline "
+        "single", help="Execution mode for the pipeline"
     ),
     config_file: Path = typer.Option(
         Path.home() / ".codemanager.toml", help="Path to config file"
     ),
 ):
+    """Submit a job with executor image tag."""
     try:
         config = load_config(config_file)
         if not config:
             raise NoConfigSetError()
-        manager = SubmitCommand(config)
-        job_execution_schedule = None
-        if "execution" in config:
-            job_execution_schedule = JobExecutionSchedule(
-                **config.get("execution", {}).get("plan", {})
-            )
-        job_metadata = JobMetadata(
-            pipeline_hash="",
+        _execute_job_submission(
+            directory=directory,
+            name=name,
+            version=version,
+            config=config,
             confidential_level=confidential_level,
-            execution_mode=ExecutionMode(execution_mode),
-            execution_plan=job_execution_schedule,
-            main_file_path=config.get("code", {}).get("main_file", "main.py"),
-            env_file_path=config.get("code", {}).get("env_file", ".env"),
-            data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
-            complexity_factor=config.get("code", {}).get("complexity_factor", 0),
-            requirement_file=None,
-            requirements_type=None,
-            secret_key="",
-            iv="",
+            execution_mode=execution_mode,
+            executor_image_tag=executor_image_tag,
+            operation_type="submit",
         )
-        package = manager.handle(
-            directory, name, version, executor_image_tag, metadata=job_metadata
-        )
+    except Exception as e:
+        typer.echo(f"Error: {str(e)}", err=True)
+        raise typer.Exit(1)
-        obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
-        obfuscate_code = True if obfuscate_code.lower() == "true" else False
-        if obfuscate_code:
-            package = manager.obfuscate_code(package)
-        job_metadata.pipeline_hash = package.hash
-        identifier = manager.store_code(package)
+@app.command(name="publish")
+def publish_job_on_cook(
+    directory: str = typer.Argument(..., help="Directory containing the code"),
+    name: str = typer.Option(..., help="Name of the package"),
+    version: str = typer.Option(..., help="Version of the package"),
+    confidential_level: str = typer.Option(
+        "default", help="Confidential level desired for the pipeline"
+    ),
+    execution_mode: str = typer.Option(
+        "single", help="Execution mode for the pipeline"
+    ),
+    config_file: Path = typer.Option(
+        Path.home() / ".codemanager.toml", help="Path to config file"
+    ),
+):
+    """Publish a job on cook without executor image tag."""
+    try:
+        config = load_config(config_file)
+        if not config:
+            raise NoConfigSetError()
-        job_response = manager.submit_job(
-            identifier,
-            name,
-            version,
-            executor_image_tag,
-            job_metadata,
-            expiration=config.get("aws", {}).get("expiration", DEFAULT_EXPIRATION),
+        _execute_job_submission(
+            directory=directory,
+            name=name,
+            version=version,
+            config=config,
+            confidential_level=confidential_level,
+            execution_mode=execution_mode,
+            executor_image_tag=None,
+            operation_type="publish",
         )
-        job_response_data = job_response.get("data", {})
-        table = Table(title="Upload Summary")
-        table.add_column("Property", style="cyan")
-        table.add_column("Value", style="green")
-        table.add_row("Name", package.name)
-        table.add_row("Version", package.version)
-        table.add_row("ExecutorImageTag", package.executor_image_tag)
-        table.add_row("Hash", package.hash)
-        table.add_row("S3 Location", identifier)
-        table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
-        table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
-        console.print(table)
     except Exception as e:
         typer.echo(f"Error: {str(e)}", err=True)

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/data_class/base.py RENAMED Viewed

@@ -61,7 +61,7 @@ class SFrame:
     def iterrows(self, column_name: str, key: str = configs.DEFAULT_SF_KEY):
         pass
-    def make_group(self, default_key=configs.DEFAULT_SF_KEY):
+    def make_group(self, default_key=configs.DEFAULT_SF_KEY) -> "GroupSFrame":
         pass
     def convert(

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/base.py RENAMED Viewed

@@ -36,6 +36,8 @@ class JobExecutionSchedule:
     until: datetime | None
     cron_expression: str | None
     run_overlap: bool = True
+    initial_run: bool = True
+    timezone: str = "UTC"
     def __post_init__(self):
         import re
@@ -108,8 +110,10 @@ class JobMetadata:
     requirements_type: Optional[str]
     complexity_factor: float
     data_size: float
-    secret_key: str
-    iv: str
+    code_size: float
+    secret_key: Optional[str]
+    iv: Optional[str]
+    env_vars: dict = None
 class BaseTyperCommand:

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/command/submit_to_network.py RENAMED Viewed

@@ -8,10 +8,12 @@ from typing import Optional, BinaryIO
 import boto3
 import typer
 from botocore.exceptions import ClientError
+from dotenv import dotenv_values
 from seshat.general.command.base import BaseTyperCommand, ApiConfig, JobMetadata
 from seshat.general.exceptions import RestClientException, EnvFileNotFound
 from seshat.general.models import CodePackage
+from seshat.utils.date_utils import format_datetime_for_api
 from seshat.utils.file import is_binary_file
 from seshat.utils.file_cryptography import AESCipher
 from seshat.utils.jobignore import JobIgnoreHandler
@@ -191,15 +193,17 @@ class SubmitCommand(BaseTyperCommand):
         directory: str,
         name: str,
         version: str,
-        executor_image_tag: str,
+        metadata: JobMetadata,
+        executor_image_tag: str = None,
         requirements_file: Optional[str] = None,
-        metadata: JobMetadata = None,
+        secret_env: bool = False,
     ) -> CodePackage:
         self.echo(f"📦 Packaging code from {directory}")
-        env_file, encrypted_data = None, None
+        env_file, env_data = None, None
         try:
-            encrypted_data, key, iv, env_file = self.handle_env_file(directory)
+            env_data, key, iv, env_file = self.handle_env_file(directory, secret_env)
+            if not secret_env:
+                metadata.env_vars = env_data
             metadata.secret_key = key
             metadata.iv = iv
         except EnvFileNotFound:
@@ -225,9 +229,13 @@ class SubmitCommand(BaseTyperCommand):
                 if job_ignore_handler.match_gitignore_like_path(relative_path):
                     continue
-                if env_file and (pathlib.Path(filepath) == pathlib.Path(env_file)):
+                if (
+                    secret_env
+                    and env_file
+                    and (pathlib.Path(filepath) == pathlib.Path(env_file))
+                ):
                     with open(filepath, "r", encoding="utf-8") as _:
-                        all_files[relative_path] = encrypted_data
+                        all_files[relative_path] = env_data
                         # metadata.env_file_path = env_file
                         self.echo(f"📄 Added {relative_path}")
                     continue
@@ -236,6 +244,8 @@ class SubmitCommand(BaseTyperCommand):
                     all_files, filepath, public_files, relative_path, total_size
                 )
+        metadata.code_size = total_size
         content_hash = self._hash_package(public_files)
         package = CodePackage(
@@ -243,7 +253,7 @@ class SubmitCommand(BaseTyperCommand):
             version=version,
             executor_image_tag=executor_image_tag,
             files=all_files,
-            metadata=None if metadata is None else asdict(metadata),
+            metadata=asdict(metadata),
             hash=content_hash.hexdigest(),
             binary_files=set(),
         )
@@ -270,7 +280,7 @@ class SubmitCommand(BaseTyperCommand):
         return new_total_size
-    def handle_env_file(self, directory: str):
+    def handle_env_file(self, directory: str, secret_env: bool):
         env_file_path = self.config.get("code").get("env_file")
         if not env_file_path:
             self.echo("No env file found in config")
@@ -289,11 +299,15 @@ class SubmitCommand(BaseTyperCommand):
             self.echo("No env file found to encrypt")
             raise EnvFileNotFound
-        encryption_result = AESCipher().encrypt_file(env_file)
-        encrypted_data = encryption_result["encrypted_data"]
-        key = encryption_result["key"]
-        iv = encryption_result["iv"]
-        return encrypted_data, key, iv, env_file
+        key, iv = None, None
+        if secret_env:
+            encryption_result = AESCipher().encrypt_file(env_file)
+            env_data = encryption_result["encrypted_data"]
+            key = encryption_result["key"]
+            iv = encryption_result["iv"]
+        else:
+            env_data = dotenv_values(env_file)
+        return env_data, key, iv, env_file
     def store_code(self, package: CodePackage) -> str:
         self.echo("☁️  Uploading to S3...")
@@ -311,8 +325,8 @@ class SubmitCommand(BaseTyperCommand):
         s3_key: str,
         name: str,
         version: str,
-        executor_image_tag: str,
         metadata: JobMetadata,
+        executor_image_tag: str = None,
         expiration=86400,
     ) -> dict:
         """Submit job to API after successful upload"""
@@ -357,3 +371,89 @@ class SubmitCommand(BaseTyperCommand):
         except RestClientException as e:
             self.echo(f"❌ Failed to submit job: {str(e)}")
             raise
+    def publish_job(
+        self,
+        s3_key: str,
+        name: str,
+        version: str,
+        metadata: JobMetadata,
+        expiration=86400,
+    ) -> dict:
+        """Submit job to API after successful upload"""
+        if not self.job_config.base_url or not self.job_config.auth_token:
+            raise ValueError(
+                "API configuration missing. Please set base_url and auth_token"
+            )
+        presigned_url = self.backend.generate_presigned_url(s3_key, expiration)
+        executor_label = self.config.get("executor", {}).get("label")
+        payload = {
+            "name": name,
+            "label": name,
+            "public": False,
+            "pipeline": {
+                "configs": metadata.env_vars,
+                "schedule": {
+                    "type": metadata.execution_plan.schedule_mode,
+                    "expression": metadata.execution_plan.cron_expression,
+                    "timezone": metadata.execution_plan.timezone,
+                    "start_time": format_datetime_for_api(
+                        metadata.execution_plan.start_time
+                    ),
+                    "end_time": format_datetime_for_api(metadata.execution_plan.until),
+                    "initial_run": metadata.execution_plan.initial_run,
+                    "run_overlap": metadata.execution_plan.run_overlap,
+                }
+                if metadata.execution_plan is not None
+                else {"type": "once"},
+                "job_template": {
+                    "name": f"{name}-job",
+                    "label": f"{name}-job",
+                    "description": "",
+                    "version": str(version),
+                    "execution_priority": "default",
+                    "validation_priority": "default",
+                    "directory": {"url": presigned_url, "type": "s3"},
+                    "retry_policy": {
+                        "retry_on_error": False,
+                        "interval": "0",
+                        "retry_count": 0,
+                        "action_on_failure": "none",
+                    },
+                    "meta_data": {
+                        "code_size": metadata.code_size,
+                        "pipeline_hash": metadata.pipeline_hash,
+                        "confidential_level": metadata.confidential_level,
+                        "execution_mode": metadata.execution_mode,
+                        "main_file_path": metadata.main_file_path,
+                        "env_file_path": metadata.env_file_path,
+                        "complexity_factor": metadata.complexity_factor,
+                        "data_size": metadata.data_size,
+                    },
+                    "executor_label": executor_label,
+                },
+                "config_handler": "store_accounts",
+                "active": True,
+            },
+        }
+        if metadata.secret_key:
+            payload["pipeline"]["job_template"]["meta_data"].update(
+                {
+                    "encryption_secret_key": metadata.secret_key,
+                    "encryption_iv": metadata.iv,
+                }
+            )
+        try:
+            self.echo("🚀 Publishing job to cook...")
+            response_data = self.rest_client.post(
+                "agent-launchers/data-agents/submit", json=payload
+            )
+            self.echo("✅ Job published to cook successfully!")
+            return response_data
+        except RestClientException as e:
+            self.echo(f"❌ Failed to publish job: {str(e)}")
+            raise

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/general/config.py RENAMED Viewed

@@ -26,6 +26,7 @@ TOP_ADDRESS_SF_KEY = "top_address"
 EXCLUSION_SF_KEY = "exclusion"
 TOKEN_PRICE_SF_KEY = "token_price"
 PROFIT_LOSS_SF_KEY = "profit_loss"
+DUPLICATED_SF_KEY = "duplicated"
 SPARK_APP_NAME = "seshat"
 PANDAS_MODE = "df"
 PYSPARK_MODE = "spf"

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/mixins.py RENAMED Viewed

@@ -78,13 +78,31 @@ class SQLMixin:
             trans.commit()
             conn.close()
+    def _parse_table_name(self, table_name: str) -> tuple[Optional[str], str]:
+        """
+        Parse a table name that might include a schema prefix.
+        Returns (schema_name, table_name)
+        """
+        if "." in table_name:
+            parts = table_name.split(".", 1)
+            return parts[0], parts[1]
+        return None, table_name
     def ensure_table_exists(self, table: str, schema: Schema):
         engine = self.get_engine()
-        if table in inspect(engine).get_table_names():
+        db_schema, table_name = self._parse_table_name(table)
+        # Check if table exists in the specific schema
+        inspector = inspect(engine)
+        existing_tables = inspector.get_table_names(schema=db_schema)
+        if table_name in existing_tables:
             return
         self.create_table(schema, table)
     def create_table(self, schema: Schema, table: str):
+        db_schema, table_name = self._parse_table_name(table)
         table_columns = []
         pk_cols = []
         for col in schema.cols:
@@ -96,7 +114,9 @@ class SQLMixin:
         constraints = []
         if pk_cols:
             constraints.append(
-                PrimaryKeyConstraint(*pk_cols, name=f"{table}_pk_{'_'.join(pk_cols)}")
+                PrimaryKeyConstraint(
+                    *pk_cols, name=f"{table_name}_pk_{'_'.join(pk_cols)}"
+                )
             )
         _, metadata = self.get_table(
             table, False, *table_columns, *constraints, extend_existing=True
@@ -104,7 +124,13 @@ class SQLMixin:
         metadata.create_all(self.get_engine())
     def get_table(self, table_name, autoload, *args, **kwargs):
+        db_schema, actual_table_name = self._parse_table_name(table_name)
         metadata = MetaData()
         if autoload:
             kwargs.setdefault("autoload_with", self.get_engine())
-        return Table(table_name, metadata, *args, **kwargs), metadata
+        if db_schema:
+            kwargs["schema"] = db_schema
+        return Table(actual_table_name, metadata, *args, **kwargs), metadata

{sdk_seshat_python-0.4.2 → sdk_seshat_python-0.4.4}/seshat/source/saver/database.py RENAMED Viewed

@@ -2,12 +2,8 @@ import hashlib
 import statistics
 from typing import List
-import sqlalchemy as db
 from sqlalchemy import (
-    Column,
     Index,
-    MetaData,
-    Table,
     and_,
     inspect,
     select,
@@ -52,29 +48,21 @@ class SQLDBSaver(SQLMixin, Saver):
             else:
                 self.insert(selected_sf, config)
-    def ensure_table_exists(self, table: str, schema: Schema):
-        engine = self.get_engine()
-        if table in inspect(engine).get_table_names():
-            return
-        self.create_table(schema, table)
-    def create_table(self, schema: Schema, table: str):
-        table_columns = []
-        for col in schema.cols:
-            col_name = col.to
-            col_type = getattr(db, col.dtype or "String")
-            table_columns.append(Column(col_name, col_type))
-        _, metadata = self.get_table(table, False, *table_columns, extend_existing=True)
-        metadata.create_all(self.get_engine())
     def delete(self, table_name):
         table, _ = self.get_table(table_name, autoload=True)
         self.write_on_db(table.delete())
     def drop_table(self, table_name):
-        if table_name in inspect(self.get_engine()).get_table_names():
+        db_schema, actual_table_name = self._parse_table_name(table_name)
+        engine = self.get_engine()
+        inspector = inspect(engine)
+        # Check if table exists in the specific schema
+        existing_tables = inspector.get_table_names(schema=db_schema)
+        if actual_table_name in existing_tables:
             table, _ = self.get_table(table_name, autoload=True)
-            table.drop(self.get_engine())
+            table.drop(engine)
     def insert(self, selected_sf: SFrame, config: SaveConfig):
         values = self.prepare_sf_to_insert(selected_sf, config).to_dict()
@@ -126,13 +114,17 @@ class SQLDBSaver(SQLMixin, Saver):
             hashed_cols = self.hash_columns([col.key for col in index.columns])
             current_indexes.add(hashed_cols)
+        # Parse table name to get the actual table name without schema
+        _, actual_table_name = self._parse_table_name(config.table)
         for index in config.indexes:
             index_cols = [index] if isinstance(index, str) else index
             index_hash = self.hash_columns(index_cols)
             if index_hash in current_indexes:
                 continue
-            index_name = f"{'_'.join(index_cols)}_index_{table.name}"
+            # Use the actual table name (without schema) for index naming
+            index_name = f"{'_'.join(index_cols)}_index_{actual_table_name}"
             index_obj = Index(
                 index_name,
                 *[getattr(table.c, index_col) for index_col in index_cols],
@@ -184,12 +176,6 @@ class SQLDBSaver(SQLMixin, Saver):
         )
         return self.get_from_db(query)
-    def get_table(self, table_name, autoload, *args, **kwargs):
-        metadata = MetaData()
-        if autoload:
-            kwargs.setdefault("autoload_with", self.get_engine())
-        return Table(table_name, metadata, *args, **kwargs), metadata
     def get_from_db(self, query):
         with self.get_engine().connect() as conn:
             result = conn.execute(query)

sdk-seshat-python 0.4.2__tar.gz → 0.4.4__tar.gz

sdk-seshat-python 0.4.2tar.gz → 0.4.4tar.gz