PyPI - sdk-seshat-python - Versions diffs - 0.3.4__tar.gz → 0.3.13__tar.gz - Mend

sdk-seshat-python 0.3.4tar.gz → 0.3.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

sdk_seshat_python-0.3.13/PKG-INFO ADDED Viewed

@@ -0,0 +1,40 @@
+Metadata-Version: 2.1
+Name: sdk-seshat-python
+Version: 0.3.13
+Summary: Seshat python SDK is a library to help create ML data pipelines.
+License: Commercial - see LICENSE.txt
+Author: SeshatLabs
+Author-email: info@seshatlabs.xyz
+Requires-Python: >=3.11,<4.0
+Classifier: License :: Other/Proprietary License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Provides-Extra: flipside-support
+Provides-Extra: postgres-support
+Requires-Dist: backoff (>=2.2.1,<3.0.0)
+Requires-Dist: bokeh (>=3.6.0,<4.0.0)
+Requires-Dist: boto3 (>=1.35.68,<2.0.0)
+Requires-Dist: croniter (>=6.0.0,<7.0.0)
+Requires-Dist: cryptography (>=44.0.0,<45.0.0)
+Requires-Dist: dask[array,complete,dataframe,distributed] (>=2024.10.0,<2025.0.0)
+Requires-Dist: flipside (>=2.1.0,<3.0.0) ; extra == "flipside-support"
+Requires-Dist: langchain (>=0.3.23,<0.4.0)
+Requires-Dist: langchain-community (>=0.3.21,<0.4.0)
+Requires-Dist: langchain-openai (>=0.3.12,<0.4.0)
+Requires-Dist: loguru (>=0.7.3,<0.8.0)
+Requires-Dist: memory-profiler (>=0.61.0,<0.62.0)
+Requires-Dist: openai (>=1.73.0,<2.0.0)
+Requires-Dist: pandas (>=2.2.1,<3.0.0)
+Requires-Dist: pyarmor (>=8.5.1,<9.0.0)
+Requires-Dist: pydantic (>=2.7.4,<3.0.0)
+Requires-Dist: pyspark (>=3.5.1,<4.0.0)
+Requires-Dist: python-logstash-async (>=4.0.2,<5.0.0)
+Requires-Dist: requests (==2.32.0)
+Requires-Dist: rich (>=13.9.4,<14.0.0)
+Requires-Dist: scikit-learn (>=1.4.1.post1,<2.0.0)
+Requires-Dist: sqlalchemy (>=2.0.29,<3.0.0)
+Requires-Dist: toml (>=0.10.2,<0.11.0)
+Requires-Dist: typer (>=0.12.3,<0.13.0)
+Description-Content-Type: text/markdown

sdk_seshat_python-0.3.13/pyproject.toml ADDED Viewed

@@ -0,0 +1,59 @@
+[tool.poetry]
+name = "sdk-seshat-python"
+version = "0.3.13"
+description = "Seshat python SDK is a library to help create ML data pipelines."
+authors =  ["SeshatLabs <info@seshatlabs.xyz>"]
+packages = [{ include = "seshat", from = "." }]
+readme = "README.md"
+license = "Commercial - see LICENSE.txt"
+[tool.poetry.dependencies]
+python = "^3.11"
+pandas = "^2.2.1"
+scikit-learn = "^1.4.1.post1"
+pyspark = "^3.5.1"
+flipside = "^2.1.0"
+sqlalchemy = "^2.0.29"
+memory-profiler = "^0.61.0"
+typer = "^0.12.3"
+dask = {extras = ["array", "complete", "dataframe", "distributed"], version = "^2024.10.0"}
+bokeh = "^3.6.0"
+toml = "^0.10.2"
+rich = "^13.9.4"
+boto3 = "^1.35.68"
+requests = "2.32.0"
+backoff = "^2.2.1"
+cryptography = "^44.0.0"
+loguru = "^0.7.3"
+openai = "^1.73.0"
+pydantic = "^2.7.4"
+langchain = "^0.3.23"
+langchain-community = "^0.3.21"
+langchain-openai = "^0.3.12"
+pyarmor = "^8.5.1"
+croniter = "^6.0.0"
+python-logstash-async = "^4.0.2"
+[tool.poetry.extras]
+flipside_support = ["flipside"]
+postgres_support = ["psycopg2-binary"]
+[tool.poetry.group.dev.dependencies]
+flake8 = "^7.0.0"
+black = "^24.3.0"
+pre-commit = "^3.7.0"
+pytest = "^8.3.4"
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+[tool.pytest.ini_options]
+pythonpath = [
+    ".",
+    "src",
+]
+testpaths = [
+    "test",
+]

sdk_seshat_python-0.3.13/seshat/__init__.py ADDED Viewed

@@ -0,0 +1,321 @@
+import os.path
+from pathlib import Path
+import toml
+import typer
+from rich.console import Console
+from rich.table import Table
+from seshat.general.command import (
+    SubmitCommand,
+    JobMetadata,
+    JobExecutionSchedule,
+    ExecutionMode,
+)
+from seshat.general.command.code_inspect import CodeInspectCommand
+from seshat.general.command.job_status import JobStatusCommand
+from seshat.general.command.setup_project import RECOMMENDATION, SetUpProjectCommand
+from seshat.general.exceptions import NoConfigSetError, RestClientException
+app = typer.Typer()
+console = Console()
+DEFAULT_DATA_SIZE = 1_000_000  # 1 GB
+state = {"verbose": False}
+def load_config(config_file: Path = Path.home() / ".codemanager.toml") -> dict:
+    try:
+        if config_file.exists():
+            return toml.load(config_file)
+        else:
+            raise NoConfigSetError("Config file not found.")
+    except toml.TomlDecodeError:
+        raise NoConfigSetError("Config file is malformed.")
+@app.command(name="create-project")
+def create_project(name: str, usecase=typer.Option(default=RECOMMENDATION)):
+    command = SetUpProjectCommand(name, usecase, os.getcwd(), report=state["verbose"])
+    try:
+        command.handle()
+    except Exception as exc:
+        cli_msg = typer.style(
+            f"Setup project in usecase {usecase} failed because of {str(exc)}",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+    else:
+        cli_msg = typer.style(
+            f"""
+            Setup project in usecase {usecase} done!\n
+            You can deploy your project by this command 🚀:
+            'python -m seshat deploy`
+            """,
+            fg=typer.colors.GREEN,
+            bold=True,
+        )
+    typer.echo(cli_msg)
+@app.command(name="inspect")
+def inspect_code(
+    directory: str = typer.Argument(..., help="Directory containing the code"),
+    config_file: Path = typer.Option(
+        Path.home() / ".codemanager.toml", help="Path to config file"
+    ),
+):
+    try:
+        config = load_config(config_file)
+        if not config:
+            raise NoConfigSetError()
+        manager = CodeInspectCommand(config)
+        complexity = manager.handle(directory)
+        config["code"] = {**config.get("code", {}), "complexity_factor": complexity}
+        with open(config_file, "w") as f:
+            toml.dump(config, f)
+        typer.echo(f"Configuration updated in {config_file}")
+    except Exception as e:
+        typer.echo(f"Error: {str(e)}", err=True)
+        raise typer.Exit(1)
+@app.command(name="submit")
+def submit_job(
+    directory: str = typer.Argument(..., help="Directory containing the code"),
+    name: str = typer.Option(..., help="Name of the package"),
+    version: str = typer.Option(..., help="Version of the package"),
+    executor_image_tag: str = typer.Option(
+        "latest", help="Image tag of the executor which runs the job"
+    ),
+    confidential_level: str = typer.Option(
+        "default", help="Confidential level desired for the pipeline "
+    ),
+    execution_mode: str = typer.Option(
+        "single", help="Execution mode for the pipeline "
+    ),
+    config_file: Path = typer.Option(
+        Path.home() / ".codemanager.toml", help="Path to config file"
+    ),
+):
+    try:
+        config = load_config(config_file)
+        if not config:
+            raise NoConfigSetError()
+        manager = SubmitCommand(config)
+        job_execution_schedule = None
+        if "execution" in config:
+            job_execution_schedule = JobExecutionSchedule(
+                **config.get("execution", {}).get("plan", {})
+            )
+        job_metadata = JobMetadata(
+            pipeline_hash="",
+            confidential_level=confidential_level,
+            execution_mode=ExecutionMode(execution_mode),
+            execution_plan=job_execution_schedule,
+            main_file_path=config.get("code", {}).get("main_file", "main.py"),
+            env_file_path=config.get("code", {}).get("env_file", ".env"),
+            data_size=config.get("code", {}).get("data_size", DEFAULT_DATA_SIZE),
+            complexity_factor=config.get("code", {}).get("complexity_factor", 0),
+            requirement_file=None,
+            requirements_type=None,
+            secret_key="",
+            iv="",
+        )
+        package = manager.handle(
+            directory, name, version, executor_image_tag, metadata=job_metadata
+        )
+        obfuscate_code = config.get("code", {}).get("obfuscate_code", "false")
+        obfuscate_code = True if obfuscate_code.lower() == "true" else False
+        if obfuscate_code:
+            package = manager.obfuscate_code(package)
+        job_metadata.pipeline_hash = package.hash
+        identifier = manager.store_code(package)
+        job_response = manager.submit_job(
+            identifier, name, version, executor_image_tag, job_metadata
+        )
+        job_response_data = job_response.get("data", {})
+        table = Table(title="Upload Summary")
+        table.add_column("Property", style="cyan")
+        table.add_column("Value", style="green")
+        table.add_row("Name", package.name)
+        table.add_row("Version", package.version)
+        table.add_row("ExecutorImageTag", package.executor_image_tag)
+        table.add_row("Hash", package.hash)
+        table.add_row("S3 Location", identifier)
+        table.add_row("Job ID", str(job_response_data.get("id", "N/A")))
+        table.add_row("Status", job_response_data.get("status", {}).get("state", "N/A"))
+        console.print(table)
+    except Exception as e:
+        typer.echo(f"Error: {str(e)}", err=True)
+        raise typer.Exit(1)
+@app.command(name="configure")
+def configure_job(
+    bucket: str = typer.Option(..., prompt=True, help="S3 bucket name"),
+    prefix: str = typer.Option("code", prompt=True, help="S3 prefix"),
+    aws_access_key_id: str = typer.Option(..., prompt=True, help="AWS access key ID"),
+    aws_secret_access_key: str = typer.Option(
+        ..., prompt=True, hide_input=True, help="AWS secret access key"
+    ),
+    aws_region: str = typer.Option("us-east-1", prompt=True, help="AWS region"),
+    api_base_url: str = typer.Option(
+        ..., prompt=True, show_default=True, help="API base URL"
+    ),
+    api_auth_token: str = typer.Option(
+        ...,
+        prompt=True,
+        hide_input=True,
+        prompt_required=True,
+        help="API authentication token",
+    ),
+    main_file_path: str = typer.Option(
+        ...,
+        prompt=True,
+        hide_input=False,
+        prompt_required=True,
+        help="Path to main file",
+    ),
+    env_file_path: str = typer.Option(
+        ".env",
+        prompt=True,
+        hide_input=False,
+        prompt_required=True,
+        help="Path to main file",
+    ),
+    ignore_file: str = typer.Option(
+        ".jobignore",
+        prompt=True,
+        help="Path to file containing ignore patterns for job files",
+    ),
+    schedule_mode: str = typer.Option(
+        "once", prompt=True, help="Execution schedule mode (e.g. single, recurring)"
+    ),
+    start_time: str = typer.Option(
+        "", prompt=True, help="Start time (YYYY-MM-DDTHH:MM)"
+    ),
+    until: str = typer.Option("", prompt=True, help="Until time (YYYY-MM-DDTHH:MM)"),
+    cron_expression: str = typer.Option(
+        "", prompt=True, help="Cron expression for schedule (if applicable)"
+    ),
+    run_overlap: str = typer.Option(
+        "true", prompt=True, help="Allow overlapping runs (true/false)"
+    ),
+    data_size: float = typer.Option(
+        DEFAULT_DATA_SIZE,
+        prompt=True,
+        help="Estimate of size of data you want processed",
+    ),
+    complexity_factor: float = typer.Option(
+        None,
+        prompt=True,
+        help="Estimate of process complexity (between 1 to 1000, higher takes more time and is more expensive)",
+    ),
+    config_file: Path = typer.Option(
+        Path.home() / ".codemanager.toml", help="Path to config file"
+    ),
+):
+    config = {
+        "aws": {
+            "bucket": bucket,
+            "prefix": prefix,
+            "access_key_id": aws_access_key_id,
+            "secret_access_key": aws_secret_access_key,
+            "region": aws_region,
+        },
+        "api": {
+            "base_url": api_base_url,
+            "auth_token": api_auth_token,
+        },
+        "code": {
+            "main_file": main_file_path,
+            "ignore_file": ignore_file,
+            "env_file": env_file_path,
+            "data_size": data_size,
+            "complexity_factor": complexity_factor,
+        },
+        "execution": {
+            "plan": {
+                "schedule_mode": schedule_mode,
+                "start_time": start_time,
+                "until": until,
+                "cron_expression": cron_expression,
+                "run_overlap": run_overlap,
+            }
+        },
+    }
+    config_file.parent.mkdir(parents=True, exist_ok=True)
+    with open(config_file, "w") as f:
+        toml.dump(config, f)
+    typer.echo(f"Configuration saved to {config_file}")
+@app.command(name="job-status")
+def job_status(
+    job_id: str = typer.Argument(..., help="The job ID to check"),
+    config_file: Path = typer.Option(
+        Path.home() / ".codemanager.toml", help="Path to config file"
+    ),
+    api_base_url: str = typer.Option(None, help="API base URL"),
+    api_auth_token: str = typer.Option(
+        None,
+        help="API authentication token",
+    ),
+):
+    """Check the status of a submitted job"""
+    try:
+        try:
+            config = load_config(config_file)
+        except NoConfigSetError():
+            config = None
+        if not config and not (api_base_url and api_auth_token):
+            raise NoConfigSetError()
+        else:
+            base_url = api_base_url or config.get("api", {}).get("base_url")
+            auth_token = api_auth_token or config.get("api", {}).get("auth_token")
+        if not base_url:
+            typer.echo("base_url is not set")
+            raise NoConfigSetError(
+                "base_url parameter must be set in the config file or as command option"
+            )
+        if not auth_token:
+            typer.echo("auth_token is not set")
+            raise NoConfigSetError(
+                "auth_token parameter must be set in the config file or as command option"
+            )
+        manager = JobStatusCommand(base_url=base_url, auth_token=auth_token)
+        manager.job_status(job_id)
+    except NoConfigSetError as e:
+        typer.echo(f"Configuration error: {e}")
+        raise typer.Exit(2)
+    except RestClientException as e:
+        typer.echo(f"API error: {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        typer.echo(f"Unexpected error: {e}")
+        raise typer.Exit(1)
+@app.callback()
+def main(verbose: bool = False):
+    state["verbose"] = verbose

{sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/__main__.py RENAMED Viewed

@@ -8,4 +8,5 @@ try:
     rc = 0
 except Exception as e:
     print("Error: %s" % e, file=sys.stderr)
 sys.exit(rc)

{sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/data_class/pandas.py RENAMED Viewed

@@ -19,7 +19,7 @@ class DFrame(SFrame):
         return GroupSFrame({default_key: self}, sframe_class=self.__class__)
     def get_columns(self, *args) -> Iterable[str]:
-        return self.data.columns
+        return self.data.columns.tolist()
     def to_dict(self, *cols: str, key=configs.DEFAULT_SF_KEY) -> List[Dict]:
         selected = self.data[list(cols)] if cols else self.data

{sdk_seshat_python-0.3.4 → sdk_seshat_python-0.3.13}/seshat/feature_view/base.py RENAMED Viewed

@@ -4,7 +4,7 @@ from typing import Dict, Callable
 from seshat.data_class import SFrame
 from seshat.evaluation.base import Evaluation
 from seshat.profiler import ProfileConfig
-from seshat.profiler.base import Profiler, profiler
+from seshat.profiler.base import profiler
 from seshat.source import Source
 from seshat.source.saver import Saver
 from seshat.transformer.pipeline import Pipeline
@@ -83,20 +83,38 @@ class FeatureView:
     evaluation: Evaluation
     def __call__(self, *args, **kwargs):
-        Profiler.setup(self.profile_config)
         source = self._get_source()
         pipeline = self._get_pipeline()
-        self.data = source(*args, **kwargs)
-        if self.split_at_start:
-            self._split(*args, **kwargs)
-            self.run_pipline_on_split_data(pipeline, *args, **kwargs)
-        else:
-            self.data = pipeline(self.data, *args, **kwargs)
+        profiler.setup(config=self.profile_config)
+        with profiler:
+            self.data = source(*args, **kwargs)
+            if self.split_at_start:
+                self._split(*args, **kwargs)
+                self.run_pipline_on_split_data(pipeline, *args, **kwargs)
+            else:
+                self.data = pipeline(self.data, *args, **kwargs)
-        profiler.tear_down()
         return self
+    def calculate_complexity(self):
+        complexity = 0
+        if self.saver is not None:
+            complexity += self.saver.calculate_complexity()
+        if self.online:
+            return (
+                complexity
+                + self.online_source.calculate_complexity()
+                + self.online_pipeline.calculate_complexity()
+            )
+        return (
+            complexity
+            + self.offline_source.calculate_complexity()
+            + self.offline_pipeline.calculate_complexity()
+        )
     def run_pipline_on_split_data(self, pipeline, *args, **kwargs):
         for k, data in self.split_data.items():
             self.split_data[k] = pipeline(data, *args, **kwargs)
@@ -119,12 +137,12 @@ class FeatureView:
     def save(self):
         if self.saver is None:
             return
-        (
-            self.saver(self.train_data())
-            if hasattr(self, "splitter")
-            else self.saver(self.data)
-        )
+        with profiler:
+            (
+                self.saver(self.train_data())
+                if hasattr(self, "splitter")
+                else self.saver(self.data)
+            )
     def _split(self, *args, **kwargs):
         self.split_data = self.splitter(self.data, *args, **kwargs)

sdk_seshat_python-0.3.13/seshat/general/command/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from .setup_project import SetUpProjectCommand
+from .submit_to_network import SubmitCommand
+from .base import JobMetadata, JobExecutionSchedule, ExecutionMode, JobScheduleMode
+__all__ = [
+    "SetUpProjectCommand",
+    "SubmitCommand",
+    "JobMetadata",
+    "JobExecutionSchedule",
+    "ExecutionMode",
+    "JobScheduleMode",
+]

sdk_seshat_python-0.3.13/seshat/general/command/base.py ADDED Viewed

@@ -0,0 +1,122 @@
+from dataclasses import dataclass
+from datetime import datetime
+from enum import StrEnum
+from typing import Optional
+import typer
+from croniter import croniter
+class ExecutionMode(StrEnum):
+    SINGLE = "single"
+    CLUSTER = "cluster"
+    STREAM = "stream"
+class JobScheduleMode(StrEnum):
+    ONCE = "once"
+    CRON = "cron"
+@dataclass
+class ApiConfig:
+    base_url: str
+    auth_token: str
+    def __post_init__(self):
+        for field_name, field_value in self.__dict__.items():
+            if field_value is None:
+                raise ValueError(f"The field '{field_name}' cannot be None.")
+@dataclass
+class JobExecutionSchedule:
+    schedule_mode: JobScheduleMode
+    start_time: datetime | None
+    until: datetime | None
+    cron_expression: str | None
+    run_overlap: bool = True
+    def __post_init__(self):
+        import re
+        from datetime import datetime
+        fmt = "%Y-%m-%dT%H:%M"
+        regex = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$"
+        for field in ["start_time", "until"]:
+            value = getattr(self, field)
+            if value:
+                if isinstance(value, str):
+                    if not re.match(regex, value):
+                        raise ValueError(
+                            f"{field} must be in format YYYY-MM-DDTHH:MM, e.g., 2025-08-12T18:35"
+                        )
+                    try:
+                        datetime.strptime(value, fmt)
+                    except ValueError:
+                        raise ValueError(f"{field} is not a valid date/time: {value}")
+                elif isinstance(value, datetime):
+                    pass
+                else:
+                    raise TypeError(
+                        f"{field} must be a string in format YYYY-MM-DDTHH:MM or a datetime object"
+                    )
+        cron_value = self.cron_expression
+        if cron_value:
+            if not croniter.is_valid(cron_value):
+                raise ValueError(
+                    "cron_expression must be a valid 5-field cron string, e.g., '0 0 * * *'"
+                )
+@dataclass
+class JobMetadata:
+    """Metadata container for job execution in a pipeline system.
+    This class encapsulates necessary metadata for executing a job within
+    a pipeline, including security credentials, execution configuration, and
+    resource requirements.
+    Attributes:
+        pipeline_hash (str): Unique identifier hash for the pipeline instance.
+        confidential_level (str): Security classification level of the job
+            (e.g., 'public', 'internal', 'confidential', 'secret').
+        execution_mode (ExecutionMode): Enum specifying how the job should be
+            executed (e.g., local, distributed, containerized).
+        execution_plan (Optional[JobExecutionSchedule]): Schedule configuration
+            for job execution. None if job runs immediately or on-demand.
+        main_file_path (str): Path to the main executable file or script.
+        env_file_path (str): Path to the environment configuration file.
+        secret_key (str): Encryption key for securing sensitive job data.
+        iv (str): Initialization vector for encryption operations.
+        requirement_file (str): Path to the file containing job dependencies.
+        requirements_type (str): Type of requirements specification
+            (e.g., 'pip', 'conda', 'poetry').
+        complexity_factor (float): Numerical measure of job computational
+            complexity, used for resource allocation.
+        data_size (float): Expected size of data in KB to be processed in the job.
+    """
+    pipeline_hash: str
+    confidential_level: str
+    execution_mode: ExecutionMode
+    execution_plan: Optional[JobExecutionSchedule]
+    main_file_path: str
+    env_file_path: str
+    requirement_file: Optional[str]
+    requirements_type: Optional[str]
+    complexity_factor: float
+    data_size: float
+    secret_key: str
+    iv: str
+class BaseTyperCommand:
+    def __init__(self, report: bool = False):
+        self.report = report
+    def echo(self, msg, *args, **kwargs):
+        if not self.report:
+            return
+        typer.echo(msg, *args, **kwargs)

sdk-seshat-python 0.3.4__tar.gz → 0.3.13__tar.gz

sdk-seshat-python 0.3.4tar.gz → 0.3.13tar.gz