PyPI - altasigma - Versions diffs - 3.10.1__tar.gz - Mend

altasigma 3.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

altasigma-3.10.1/LICENSE +2 -0
altasigma-3.10.1/PKG-INFO +80 -0
altasigma-3.10.1/README.md +49 -0
altasigma-3.10.1/altasigma/__init__.py +35 -0
altasigma-3.10.1/altasigma/config/__init__.py +6 -0
altasigma-3.10.1/altasigma/config/config.py +306 -0
altasigma-3.10.1/altasigma/config/http_session.py +67 -0
altasigma-3.10.1/altasigma/credentials/__init__.py +8 -0
altasigma-3.10.1/altasigma/credentials/credential_utils.py +345 -0
altasigma-3.10.1/altasigma/credentials/token_refresher.py +31 -0
altasigma-3.10.1/altasigma/initialize.py +26 -0
altasigma-3.10.1/altasigma/io/__init__.py +24 -0
altasigma-3.10.1/altasigma/io/augurdata.py +135 -0
altasigma-3.10.1/altasigma/io/data_management.py +576 -0
altasigma-3.10.1/altasigma/io/files.py +114 -0
altasigma-3.10.1/altasigma/jobsupervisor/__init__.py +35 -0
altasigma-3.10.1/altasigma/jobsupervisor/job_supervisor_abstract.py +147 -0
altasigma-3.10.1/altasigma/jobsupervisor/job_supervisor_dev_mock.py +142 -0
altasigma-3.10.1/altasigma/jobsupervisor/job_supervisor_helpers.py +61 -0
altasigma-3.10.1/altasigma/jobsupervisor/job_supervisor_http.py +282 -0
altasigma-3.10.1/altasigma/jobsupervisor/reports.py +35 -0
altasigma-3.10.1/altasigma/progress_reporter/__init__.py +10 -0
altasigma-3.10.1/altasigma/progress_reporter/progress_reporter.py +208 -0
altasigma-3.10.1/altasigma/spark_session/__init__.py +8 -0
altasigma-3.10.1/altasigma/spark_session/spark_session_util.py +178 -0
altasigma-3.10.1/altasigma/utils/__init__.py +8 -0
altasigma-3.10.1/altasigma/utils/notebook.py +59 -0
altasigma-3.10.1/altasigma.egg-info/PKG-INFO +80 -0
altasigma-3.10.1/altasigma.egg-info/SOURCES.txt +32 -0
altasigma-3.10.1/altasigma.egg-info/dependency_links.txt +1 -0
altasigma-3.10.1/altasigma.egg-info/requires.txt +4 -0
altasigma-3.10.1/altasigma.egg-info/top_level.txt +1 -0
altasigma-3.10.1/setup.cfg +4 -0
altasigma-3.10.1/setup.py +32 -0

altasigma-3.10.1/LICENSE ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ Copyright (c) 2025 AltaSigma GmbH
2	+ All rights reserved.

altasigma-3.10.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,80 @@
+Metadata-Version: 2.4
+Name: altasigma
+Version: 3.10.1
+Summary: This package contains methods to interact with the AltaSigma platform.
+Home-page: https://www.altasigma.com
+Author: AltaSigma GmbH
+Author-email: pypi@altasigma.com
+License: Proprietary
+Classifier: License :: Other/Proprietary License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests<3,>=2
+Requires-Dist: pandas>=1
+Requires-Dist: ipython>=8
+Requires-Dist: boto3<2,>=1.18.49
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+# AltaSigma Helpers
+Source code of the `altasigma` packages
+## Development
+Use `make requirements` to install all dependencies for development and testing.
+**Dependency Management:**
+- **`requirements/production.txt`** - Pinned versions for consistent development environments
+  - Ensures IDE autocomplete/suggestions work correctly
+  - Guarantees reproducible test runs across developers and CI
+  - Used by developers and CI for consistent behavior
+- **`requirements/development.txt`** - Development tools (pytest, black, isort, security scanners)
+- **`setup.py` (install_requires)** - Flexible version ranges for package users
+  - Allows users to get compatible versions that work with their other dependencies
+**When upgrading dependencies:**
+1. Update the pinned version in `requirements/production.txt`
+2. Update the version range in `setup.py` install_requires if needed
+3. Run tests to ensure compatibility
+Use `make format` before committing to format (with black) and sort imports (with isort).
+### Tags
+Assign tags like [`0.0.4.dev2`](https://peps.python.org/pep-0440/#developmental-releases) (counting the last part up)
+for developing a new version based on `0.0.4`. It could release as `0.0.5`, but you don't know.
+### R Development Tags
+Use tags like `0.0.4-1`, `0.0.4-2`, etc. for R-only development. These:
+- Match the R CI regex `/^[0-9]+([.-][0-9]+)+$/` (`.gitlab-ci.yml:173`)
+- Are CRAN-compatible (hyphen separator)
+- Won't conflict with Python semantic versions
+- Python build will create a version like 0.0.4.post1, which would actually be treated as a newer stable version of 0.0.4,
+  so be careful with that and just use outdated version tags in the front and ideally delete the package version later
+To install a version in the workbench for example, the easiest way may be to download the package and do
+`install.packages("/workbench/altasigma_0.0.4-1.tar.gz", repos = NULL, type = "source")` and check with
+`packageVersion("altasigma")`
+### R Versions
+https://cran.r-project.org/doc/manuals/r-release/R-exts.html#:~:text=The%20mandatory%20%E2%80%98Version
+> The mandatory 'Version' field gives the version of the package.
+> This is a sequence of at least two (and usually three) non-negative integers separated by single '.' or '-' characters.
+> The canonical form is as shown in the example, and a version such as '0.01' or '0.01.0' will be handled as if it were '0.1-0'.
+> It is not a decimal number, so for example 0.9 < 0.75 since 9 < 75.

altasigma-3.10.1/README.md ADDED Viewed

@@ -0,0 +1,49 @@
+# AltaSigma Helpers
+Source code of the `altasigma` packages
+## Development
+Use `make requirements` to install all dependencies for development and testing.
+**Dependency Management:**
+- **`requirements/production.txt`** - Pinned versions for consistent development environments
+  - Ensures IDE autocomplete/suggestions work correctly
+  - Guarantees reproducible test runs across developers and CI
+  - Used by developers and CI for consistent behavior
+- **`requirements/development.txt`** - Development tools (pytest, black, isort, security scanners)
+- **`setup.py` (install_requires)** - Flexible version ranges for package users
+  - Allows users to get compatible versions that work with their other dependencies
+**When upgrading dependencies:**
+1. Update the pinned version in `requirements/production.txt`
+2. Update the version range in `setup.py` install_requires if needed
+3. Run tests to ensure compatibility
+Use `make format` before committing to format (with black) and sort imports (with isort).
+### Tags
+Assign tags like [`0.0.4.dev2`](https://peps.python.org/pep-0440/#developmental-releases) (counting the last part up)
+for developing a new version based on `0.0.4`. It could release as `0.0.5`, but you don't know.
+### R Development Tags
+Use tags like `0.0.4-1`, `0.0.4-2`, etc. for R-only development. These:
+- Match the R CI regex `/^[0-9]+([.-][0-9]+)+$/` (`.gitlab-ci.yml:173`)
+- Are CRAN-compatible (hyphen separator)
+- Won't conflict with Python semantic versions
+- Python build will create a version like 0.0.4.post1, which would actually be treated as a newer stable version of 0.0.4,
+  so be careful with that and just use outdated version tags in the front and ideally delete the package version later
+To install a version in the workbench for example, the easiest way may be to download the package and do
+`install.packages("/workbench/altasigma_0.0.4-1.tar.gz", repos = NULL, type = "source")` and check with
+`packageVersion("altasigma")`
+### R Versions
+https://cran.r-project.org/doc/manuals/r-release/R-exts.html#:~:text=The%20mandatory%20%E2%80%98Version
+> The mandatory 'Version' field gives the version of the package.
+> This is a sequence of at least two (and usually three) non-negative integers separated by single '.' or '-' characters.
+> The canonical form is as shown in the example, and a version such as '0.01' or '0.01.0' will be handled as if it were '0.1-0'.
+> It is not a decimal number, so for example 0.9 < 0.75 since 9 < 75.

altasigma-3.10.1/altasigma/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""AltaSigma package for data management and processing.
+This package provides tools for credential management, data source operations,
+configuration management, and job supervision.
+The public API components are explicitly imported and exposed at the package level.
+Users should import components directly from the `altasigma` package rather than
+from individual submodules.
+Examples:
+    Recommended import style:
+    >>> from altasigma import CredentialUtils, AltaSigma, S3DataSource
+    Avoid importing from submodules directly:
+    >>> # Not recommended
+    >>> from altasigma.credentials.credential_utils import CredentialUtils
+Note:
+    Only components explicitly imported in this file are considered part of the
+    public API. Other components within submodules should be treated as internal
+    implementation details that may change without notice.
+    The only exception to that is altasigma.spark_session.spark_session_util, which contains
+    imports from pyspark. Not importing it here makes pyspark an optional dependency.
+"""
+from altasigma.credentials.credential_utils import CredentialUtils
+from altasigma.initialize import AltaSigma, initialize
+from altasigma.io.data_management import S3DataSource, Bucket, CassandraDataSource, BiographyInfo, BiographyInfoEntry, TextEntry, S3PathEntry, S3Data, CassandraTableEntry, CassandraData, get_datasource
+from altasigma.config.config import RunEnv, JobType
+from altasigma.jobsupervisor.reports import dataframe_to_table_report_data
+# Spark is an optional dependency. If we add an import here it is no longer optional
+# from altasigma.spark_session.spark_session_util import get_spark_session
+from altasigma.progress_reporter.progress_reporter import ProgressReporter

altasigma-3.10.1/altasigma/config/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+Python module for configuration management in AltaSigma Modules.
+This module defines job types, environment settings, and configuration classes
+for various components of the system.
+"""

altasigma-3.10.1/altasigma/config/config.py ADDED Viewed

@@ -0,0 +1,306 @@
+"""
+Python module for configuration management in AltaSigma Modules.
+This module defines job types, environment settings, and configuration classes
+for various components of the system.
+"""
+import logging
+import os
+import sys
+import threading
+from datetime import datetime
+from dataclasses import dataclass
+from enum import Enum
+from ..credentials.credential_utils import _credential_utils
+from ..credentials.token_refresher import TokenRefresher
+logger = logging.getLogger(__name__)
+class JobType(Enum):
+    """Enumeration of job types for Modules/Augurs.
+    Attributes:
+        Learning (str): Job type for learning jobs.
+        Evaluation (str): Job type for evaluation jobs.
+        Prediction (str): Job type for prediction jobs.
+        RealtimeScoring (str): Job type for real-time prediction jobs.
+    """
+    Learning = "learning"
+    Evaluation = "evaluation"
+    Prediction = "prediction"
+    RealtimeScoring = "realtime-scoring"
+    @classmethod
+    def from_name(cls, name):
+        """Converts a string name to its corresponding JobType enum.
+        Args:
+            name (str): The string name of the job type.
+        Returns:
+            JobType: The corresponding JobType enum.
+        Raises:
+            ValueError: If the provided name does not match any JobType.
+        """
+        for enum in JobType.__members__.values():
+            if enum.value == name:
+                return enum
+        raise ValueError(f"{name} is not a valid JobType name.")
+    def to_name(self):
+        """Gets the string representation of the job type.
+        Returns:
+            str: The string name of the job type.
+        """
+        return self.value
+    def is_batch_job(self):
+        """Determines if the job type is a batch job.
+        Returns:
+            bool: True if the job type is Learning, Evaluation, or Prediction, False otherwise.
+        """
+        return self in [JobType.Learning, JobType.Evaluation, JobType.Prediction]
+    def is_passed_model_in_env_in_prod(self):
+        """Determines if the job type is passed a model in the production environment.
+        Returns:
+            bool: True if the job type is RealtimeScoring, False otherwise.
+        """
+        return self in [JobType.RealtimeScoring]
+    def is_passed_model_in_env_in_dev(self):
+        """Determines if the job type is passed a model in the development environment.
+        Returns:
+            bool: True if the job type is Evaluation, Prediction, or RealtimeScoring, False otherwise.
+        """
+        return self in [JobType.Evaluation, JobType.Prediction, JobType.RealtimeScoring]
+class RunEnv(Enum):
+    """Enumeration of running environments.
+    Attributes:
+        Dev (str): Development environment.
+        Prod (str): Production environment.
+    """
+    Dev = "dev"
+    Prod = "prod"
+    @classmethod
+    def from_name(cls, name):
+        """Converts a string name to its corresponding RunEnv enum.
+        Args:
+            name (str): The string name of the run environment.
+        Returns:
+            RunEnv: The corresponding RunEnv enum.
+        Raises:
+            ValueError: If the provided name does not match any RunEnv.
+        """
+        for enum in RunEnv.__members__.values():
+            if enum.value == name:
+                return enum
+        raise ValueError(f"{name} is not a valid RunEnv name.")
+    def to_name(self):
+        """Gets the string representation of the run environment.
+        Returns:
+            str: The string name of the run environment.
+        """
+        return self.value
+@dataclass()
+class JobConfig:
+    """Configuration for a job.
+    Attributes:
+        augur_code (str): Identifier for the augur.
+        job_type (JobType): Type of the job.
+        job_code (str | None): Identifier for the job, None if not a batch job.
+        model_code (str, optional): Identifier for the model. None at the start if JobType.Learning.
+            Defaults to None.
+        settings_code (str, optional): Identifier for settings. Only set for realtime_prediction.
+            Defaults to None.
+        realtime_server_host (str, optional): Host for the realtime server. Defaults to None.
+        realtime_server_port (str, optional): Port for the realtime server. Defaults to None.
+    """
+    augur_code: str
+    job_type: JobType
+    # None if not a batch job
+    job_code: str | None
+    # None at the start if JobType.Learning, because that creates a model_code, which will be received from the JobSupervisor
+    model_code: str = None
+    # Only set for realtime_prediction since that needs a stable identifier
+    settings_code: str = None
+    realtime_server_host: str = None
+    realtime_server_port: str = None
+@dataclass(frozen=True)
+class DashboardConfig:
+    """Configuration for the dashboard API.
+    Attributes:
+        api_host (str): Hostname for the dashboard API.
+        api_port (int): Port number for the dashboard API.
+    """
+    api_host: str
+    api_port: int
+@dataclass(frozen=True)
+class DataManConfig:
+    """Configuration for the data management API.
+    Attributes:
+        api_host (str): Hostname for the data management API.
+        api_port (int): Port number for the data management API.
+    """
+    api_host: str
+    api_port: int
+def generate_dev_job_code(job_type: JobType) -> str:
+    """Generates a job code for development environment.
+    Creates a job code based on current timestamp and job type.
+    Args:
+        job_type (JobType): Type of the job.
+    Returns:
+        str: A generated job code string.
+    """
+    return f'{datetime.now().strftime("%Y%m%dT%H%M%SZ")}_{job_type.to_name().upper()}'
+@dataclass
+class ModuleConfig:
+    """Main configuration class for the module.
+    This class holds all configuration parameters for the module,
+    including job, dashboard, and data management configurations.
+    Attributes:
+        run_env (RunEnv): Running environment, either "dev" or "prod".
+        job (JobConfig): Job configuration.
+        dashboard (DashboardConfig | None): Dashboard API configuration.
+        data_man (DataManConfig | None): Data management API configuration.
+        package_log_level (int): Logging level for the package.
+        root_log_level (int): Root logging level.
+    """
+    # "dev" | "prod"
+    run_env: RunEnv
+    job: JobConfig
+    dashboard: DashboardConfig | None
+    data_man: DataManConfig | None
+    package_log_level: int
+    root_log_level: int
+    def __init__(self):
+        """Initializes a ModuleConfig instance.
+        Loads configuration from environment variables and sets up the necessary
+        configurations for job processing, dashboard, and data management.
+        """
+        # General config parameters
+        realtime_server_host = os.environ.get("JOB_ARG_REALTIME_SERVER_HOST", "0.0.0.0")
+        realtime_server_port = os.environ.get("JOB_ARG_REALTIME_SERVER_PORT", 5000)
+        # Always given
+        self.run_env = RunEnv.from_name(os.environ["JOB_ENV"])
+        job_type = JobType.from_name(os.environ["JOB_ARG_JOB_TYPE"])
+        # Always either set, can be generated or not required
+        augur_code = None
+        if self.run_env == RunEnv.Prod:
+            augur_code = os.environ["JOB_ARG_AUGUR_CODE"]
+        job_code = None
+        if job_type.is_batch_job():
+            if self.run_env == RunEnv.Prod:
+                job_code = os.environ["JOB_ARG_JOB_CODE"]
+            elif self.run_env == RunEnv.Dev:
+                job_code = generate_dev_job_code(job_type)
+        model_code = None
+        if (self.run_env == RunEnv.Prod and job_type.is_passed_model_in_env_in_prod()) or (
+                self.run_env == RunEnv.Dev and job_type.is_passed_model_in_env_in_dev()):
+            model_code = os.environ["JOB_ARG_MODEL_CODE"]
+        settings_code = None
+        if self.run_env == RunEnv.Prod and job_type == JobType.RealtimeScoring:
+            settings_code = os.environ["JOB_ARG_SETTINGS_CODE"]
+        self.job = JobConfig(
+            augur_code=augur_code,
+            job_type=job_type,
+            job_code=job_code,
+            model_code=model_code,
+            settings_code=settings_code,
+            realtime_server_host=realtime_server_host,
+            realtime_server_port=realtime_server_port
+        )
+        if self.run_env == RunEnv.Prod:
+            self.dashboard = DashboardConfig(os.environ["DASHBOARD_API_HOST"], os.environ["DASHBOARD_API_PORT"])
+        self.data_man = _data_man_config()
+        # In Code Capsule run jobs this is actually added by the orchestration, but it's also just hardcoded there.
+        # Seems pointless to add it to the augur jobs too, just to satisfy the way the CredentialUtils is written
+        os.environ["CLIENT_ID"] = "altasigma-frontend"
+        if self.run_env == RunEnv.Dev:
+            # In the workbench we need to override some things to make it fit with the CredentialsUtils, which otherwise would assume the Code Capsule + Device Auth Flow
+            # Write the refresh token into the tmp file, because we neither do the device flow, nor have the mounted secret with a refresh token
+            refresh_token = os.environ.get("AS_TOKEN")
+            _credential_utils()._write_tokens_to_file("", refresh_token)
+            # Fix client secret for non-device auth flow
+            try:
+                del os.environ['CLIENT_SECRET']
+            except Exception as e:
+                logger.warning(f"Unexpected error deleting environment variable CLIENT_SECRET: {e}")
+        TokenRefresher().schedule_refresh()
+module_config = None
+data_man_config = None
+def _module_config():
+    """Gets or creates the module configuration.
+    Returns:
+        ModuleConfig: The module configuration instance.
+    """
+    global module_config
+    if module_config is not None:
+        return module_config
+    else:
+        module_config = ModuleConfig()
+        return module_config
+def _data_man_config():
+    """Gets or creates the data management configuration.
+    Returns:
+        DataManConfig: The data management configuration instance.
+    """
+    global data_man_config
+    if data_man_config is not None:
+        return data_man_config
+    else:
+        data_man_config = DataManConfig(os.environ["DATA_MAN_API_HOST"], os.environ["DATA_MAN_API_PORT"])
+        return data_man_config

altasigma-3.10.1/altasigma/config/http_session.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""
+HTTP Session Configuration Module
+This module provides a centralized HTTP session with configurable SSL verification
+for all HTTP requests throughout the AltaSigma package.
+"""
+import os
+import requests
+# Global session instance
+http_session = None
+def _http_session():
+    """Gets or creates the configured HTTP session.
+    Creates a requests session with SSL verification settings based on
+    the DISABLE_SSL_VERIFICATION environment variable. When DISABLE_SSL_VERIFICATION=true,
+    SSL certificate verification is disabled.
+    Note: The requests library automatically uses REQUESTS_CA_BUNDLE environment
+    variable for custom CA certificates when verification is enabled.
+    Returns:
+        requests.Session: A configured requests session.
+    """
+    global http_session
+    if http_session is not None:
+        return http_session
+    else:
+        http_session = requests.Session()
+        # Get SSL configuration
+        ssl_config = _get_ssl_config()
+        if not ssl_config['verify']:
+            # Disable SSL verification
+            http_session.verify = False
+            # Disable SSL warnings when verification is disabled
+            import urllib3
+            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        # else: requests automatically uses REQUESTS_CA_BUNDLE if set
+        return http_session
+def _get_ssl_config():
+    """Get SSL configuration settings for use across different HTTP clients.
+    Returns a dictionary with SSL configuration that can be used by
+    boto3, requests, and other HTTP clients.
+    Consumers should check the 'verify' boolean first. If verification is not
+    disabled and 'ca_bundle' is provided, use the ca_bundle path.
+    Returns:
+        dict: SSL configuration with keys:
+            - 'verify': Boolean indicating whether to verify SSL certificates
+            - 'ca_bundle': Path to CA certificate bundle if set via REQUESTS_CA_BUNDLE, or None
+    """
+    disable_ssl_verification = os.environ.get('DISABLE_SSL_VERIFICATION', 'false').lower() == 'true'
+    ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE')
+    return {
+        'verify': not disable_ssl_verification,
+        'ca_bundle': ca_bundle
+    }

altasigma-3.10.1/altasigma/credentials/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Credential utilities for managing authentication and secure data source access. For example in the Workbench and in Code Capsules.
+This module provides tools for obtaining, refreshing, and managing authentication
+credentials required to access various data sources within the AltaSigma ecosystem.
+It supports both interactive authentication flows for notebooks and automated
+authentication for production environments.
+"""