PyPI - matrice-inference - Versions diffs - 0.1.0__py3-none-manylinux_2_17_x86_64.whl - Mend

matrice-inference 0.1.0__py3-none-manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of matrice-inference might be problematic. Click here for more details.

Files changed (80) hide show

matrice_inference/deploy/server/inference/model_manager.pyi ADDED Viewed

@@ -0,0 +1,80 @@
+"""Auto-generated stub for module: model_manager."""
+from typing import Any, Tuple
+import gc
+import logging
+import torch
+# Classes
+class ModelManager:
+    """
+    Minimal ModelManager that focuses on model lifecycle and prediction calls.
+    """
+    def __init__(self: Any, model_id: str, internal_server_type: str, internal_port: int, internal_host: str, load_model: Any = None, predict: Any = None, batch_predict: Any = None, action_tracker: Any = None, num_model_instances: int = 1) -> None: ...
+        """
+        Initialize the ModelManager
+                Args:
+                    model_id: ID of the model
+                    internal_server_type: Type of internal server
+                    internal_port: Internal port number
+                    internal_host: Internal host address
+                    load_model: Function to load the model
+                    predict: Function to run predictions
+                    batch_predict: Function to run batch predictions
+                    action_tracker: Tracker for monitoring actions
+                    num_model_instances: Number of model instances to create
+        """
+    def batch_inference(self: Any, input1: Any, input2: Any = None, extra_params: Any = None, stream_key: Any = None, stream_info: Any = None, input_hash: Any = None) -> Tuple[dict, bool]: ...
+        """
+        Run batch inference on the provided input data.
+                Args:
+                    input1: Primary input data
+                    input2: Secondary input data (optional)
+                    extra_params: Additional parameters for inference (optional)
+                    stream_key: Stream key for the inference
+                    stream_info: Stream info for the inference
+                    input_hash: Input hash for the inference
+                Returns:
+                    Tuple of (results, success_flag)
+                Raises:
+                    ValueError: If input data is invalid
+        """
+    def get_model(self: Any) -> Any: ...
+        """
+        Get the model instance in round-robin fashion
+        """
+    def inference(self: Any, input1: Any, input2: Any = None, extra_params: Any = None, stream_key: Any = None, stream_info: Any = None, input_hash: Any = None) -> Tuple[dict, bool]: ...
+        """
+        Run inference on the provided input data.
+                Args:
+                    input1: Primary input data (can be image bytes or numpy array)
+                    input2: Secondary input data (optional)
+                    extra_params: Additional parameters for inference (optional)
+                    stream_key: Stream key for the inference
+                    stream_info: Stream info for the inference
+                    input_hash: Input hash for the inference
+                Returns:
+                    Tuple of (results, success_flag)
+                Raises:
+                    ValueError: If input data is invalid
+        """
+    def scale_down(self: Any) -> Any: ...
+        """
+        Unload the model from memory (scale down)
+        """
+    def scale_up(self: Any) -> Any: ...
+        """
+        Load the model into memory (scale up)
+        """

matrice_inference/deploy/server/inference/triton_utils.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

matrice_inference/deploy/server/inference/triton_utils.pyi ADDED Viewed

@@ -0,0 +1,115 @@
+"""Auto-generated stub for module: triton_utils."""
+from typing import Any, Dict, Optional, Union
+from PIL import Image
+from datetime import datetime, timezone
+from io import BytesIO
+from matrice.docker_utils import pull_docker_image
+from matrice_common.utils import dependencies_check
+from matrice_common.utils import dependencies_check
+import httpx
+import logging
+import logging
+import numpy as np
+import os
+import shlex
+import subprocess
+import threading
+import torch
+import torch
+import tritonclient.grpc as tritonclientclass
+import tritonclient.http as tritonclientclass
+import zipfile
+# Constants
+BASE_PATH: str
+TRITON_DOCKER_IMAGE: str
+# Classes
+class MatriceTritonServer:
+    def __init__(self: Any, action_tracker: Any) -> None: ...
+    def check_triton_docker_image(self: Any) -> Any: ...
+        """
+        Check if docker image download is complete and wait for it to finish
+        """
+    def create_model_repository(self: Any) -> Any: ...
+        """
+        Create the model repository directory structure
+        """
+    def download_model(self: Any, model_version_dir: Any) -> Any: ...
+        """
+        Download and extract the model files
+        """
+    def get_config_params(self: Any) -> Any: ...
+    def setup(self: Any) -> Any: ...
+    def start_server(self: Any) -> Any: ...
+        """
+        Start the Triton Inference Server
+        """
+    def write_config_file(self: Any, model_dir: Any, max_batch_size: Any = 0, num_model_instances: Any = 1, image_size: Any = [224, 224], num_classes: Any = 10, input_data_type: str = 'TYPE_FP32', output_data_type: str = 'TYPE_FP32', dynamic_batching: bool = False, preferred_batch_size: list = [2, 4, 8], max_queue_delay_microseconds: int = 100, input_pinned_memory: bool = True, output_pinned_memory: bool = True, **kwargs: Any) -> Any: ...
+        """
+        Write the model configuration file for Triton Inference Server
+        """
+class TritonInference:
+    """
+    Class for making Triton inference requests.
+    """
+    def __init__(self: Any, server_type: str, model_id: str, internal_port: int = 80, internal_host: str = 'localhost') -> None: ...
+        """
+        Initialize Triton inference client.
+                Args:
+                    server_type: Type of server (grpc/rest)
+                    model_id: ID of model to use
+                    internal_port: Port number for internal API
+                    internal_host: Hostname for internal API
+        """
+    async def async_inference(self: Any, input_data: Any) -> Any: ...
+        """
+        Make an asynchronous inference request.
+                Args:
+                    input_data: Input data as bytes
+                Returns:
+                    Model prediction as numpy array
+                Raises:
+                    Exception: If inference fails
+        """
+    def format_response(self: Any, response: Any) -> Dict[str, Any]: ...
+        """
+        Format model response for consistent logging.
+                Args:
+                    response: Raw model output
+                Returns:
+                    Formatted response dictionary
+        """
+    def inference(self: Any, input_data: Any) -> Any: ...
+        """
+        Make a synchronous inference request.
+                Args:
+                    input_data: Input data as bytes
+                Returns:
+                    Model prediction as numpy array
+                Raises:
+                    Exception: If inference fails
+        """

matrice_inference/deploy/server/proxy/proxy_interface.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

matrice_inference/deploy/server/proxy/proxy_interface.pyi ADDED Viewed

@@ -0,0 +1,90 @@
+"""Auto-generated stub for module: proxy_interface."""
+from typing import Any, Optional
+from contextlib import asynccontextmanager
+from datetime import datetime, timezone
+from fastapi import FastAPI, HTTPException, UploadFile
+from fastapi.encoders import jsonable_encoder
+from fastapi.params import File, Form
+from fastapi.responses import JSONResponse
+from matrice_inference.deploy.server.inference.inference_interface import InferenceInterface
+from matrice_inference.deploy.server.proxy.proxy_utils import AuthKeyValidator, RequestsLogger
+import asyncio
+import httpx
+import logging
+import threading
+import time
+import uvicorn
+# Classes
+class MatriceProxyInterface:
+    """
+    Interface for proxying requests to model servers.
+    """
+    def __init__(self: Any, session: Any, deployment_id: str, deployment_instance_id: str, external_port: int, inference_interface: Any) -> None: ...
+        """
+        Initialize proxy server.
+                Args:
+                    session: Session object for authentication and RPC
+                    deployment_id: ID of the deployment
+                    external_port: Port to expose externally
+        """
+    async def inference(self: Any, input1: Any, input2: Any = None, extra_params: Any = None, apply_post_processing: Any = False) -> Any: ...
+        """
+        Perform inference using the inference interface.
+                Args:
+                    input1: Primary input data
+                    input2: Secondary input data (optional)
+                    extra_params: Additional parameters for inference (optional)
+                    apply_post_processing: Flag to apply post-processing
+                Returns:
+                    Inference result, Post-processing result
+        """
+    def log_prediction_info(self: Any, result: Any, start_time: Any, input1: Any, auth_key: Any) -> Any: ...
+        """
+        Log prediction info.
+                Args:
+                    result: Prediction result
+                    start_time: Start time of the request
+                    input1: Input data
+                    auth    _key: Authentication key used
+        """
+    def on_start(self: Any) -> Any: ...
+        """
+        Start the proxy server components.
+        """
+    async def on_stop(self: Any) -> Any: ...
+        """
+        Clean up proxy server components.
+        """
+    def start(self: Any) -> Any: ...
+        """
+        Start the proxy server in a background thread.
+        """
+    def stop(self: Any) -> Any: ...
+        """
+        Stop the proxy server gracefully.
+        """
+    def validate_auth_key(self: Any, auth_key: Any) -> Any: ...
+        """
+        Validate auth key.
+                Args:
+                    auth_key: Authentication key to validate
+                Returns:
+                    bool: True if valid, False otherwise
+        """

matrice_inference/deploy/server/proxy/proxy_utils.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

matrice_inference/deploy/server/proxy/proxy_utils.pyi ADDED Viewed

@@ -0,0 +1,113 @@
+"""Auto-generated stub for module: proxy_utils."""
+from typing import Any, Dict, Set
+from datetime import datetime
+from queue import Queue
+import logging
+import numpy as np
+import requests
+import threading
+import time
+# Classes
+class AuthKeyValidator:
+    """
+    Validates authentication keys for deployments.
+    """
+    def __init__(self: Any, deployment_id: str, session: Any) -> None: ...
+        """
+        Initialize the AuthKeyValidator.
+                Args:
+                    deployment_id: ID of the deployment
+                    session: Session object containing RPC client
+        """
+    def start(self: Any) -> None: ...
+        """
+        Start the auth key update loop in a background thread.
+        """
+    def stop(self: Any) -> None: ...
+        """
+        Stop the auth key update loop.
+        """
+    def update_auth_keys(self: Any) -> None: ...
+        """
+        Fetch and validate auth keys for the deployment.
+        """
+    def update_auth_keys_loop(self: Any) -> None: ...
+        """
+        Run continuous loop to update auth keys.
+        """
+class RequestsLogger:
+    """
+    Logs prediction requests and handles drift monitoring.
+    """
+    def __init__(self: Any, deployment_id: str, session: Any) -> None: ...
+        """
+        Initialize the RequestsLogger.
+                Args:
+                    deployment_id: ID of the deployment
+                    session: Session object containing RPC client
+        """
+    def add_log_to_queue(self: Any, prediction: Any, latency: float, request_time: str, input_data: Any, deployment_instance_id: str, auth_key: str) -> None: ...
+        """
+        Add prediction log to queue for async processing.
+                Args:
+                    prediction: The model prediction
+                    latency: Request latency in seconds
+                    request_time: Timestamp of the request
+                    input_data: Raw input data bytes
+                    deployment_instance_id: ID of deployment instance
+                    auth_key: Authentication key used
+        """
+    def log_prediction_info(self: Any, prediction: Any, latency: float, request_time: str, input_data: Any, deployment_instance_id: str, auth_key: str) -> Dict: ...
+        """
+        Log prediction information to the server.
+                Args:
+                    prediction: The model prediction
+                    latency: Request latency in seconds
+                    request_time: Timestamp of the request
+                    input_data: Raw input data bytes
+                    deployment_instance_id: ID of deployment instance
+                    auth_key: Authentication key used
+                Returns:
+                    Dict: Response from logging endpoint
+        """
+    def log_prediction_info_thread(self: Any) -> None: ...
+        """
+        Background thread for processing prediction logs.
+        """
+    def start(self: Any) -> None: ...
+        """
+        Start the prediction logging thread.
+        """
+    def stop(self: Any) -> None: ...
+        """
+        Stop the prediction logging thread.
+        """
+    def upload_input_for_drift_monitoring(self: Any, log_response: Dict, input_data: Any) -> None: ...
+        """
+        Upload input data for drift monitoring.
+                Args:
+                    log_response: Response from logging endpoint
+                    input_data: Raw input data bytes
+        """

matrice_inference/deploy/server/server.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

matrice_inference/deploy/server/server.pyi ADDED Viewed

@@ -0,0 +1,155 @@
+"""Auto-generated stub for module: server."""
+from typing import Any, Optional
+from datetime import datetime, timezone
+from matrice.action_tracker import ActionTracker
+from matrice_inference.deploy.server.inference.inference_interface import InferenceInterface
+from matrice_inference.deploy.server.inference.model_manager import ModelManager
+from matrice_inference.deploy.server.proxy.proxy_interface import MatriceProxyInterface
+from matrice_inference.deploy.server.stream.stream_manager import StreamManager
+import asyncio
+import atexit
+import logging
+import os
+import signal
+import threading
+import time
+import urllib.request
+# Constants
+CLEANUP_DELAY_SECONDS: int
+DEFAULT_EXTERNAL_PORT: int
+DEFAULT_SHUTDOWN_THRESHOLD_MINUTES: int
+FINAL_CLEANUP_DELAY_SECONDS: int
+HEARTBEAT_INTERVAL_SECONDS: int
+IP_FETCH_TIMEOUT_SECONDS: int
+MAX_DEPLOYMENT_CHECK_FAILURES_BEFORE_SHUTDOWN: int
+MAX_HEARTBEAT_FAILURES_BEFORE_SHUTDOWN: int
+MAX_IP_FETCH_ATTEMPTS: int
+MIN_SHUTDOWN_THRESHOLD_MINUTES: int
+SHUTDOWN_CHECK_INTERVAL_SECONDS: int
+# Classes
+class MatriceDeployServer:
+    """
+    Class for managing model deployment and server functionality.
+    """
+    def __init__(self: Any, load_model: Optional[Callable] = None, predict: Optional[Callable] = None, action_id: str = '', external_port: int = DEFAULT_EXTERNAL_PORT, batch_predict: Optional[Callable] = None, custom_post_processing_fn: Optional[Callable] = None) -> None: ...
+        """
+        Initialize MatriceDeploy.
+                Args:
+                    load_model (callable, optional): Function to load model. Defaults to None.
+                    predict (callable, optional): Function to make predictions. Defaults to None.
+                    batch_predict (callable, optional): Function to make batch predictions. Defaults to None.
+                    custom_post_processing_fn (callable, optional): Function to get custom post processing config. Defaults to None.
+                    action_id (str, optional): ID for action tracking. Defaults to "".
+                    external_port (int, optional): External port number. Defaults to 80.
+                Raises:
+                    ValueError: If required parameters are invalid
+                    Exception: If initialization fails
+        """
+    def start(self: Any, block: Any = True) -> Any: ...
+        """
+        Start the proxy interface and all server components.
+        """
+    def start_server(self: Any, block: Any = True) -> Any: ...
+        """
+        Start the server and related components.
+                Args:
+                    block: If True, wait for shutdown signal. If False, return immediately after starting.
+                Raises:
+                    Exception: If unable to initialize server
+        """
+    def stop_server(self: Any) -> Any: ...
+        """
+        Stop the server and related components.
+        """
+class MatriceDeployServerUtils:
+    """
+    Utility class for managing deployment server operations.
+    """
+    def __init__(self: Any, action_tracker: Any, inference_interface: Any, external_port: int, main_server: Any = None) -> None: ...
+        """
+        Initialize utils with reference to the main server.
+                Args:
+                    action_tracker: ActionTracker instance
+                    inference_interface: InferenceInterface instance
+                    external_port: External port number
+                    main_server: Reference to the main MatriceDeployServer instance
+        """
+    def get_elapsed_time_since_latest_inference(self: Any) -> Any: ...
+        """
+        Get time elapsed since latest inference.
+                Returns:
+                    float: Elapsed time in seconds
+                Raises:
+                    Exception: If unable to get elapsed time and no fallback available
+        """
+    def heartbeat_checker(self: Any) -> Any: ...
+        """
+        Background thread to periodically send heartbeat.
+        """
+    def ip(self: Any) -> Any: ...
+        """
+        Get the external IP address with caching and retry logic.
+        """
+    def is_instance_running(self: Any) -> Any: ...
+        """
+        Check if deployment instance is running.
+                Returns:
+                    bool: True if instance is running, False otherwise
+        """
+    def run_background_checkers(self: Any) -> Any: ...
+        """
+        Start the shutdown checker and heartbeat checker threads as daemons.
+        """
+    def shutdown(self: Any) -> Any: ...
+        """
+        Gracefully shutdown the deployment instance.
+        """
+    def shutdown_checker(self: Any) -> Any: ...
+        """
+        Background thread to periodically check for idle shutdown condition and deployment status.
+        """
+    def trigger_shutdown_if_needed(self: Any) -> Any: ...
+        """
+        Check idle time and trigger shutdown if threshold exceeded.
+        """
+    def update_deployment_address(self: Any) -> Any: ...
+        """
+        Update the deployment address in the backend.
+                Raises:
+                    Exception: If unable to update deployment address
+        """
+    def wait_for_shutdown(self: Any) -> Any: ...
+        """
+        Wait for shutdown to be initiated by background checkers or external signals.
+                This method blocks the main thread until shutdown is triggered.
+        """

matrice_inference/deploy/server/stream/inference_worker.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

matrice_inference/deploy/server/stream/inference_worker.pyi ADDED Viewed

@@ -0,0 +1,56 @@
+"""Auto-generated stub for module: inference_worker."""
+from typing import Any, Dict, List, Optional
+from datetime import datetime, timezone
+from matrice_inference.deploy.optimize.cache_manager import CacheManager
+from matrice_inference.deploy.optimize.frame_comparators import SSIMComparator
+from matrice_inference.deploy.optimize.transmission import ServerTransmissionHandler
+from matrice_inference.deploy.server.inference.inference_interface import InferenceInterface
+from matrice_inference.deploy.server.stream.video_buffer import VideoBufferManager
+import asyncio
+import base64
+import cv2
+import logging
+import numpy as np
+# Classes
+class InferenceWorker:
+    """
+    Inference worker that processes messages from input queue and adds results to output queue.
+    """
+    def __init__(self: Any, worker_id: str, inference_interface: Any, input_queue: Any, output_queue: Any, process_timeout: float = 30.0, enable_video_buffering: bool = True, ssim_threshold: float = 0.95, cache_size: int = 100) -> None: ...
+        """
+        Initialize inference worker.
+                Args:
+                    worker_id: Unique identifier for this worker
+                    inference_interface: Inference interface to use for inference
+                    input_queue: Queue to get messages from
+                    output_queue: Queue to put results into
+                    process_timeout: Timeout for inference processing
+                    enable_video_buffering: Whether to enable video buffering
+                    ssim_threshold: SSIM threshold for frame similarity (default: 0.95)
+                    cache_size: Maximum number of cached results per stream
+        """
+    def get_metrics(self: Any) -> Dict[str, Any]: ...
+        """
+        Get worker metrics.
+        """
+    def reset_metrics(self: Any) -> None: ...
+        """
+        Reset worker metrics.
+        """
+    async def start(self: Any) -> None: ...
+        """
+        Start the inference worker.
+        """
+    async def stop(self: Any) -> None: ...
+        """
+        Stop the inference worker.
+        """

matrice_inference/deploy/server/stream/kafka_consumer_worker.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

matrice_inference/deploy/server/stream/kafka_consumer_worker.pyi ADDED Viewed

@@ -0,0 +1,51 @@
+"""Auto-generated stub for module: kafka_consumer_worker."""
+from typing import Any, Dict, Optional
+from datetime import datetime, timezone
+from matrice_inference.deploy.optimize.transmission import ServerTransmissionHandler
+from matrice_inference.deploy.stream.kafka_stream import MatriceKafkaDeployment
+import asyncio
+import base64
+import logging
+# Classes
+class KafkaConsumerWorker:
+    """
+    Kafka consumer worker that polls from topics and adds to input queue.
+    """
+    def __init__(self: Any, worker_id: str, session: Any, deployment_id: str, deployment_instance_id: str, input_queue: Any, consumer_group_suffix: str = '', poll_timeout: float = 1.0, max_messages_per_poll: int = 1, inference_pipeline_id: str = '') -> None: ...
+        """
+        Initialize Kafka consumer worker.
+                Args:
+                    worker_id: Unique identifier for this worker
+                    session: Session object for authentication and RPC
+                    deployment_id: ID of the deployment
+                    deployment_instance_id: ID of the deployment instance
+                    input_queue: Queue to put consumed messages into
+                    consumer_group_suffix: Optional suffix for consumer group ID
+                    poll_timeout: Timeout for Kafka polling
+                    max_messages_per_poll: Maximum messages to consume in one poll
+        """
+    def get_metrics(self: Any) -> Dict[str, Any]: ...
+        """
+        Get worker metrics.
+        """
+    def reset_metrics(self: Any) -> None: ...
+        """
+        Reset worker metrics.
+        """
+    async def start(self: Any) -> None: ...
+        """
+        Start the consumer worker.
+        """
+    async def stop(self: Any) -> None: ...
+        """
+        Stop the consumer worker.
+        """

matrice_inference/deploy/server/stream/kafka_producer_worker.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file