PyPI - matrice-inference - Versions diffs - 0.1.2__py3-none-any.whl - Mend

matrice-inference 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of matrice-inference might be problematic. Click here for more details.

Files changed (37) hide show

matrice_inference/__init__.py +72 -0
matrice_inference/py.typed +0 -0
matrice_inference/server/__init__.py +23 -0
matrice_inference/server/inference_interface.py +176 -0
matrice_inference/server/model/__init__.py +1 -0
matrice_inference/server/model/model_manager.py +274 -0
matrice_inference/server/model/model_manager_wrapper.py +550 -0
matrice_inference/server/model/triton_model_manager.py +290 -0
matrice_inference/server/model/triton_server.py +1248 -0
matrice_inference/server/proxy_interface.py +371 -0
matrice_inference/server/server.py +1004 -0
matrice_inference/server/stream/__init__.py +0 -0
matrice_inference/server/stream/app_deployment.py +228 -0
matrice_inference/server/stream/consumer_worker.py +201 -0
matrice_inference/server/stream/frame_cache.py +127 -0
matrice_inference/server/stream/inference_worker.py +163 -0
matrice_inference/server/stream/post_processing_worker.py +230 -0
matrice_inference/server/stream/producer_worker.py +147 -0
matrice_inference/server/stream/stream_pipeline.py +451 -0
matrice_inference/server/stream/utils.py +23 -0
matrice_inference/tmp/abstract_model_manager.py +58 -0
matrice_inference/tmp/aggregator/__init__.py +18 -0
matrice_inference/tmp/aggregator/aggregator.py +330 -0
matrice_inference/tmp/aggregator/analytics.py +906 -0
matrice_inference/tmp/aggregator/ingestor.py +438 -0
matrice_inference/tmp/aggregator/latency.py +597 -0
matrice_inference/tmp/aggregator/pipeline.py +968 -0
matrice_inference/tmp/aggregator/publisher.py +431 -0
matrice_inference/tmp/aggregator/synchronizer.py +594 -0
matrice_inference/tmp/batch_manager.py +239 -0
matrice_inference/tmp/overall_inference_testing.py +338 -0
matrice_inference/tmp/triton_utils.py +638 -0
matrice_inference-0.1.2.dist-info/METADATA +28 -0
matrice_inference-0.1.2.dist-info/RECORD +37 -0
matrice_inference-0.1.2.dist-info/WHEEL +5 -0
matrice_inference-0.1.2.dist-info/licenses/LICENSE.txt +21 -0
matrice_inference-0.1.2.dist-info/top_level.txt +1 -0

matrice_inference/__init__.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""Module providing __init__ functionality."""
+import os
+import sys
+from matrice_common.utils import dependencies_check
+base = [
+    "httpx",
+    "fastapi",
+    "uvicorn",
+    "pillow",
+    "confluent_kafka[snappy]",
+    "aiokafka",
+    "aiohttp",
+    "filterpy",
+    "scipy",
+    "scikit-learn",
+    "matplotlib",
+    "scikit-image",
+    "python-snappy",
+    "pyyaml",
+    "imagehash",
+    "Pillow",
+    "transformers"
+]
+# Install base dependencies first
+dependencies_check(base)
+# Helper to attempt installation and verify importability
+def _install_and_verify(pkg: str, import_name: str):
+    """Install a package expression and return True if the import succeeds."""
+    if dependencies_check([pkg]):
+        try:
+            __import__(import_name)
+            return True
+        except ImportError:
+            return False
+    return False
+if not dependencies_check(["opencv-python"]):
+    dependencies_check(["opencv-python-headless"])
+# Attempt GPU-specific dependencies first
+_gpu_ok = _install_and_verify("onnxruntime-gpu", "onnxruntime") and _install_and_verify(
+    "fast-plate-ocr[onnx-gpu]", "fast_plate_ocr"
+)
+if not _gpu_ok:
+    # Fallback to CPU variants
+    _cpu_ok = _install_and_verify("onnxruntime", "onnxruntime") and _install_and_verify(
+        "fast-plate-ocr[onnx]", "fast_plate_ocr"
+    )
+    if not _cpu_ok:
+        # Last-chance fallback without extras tag (PyPI sometimes lacks them)
+        _install_and_verify("fast-plate-ocr", "fast_plate_ocr")
+# matrice_deps = ["matrice_common", "matrice_analytics", "matrice"]
+# dependencies_check(matrice_deps)
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from server.server import MatriceDeployServer  # noqa: E402
+from server.server import MatriceDeployServer as MatriceDeploy  # noqa: E402 # Keep this for backwards compatibility
+from server.inference_interface import InferenceInterface  # noqa: E402
+from server.proxy_interface import MatriceProxyInterface  # noqa: E402
+__all__ = [
+    "MatriceDeploy",
+    "MatriceDeployServer",
+    "InferenceInterface",
+    "MatriceProxyInterface",
+]

matrice_inference/py.typed ADDED Viewed

File without changes

matrice_inference/server/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+import os
+import logging
+# Root logger
+logging.basicConfig(level=logging.DEBUG)
+# Console handler (INFO+)
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.INFO)
+# File handler (DEBUG+)
+log_path = os.path.join(os.getcwd(), "deploy_server.log")
+file_handler = logging.FileHandler(log_path)
+file_handler.setLevel(logging.DEBUG)
+# Formatter
+formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+console_handler.setFormatter(formatter)
+file_handler.setFormatter(formatter)
+# Add handlers to root logger
+logging.getLogger().addHandler(console_handler)
+logging.getLogger().addHandler(file_handler)

matrice_inference/server/inference_interface.py ADDED Viewed

@@ -0,0 +1,176 @@
+from matrice_inference.server.model.model_manager_wrapper import ModelManagerWrapper
+from typing import Dict, Any, Optional, Tuple, Union
+from datetime import datetime, timezone
+import logging
+import time
+from matrice_analytics.post_processing.post_processor import PostProcessor
+class InferenceInterface:
+    """Interface for proxying requests to model servers with optional post-processing."""
+    def __init__(
+        self,
+        model_manager: ModelManagerWrapper,
+        post_processor: Optional[PostProcessor] = None,
+    ):
+        """
+        Initialize the inference interface.
+        Args:
+            model_manager: Model manager for model inference
+            post_processor: Post processor for post-processing
+        """
+        self.logger = logging.getLogger(__name__)
+        self.model_manager = model_manager
+        self.post_processor = post_processor
+        self.latest_inference_time = datetime.now(timezone.utc)
+    def get_latest_inference_time(self) -> datetime:
+        """Get the latest inference time."""
+        return self.latest_inference_time
+    async def inference(
+        self,
+        input: Any,
+        extra_params: Optional[Dict[str, Any]] = None,
+        apply_post_processing: bool = False,
+        post_processing_config: Optional[Union[Dict[str, Any], str]] = None,
+        stream_key: Optional[str] = None,
+        stream_info: Optional[Dict[str, Any]] = None,
+        camera_info: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[Any, Optional[Dict[str, Any]]]:
+        """Perform inference using the appropriate client with optional post-processing.
+        Args:
+            input: Primary input data (e.g., image bytes, numpy array)
+            extra_params: Additional parameters for inference (optional)
+            apply_post_processing: Whether to apply post-processing
+            post_processing_config: Configuration for post-processing
+            stream_key: Unique identifier for the input stream
+            stream_info: Additional metadata about the stream (optional)
+            camera_info: Additional metadata about the camera/source (optional)
+        Returns:
+            A tuple containing:
+                - The inference results (raw or post-processed)
+                - Metadata about the inference and post-processing (if applicable)
+        """
+        if input is None:
+            raise ValueError("Input cannot be None")
+        # Measure model inference time
+        model_start_time = time.time()
+        # Update latest inference time
+        self.latest_inference_time = datetime.now(timezone.utc)
+        # Run model inference
+        try:
+            raw_results, success = self.model_manager.inference(
+                input=input,
+                extra_params=extra_params,
+                stream_key=stream_key,
+                stream_info=stream_info
+            )
+            model_inference_time = time.time() - model_start_time
+            if not success:
+                raise RuntimeError("Model inference failed")
+            self.logger.debug(
+                f"Model inference executed stream_key={stream_key} time={model_inference_time:.4f}s"
+            )
+        except Exception as exc:
+            self.logger.error(f"Model inference failed: {str(exc)}", exc_info=True)
+            raise RuntimeError(f"Model inference failed: {str(exc)}") from exc
+        # If no post-processing requested, return raw results
+        if not apply_post_processing or not self.post_processor:
+            return raw_results, {
+                "timing_metadata": {
+                    "model_inference_time_sec": model_inference_time,
+                    "post_processing_time_sec": 0.0,
+                    "total_time_sec": model_inference_time,
+                }
+            }
+        # Apply post-processing using PostProcessor
+        try:
+            post_processing_start_time = time.time()
+            # Use PostProcessor.process() method directly
+            result = await self.post_processor.process(
+                data=raw_results,
+                config=post_processing_config,  # Use stream_key as fallback if no config
+                input_bytes=input if isinstance(input, bytes) else None,
+                stream_key=stream_key,
+                stream_info=stream_info
+            )
+            post_processing_time = time.time() - post_processing_start_time
+            # Format the response based on PostProcessor result
+            if result.is_success():
+                # For face recognition use case, return empty raw results
+                processed_raw_results = [] if (
+                    hasattr(result, 'usecase') and result.usecase == 'face_recognition'
+                ) else raw_results
+                # Extract agg_summary from result data if available
+                agg_summary = {}
+                if hasattr(result, 'data') and isinstance(result.data, dict):
+                    agg_summary = result.data.get("agg_summary", {})
+                post_processing_result = {
+                    "status": "success",
+                    "processing_time": result.processing_time,
+                    "usecase": getattr(result, 'usecase', ''),
+                    "category": getattr(result, 'category', ''),
+                    "summary": getattr(result, 'summary', ''),
+                    "insights": getattr(result, 'insights', []),
+                    "metrics": getattr(result, 'metrics', {}),
+                    "predictions": getattr(result, 'predictions', []),
+                    "agg_summary": agg_summary,
+                    "stream_key": stream_key or "default_stream",
+                    "timing_metadata": {
+                        "model_inference_time_sec": model_inference_time,
+                        "post_processing_time_sec": post_processing_time,
+                        "total_time_sec": model_inference_time + post_processing_time,
+                    }
+                }
+                return processed_raw_results, post_processing_result
+            else:
+                # Post-processing failed
+                self.logger.error(f"Post-processing failed: {result.error_message}")
+                return raw_results, {
+                    "status": "post_processing_failed",
+                    "error": result.error_message,
+                    "error_type": getattr(result, 'error_type', 'ProcessingError'),
+                    "processing_time": result.processing_time,
+                    "processed_data": raw_results,
+                    "stream_key": stream_key or "default_stream",
+                    "timing_metadata": {
+                        "model_inference_time_sec": model_inference_time,
+                        "post_processing_time_sec": post_processing_time,
+                        "total_time_sec": model_inference_time + post_processing_time,
+                    }
+                }
+        except Exception as e:
+            post_processing_time = time.time() - post_processing_start_time
+            self.logger.error(f"Post-processing exception: {str(e)}", exc_info=True)
+            return raw_results, {
+                "status": "post_processing_failed",
+                "error": str(e),
+                "error_type": type(e).__name__,
+                "processed_data": raw_results,
+                "stream_key": stream_key or "default_stream",
+                "timing_metadata": {
+                    "model_inference_time_sec": model_inference_time,
+                    "post_processing_time_sec": post_processing_time,
+                    "total_time_sec": model_inference_time + post_processing_time,
+                }
+            }

matrice_inference/server/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

matrice_inference/server/model/model_manager.py ADDED Viewed

@@ -0,0 +1,274 @@
+import logging
+import gc
+from typing import Tuple, Any, Optional, List, Callable, Dict
+class ModelManager:
+    """Minimal ModelManager that focuses on model lifecycle and prediction calls."""
+    def __init__(
+        self,
+        action_tracker: Any,
+        load_model: Optional[Callable] = None,
+        predict: Optional[Callable] = None,
+        batch_predict: Optional[Callable] = None,
+        num_model_instances: int = 1,
+        model_path: Optional[str] = None, # For local model loading testing
+    ):
+        """Initialize the ModelManager
+        Args:
+            action_tracker: Tracker for monitoring actions.
+            load_model: Function to load the model.
+            predict: Function to run predictions.
+            batch_predict: Function to run batch predictions.
+            num_model_instances: Number of model instances to create.
+            model_path: Path to the model directory.
+        """
+        try:
+            self.load_model = self._create_load_model_wrapper(load_model)
+            self.predict = self._create_prediction_wrapper(predict)
+            self.batch_predict = self._create_prediction_wrapper(batch_predict)
+            self.action_tracker = action_tracker
+            # Model instances
+            self.model_instances = []
+            self._round_robin_counter = 0
+            self.model_path = model_path
+            for _ in range(num_model_instances):
+                self.scale_up()
+        except Exception as e:
+            logging.error(f"Failed to initialize ModelManager: {str(e)}")
+            raise
+    def _create_load_model_wrapper(self, load_model_func: Callable):
+        """Create a wrapper function that handles parameter passing to the load model function.
+        Args:
+            load_model_func: The load model function to wrap
+        Returns:
+            A wrapper function that handles parameter passing safely
+        """
+        if not load_model_func:
+            return load_model_func
+        def wrapper():
+            """Wrapper that safely calls the load model function with proper parameter handling."""
+            try:
+                # Get function parameter names
+                param_names = load_model_func.__code__.co_varnames[
+                    : load_model_func.__code__.co_argcount
+                ]
+                arg_count = load_model_func.__code__.co_argcount
+                # Handle case where function has exactly 1 argument and it's not named
+                if arg_count == 1 and param_names and param_names[0] in ['_', 'arg', 'args']:
+                    # Pass action_tracker as positional argument
+                    if self.action_tracker is not None:
+                        return load_model_func(self.action_tracker)
+                    else:
+                        # Try calling with no arguments if action_tracker is None
+                        return load_model_func()
+                # Handle case where function has exactly 1 argument with a recognizable name
+                if arg_count == 1 and param_names:
+                    param_name = param_names[0]
+                    # Check if it's likely to want action_tracker
+                    if param_name in ["action_tracker", "actionTracker", "tracker"]:
+                        return load_model_func(self.action_tracker)
+                    elif param_name in ["model_path", "path"] and self.model_path is not None:
+                        return load_model_func(self.model_path)
+                    else:
+                        # Pass action_tracker as fallback for single argument functions
+                        return load_model_func(self.action_tracker if self.action_tracker is not None else None)
+                # Build filtered parameters based on what the function accepts (original logic for multi-param functions)
+                filtered_params = {}
+                # Add action_tracker if the function accepts it
+                if self.action_tracker is not None:
+                    if "action_tracker" in param_names:
+                        filtered_params["action_tracker"] = self.action_tracker
+                    elif "actionTracker" in param_names:
+                        filtered_params["actionTracker"] = self.action_tracker
+                # Add model_path if the function accepts it
+                if "model_path" in param_names and self.model_path is not None:
+                    filtered_params["model_path"] = self.model_path
+                return load_model_func(**filtered_params)
+            except Exception as e:
+                error_msg = f"Load model function execution failed: {str(e)}"
+                logging.error(error_msg, exc_info=True)
+                raise RuntimeError(error_msg) from e
+        return wrapper
+    def scale_up(self):
+        """Load the model into memory (scale up)"""
+        try:
+            self.model_instances.append(self.load_model())
+            return True
+        except Exception as e:
+            logging.error(f"Failed to scale up model: {str(e)}")
+            return False
+    def scale_down(self):
+        """Unload the model from memory (scale down)"""
+        if not self.model_instances:
+            return True
+        try:
+            del self.model_instances[-1]
+            gc.collect()
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            return True
+        except Exception as e:
+            logging.error(f"Failed to scale down model: {str(e)}")
+            return False
+    def get_model(self):
+        """Get the model instance in round-robin fashion"""
+        if not self.model_instances:
+            logging.warning("No model instances available")
+            return None
+        order = self._round_robin_counter % len(self.model_instances)
+        # Get the current model instance
+        model = self.model_instances[order]
+        if not model:
+            logging.error("No model instance found, will try to load model")
+            self.model_instances[order] = self.load_model()
+            model = self.model_instances[order]
+        # Increment counter for next call
+        self._round_robin_counter = (self._round_robin_counter + 1) % len(
+            self.model_instances
+        )
+        return model
+    def _create_prediction_wrapper(self, predict_func: Callable):
+        """Create a wrapper function that handles parameter passing to the prediction function.
+        Args:
+            predict_func: The prediction function to wrap
+        Returns:
+            A wrapper function that handles parameter passing safely
+        """
+        def wrapper(model, input: bytes, extra_params: Dict[str, Any]=None, stream_key: Optional[str]=None, stream_info: Optional[Dict[str, Any]]=None) -> dict:
+            """Wrapper that safely calls the prediction function with proper parameter handling."""
+            try:
+                # Ensure extra_params is a dictionary
+                if extra_params is None:
+                    extra_params = {}
+                elif isinstance(extra_params, list):
+                    logging.warning(f"extra_params received as list instead of dict, converting: {extra_params}")
+                    # Convert list to dict if possible, otherwise use empty dict
+                    if len(extra_params) == 0:
+                        extra_params = {}
+                    elif all(isinstance(item, dict) for item in extra_params):
+                        # Merge all dictionaries in the list
+                        merged_params = {}
+                        for item in extra_params:
+                            merged_params.update(item)
+                        extra_params = merged_params
+                    else:
+                        logging.error(f"Cannot convert extra_params list to dict: {extra_params}")
+                        extra_params = {}
+                elif not isinstance(extra_params, dict):
+                    logging.warning(f"extra_params is not a dict, using empty dict instead. Received: {type(extra_params)}")
+                    extra_params = {}
+                param_names = predict_func.__code__.co_varnames[
+                    : predict_func.__code__.co_argcount
+                ]
+                filtered_params = {
+                    k: v for k, v in extra_params.items() if k in param_names
+                }
+                # Build arguments list
+                args = [model, input]
+                # Add stream_key if the function accepts it (regardless of its value)
+                if "stream_key" in param_names:
+                    filtered_params["stream_key"] = stream_key
+                if "stream_info" in param_names:
+                    filtered_params["stream_info"] = stream_info
+                return predict_func(*args, **filtered_params)
+            except Exception as e:
+                error_msg = f"Prediction function execution failed: {str(e)}"
+                logging.error(error_msg, exc_info=True)
+                raise RuntimeError(error_msg) from e
+        return wrapper
+    def inference(self, input: bytes, extra_params: Dict[str, Any]=None, stream_key: Optional[str]=None, stream_info: Optional[Dict[str, Any]]=None) -> Tuple[dict, bool]:
+        """Run inference on the provided input data.
+        Args:
+            input: Primary input data (can be image bytes or numpy array)
+            extra_params: Additional parameters for inference (optional)
+            stream_key: Stream key for the inference
+            stream_info: Stream info for the inference
+        Returns:
+            Tuple of (results, success_flag)
+        Raises:
+            ValueError: If input data is invalid
+        """
+        if input is None:
+            raise ValueError("Input data cannot be None")
+        try:
+            model = self.get_model()
+            results = self.predict(model, input, extra_params, stream_key, stream_info)
+            if self.action_tracker:
+                results = self.action_tracker.update_prediction_results(results)
+            return results, True
+        except Exception as e:
+            logging.error(f"Inference failed: {str(e)}")
+            return None, False
+    def batch_inference(
+        self, input: List[bytes], extra_params: Dict[str, Any]=None, stream_key: Optional[str]=None, stream_info: Optional[Dict[str, Any]]=None
+    ) -> Tuple[dict, bool]:
+        """Run batch inference on the provided input data.
+        Args:
+            input: Primary input data
+            extra_params: Additional parameters for inference (optional)
+            stream_key: Stream key for the inference
+            stream_info: Stream info for the inference
+        Returns:
+            Tuple of (results, success_flag)
+        Raises:
+            ValueError: If input data is invalid
+        """
+        if input is None:
+            raise ValueError("Input data cannot be None")
+        try:
+            model = self.get_model()
+            if not self.batch_predict:
+                logging.error("Batch prediction function not found")
+                return None, False
+            results = self.batch_predict(model, input, extra_params, stream_key, stream_info)
+            if self.action_tracker:
+                for result in results:
+                    self.action_tracker.update_prediction_results(result)
+            return results, True
+        except Exception as e:
+            logging.error(f"Batch inference failed: {str(e)}")
+            return None, False
+# TODO: Add multi model execution with torch.cuda.stream()