PyPI - matrice-compute - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

matrice-compute 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

matrice_compute/__init__.py CHANGED Viewed

@@ -1,9 +1,20 @@
 """Module providing __init__ functionality."""
+import subprocess
 from matrice_common.utils import dependencies_check
-dependencies_check(["docker", "psutil", "cryptography", "notebook", "aiohttp", "kafka-python"])
+dependencies_check(
+    ["docker", "psutil", "cryptography", "notebook", "aiohttp", "kafka-python"]
+)
+subprocess.run( # Re-upgrade docker to avoid missing DOCKER_HOST connection error
+    ["pip", "install", "--upgrade", "docker"],
+    check=True,
+    stdout=subprocess.DEVNULL,   # suppress normal output
+    stderr=subprocess.DEVNULL    # suppress warnings/progress
+)
 from matrice_compute.instance_manager import InstanceManager  # noqa: E402
 __all__ = ["InstanceManager"]

matrice_compute/action_instance.py CHANGED Viewed

@@ -74,7 +74,8 @@ class ActionInstance:
             "streaming_gateway": streaming_gateway_execute,
             "facial_recognition_setup": facial_recognition_setup_execute,
             "fe_fs_streaming": fe_fs_streaming_execute,
-            "inference_ws_server": inference_ws_server_execute
+            "inference_ws_server": inference_ws_server_execute,
+            "lpr_setup": lpr_setup_execute
         }
         if self.action_type not in self.actions_map:
             raise ValueError(f"Unknown action type: {self.action_type}")
@@ -1100,6 +1101,36 @@ def facial_recognition_setup_execute(self: ActionInstance):
     # Docker Command run
     self.start(worker_cmd, "facial_recognition_setup")
+@log_errors(raise_exception=False)
+def lpr_setup_execute(self: ActionInstance):
+    """
+    Creates and setup the database for license plate server.
+    """
+    action_details = self.get_action_details()
+    if not action_details:
+        return
+    image = self.docker_container
+    external_port = self.scaling.get_open_port()
+    self.setup_action_requirements(action_details)
+    # Add worker container run command
+    worker_cmd = (
+        f"docker run -d --pull=always "
+        f"--name lpr-worker "
+        f"-p {external_port}:8082 "
+        f'-e ENV="{os.environ.get("ENV", "prod")}" '
+        f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
+        f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
+        f'-e ACTION_ID="{self.action_record_id}" '
+        f"{image}"
+    )
+    print("Worker docker run command:", worker_cmd)
+    # Docker Command run
+    self.start(worker_cmd, "lpr_setup")
 @log_errors(raise_exception=False)
 def inference_ws_server_execute(self: ActionInstance):
     """

matrice_compute/instance_manager.py CHANGED Viewed

@@ -153,7 +153,8 @@ class InstanceManager:
             key,
             value,
         ) in manual_instance_info.items():
-            os.environ[key] = value
+            if value is not None:
+                os.environ[key] = str(value)
         if not (os.environ.get("SERVICE_PROVIDER") and os.environ.get("INSTANCE_ID")):
             raise Exception(
                 "SERVICE_PROVIDER and INSTANCE_ID must be set as environment variables or passed as arguments"
@@ -220,13 +221,13 @@ class InstanceManager:
                     "Error in shutdown_manager handle_shutdown: %s",
                     str(exc),
                 )
-            try:
-                self.scale_down_manager.auto_scaledown_actions()
-            except Exception as exc:
-                logging.error(
-                    "Error in scale_down_manager auto_scaledown_actions: %s",
-                    str(exc),
-                )
+            # try:
+            #     self.scale_down_manager.auto_scaledown_actions()
+            # except Exception as exc:
+            #     logging.error(
+            #         "Error in scale_down_manager auto_scaledown_actions: %s",
+            #         str(exc),
+            #     )
             try:
                 self.machine_resources_tracker.update_available_resources()
             except Exception as exc:

matrice_compute/scaling.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import os
 import logging
 from matrice_common.utils import log_errors
-from kafka import KafkaProducer, KafkaConsumer
+# from kafka import KafkaProducer, KafkaConsumer
 import uuid
 import json
 import time
@@ -37,32 +37,34 @@ class Scaling:
             "Initialized Scaling with instance_id: %s",
             instance_id,
         )
-        self.kafka_config = {
-            "bootstrap_servers": self.get_kafka_bootstrap_servers(),
-            "api_request_topic": "action_requests",
-            "api_response_topic": "action_responses",
-            "scaling_request_topic": "compute_requests",
-            "scaling_response_topic": "compute_responses"
-        }
-        self.kafka_producer = KafkaProducer(
-            bootstrap_servers=self.kafka_config["bootstrap_servers"],
-            value_serializer=lambda v: json.dumps(v).encode("utf-8"),)
+        # KAFKA TEMPORARILY DISABLED - Using REST API directly
+        # self.kafka_config = {
+        #     "bootstrap_servers": self.get_kafka_bootstrap_servers(),
+        #     "api_request_topic": "action_requests",
+        #     "api_response_topic": "action_responses",
+        #     "scaling_request_topic": "compute_requests",
+        #     "scaling_response_topic": "compute_responses"
+        # }
+        # self.kafka_producer = KafkaProducer(
+        #     bootstrap_servers=self.kafka_config["bootstrap_servers"],
+        #     value_serializer=lambda v: json.dumps(v).encode("utf-8"),)
-    @log_errors(default_return=(None, "Error creating Kafka producer", "Kafka producer creation failed"), log_error=True)
-    def get_kafka_bootstrap_servers(self):
-        """Get Kafka bootstrap servers from API and decode base64 fields."""
-        path = "/v1/actions/get_kafka_info"
-        response = self.rpc.get(path=path)
-        if not response or not response.get("success"):
-            raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
-        encoded_ip = response["data"]["ip"]
-        encoded_port = response["data"]["port"]
-        ip = base64.b64decode(encoded_ip).decode("utf-8")
-        port = base64.b64decode(encoded_port).decode("utf-8")
-        bootstrap_servers = f"{ip}:{port}"
-        return bootstrap_servers
+    # KAFKA TEMPORARILY DISABLED - Using REST API directly
+    # @log_errors(default_return=(None, "Error creating Kafka producer", "Kafka producer creation failed"), log_error=True)
+    # def get_kafka_bootstrap_servers(self):
+    #     """Get Kafka bootstrap servers from API and decode base64 fields."""
+    #     path = "/v1/actions/get_kafka_info"
+    #     response = self.rpc.get(path=path)
+    #     if not response or not response.get("success"):
+    #         raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
+    #     encoded_ip = response["data"]["ip"]
+    #     encoded_port = response["data"]["port"]
+    #     ip = base64.b64decode(encoded_ip).decode("utf-8")
+    #     port = base64.b64decode(encoded_port).decode("utf-8")
+    #     bootstrap_servers = f"{ip}:{port}"
+    #     return bootstrap_servers
     @log_errors(default_return=(None, "Error processing response", "Response processing failed"), log_error=True)
     def handle_response(self, resp, success_message, error_message):
@@ -285,34 +287,44 @@ class Scaling:
     @log_errors(log_error=True)
     def get_action_details(self, action_status_id):
-        """Get details for a specific action using Kafka, fallback to REST, then cache."""
+        """Get details for a specific action using REST API.
+        Args:
+            action_status_id: ID of the action status to fetch
+        Returns:
+            Tuple of (data, error, message) from API response
+        """
         logging.info("Getting action details for action %s", action_status_id)
-        api = "get_action_details"
-        payload = {"actionRecordId": action_status_id}
-        # Try Kafka first
-        data, error, message, kafka_response_received = self._send_kafka_request(
-            api=api,
-            payload=payload,
-            request_topic=self.kafka_config["api_request_topic"],
-            response_topic=self.kafka_config["api_response_topic"],
-            timeout=60
-        )
-        if kafka_response_received:
-            return data, error, message
-        # Only if Kafka transport failed or timed out, try REST
+        # KAFKA TEMPORARILY DISABLED - Using REST API directly
+        # api = "get_action_details"
+        # payload = {"actionRecordId": action_status_id}
+        # data, error, message, kafka_response_received = self._send_kafka_request(
+        #     api=api,
+        #     payload=payload,
+        #     request_topic=self.kafka_config["api_request_topic"],
+        #     response_topic=self.kafka_config["api_response_topic"],
+        #     timeout=60
+        # )
+        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
+        # if kafka_response_received:
+        #     if error:
+        #         logging.warning("Kafka returned error for get_action_details: %s. Falling back to REST API.", error)
+        #     else:
+        #         return data, error, message
+        # Using REST API directly
         try:
             path = f"/v1/actions/action/{action_status_id}/details"
             resp = self.rpc.get(path=path)
             return self.handle_response(
                 resp,
-                "Task details fetched successfully (REST fallback)",
-                "Could not fetch the task details (REST fallback)",
+                "Task details fetched successfully",
+                "Could not fetch the task details",
             )
         except Exception as e:
-            logging.error("REST fallback failed: %s", e)
-            self._cache_failed_request(api, payload)
-            return None, f"Failed via Kafka and REST: {e}", "Cached for retry"
+            logging.error("REST API failed (get_action_details): %s", e)
+            return None, f"Failed via REST: {e}", "REST API failed"
     @log_errors(log_error=True)
@@ -327,11 +339,26 @@ class Scaling:
         service="",
         job_params=None,
     ):
-        """Update an action using Kafka, fallback to REST, then cache."""
+        """Update an action using REST API.
+        Args:
+            id: Action ID
+            step_code: Step code
+            action_type: Type of action
+            status: Status of the action
+            sub_action: Sub-action details
+            status_description: Description of the status
+            service: Service name
+            job_params: Job parameters dictionary
+        Returns:
+            Tuple of (data, error, message) from API response
+        """
         if job_params is None:
             job_params = {}
         logging.info("Updating action %s", id)
-        api = "update_action"
+        # KAFKA TEMPORARILY DISABLED - Using REST API directly
+        # api = "update_action"
         payload = {
             "_id": id,
             "stepCode": step_code,
@@ -342,63 +369,85 @@ class Scaling:
             "serviceName": service,
             "jobParams": job_params,
         }
-        data, error, message, kafka_response_received = self._send_kafka_request(
-            api=api,
-            payload=payload,
-            request_topic=self.kafka_config["api_request_topic"],
-            response_topic=self.kafka_config["api_response_topic"],
-            timeout=60
-        )
-        if kafka_response_received:
-            return data, error, message
+        # data, error, message, kafka_response_received = self._send_kafka_request(
+        #     api=api,
+        #     payload=payload,
+        #     request_topic=self.kafka_config["api_request_topic"],
+        #     response_topic=self.kafka_config["api_response_topic"],
+        #     timeout=60
+        # )
+        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
+        # if kafka_response_received:
+        #     if error:
+        #         logging.warning("Kafka returned error for update_action: %s. Falling back to REST API.", error)
+        #     else:
+        #         return data, error, message
+        # Using REST API directly
         try:
             path = "/v1/actions"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
                 resp,
-                "Error logged successfully (REST fallback)",
-                "Could not log the errors (REST fallback)",
+                "Error logged successfully",
+                "Could not log the errors",
             )
         except Exception as e:
-            logging.error("REST fallback failed (update_action): %s", e)
-            self._cache_failed_request(api, payload)
-            return None, f"Failed via Kafka and REST: {e}", "Cached for retry"
+            logging.error("REST API failed (update_action): %s", e)
+            return None, f"Failed via REST: {e}", "REST API failed"
     @log_errors(log_error=True)
     def assign_jobs(self, is_gpu):
-        """Assign jobs to the instance using Kafka, fallback to REST, then cache."""
-        logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu)
-        api = "assign_jobs"
-        payload = {
-            "instanceID": self.instance_id,
-            "isGPUInstance": is_gpu,
-        }
-        data, error, message, kafka_response_received = self._send_kafka_request(
-            api=api,
-            payload=payload,
-            request_topic=self.kafka_config["api_request_topic"],
-            response_topic=self.kafka_config["api_response_topic"],
-            timeout=60
-        )
-        if kafka_response_received:
-            return data, error, message
-        # Fallback to REST
+        """Assign jobs to the instance using REST API.
+        Args:
+            is_gpu: Boolean or any value indicating if this is a GPU instance.
+                    Will be converted to proper boolean.
+        Returns:
+            Tuple of (data, error, message) from API response
+        """
+        # Convert is_gpu to proper boolean
+        is_gpu_bool = bool(is_gpu)
+        logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu_bool)
+        # KAFKA TEMPORARILY DISABLED - Using REST API directly
+        # api = "assign_jobs"
+        # payload = {
+        #     "instanceID": self.instance_id,
+        #     "isGPUInstance": is_gpu_bool,
+        # }
+        # data, error, message, kafka_response_received = self._send_kafka_request(
+        #     api=api,
+        #     payload=payload,
+        #     request_topic=self.kafka_config["api_request_topic"],
+        #     response_topic=self.kafka_config["api_response_topic"],
+        #     timeout=60
+        # )
+        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
+        # if kafka_response_received:
+        #     if error:
+        #         logging.warning("Kafka returned error for assign_jobs: %s. Falling back to REST API.", error)
+        #     else:
+        #         return data, error, message
+        # Using REST API directly
         try:
-            path = f"/v1/actions/assign_jobs/{str(is_gpu)}/{self.instance_id}"
+            # Convert boolean to lowercase string for API endpoint
+            is_gpu_str = str(is_gpu_bool).lower()
+            path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
             resp = self.rpc.get(path=path)
             return self.handle_response(
                 resp,
-                "Pinged successfully (REST fallback)",
-                "Could not ping the scaling jobs (REST fallback)",
+                "Pinged successfully",
+                "Could not ping the scaling jobs",
             )
         except Exception as e:
-            logging.error("REST fallback failed (assign_jobs): %s", e)
-            self._cache_failed_request(api, payload)
-            return None, f"Failed via Kafka and REST: {e}", "Cached for retry"
+            logging.error("REST API failed (assign_jobs): %s", e)
+            return None, f"Failed via REST: {e}", "REST API failed"
     @log_errors(log_error=True)
@@ -409,7 +458,17 @@ class Scaling:
         availableMemory=0,
         availableGPUMemory=0,
     ):
-        """Update available resources for the instance using Kafka, fallback to REST, then cache."""
+        """Update available resources for the instance using REST API.
+        Args:
+            availableCPU: Available CPU resources
+            availableGPU: Available GPU resources
+            availableMemory: Available memory
+            availableGPUMemory: Available GPU memory
+        Returns:
+            Tuple of (data, error, message) from API response
+        """
         logging.info("Updating available resources for instance %s", self.instance_id)
         payload = {
             "instance_id": self.instance_id,
@@ -418,63 +477,84 @@ class Scaling:
             "availableGPUMemory": availableGPUMemory,
             "availableGPU": availableGPU,
         }
-        api = "update_available_resources"
-        correlation_id = str(uuid.uuid4())
-        data, error, message, kafka_response_received = self._send_kafka_request(
-            api=api,
-            payload=payload,
-            request_topic=self.kafka_config["scaling_request_topic"],
-            response_topic=self.kafka_config["scaling_response_topic"],
-            timeout=60
-        )
-        if kafka_response_received:
-            return data, error, message
+        # KAFKA TEMPORARILY DISABLED - Using REST API directly
+        # api = "update_available_resources"
+        # correlation_id = str(uuid.uuid4())
+        # data, error, message, kafka_response_received = self._send_kafka_request(
+        #     api=api,
+        #     payload=payload,
+        #     request_topic=self.kafka_config["scaling_request_topic"],
+        #     response_topic=self.kafka_config["scaling_response_topic"],
+        #     timeout=60
+        # )
+        # # Check if Kafka response was received
+        # # Response format: {'correlationId': 'id', 'status': 'success'/'error', 'data': ..., 'error': 'error message'}
+        # if kafka_response_received:
+        #     if error:
+        #         logging.warning("Kafka returned error for update_available_resources: %s. Falling back to REST API.", error)
+        #     else:
+        #         return data, error, message
+        # Using REST API directly
         try:
             path = f"/v1/scaling/update_available_resources/{self.instance_id}"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
                 resp,
-                "Resources updated successfully (REST fallback)",
-                "Could not update the resources (REST fallback)",
+                "Resources updated successfully",
+                "Could not update the resources",
             )
         except Exception as e:
-            logging.error("REST fallback failed (update_available_resources): %s", e)
-            self._cache_failed_request(api, payload)
-            return None, f"Failed to update available resources via Kafka and REST: {e}", "Cached for retry"
+            logging.error("REST API failed (update_available_resources): %s", e)
+            return None, f"Failed to update available resources via REST: {e}", "REST API failed"
     @log_errors(log_error=True)
     def update_action_docker_logs(self, action_record_id, log_content):
-        """Update docker logs for an action using Kafka, fallback to REST, then cache."""
+        """Update docker logs for an action using REST API.
+        Args:
+            action_record_id: ID of the action record
+            log_content: Content of the logs to update
+        Returns:
+            Tuple of (data, error, message) from API response
+        """
         logging.info("Updating docker logs for action %s", action_record_id)
-        api = "update_action_docker_logs"
+        # KAFKA TEMPORARILY DISABLED - Using REST API directly
+        # api = "update_action_docker_logs"
         payload = {
             "actionRecordId": action_record_id,
             "logContent": log_content,
         }
-        data, error, message, kafka_response_received = self._send_kafka_request(
-                api=api,
-                payload=payload,
-                request_topic=self.kafka_config["api_request_topic"],
-                response_topic=self.kafka_config["api_response_topic"],
-                timeout=60
-            )
-        if kafka_response_received:
-            return data, error, message
+        # data, error, message, kafka_response_received = self._send_kafka_request(
+        #     api=api,
+        #     payload=payload,
+        #     request_topic=self.kafka_config["api_request_topic"],
+        #     response_topic=self.kafka_config["api_response_topic"],
+        #     timeout=60
+        # )
+        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
+        # if kafka_response_received:
+        #     if error:
+        #         logging.warning("Kafka returned error for update_action_docker_logs: %s. Falling back to REST API.", error)
+        #     else:
+        #         return data, error, message
+        # Using REST API directly
         try:
             path = "/v1/actions/update_action_docker_logs"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
                 resp,
-                "Docker logs updated successfully (REST fallback)",
-                "Could not update the docker logs (REST fallback)",
+                "Docker logs updated successfully",
+                "Could not update the docker logs",
             )
         except Exception as e:
-            logging.error("REST fallback failed (update_action_docker_logs): %s", e)
+            logging.error("REST API failed (update_action_docker_logs): %s", e)
+            return None, f"Failed via REST: {e}", "REST API failed"
     @log_errors(log_error=True)
@@ -533,7 +613,8 @@ class Scaling:
             if port in self.used_ports:
                 continue
             self.used_ports.add(port)
-            os.environ["USED_PORTS"] = ",".join(str(p) for p in self.used_ports)
+            ports_value = ",".join(str(p) for p in self.used_ports)
+            os.environ["USED_PORTS"] = str(ports_value)
             logging.info("Found available port: %s", port)
             return port
         logging.error(
@@ -790,98 +871,99 @@ class Scaling:
             "Could not fetch internal keys",
         )
-    @log_errors(log_error=True)
-    def handle_kafka_response(self, msg, success_message, error_message):
-        """
-        Helper to process Kafka response messages in a consistent way.
-        """
-        if msg.get("status") == "success":
-            data = msg.get("data")
-            error = None
-            message = success_message
-            logging.info(message)
-        else:
-            data = msg.get("data")
-            error = msg.get("error", "Unknown error")
-            message = error_message
-            logging.error("%s: %s", message, error)
-        return data, error, message
-    def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=60):
-        """
-        Helper to send a request to Kafka and wait for a response.
-        Returns (data, error, message, kafka_response_received) where kafka_response_received is True if a response was received (even if error), False if transport error/timeout.
-        """
-        correlation_id = str(uuid.uuid4())
-        request_message = {
-            "correlationId": correlation_id,
-            "api": api,
-            "payload": payload,
-        }
-        consumer = KafkaConsumer(
-            response_topic,
-            bootstrap_servers=self.kafka_config["bootstrap_servers"],
-            group_id=None,
-            value_deserializer=lambda m: json.loads(m.decode("utf-8")),
-            auto_offset_reset='latest',
-            enable_auto_commit=True,
-        )
-        try:
-            if hasattr(self.session.rpc, 'AUTH_TOKEN'):
-                self.session.rpc.AUTH_TOKEN.set_bearer_token()
-                auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
-                auth_token = auth_token.replace("Bearer ", "")
-                headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
-            else:
-                headers = None
-            self.kafka_producer.send(request_topic, request_message, headers=headers)
-            # self.kafka_producer.flush()
-            logging.info("Sent %s request to Kafka topic %s", api, request_topic)
-        except Exception as e:
-            logging.error("Kafka producer error: %s", e)
-            return None, f"Kafka producer error: {e}", "Kafka send failed", False
-        try:
-            start = time.time()
-            while time.time() - start < timeout:
-                # Poll for messages with a short timeout to avoid blocking forever
-                message_batch = consumer.poll(timeout_ms=1000)
-                if message_batch:
-                    for topic_partition, messages in message_batch.items():
-                        for message in messages:
-                            print("trying to fetch message")
-                            msg = message.value
-                            if msg.get("correlationId") == correlation_id:
-                                consumer.close()
-                                # Always treat a received response as final, even if error
-                                return self.handle_kafka_response(
-                                    msg,
-                                    f"Fetched via Kafka for {api}",
-                                    f"Kafka error response for {api}"
-                                ) + (True,)
-                else:
-                    print(f"No messages received, waiting... ({time.time() - start:.1f}s/{timeout}s)")
-            consumer.close()
-            logging.warning("Kafka response timeout for %s after %d seconds", api, timeout)
-            return None, "Kafka response timeout", "Kafka response timeout", False
-        except Exception as e:
-            logging.error("Kafka consumer error: %s", e)
-            return None, f"Kafka consumer error: {e}", "Kafka consumer error", False
-    def _cache_failed_request(self, api, payload):
-        """Cache the failed request for retry. Here, we use a simple file cache as a placeholder."""
-        try:
-            cache_file = os.path.join(os.path.dirname(__file__), 'request_cache.json')
-            if os.path.exists(cache_file):
-                with open(cache_file, 'r') as f:
-                    cache = json.load(f)
-            else:
-                cache = []
-            cache.append({"api": api, "payload": payload, "ts": time.time()})
-            with open(cache_file, 'w') as f:
-                json.dump(cache, f)
-            logging.info("Cached failed request for api %s", api)
-        except Exception as e:
-            logging.error("Failed to cache request: %s", e)
+    # KAFKA TEMPORARILY DISABLED - Using REST API directly
+    # @log_errors(log_error=True)
+    # def handle_kafka_response(self, msg, success_message, error_message):
+    #     """
+    #     Helper to process Kafka response messages in a consistent way.
+    #     """
+    #     if msg.get("status") == "success":
+    #         data = msg.get("data")
+    #         error = None
+    #         message = success_message
+    #         logging.info(message)
+    #     else:
+    #         data = msg.get("data")
+    #         error = msg.get("error", "Unknown error")
+    #         message = error_message
+    #         logging.error("%s: %s", message, error)
+    #     return data, error, message
+    # def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=60):
+    #     """
+    #     Helper to send a request to Kafka and wait for a response.
+    #     Returns (data, error, message, kafka_response_received) where kafka_response_received is True if a response was received (even if error), False if transport error/timeout.
+    #     """
+    #     correlation_id = str(uuid.uuid4())
+    #     request_message = {
+    #         "correlationId": correlation_id,
+    #         "api": api,
+    #         "payload": payload,
+    #     }
+    #     consumer = KafkaConsumer(
+    #         response_topic,
+    #         bootstrap_servers=self.kafka_config["bootstrap_servers"],
+    #         group_id=None,
+    #         value_deserializer=lambda m: json.loads(m.decode("utf-8")),
+    #         auto_offset_reset='latest',
+    #         enable_auto_commit=True,
+    #     )
+    #     try:
+    #         if hasattr(self.session.rpc, 'AUTH_TOKEN'):
+    #             self.session.rpc.AUTH_TOKEN.set_bearer_token()
+    #             auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
+    #             auth_token = auth_token.replace("Bearer ", "")
+    #             headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
+    #         else:
+    #             headers = None
+    #         self.kafka_producer.send(request_topic, request_message, headers=headers)
+    #         # self.kafka_producer.flush()
+    #         logging.info("Sent %s request to Kafka topic %s", api, request_topic)
+    #     except Exception as e:
+    #         logging.error("Kafka producer error: %s", e)
+    #         return None, f"Kafka producer error: {e}", "Kafka send failed", False
+    #     try:
+    #         start = time.time()
+    #         while time.time() - start < timeout:
+    #             # Poll for messages with a short timeout to avoid blocking forever
+    #             message_batch = consumer.poll(timeout_ms=1000)
+    #             if message_batch:
+    #                 for topic_partition, messages in message_batch.items():
+    #                     for message in messages:
+    #                         print("trying to fetch message")
+    #                         msg = message.value
+    #                         if msg.get("correlationId") == correlation_id:
+    #                             consumer.close()
+    #                             # Always treat a received response as final, even if error
+    #                             return self.handle_kafka_response(
+    #                                 msg,
+    #                                 f"Fetched via Kafka for {api}",
+    #                                 f"Kafka error response for {api}"
+    #                             ) + (True,)
+    #             else:
+    #                 print(f"No messages received, waiting... ({time.time() - start:.1f}s/{timeout}s)")
+    #
+    #         consumer.close()
+    #         logging.warning("Kafka response timeout for %s after %d seconds", api, timeout)
+    #         return None, "Kafka response timeout", "Kafka response timeout", False
+    #     except Exception as e:
+    #         logging.error("Kafka consumer error: %s", e)
+    #         return None, f"Kafka consumer error: {e}", "Kafka consumer error", False
+    # def _cache_failed_request(self, api, payload):
+    #     """Cache the failed request for retry. Here, we use a simple file cache as a placeholder."""
+    #     try:
+    #         cache_file = os.path.join(os.path.dirname(__file__), 'request_cache.json')
+    #         if os.path.exists(cache_file):
+    #             with open(cache_file, 'r') as f:
+    #                 cache = json.load(f)
+    #         else:
+    #             cache = []
+    #         cache.append({"api": api, "payload": payload, "ts": time.time()})
+    #         with open(cache_file, 'w') as f:
+    #             json.dump(cache, f)
+    #         logging.info("Cached failed request for api %s", api)
+    #     except Exception as e:
+    #         logging.error("Failed to cache request: %s", e)

{matrice_compute-0.1.11.dist-info → matrice_compute-0.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.11
+Version: 0.1.13
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.11.dist-info → matrice_compute-0.1.13.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-matrice_compute/__init__.py,sha256=HG5yzsY2dcQ0sGKwxMj-Sv2zDhbSC00slAdkcfS9nng,304
-matrice_compute/action_instance.py,sha256=6tyZehK7SfIu6NjXp4wFeYMY0BINShmXtoCXyimDKN0,58002
+matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
+matrice_compute/action_instance.py,sha256=6IVMNODznEagFlwifjP1neO6OK0H46vuvMYDw02gYF0,58985
 matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
 matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
-matrice_compute/instance_manager.py,sha256=OrKZFOdrQLz_e0tVoIeasY_mvGX8qBnkbkra8WRqkSg,10089
+matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
 matrice_compute/instance_utils.py,sha256=tIFVUi8HJPy4GY-jtfVx2zIgmXNta7s3jCIRzBga1hI,21977
 matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
 matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 matrice_compute/resources_tracker.py,sha256=My26LPglDHcQcTkxxiXwpfdqkpEAt3clrqJ-k1fAl1M,17878
-matrice_compute/scaling.py,sha256=8HfbKMsR7EI0rrLfKl_gz6FMO2Q4sLXELxGc3DcLwz8,31743
+matrice_compute/scaling.py,sha256=hlPpEW8uggMKHW9kwu71obOnbNXhoqRlkmux4Fc3OP0,35202
 matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
 matrice_compute/task_utils.py,sha256=ML9uTrYQiWgEMJitYxoGlVOa9KUXNKV_WqnousOTK6k,2762
-matrice_compute-0.1.11.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
-matrice_compute-0.1.11.dist-info/METADATA,sha256=gBRGeGZ5jUZnz5Bw5TevMHwTr0k63ZL8DK9l3lQWHMg,1038
-matrice_compute-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-matrice_compute-0.1.11.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
-matrice_compute-0.1.11.dist-info/RECORD,,
+matrice_compute-0.1.13.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
+matrice_compute-0.1.13.dist-info/METADATA,sha256=aX4hxZ2ll6w9miiYJ9Ed-FZtEVUEvwNb6vUplVYNm0w,1038
+matrice_compute-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+matrice_compute-0.1.13.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
+matrice_compute-0.1.13.dist-info/RECORD,,

{matrice_compute-0.1.11.dist-info → matrice_compute-0.1.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{matrice_compute-0.1.11.dist-info → matrice_compute-0.1.13.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{matrice_compute-0.1.11.dist-info → matrice_compute-0.1.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

matrice-compute 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

matrice-compute 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl