PyPI - matrice-compute - Versions diffs - 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl - Mend

matrice-compute 0.1.20py3-none-any.whl → 0.1.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

matrice_compute/scaling.py CHANGED Viewed

@@ -3,11 +3,12 @@
 import os
 import logging
 from matrice_common.utils import log_errors
-# from kafka import KafkaProducer, KafkaConsumer
+from kafka import KafkaProducer, KafkaConsumer
 import uuid
 import json
 import time
 import base64
+import threading
 # TODO: update /scaling to /compute
@@ -15,12 +16,13 @@ class Scaling:
     """Class providing scaling functionality for compute instances."""
-    def __init__(self, session, instance_id=None):
+    def __init__(self, session, instance_id=None, enable_kafka=True):
         """Initialize Scaling instance.
         Args:
             session: Session object for making RPC calls
             instance_id: ID of the compute instance
+            enable_kafka: Enable Kafka communication (default True)
         Raises:
             Exception: If instance_id is not provided
@@ -34,38 +36,93 @@ class Scaling:
         self.rpc = session.rpc
         used_ports_str = os.environ.get("USED_PORTS", "")
         self.used_ports = set(int(p) for p in used_ports_str.split(",") if p.strip())
+        # Kafka configuration and initialization
+        self.enable_kafka = enable_kafka
+        self.kafka_producer = None
+        self.kafka_consumer = None
+        self.kafka_thread = None
+        self.kafka_running = False
+        # Maps correlation_id to threading.Event for request/response matching
+        self.pending_requests = {}
+        # Maps correlation_id to response data
+        self.response_map = {}
+        self.response_lock = threading.Lock()
+        if self.enable_kafka:
+            try:
+                self.kafka_config = {
+                    "bootstrap_servers": self.get_kafka_bootstrap_servers(),
+                    "action_request_topic": "action_requests",
+                    "action_response_topic": "action_responses",
+                    "compute_request_topic": "compute_requests",
+                    "compute_response_topic": "compute_responses"
+                }
+                # Initialize single producer
+                self.kafka_producer = KafkaProducer(
+                    bootstrap_servers=self.kafka_config["bootstrap_servers"],
+                    value_serializer=lambda v: json.dumps(v).encode("utf-8"),
+                    max_block_ms=5000  # Timeout if Kafka is down
+                )
+                # Initialize single consumer for both response topics
+                self.kafka_consumer = KafkaConsumer(
+                    self.kafka_config["action_response_topic"],
+                    self.kafka_config["compute_response_topic"],
+                    bootstrap_servers=self.kafka_config["bootstrap_servers"],
+                    group_id=f"py_compute_{instance_id}",
+                    value_deserializer=lambda m: json.loads(m.decode("utf-8")),
+                    auto_offset_reset='latest',
+                    enable_auto_commit=True,
+                    consumer_timeout_ms=1000,  # Poll timeout
+                    session_timeout_ms=60000,  # Increase session timeout to 60s (default 30s)
+                    heartbeat_interval_ms=3000,  # Send heartbeat every 3s
+                    max_poll_interval_ms=300000  # Max time between polls: 5 minutes
+                )
+                # Start background thread to handle responses
+                self.kafka_running = True
+                self.kafka_thread = threading.Thread(target=self._kafka_response_listener, daemon=True)
+                self.kafka_thread.start()
+                logging.info(f"Kafka enabled with bootstrap servers: {self.kafka_config['bootstrap_servers']}")
+            except Exception as e:
+                logging.warning(f"Failed to initialize Kafka, will use REST API only: {e}")
+                self.enable_kafka = False
+                self.kafka_producer = None
+                self.kafka_consumer = None
         logging.info(
-            "Initialized Scaling with instance_id: %s",
+            "Initialized Scaling with instance_id: %s, Kafka enabled: %s",
             instance_id,
+            self.enable_kafka
         )
-        # KAFKA TEMPORARILY DISABLED - Using REST API directly
-        # self.kafka_config = {
-        #     "bootstrap_servers": self.get_kafka_bootstrap_servers(),
-        #     "api_request_topic": "action_requests",
-        #     "api_response_topic": "action_responses",
-        #     "scaling_request_topic": "compute_requests",
-        #     "scaling_response_topic": "compute_responses"
-        # }
-        # self.kafka_producer = KafkaProducer(
-        #     bootstrap_servers=self.kafka_config["bootstrap_servers"],
-        #     value_serializer=lambda v: json.dumps(v).encode("utf-8"),)
-    # KAFKA TEMPORARILY DISABLED - Using REST API directly
-    # @log_errors(default_return=(None, "Error creating Kafka producer", "Kafka producer creation failed"), log_error=True)
-    # def get_kafka_bootstrap_servers(self):
-    #     """Get Kafka bootstrap servers from API and decode base64 fields."""
-    #     path = "/v1/actions/get_kafka_info"
-    #     response = self.rpc.get(path=path)
-    #     if not response or not response.get("success"):
-    #         raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
-    #     encoded_ip = response["data"]["ip"]
-    #     encoded_port = response["data"]["port"]
-    #     ip = base64.b64decode(encoded_ip).decode("utf-8")
-    #     port = base64.b64decode(encoded_port).decode("utf-8")
-    #     bootstrap_servers = f"{ip}:{port}"
-    #     return bootstrap_servers
+    @log_errors(default_return=None, log_error=True)
+    def get_kafka_bootstrap_servers(self):
+        """Get Kafka bootstrap servers from API and decode base64 fields.
+        Returns:
+            str: Kafka bootstrap servers in format "ip:port"
+        Raises:
+            ValueError: If unable to fetch Kafka configuration
+        """
+        path = "/v1/actions/get_kafka_info"
+        response = self.rpc.get(path=path)
+        if not response or not response.get("success"):
+            raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
+        encoded_ip = response["data"]["ip"]
+        encoded_port = response["data"]["port"]
+        ip = base64.b64decode(encoded_ip).decode("utf-8")
+        port = base64.b64decode(encoded_port).decode("utf-8")
+        bootstrap_servers = f"{ip}:{port}"
+        # logging.info(f"Retrieved Kafka bootstrap servers: {bootstrap_servers}")
+        return bootstrap_servers
     @log_errors(default_return=(None, "Error processing response", "Response processing failed"), log_error=True)
     def handle_response(self, resp, success_message, error_message):
@@ -90,65 +147,267 @@ class Scaling:
             message = error_message
             logging.error("%s: %s", message, error)
         return data, error, message
+    def _kafka_response_listener(self):
+        """
+        Background thread that continuously polls for Kafka responses.
+        This thread runs in the background and listens for responses from both
+        action_responses and compute_responses topics. When a response is received,
+        it matches the correlation ID to pending requests and wakes up the waiting thread.
+        """
+        logging.info("Kafka response listener thread started")
+        while self.kafka_running:
+            try:
+                # Poll for messages with 1 second timeout
+                message_batch = self.kafka_consumer.poll(timeout_ms=1000)
+                if message_batch:
+                    for topic_partition, messages in message_batch.items():
+                        for message in messages:
+                            try:
+                                msg = message.value
+                                correlation_id = msg.get("correlationId")
+                                if correlation_id:
+                                    with self.response_lock:
+                                        if correlation_id in self.pending_requests:
+                                            # Store response and signal waiting thread
+                                            self.response_map[correlation_id] = msg
+                                            self.pending_requests[correlation_id].set()
+                                            logging.debug(f"Received Kafka response for correlation_id: {correlation_id}")
+                                else:
+                                    logging.warning(f"Received Kafka message without correlationId: {msg}")
+                            except Exception as e:
+                                logging.error(f"Error processing Kafka message: {e}")
+            except Exception as e:
+                if self.kafka_running:  # Only log if not shutting down
+                    logging.error(f"Error in Kafka response listener: {e}")
+                    time.sleep(1)  # Avoid tight loop on persistent errors
+        logging.info("Kafka response listener thread stopped")
+    def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=5):
+        """
+        Send a request via Kafka and wait for response using the persistent consumer.
+        Args:
+            api: API name to call
+            payload: Request payload dictionary
+            request_topic: Kafka topic to send request to
+            response_topic: Kafka topic to receive response from (not used, kept for signature)
+            timeout: Timeout in seconds to wait for response
+        Returns:
+            Tuple of (data, error, message, kafka_success)
+            kafka_success is True if response received, False if timeout/error
+        """
+        if not self.enable_kafka or not self.kafka_producer:
+            return None, "Kafka not enabled", "Kafka not available", False
+        correlation_id = str(uuid.uuid4())
+        request_message = {
+            "correlationId": correlation_id,
+            "api": api,
+            "payload": payload,
+        }
+        # Create event for this request
+        event = threading.Event()
+        with self.response_lock:
+            self.pending_requests[correlation_id] = event
+        try:
+            # Add auth token if available
+            headers = None
+            if hasattr(self.session.rpc, 'AUTH_TOKEN'):
+                self.session.rpc.AUTH_TOKEN.set_bearer_token()
+                auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
+                auth_token = auth_token.replace("Bearer ", "")
+                headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
+            # Send request
+            self.kafka_producer.send(request_topic, request_message, headers=headers)
+            logging.info(f"Sent Kafka request for {api} with correlation_id: {correlation_id}")
+            # Wait for response with timeout
+            if event.wait(timeout=timeout):
+                # Response received
+                with self.response_lock:
+                    response = self.response_map.pop(correlation_id, None)
+                    self.pending_requests.pop(correlation_id, None)
+                if response:
+                    if response.get("status") == "success":
+                        data = response.get("data")
+                        logging.info(f"Kafka success for {api}")
+                        return data, None, f"Fetched via Kafka for {api}", True
+                    else:
+                        error = response.get("error", "Unknown error")
+                        logging.error(f"Kafka error response for {api}: {error}")
+                        return None, error, f"Kafka error response for {api}", True
+                else:
+                    logging.warning(f"Kafka response received but missing data for {api}")
+                    return None, "Response missing data", "Kafka response error", False
+            else:
+                # Timeout
+                with self.response_lock:
+                    self.pending_requests.pop(correlation_id, None)
+                logging.warning(f"Kafka response timeout for {api} after {timeout} seconds")
+                return None, "Kafka response timeout", "Kafka response timeout", False
+        except Exception as e:
+            # Cleanup on error
+            with self.response_lock:
+                self.pending_requests.pop(correlation_id, None)
+            logging.error(f"Kafka send error for {api}: {e}")
+            return None, f"Kafka error: {e}", "Kafka send failed", False
+    def _hybrid_request(self, api, payload, request_topic, response_topic, rest_fallback_func):
+        """
+        Hybrid request method: try Kafka first, fallback to REST, cache if both fail.
+        Args:
+            api: API name
+            payload: Request payload
+            request_topic: Kafka request topic
+            response_topic: Kafka response topic
+            rest_fallback_func: Function to call for REST fallback (should return same format as handle_response)
+        Returns:
+            Tuple of (data, error, message) matching the API response pattern
+        """
+        # Try Kafka first
+        if self.enable_kafka:
+            data, error, message, kafka_success = self._send_kafka_request(
+                api, payload, request_topic, response_topic, timeout=5
+            )
+            if kafka_success and error is None:
+                # Kafka succeeded
+                return data, error, message
+            # Kafka returned an error response (not transport error)
+            if kafka_success and error is not None:
+                logging.warning(f"Kafka returned error for {api}, falling back to REST")
+        # Kafka failed or disabled, try REST
+        logging.info(f"Using REST API for {api}")
+        try:
+            rest_response = rest_fallback_func()
+            # Return REST response (success or failure)
+            if rest_response and len(rest_response) == 3:
+                return rest_response
+            else:
+                # Unexpected REST response format
+                logging.error(f"REST API returned unexpected format for {api}")
+                return None, "Unexpected REST response format", "REST API error"
+        except Exception as e:
+            # REST failed
+            logging.error(f"REST API failed for {api}: {e}")
+            return None, str(e), "REST API failed"
+    def shutdown(self):
+        """Gracefully shutdown Kafka connections."""
+        if self.kafka_running:
+            logging.info("Shutting down Kafka connections...")
+            self.kafka_running = False
+            if self.kafka_thread:
+                self.kafka_thread.join(timeout=5)
+            if self.kafka_consumer:
+                self.kafka_consumer.close()
+            if self.kafka_producer:
+                self.kafka_producer.close()
+            logging.info("Kafka connections closed")
     @log_errors(log_error=True)
     def get_downscaled_ids(self):
-        """Get IDs of downscaled instances.
+        """Get IDs of downscaled instances using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        logging.info(
-            "Getting downscaled ids for instance %s",
-            self.instance_id,
-        )
-        path = f"/v1/compute/down_scaled_ids/{self.instance_id}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Downscaled ids info fetched successfully",
-            "Could not fetch the Downscaled ids info",
+        logging.info("Getting downscaled ids for instance %s", self.instance_id)
+        payload = {"instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/compute/down_scaled_ids/{self.instance_id}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Downscaled ids info fetched successfully",
+                "Could not fetch the Downscaled ids info",
+            )
+        return self._hybrid_request(
+            api="get_downscaled_ids",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(default_return=(None, "API call failed", "Failed to stop instance"), log_error=True)
     def stop_instance(self):
-        """Stop the compute instance.
+        """Stop the compute instance using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        logging.info(
-            "Stopping instance %s",
-            self.instance_id,
-        )
-        path = "/v1/compute/compute_instance/stop"
-        resp = self.rpc.put(
-            path=path,
-            payload={
-                "_idInstance": self.instance_id,
-                "isForcedStop": False,
-            },
-        )
-        return self.handle_response(
-            resp,
-            "Instance stopped successfully",
-            "Could not stop the instance",
+        logging.info("Stopping instance %s", self.instance_id)
+        payload = {
+            "_idInstance": self.instance_id,
+            "isForcedStop": False,
+        }
+        def rest_fallback():
+            path = "/v1/compute/compute_instance/stop"
+            resp = self.rpc.put(path=path, payload=payload)
+            return self.handle_response(
+                resp,
+                "Instance stopped successfully",
+                "Could not stop the instance",
+            )
+        return self._hybrid_request(
+            api="stop_instance",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
-    def update_jupyter_token(
-        self,
-        token="",
-    ):
-        path = f"/v1/scaling/update_jupyter_notebook_token/{self.instance_id}"
-        payload = {
-            "token": token,
-        }
-        resp = self.rpc.put(path=path, payload=payload)
-        return self.handle_response(
-            resp,
-            "Resources updated successfully",
-            "Could not update the resources",
+    def update_jupyter_token(self, token=""):
+        """Update Jupyter notebook token using Kafka (with REST fallback)."""
+        payload = {"token": token, "instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/scaling/update_jupyter_notebook_token/{self.instance_id}"
+            resp = self.rpc.put(path=path, payload={"token": token})
+            return self.handle_response(
+                resp,
+                "Resources updated successfully",
+                "Could not update the resources",
+            )
+        return self._hybrid_request(
+            api="update_jupyter_token",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
@@ -167,7 +426,7 @@ class Scaling:
         createdAt=None,
         updatedAt=None,
     ):
-        """Update status of an action.
+        """Update status of an action using Kafka (with REST fallback).
         Args:
             service_provider: Provider of the service
@@ -188,12 +447,10 @@ class Scaling:
         """
         if not action_record_id:
             return None, "Action record id is required", "Action record id is required"
-        logging.info(
-            "Updating action status for action %s",
-            action_record_id,
-        )
-        path = "/v1/compute/update_action_status"
-        payload_scaling = {
+        logging.info("Updating action status for action %s", action_record_id)
+        payload = {
             "instanceID": self.instance_id,
             "serviceProvider": service_provider,
             "actionRecordId": action_record_id,
@@ -208,11 +465,22 @@ class Scaling:
             "createdAt": createdAt,
             "updatedAt": updatedAt,
         }
-        resp = self.rpc.put(path=path, payload=payload_scaling)
-        return self.handle_response(
-            resp,
-            "Action status details updated successfully",
-            "Could not update the action status details ",
+        def rest_fallback():
+            path = "/v1/compute/update_action_status"
+            resp = self.rpc.put(path=path, payload=payload)
+            return self.handle_response(
+                resp,
+                "Action status details updated successfully",
+                "Could not update the action status details ",
+            )
+        return self._hybrid_request(
+            api="update_action_status",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
@@ -225,7 +493,7 @@ class Scaling:
         status,
         status_description,
     ):
-        """Update status of an action.
+        """Update status of an action using Kafka (with REST fallback).
         Args:
             action_record_id: ID of the action record
@@ -235,11 +503,8 @@ class Scaling:
             status: Status to update
             status_description: Description of the status
         """
-        logging.info(
-            "Updating status for action %s",
-            action_record_id,
-        )
-        url = "/v1/actions"
+        logging.info("Updating status for action %s", action_record_id)
         payload = {
             "_id": action_record_id,
             "action": action_type,
@@ -248,76 +513,91 @@ class Scaling:
             "status": status,
             "statusDescription": status_description,
         }
-        self.rpc.put(path=url, payload=payload)
+        def rest_fallback():
+            url = "/v1/actions"
+            self.rpc.put(path=url, payload=payload)
+            return None, None, "Status updated"
+        return self._hybrid_request(
+            api="update_action",
+            payload=payload,
+            request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
+        )
     @log_errors(log_error=True)
     def get_shutdown_details(self):
-        """Get shutdown details for the instance.
+        """Get shutdown details for the instance using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        logging.info(
-            "Getting shutdown details for instance %s",
-            self.instance_id,
-        )
-        path = f"/v1/compute/get_shutdown_details/{self.instance_id}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Shutdown info fetched successfully",
-            "Could not fetch the shutdown details",
+        logging.info("Getting shutdown details for instance %s", self.instance_id)
+        payload = {"instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/compute/get_shutdown_details/{self.instance_id}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Shutdown info fetched successfully",
+                "Could not fetch the shutdown details",
+            )
+        return self._hybrid_request(
+            api="get_shutdown_details",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def get_tasks_details(self):
-        """Get task details for the instance.
+        """Get task details for the instance using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        logging.info(
-            "Getting tasks details for instance %s",
-            self.instance_id,
-        )
-        path = f"/v1/actions/fetch_instance_action_details/{self.instance_id}/action_details"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Task details fetched successfully",
-            "Could not fetch the task details",
+        logging.info("Getting tasks details for instance %s", self.instance_id)
+        payload = {"instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/actions/fetch_instance_action_details/{self.instance_id}/action_details"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Task details fetched successfully",
+                "Could not fetch the task details",
+            )
+        return self._hybrid_request(
+            api="get_tasks_details",
+            payload=payload,
+            request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def get_action_details(self, action_status_id):
-        """Get details for a specific action using REST API.
+        """Get details for a specific action using Kafka (with REST fallback).
         Args:
             action_status_id: ID of the action status to fetch
         Returns:
             Tuple of (data, error, message) from API response
         """
         logging.info("Getting action details for action %s", action_status_id)
-        # KAFKA TEMPORARILY DISABLED - Using REST API directly
-        # api = "get_action_details"
-        # payload = {"actionRecordId": action_status_id}
-        # data, error, message, kafka_response_received = self._send_kafka_request(
-        #     api=api,
-        #     payload=payload,
-        #     request_topic=self.kafka_config["api_request_topic"],
-        #     response_topic=self.kafka_config["api_response_topic"],
-        #     timeout=60
-        # )
-        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
-        # if kafka_response_received:
-        #     if error:
-        #         logging.warning("Kafka returned error for get_action_details: %s. Falling back to REST API.", error)
-        #     else:
-        #         return data, error, message
-        # Using REST API directly
-        try:
+        payload = {"actionRecordId": action_status_id}
+        def rest_fallback():
             path = f"/v1/actions/action/{action_status_id}/details"
             resp = self.rpc.get(path=path)
             return self.handle_response(
@@ -325,10 +605,14 @@ class Scaling:
                 "Task details fetched successfully",
                 "Could not fetch the task details",
             )
-        except Exception as e:
-            logging.error("REST API failed (get_action_details): %s", e)
-            return None, f"Failed via REST: {e}", "REST API failed"
+        return self._hybrid_request(
+            api="get_action_details",
+            payload=payload,
+            request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
+        )
     @log_errors(log_error=True)
     def update_action(
@@ -342,8 +626,8 @@ class Scaling:
         service="",
         job_params=None,
     ):
-        """Update an action using REST API.
+        """Update an action using Kafka (with REST fallback).
         Args:
             id: Action ID
             step_code: Step code
@@ -353,15 +637,15 @@ class Scaling:
             status_description: Description of the status
             service: Service name
             job_params: Job parameters dictionary
         Returns:
             Tuple of (data, error, message) from API response
         """
         if job_params is None:
             job_params = {}
         logging.info("Updating action %s", id)
-        # KAFKA TEMPORARILY DISABLED - Using REST API directly
-        # api = "update_action"
         payload = {
             "_id": id,
             "stepCode": step_code,
@@ -372,22 +656,8 @@ class Scaling:
             "serviceName": service,
             "jobParams": job_params,
         }
-        # data, error, message, kafka_response_received = self._send_kafka_request(
-        #     api=api,
-        #     payload=payload,
-        #     request_topic=self.kafka_config["api_request_topic"],
-        #     response_topic=self.kafka_config["api_response_topic"],
-        #     timeout=60
-        # )
-        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
-        # if kafka_response_received:
-        #     if error:
-        #         logging.warning("Kafka returned error for update_action: %s. Falling back to REST API.", error)
-        #     else:
-        #         return data, error, message
-        # Using REST API directly
-        try:
+        def rest_fallback():
             path = "/v1/actions"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
@@ -395,62 +665,66 @@ class Scaling:
                 "Error logged successfully",
                 "Could not log the errors",
             )
-        except Exception as e:
-            logging.error("REST API failed (update_action): %s", e)
-            return None, f"Failed via REST: {e}", "REST API failed"
+        return self._hybrid_request(
+            api="update_action",
+            payload=payload,
+            request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
+        )
     @log_errors(log_error=True)
     def assign_jobs(self, is_gpu):
         """Assign jobs to the instance using REST API.
         Args:
             is_gpu: Boolean or any value indicating if this is a GPU instance.
                     Will be converted to proper boolean.
         Returns:
             Tuple of (data, error, message) from API response
         """
         # Convert is_gpu to proper boolean
         is_gpu_bool = bool(is_gpu)
         logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu_bool)
-        # KAFKA TEMPORARILY DISABLED - Using REST API directly
-        # api = "assign_jobs"
+        # Use REST API directly
+        is_gpu_str = str(is_gpu_bool).lower()
+        path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
+        resp = self.rpc.get(path=path)
+        return self.handle_response(
+            resp,
+            "Pinged successfully",
+            "Could not ping the scaling jobs",
+        )
+        # # Kafka approach (commented out - using REST only)
         # payload = {
         #     "instanceID": self.instance_id,
         #     "isGPUInstance": is_gpu_bool,
         # }
-        # data, error, message, kafka_response_received = self._send_kafka_request(
-        #     api=api,
+        #
+        # # Define REST fallback function
+        # def rest_fallback():
+        #     is_gpu_str = str(is_gpu_bool).lower()
+        #     path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
+        #     resp = self.rpc.get(path=path)
+        #     return self.handle_response(
+        #         resp,
+        #         "Pinged successfully",
+        #         "Could not ping the scaling jobs",
+        #     )
+        #
+        # # Use hybrid approach: Kafka first, REST fallback, cache if both fail
+        # return self._hybrid_request(
+        #     api="assign_jobs",
         #     payload=payload,
-        #     request_topic=self.kafka_config["api_request_topic"],
-        #     response_topic=self.kafka_config["api_response_topic"],
-        #     timeout=60
+        #     request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+        #     response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+        #     rest_fallback_func=rest_fallback
         # )
-        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
-        # if kafka_response_received:
-        #     if error:
-        #         logging.warning("Kafka returned error for assign_jobs: %s. Falling back to REST API.", error)
-        #     else:
-        #         return data, error, message
-        # Using REST API directly
-        try:
-            # Convert boolean to lowercase string for API endpoint
-            is_gpu_str = str(is_gpu_bool).lower()
-            path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
-            resp = self.rpc.get(path=path)
-            return self.handle_response(
-                resp,
-                "Pinged successfully",
-                "Could not ping the scaling jobs",
-            )
-        except Exception as e:
-            logging.error("REST API failed (assign_jobs): %s", e)
-            return None, f"Failed via REST: {e}", "REST API failed"
     @log_errors(log_error=True)
@@ -461,14 +735,14 @@ class Scaling:
         availableMemory=0,
         availableGPUMemory=0,
     ):
-        """Update available resources for the instance using REST API.
+        """Update available resources for the instance using Kafka (with REST fallback).
         Args:
             availableCPU: Available CPU resources
             availableGPU: Available GPU resources
             availableMemory: Available memory
             availableGPUMemory: Available GPU memory
         Returns:
             Tuple of (data, error, message) from API response
         """
@@ -480,28 +754,9 @@ class Scaling:
             "availableGPUMemory": availableGPUMemory,
             "availableGPU": availableGPU,
         }
-        # KAFKA TEMPORARILY DISABLED - Using REST API directly
-        # api = "update_available_resources"
-        # correlation_id = str(uuid.uuid4())
-        # data, error, message, kafka_response_received = self._send_kafka_request(
-        #     api=api,
-        #     payload=payload,
-        #     request_topic=self.kafka_config["scaling_request_topic"],
-        #     response_topic=self.kafka_config["scaling_response_topic"],
-        #     timeout=60
-        # )
-        # # Check if Kafka response was received
-        # # Response format: {'correlationId': 'id', 'status': 'success'/'error', 'data': ..., 'error': 'error message'}
-        # if kafka_response_received:
-        #     if error:
-        #         logging.warning("Kafka returned error for update_available_resources: %s. Falling back to REST API.", error)
-        #     else:
-        #         return data, error, message
-        # Using REST API directly
-        try:
+        # Define REST fallback function
+        def rest_fallback():
             path = f"/v1/compute/update_available_resources/{self.instance_id}"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
@@ -509,45 +764,35 @@ class Scaling:
                 "Resources updated successfully",
                 "Could not update the resources",
             )
-        except Exception as e:
-            logging.error("REST API failed (update_available_resources): %s", e)
-            return None, f"Failed to update available resources via REST: {e}", "REST API failed"
+        # Use hybrid approach: Kafka first, REST fallback, cache if both fail
+        return self._hybrid_request(
+            api="update_available_resources",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
+        )
     @log_errors(log_error=True)
     def update_action_docker_logs(self, action_record_id, log_content):
-        """Update docker logs for an action using REST API.
+        """Update docker logs for an action using Kafka (with REST fallback).
         Args:
             action_record_id: ID of the action record
             log_content: Content of the logs to update
         Returns:
             Tuple of (data, error, message) from API response
         """
         logging.info("Updating docker logs for action %s", action_record_id)
-        # KAFKA TEMPORARILY DISABLED - Using REST API directly
-        # api = "update_action_docker_logs"
         payload = {
             "actionRecordId": action_record_id,
             "logContent": log_content,
         }
-        # data, error, message, kafka_response_received = self._send_kafka_request(
-        #     api=api,
-        #     payload=payload,
-        #     request_topic=self.kafka_config["api_request_topic"],
-        #     response_topic=self.kafka_config["api_response_topic"],
-        #     timeout=60
-        # )
-        # # Check if Kafka response was received and if it's an error, log and fallback to REST API
-        # if kafka_response_received:
-        #     if error:
-        #         logging.warning("Kafka returned error for update_action_docker_logs: %s. Falling back to REST API.", error)
-        #     else:
-        #         return data, error, message
-        # Using REST API directly
-        try:
+        def rest_fallback():
             path = "/v1/actions/update_action_docker_logs"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
@@ -555,40 +800,67 @@ class Scaling:
                 "Docker logs updated successfully",
                 "Could not update the docker logs",
             )
-        except Exception as e:
-            logging.error("REST API failed (update_action_docker_logs): %s", e)
-            return None, f"Failed via REST: {e}", "REST API failed"
+        return self._hybrid_request(
+            api="update_action_docker_logs",
+            payload=payload,
+            request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
+        )
     @log_errors(log_error=True)
     def get_docker_hub_credentials(self):
-        """Get Docker Hub credentials.
+        """Get Docker Hub credentials using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
         logging.info("Getting docker credentials")
-        path = "/v1/compute/get_docker_hub_credentials"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Docker credentials fetched successfully",
-            "Could not fetch the docker credentials",
+        payload = {}
+        def rest_fallback():
+            path = "/v1/compute/get_docker_hub_credentials"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Docker credentials fetched successfully",
+                "Could not fetch the docker credentials",
+            )
+        return self._hybrid_request(
+            api="get_docker_hub_credentials",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def get_open_ports_config(self):
-        """Get open ports configuration.
+        """Get open ports configuration using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/scaling/get_open_ports/{self.instance_id}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Open ports config fetched successfully",
-            "Could not fetch the open ports config",
+        payload = {"instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/compute/get_open_ports/{self.instance_id}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Open ports config fetched successfully",
+                "Could not fetch the open ports config",
+            )
+        return self._hybrid_request(
+            api="get_open_ports_config",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(default_return=None, log_error=True)
@@ -639,7 +911,7 @@ class Scaling:
     @log_errors(log_error=True)
     def get_model_secret_keys(self, secret_name):
-        """Get model secret keys.
+        """Get model secret keys using Kafka (with REST fallback).
         Args:
             secret_name: Name of the secret
@@ -647,12 +919,23 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Secret keys fetched successfully",
-            "Could not fetch the secret keys",
+        payload = {"secret_name": secret_name}
+        def rest_fallback():
+            path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Secret keys fetched successfully",
+                "Could not fetch the secret keys",
+            )
+        return self._hybrid_request(
+            api="get_model_secret_keys",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
@@ -753,7 +1036,7 @@ class Scaling:
     @log_errors(log_error=True)
     def stop_account_compute(self, account_number, alias):
-        """Stop a compute instance for an account.
+        """Stop a compute instance for an account using Kafka (with REST fallback).
         Args:
             account_number: Account number
@@ -762,17 +1045,33 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/scaling/stop_account_compute/{account_number}/{alias}"
-        resp = self.rpc.put(path=path)
-        return self.handle_response(
-            resp,
-            "Compute instance stopped successfully",
-            "Could not stop the compute instance",
+        logging.info("Stopping account compute for %s/%s", account_number, alias)
+        payload = {
+            "account_number": account_number,
+            "alias": alias,
+        }
+        def rest_fallback():
+            path = f"/v1/compute/stop_account_compute/{account_number}/{alias}"
+            resp = self.rpc.put(path=path)
+            return self.handle_response(
+                resp,
+                "Compute instance stopped successfully",
+                "Could not stop the compute instance",
+            )
+        return self._hybrid_request(
+            api="stop_account_compute",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def restart_account_compute(self, account_number, alias):
-        """Restart a compute instance for an account.
+        """Restart a compute instance for an account using Kafka (with REST fallback).
         Args:
             account_number: Account number
@@ -781,12 +1080,28 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/scaling/restart_account_compute/{account_number}/{alias}"
-        resp = self.rpc.put(path=path)
-        return self.handle_response(
-            resp,
-            "Compute instance restarted successfully",
-            "Could not restart the compute instance",
+        logging.info("Restarting account compute for %s/%s", account_number, alias)
+        payload = {
+            "account_number": account_number,
+            "alias": alias,
+        }
+        def rest_fallback():
+            path = f"/v1/compute/restart_account_compute/{account_number}/{alias}"
+            resp = self.rpc.put(path=path)
+            return self.handle_response(
+                resp,
+                "Compute instance restarted successfully",
+                "Could not restart the compute instance",
+            )
+        return self._hybrid_request(
+            api="restart_account_compute",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
@@ -810,37 +1125,59 @@ class Scaling:
     @log_errors(log_error=True)
     def get_all_instances_type(self):
-        """Get all instance types.
+        """Get all instance types using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = "/v1/scaling/get_all_instances_type"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "All instance types fetched successfully",
-            "Could not fetch the instance types",
+        payload = {}
+        def rest_fallback():
+            path = "/v1/compute/get_all_instances_type"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "All instance types fetched successfully",
+                "Could not fetch the instance types",
+            )
+        return self._hybrid_request(
+            api="get_all_instances_type",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def get_compute_details(self):
-        """Get compute instance details.
+        """Get compute instance details using Kafka (with REST fallback).
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/scaling/get_compute_details/{self.instance_id}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "Compute details fetched successfully",
-            "Could not fetch the compute details",
+        payload = {"instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/scaling/get_compute_details/{self.instance_id}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "Compute details fetched successfully",
+                "Could not fetch the compute details",
+            )
+        return self._hybrid_request(
+            api="get_compute_details",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def get_user_access_key_pair(self, user_id):
-        """Get user access key pair.
+        """Get user access key pair using Kafka (with REST fallback).
         Args:
             user_id: ID of the user
@@ -848,17 +1185,28 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/compute/get_user_access_key_pair/{user_id}/{self.instance_id}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "User access key pair fetched successfully",
-            "Could not fetch the user access key pair",
+        payload = {"user_id": user_id, "instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/compute/get_user_access_key_pair/{user_id}/{self.instance_id}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "User access key pair fetched successfully",
+                "Could not fetch the user access key pair",
+            )
+        return self._hybrid_request(
+            api="get_user_access_key_pair",
+            payload=payload,
+            request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
     @log_errors(log_error=True)
     def get_internal_api_key(self, action_id):
-        """Get internal API key.
+        """Get internal API key using Kafka (with REST fallback).
         Args:
             action_id: ID of the action
@@ -866,107 +1214,22 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/actions/get_internal_api_key/{action_id}/{self.instance_id}"
-        resp = self.rpc.get(path=path)
-        return self.handle_response(
-            resp,
-            "internal keys fetched successfully",
-            "Could not fetch internal keys",
+        payload = {"action_id": action_id, "instance_id": self.instance_id}
+        def rest_fallback():
+            path = f"/v1/actions/get_internal_api_key/{action_id}/{self.instance_id}"
+            resp = self.rpc.get(path=path)
+            return self.handle_response(
+                resp,
+                "internal keys fetched successfully",
+                "Could not fetch internal keys",
+            )
+        return self._hybrid_request(
+            api="get_internal_api_key",
+            payload=payload,
+            request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
+            response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
+            rest_fallback_func=rest_fallback
         )
-    # KAFKA TEMPORARILY DISABLED - Using REST API directly
-    # @log_errors(log_error=True)
-    # def handle_kafka_response(self, msg, success_message, error_message):
-    #     """
-    #     Helper to process Kafka response messages in a consistent way.
-    #     """
-    #     if msg.get("status") == "success":
-    #         data = msg.get("data")
-    #         error = None
-    #         message = success_message
-    #         logging.info(message)
-    #     else:
-    #         data = msg.get("data")
-    #         error = msg.get("error", "Unknown error")
-    #         message = error_message
-    #         logging.error("%s: %s", message, error)
-    #     return data, error, message
-    # def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=60):
-    #     """
-    #     Helper to send a request to Kafka and wait for a response.
-    #     Returns (data, error, message, kafka_response_received) where kafka_response_received is True if a response was received (even if error), False if transport error/timeout.
-    #     """
-    #     correlation_id = str(uuid.uuid4())
-    #     request_message = {
-    #         "correlationId": correlation_id,
-    #         "api": api,
-    #         "payload": payload,
-    #     }
-    #     consumer = KafkaConsumer(
-    #         response_topic,
-    #         bootstrap_servers=self.kafka_config["bootstrap_servers"],
-    #         group_id=None,
-    #         value_deserializer=lambda m: json.loads(m.decode("utf-8")),
-    #         auto_offset_reset='latest',
-    #         enable_auto_commit=True,
-    #     )
-    #     try:
-    #         if hasattr(self.session.rpc, 'AUTH_TOKEN'):
-    #             self.session.rpc.AUTH_TOKEN.set_bearer_token()
-    #             auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
-    #             auth_token = auth_token.replace("Bearer ", "")
-    #             headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
-    #         else:
-    #             headers = None
-    #         self.kafka_producer.send(request_topic, request_message, headers=headers)
-    #         # self.kafka_producer.flush()
-    #         logging.info("Sent %s request to Kafka topic %s", api, request_topic)
-    #     except Exception as e:
-    #         logging.error("Kafka producer error: %s", e)
-    #         return None, f"Kafka producer error: {e}", "Kafka send failed", False
-    #     try:
-    #         start = time.time()
-    #         while time.time() - start < timeout:
-    #             # Poll for messages with a short timeout to avoid blocking forever
-    #             message_batch = consumer.poll(timeout_ms=1000)
-    #             if message_batch:
-    #                 for topic_partition, messages in message_batch.items():
-    #                     for message in messages:
-    #                         print("trying to fetch message")
-    #                         msg = message.value
-    #                         if msg.get("correlationId") == correlation_id:
-    #                             consumer.close()
-    #                             # Always treat a received response as final, even if error
-    #                             return self.handle_kafka_response(
-    #                                 msg,
-    #                                 f"Fetched via Kafka for {api}",
-    #                                 f"Kafka error response for {api}"
-    #                             ) + (True,)
-    #             else:
-    #                 print(f"No messages received, waiting... ({time.time() - start:.1f}s/{timeout}s)")
-    #
-    #         consumer.close()
-    #         logging.warning("Kafka response timeout for %s after %d seconds", api, timeout)
-    #         return None, "Kafka response timeout", "Kafka response timeout", False
-    #     except Exception as e:
-    #         logging.error("Kafka consumer error: %s", e)
-    #         return None, f"Kafka consumer error: {e}", "Kafka consumer error", False
-    # def _cache_failed_request(self, api, payload):
-    #     """Cache the failed request for retry. Here, we use a simple file cache as a placeholder."""
-    #     try:
-    #         cache_file = os.path.join(os.path.dirname(__file__), 'request_cache.json')
-    #         if os.path.exists(cache_file):
-    #             with open(cache_file, 'r') as f:
-    #                 cache = json.load(f)
-    #         else:
-    #             cache = []
-    #         cache.append({"api": api, "payload": payload, "ts": time.time()})
-    #         with open(cache_file, 'w') as f:
-    #             json.dump(cache, f)
-    #         logging.info("Cached failed request for api %s", api)
-    #     except Exception as e:
-    #         logging.error("Failed to cache request: %s", e)

matrice-compute 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl

matrice-compute 0.1.20py3-none-any.whl → 0.1.22py3-none-any.whl