PyPI - matrice-compute - Versions diffs - 0.1.13__tar.gz → 0.1.15__tar.gz - Mend

matrice-compute 0.1.13tar.gz → 0.1.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.13
+Version: 0.1.15
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/matrice_compute.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.13
+Version: 0.1.15
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/src/matrice_compute/action_instance.py RENAMED Viewed

@@ -575,75 +575,75 @@ class ActionInstance:
             )
             raise
-    @log_errors(raise_exception=False)
-    def create_redis_container(self, redis_image=None, redis_password=None):
-        """Create and start a Redis container using Docker.
-        Args:
-            redis_image (str, optional): Redis Docker image to use. Defaults to 'redis:latest'
-        Returns:
-            tuple: (container_info, error, message)
-        """
-        if redis_image is None:
-            redis_image = "redis:latest"
-        network_name = f"redis_network_{int(time.time())}"
-        subprocess.run(f"docker network create {network_name}", shell=True, check=True)
-        try:
-            # Get an available port for Redis
-            external_port = "6379"
-            # Generate a unique container name and password
-            container_name = f"redis_container_{int(time.time())}"
-            # Build the docker command to create Redis container with password
-            cmd = (
-                f"docker run -d "
-                f"--network {network_name} "
-                f"--name {container_name} "
-                f"-p {external_port}:6379 "
-                f"--restart unless-stopped "
-                f"{redis_image} "
-                f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
-            )
-            logging.info("Creating Redis container with command: %s", cmd)
-            # Execute the command
-            result = subprocess.run(
-                cmd, shell=True, capture_output=True, text=True, timeout=60
-            )
-            if result.returncode == 0:
-                container_id = result.stdout.strip()
-                container_info = {
-                    "container_id": container_id,
-                    "container_name": container_name,
-                    "network_name": network_name,
-                    "external_port": external_port,
-                    "internal_port": 6379,
-                    "password": redis_password,
-                    "image": redis_image,
-                    "status": "running",
-                }
-                logging.info("Redis container created successfully: %s", container_info)
-                return container_info, None, "Redis container created successfully"
-            else:
-                error_message = f"Failed to create Redis container: {result.stderr}"
-                logging.error(error_message)
-                return None, "ContainerCreationError", error_message
-        except subprocess.TimeoutExpired:
-            error_message = "Timeout while creating Redis container"
-            logging.error(error_message)
-            return None, "TimeoutError", error_message
-        except Exception as e:
-            error_message = f"Unexpected error creating Redis container: {str(e)}"
-            logging.error(error_message)
-            return None, "UnexpectedError", error_message
+    # @log_errors(raise_exception=False)
+    # def create_redis_container(self, redis_image=None, redis_password=None):
+    #     """Create and start a Redis container using Docker.
+    #     Args:
+    #         redis_image (str, optional): Redis Docker image to use. Defaults to 'redis:latest'
+    #     Returns:
+    #         tuple: (container_info, error, message)
+    #     """
+    #     if redis_image is None:
+    #         redis_image = "redis:latest"
+    #     network_name = f"redis_network_{int(time.time())}"
+    #     subprocess.run(f"docker network create {network_name}", shell=True, check=True)
+    #     try:
+    #         # Get an available port for Redis
+    #         external_port = "6379"
+    #         # Generate a unique container name and password
+    #         container_name = f"redis_container_{int(time.time())}"
+    #         # Build the docker command to create Redis container with password
+    #         cmd = (
+    #             f"docker run -d "
+    #             f"--network {network_name} "
+    #             f"--name {container_name} "
+    #             f"-p {external_port}:6379 "
+    #             f"--restart unless-stopped "
+    #             f"{redis_image} "
+    #             f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
+    #         )
+    #         logging.info("Creating Redis container with command: %s", cmd)
+    #         # Execute the command
+    #         result = subprocess.run(
+    #             cmd, shell=True, capture_output=True, text=True, timeout=60
+    #         )
+    #         if result.returncode == 0:
+    #             container_id = result.stdout.strip()
+    #             container_info = {
+    #                 "container_id": container_id,
+    #                 "container_name": container_name,
+    #                 "network_name": network_name,
+    #                 "external_port": external_port,
+    #                 "internal_port": 6379,
+    #                 "password": redis_password,
+    #                 "image": redis_image,
+    #                 "status": "running",
+    #             }
+    #             logging.info("Redis container created successfully: %s", container_info)
+    #             return container_info, None, "Redis container created successfully"
+    #         else:
+    #             error_message = f"Failed to create Redis container: {result.stderr}"
+    #             logging.error(error_message)
+    #             return None, "ContainerCreationError", error_message
+    #     except subprocess.TimeoutExpired:
+    #         error_message = "Timeout while creating Redis container"
+    #         logging.error(error_message)
+    #         return None, "TimeoutError", error_message
+    #     except Exception as e:
+    #         error_message = f"Unexpected error creating Redis container: {str(e)}"
+    #         logging.error(error_message)
+    #         return None, "UnexpectedError", error_message
     @log_errors(raise_exception=False, log_error=False)
     def send_logs_continuously(self):
@@ -1053,7 +1053,7 @@ def database_setup_execute(self: ActionInstance):
         f"-e MATRICE_ACCESS_KEY_ID={self.matrice_access_key_id} "
         f"-e MATRICE_SECRET_ACCESS_KEY={self.matrice_secret_access_key} "
         f"-e PROJECT_ID={project_id} "
-        f"-e ENV=dev "
+        f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f"{image} "
     )
     print("Docker command", cmd)
@@ -1117,13 +1117,14 @@ def lpr_setup_execute(self: ActionInstance):
     # Add worker container run command
     worker_cmd = (
-        f"docker run -d --pull=always "
+        f"docker run -d --net=host --pull=always "
         f"--name lpr-worker "
         f"-p {external_port}:8082 "
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
         f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
         f'-e ACTION_ID="{self.action_record_id}" '
+        f'-e PORT={external_port} '
         f"{image}"
     )
     print("Worker docker run command:", worker_cmd)
@@ -1134,27 +1135,31 @@ def lpr_setup_execute(self: ActionInstance):
 @log_errors(raise_exception=False)
 def inference_ws_server_execute(self: ActionInstance):
     """
-    Creates and start inference pipline.
+    Creates and start inference pipeline.
+    Inference WebSocket server runs on port 8102 (localhost only with --net=host).
     """
     action_details = self.get_action_details()
     if not action_details:
         return
     image = action_details["actionDetails"].get("docker")
     self.setup_action_requirements(action_details)
-    # Add worker container run command
+    # Inference WebSocket server with --net=host (Port: 8102)
     worker_cmd = (
-        f"docker run -d --pull=always "
+        f"docker run -d --pull=always --net=host "
         f"--name inference "
-        f"-p 8102:8102 "
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
         f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
-        f"{image}"
+        f"{image} "
+        f"./app "
+        f"{self.action_record_id} "
     )
-    print("inference docker run command:", worker_cmd)
+    logging.info("Starting inference WebSocket server (Port: 8102): %s", worker_cmd)
     # Docker Command run
     self.start(worker_cmd, "inference_ws_server")
@@ -1163,7 +1168,8 @@ def inference_ws_server_execute(self: ActionInstance):
 @log_errors(raise_exception=False)
 def fe_fs_streaming_execute(self: ActionInstance):
     """
-    Creates and setup the frontend for fs streaming
+    Creates and setup the frontend for fs streaming.
+    Frontend streaming runs on port 3000 (localhost only with --net=host).
     """
     action_details = self.get_action_details()
@@ -1173,17 +1179,16 @@ def fe_fs_streaming_execute(self: ActionInstance):
     self.setup_action_requirements(action_details)
-    # Add worker container run command
+    # Frontend streaming with --net=host (Port: 3000)
     worker_cmd = (
-        f"docker run -d --pull=always "
+        f"docker run -d --pull=always --net=host "
         f"--name fe_streaming "
-        f"-p 3000:3000 "
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
         f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
         f"{image}"
     )
-    print("fe_fs_stremaing docker run command:", worker_cmd)
+    logging.info("Starting frontend streaming (Port: 3000): %s", worker_cmd)
     # Docker Command run
     self.start(worker_cmd, "fe_fs_streaming")
@@ -1235,8 +1240,8 @@ def synthetic_data_setup_execute(self: ActionInstance):
 def redis_setup_execute(self: ActionInstance):
     """
     Creates and starts a Redis container using Docker.
+    Redis runs on port 6379 (localhost only with --net=host).
     """
-    external_port = self.scaling.get_open_port()
     work_fs = get_max_file_system()
     action_details = self.get_action_details()
@@ -1248,18 +1253,6 @@ def redis_setup_execute(self: ActionInstance):
         "password", f"redis_pass_{int(time.time())}"
     )
-    container_info, error, message = self.create_redis_container(
-        action_details["actionDetails"].get("redis_image", "redis:latest"),
-        redis_password=redis_password,
-    )
-    if error:
-        logging.error(
-            "Error creating Redis container: %s",
-            message,
-        )
-        return
-    logging.info("Redis container created successfully: %s", container_info)
     # Initialize redis container
     self.setup_action_requirements(
         action_details,
@@ -1268,17 +1261,39 @@ def redis_setup_execute(self: ActionInstance):
         action_id=action_id,
     )
+    redis_image = action_details["actionDetails"].get("redis_image", "redis:latest")
+    # Redis container with --net=host (Port: 6379)
+    redis_cmd = (
+        f"docker run -d --net=host "
+        f"--name redis_container_{int(time.time())} "
+        f"--restart unless-stopped "
+        f"{redis_image} "
+        f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
+    )
+    logging.info("Starting Redis container (Port: 6379): %s", redis_cmd)
+    # Start Redis container first
+    redis_process = subprocess.Popen(
+        redis_cmd,
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    logging.info("Redis container started successfully on localhost:6379")
+    # Wait for Redis to be ready
+    time.sleep(5)
     env_vars = {
-        "REDIS_URL": f"{container_info['container_name']}:{container_info['external_port']}",
-        "REDIS_PASSWORD": container_info["password"],
+        "REDIS_URL": f"localhost:6379",
+        "REDIS_PASSWORD": redis_password,
     }
-    network_config = f" --network {container_info['network_name']} -p 8082:8082"
-    # Make the docker file here
+    # bg-redis management container with --net=host (Port: 8082)
     cmd = (
-        f"docker run "
-        f"{network_config} "
+        f"docker run --net=host "
         f"-e REDIS_URL={shlex.quote(env_vars['REDIS_URL'])} "
         f"-e REDIS_PASSWORD={shlex.quote(env_vars['REDIS_PASSWORD'])} "
         f"-e MATRICE_ACCESS_KEY_ID={shlex.quote(self.matrice_access_key_id)} "
@@ -1290,7 +1305,7 @@ def redis_setup_execute(self: ActionInstance):
         f"{self.action_record_id} "
     )
-    logging.info("cmd is: %s", cmd)
+    logging.info("Starting bg-redis management (Port: 8082): %s", cmd)
     self.start(cmd, "redis_setup")

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/src/matrice_compute/instance_utils.py RENAMED Viewed

@@ -128,8 +128,12 @@ def has_gpu() -> bool:
     Returns:
         bool: True if GPU is present, False otherwise
     """
-    subprocess.run("nvidia-smi", check=True)
-    return True
+    try:
+        subprocess.run("nvidia-smi", timeout=5)
+        return True
+    except subprocess.TimeoutExpired:
+        logging.warning("nvidia-smi command timed out after 5 seconds")
+        return False
 @log_errors(default_return=0, raise_exception=False)
@@ -141,13 +145,17 @@ def get_gpu_memory_usage() -> float:
         float: Memory usage between 0 and 1
     """
     command = "nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader"
-    output = subprocess.check_output(command.split()).decode("ascii").strip().split("\n")
-    memory_percentages = []
-    for line in output:
-        used, total = map(int, line.split(","))
-        usage_percentage = used / total
-        memory_percentages.append(usage_percentage)
-    return min(memory_percentages)
+    try:
+        output = subprocess.check_output(command.split(), timeout=5).decode("ascii").strip().split("\n")
+        memory_percentages = []
+        for line in output:
+            used, total = map(int, line.split(","))
+            usage_percentage = used / total
+            memory_percentages.append(usage_percentage)
+        return min(memory_percentages)
+    except subprocess.TimeoutExpired:
+        logging.warning("nvidia-smi command timed out after 5 seconds in get_gpu_memory_usage")
+        return 0
 @log_errors(default_return=0, raise_exception=False)
@@ -194,17 +202,24 @@ def get_gpu_info() -> list:
     Returns:
         list: GPU information strings
     """
-    with subprocess.Popen(
+    proc = subprocess.Popen(
         [
             "nvidia-smi",
             "--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
             "--format=csv,noheader,nounits",
         ],
         stdout=subprocess.PIPE,
-    ) as proc:
-        stdout, _ = proc.communicate()
+        stderr=subprocess.PIPE,
+    )
+    try:
+        stdout, stderr = proc.communicate(timeout=5)
         output = stdout.decode("UTF-8")
         return output.split("\n")[:-1]
+    except subprocess.TimeoutExpired:
+        logging.warning("nvidia-smi command timed out after 5 seconds in get_gpu_info")
+        proc.kill()
+        proc.communicate()  # flush output after kill
+        return []
 @log_errors(default_return="", raise_exception=False)
@@ -366,7 +381,13 @@ def get_max_file_system() -> str:
             max_available_filesystem,
             max_disk["available"],
         )
-    if max_available_filesystem in ["/", ""]:
+    # Check if filesystem is writable, or if it's root/empty
+    if max_available_filesystem in ["/", ""] or not os.access(max_available_filesystem, os.W_OK):
+        if max_available_filesystem not in ["/", ""]:
+            logging.warning(
+                "Filesystem %s is not writable, falling back to home directory",
+                max_available_filesystem,
+            )
         home_dir = os.path.expanduser("~")
         if not os.environ.get("WORKSPACE_DIR"):
             logging.error("WORKSPACE_DIR environment variable not set")
@@ -499,7 +520,12 @@ def get_gpu_with_sufficient_memory_for_action(
     """
     required_gpu_memory = get_required_gpu_memory(action_details)
     command = "nvidia-smi --query-gpu=memory.free --format=csv"
-    memory_free_info = subprocess.check_output(command.split()).decode("ascii").split("\n")
+    try:
+        memory_free_info = subprocess.check_output(command.split(), timeout=5).decode("ascii").split("\n")
+    except subprocess.TimeoutExpired:
+        logging.error("nvidia-smi command timed out after 5 seconds in get_gpu_with_sufficient_memory_for_action")
+        raise ValueError("Failed to get GPU information - nvidia-smi timed out")
     if len(memory_free_info) < 2:
         raise ValueError("No GPU information available from nvidia-smi")
     memory_free_values = [int(x.split()[0]) for x in memory_free_info[1:-1]]
@@ -542,7 +568,12 @@ def get_single_gpu_with_sufficient_memory_for_action(
     """
     required_gpu_memory = get_required_gpu_memory(action_details)
     command = "nvidia-smi --query-gpu=memory.free --format=csv"
-    memory_free_info = subprocess.check_output(command.split()).decode("ascii").split("\n")
+    try:
+        memory_free_info = subprocess.check_output(command.split(), timeout=5).decode("ascii").split("\n")
+    except subprocess.TimeoutExpired:
+        logging.error("nvidia-smi command timed out after 5 seconds in get_single_gpu_with_sufficient_memory_for_action")
+        raise ValueError("Failed to get GPU information - nvidia-smi timed out")
     if len(memory_free_info) < 2:
         raise ValueError("No GPU information available from nvidia-smi")
     memory_free_values = [int(x.split()[0]) for x in memory_free_info[1:-1]]

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/src/matrice_compute/resources_tracker.py RENAMED Viewed

@@ -150,20 +150,25 @@ class ResourcesTracker:
         if not has_gpu():
             return 0
         gpu_util = 0
-        result = subprocess.run(
-            ["nvidia-smi", "pmon", "-c", "1"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        pmon_output = result.stdout.strip().split("\n")
-        for line in pmon_output[2:]:
-            parts = line.split()
-            if len(parts) >= 8:
-                pid = parts[1]
-                gpu_usage = parts[3]
-                if pid == str(container_pid):
-                    gpu_util += float(gpu_usage) if gpu_usage != "-" else 0
+        try:
+            result = subprocess.run(
+                ["nvidia-smi", "pmon", "-c", "1"],
+                capture_output=True,
+                text=True,
+                check=True,
+                timeout=5,
+            )
+            pmon_output = result.stdout.strip().split("\n")
+            for line in pmon_output[2:]:
+                parts = line.split()
+                if len(parts) >= 8:
+                    pid = parts[1]
+                    gpu_usage = parts[3]
+                    if pid == str(container_pid):
+                        gpu_util += float(gpu_usage) if gpu_usage != "-" else 0
+        except subprocess.TimeoutExpired:
+            logging.warning("nvidia-smi pmon command timed out after 5 seconds in get_container_gpu_usage")
+            return 0
         return gpu_util
     @log_errors(default_return=0, raise_exception=False, log_error=False)
@@ -185,19 +190,24 @@ class ResourcesTracker:
             "--format=csv,noheader,nounits",
         ]
         total_memory = 0
-        result = subprocess.run(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            check=True,
-        )
-        for line in result.stdout.splitlines():
-            parts = line.strip().split(", ")
-            if len(parts) == 2:
-                process_pid, used_memory = parts
-                if process_pid == str(container_pid):
-                    total_memory += int(used_memory)
+        try:
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=True,
+                timeout=5,
+            )
+            for line in result.stdout.splitlines():
+                parts = line.strip().split(", ")
+                if len(parts) == 2:
+                    process_pid, used_memory = parts
+                    if process_pid == str(container_pid):
+                        total_memory += int(used_memory)
+        except subprocess.TimeoutExpired:
+            logging.warning("nvidia-smi command timed out after 5 seconds in get_container_gpu_memory_usage")
+            return 0
         return total_memory
     @log_errors(default_return=(0, 0, 0, 0), raise_exception=False, log_error=True)
@@ -227,7 +237,12 @@ class ResourcesTracker:
         if not has_gpu():
             return gpu_memory_free, gpu_utilization
-        subprocess.check_output("nvidia-smi")
+        try:
+            subprocess.check_output("nvidia-smi", timeout=5)
+        except subprocess.TimeoutExpired:
+            logging.warning("nvidia-smi command timed out after 5 seconds in _get_gpu_resources")
+            return 0, 0.0
         info_list = get_gpu_info()
         for info in info_list:
             info_split = info.split(", ")
@@ -247,10 +262,14 @@ class ActionsResourcesTracker:
         self.max_actions_usage = {}
         self.resources_tracker = ResourcesTracker()
         self.client = docker.from_env()
+        self.logged_stopped_containers = []
     @log_errors(raise_exception=False, log_error=True)
     def update_actions_resources(self) -> None:
-        """Process both running and exited containers"""
+        """Process both running and exited containers.
+        Note: Does not remove containers to keep logs. Only tracks resource usage.
+        """
         exited_containers = self.client.containers.list(
             filters={"status": "exited"},
             all=True,
@@ -259,8 +278,12 @@ class ActionsResourcesTracker:
         if exited_containers:
             for container in exited_containers:
                 try:
+                    if container.id in self.logged_stopped_containers:
+                        continue
                     self._update_container_action_status(container, "completed")
-                    container.remove()
+                    self.logged_stopped_containers.append(container.id)
+                    # COMMENTED OUT: Do not remove containers to keep logs
+                    # container.remove()
                 except Exception as err:
                     logging.error(
                         "Error processing exited container %s: %s",
@@ -310,7 +333,7 @@ class ActionsResourcesTracker:
         args_24 = [arg for arg in remove_quotation_marks(inspect_data["Args"]) if len(arg) == 24 and "pypi" not in arg]
         action_record_id = args_24[-1] if args_24 else None
         if not action_record_id:
-            logging.warning("No valid action_id found for the container. Container ID: %s, Args: %s", container.id, inspect_data["Args"])
+            logging.debug("No valid action_id found for the container. Container ID: %s, Args: %s", container.id, inspect_data["Args"])
         duration = calculate_time_difference(start_time, finish_time)
         (
             current_gpu_utilization,
@@ -320,6 +343,8 @@ class ActionsResourcesTracker:
         ) = self.get_current_action_usage(container, status)
         sub_containers = self.get_sub_containers_by_label("action_id", action_record_id)
         for sub_container in sub_containers:
+            if sub_container.id in self.logged_stopped_containers:
+                continue
             (
                 sub_container_gpu_utilization,
                 sub_container_gpu_memory,
@@ -330,10 +355,12 @@ class ActionsResourcesTracker:
             current_gpu_memory += sub_container_gpu_memory
             current_cpu_utilization += sub_container_cpu_utilization
             current_memory_utilization += sub_container_memory_utilization
+            # COMMENTED OUT: Do not stop/remove sub-containers to keep logs
             if status == "completed":
                 try:
                     sub_container.stop()
-                    sub_container.remove(force=True)
+                    self.logged_stopped_containers.append(sub_container.id)
+            #         sub_container.remove(force=True)
                 except Exception as err:
                     logging.error(
                         "Error removing sub-container %s: %s",

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/src/matrice_compute/scaling.py RENAMED Viewed

@@ -9,6 +9,7 @@ import json
 import time
 import base64
+# TODO: update /scaling to /compute
 class Scaling:
@@ -185,6 +186,8 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
+        if not action_record_id:
+            return None, "Action record id is required", "Action record id is required"
         logging.info(
             "Updating action status for action %s",
             action_record_id,
@@ -499,7 +502,7 @@ class Scaling:
         # Using REST API directly
         try:
-            path = f"/v1/scaling/update_available_resources/{self.instance_id}"
+            path = f"/v1/compute/update_available_resources/{self.instance_id}"
             resp = self.rpc.put(path=path, payload=payload)
             return self.handle_response(
                 resp,
@@ -644,7 +647,7 @@ class Scaling:
         Returns:
             Tuple of (data, error, message) from API response
         """
-        path = f"/v1/scaling/get_models_secret_keys?secret_name={secret_name}"
+        path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
         resp = self.rpc.get(path=path)
         return self.handle_response(
             resp,

{matrice_compute-0.1.13 → matrice_compute-0.1.15}/src/matrice_compute/task_utils.py RENAMED Viewed

@@ -29,8 +29,8 @@ def setup_workspace_and_run_task(
     workspace_dir = f"{work_fs}/{action_id}"
     codebase_zip_path = f"{workspace_dir}/file.zip"
     requirements_txt_path = f"{workspace_dir}/requirements.txt"
-    if os.path.exists(workspace_dir):
-        return
+    # if os.path.exists(workspace_dir): # don't skip if workspace already exists, override it
+    #     return
     os.makedirs(workspace_dir, exist_ok=True)
     # Download codebase ZIP file