PyPI - matrice-compute - Versions diffs - 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

matrice-compute 0.1.14py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

matrice_compute/action_instance.py CHANGED Viewed

@@ -1034,6 +1034,8 @@ def augmentation_server_creation_execute(
 def database_setup_execute(self: ActionInstance):
     """
     Creates and setup the database for facial recognition server.
+    MongoDB runs on port 27020:27017 (localhost only with --net=host).
+    Qdrant runs on port 6334 (localhost only with --net=host).
     """
     action_details = self.get_action_details()
     if not action_details:
@@ -1044,11 +1046,11 @@ def database_setup_execute(self: ActionInstance):
     project_id = action_details["_idProject"]
-    # Run docker compose up
+    # MongoDB container with --net=host (Port: 27020:27017)
     cmd = (
-        f"docker run --pull=always -p 27020:27017 "
+        f"docker run --pull=always --net=host "
         f"--name mongodbdatabase "
+        f"-v matrice_myvol:/matrice_data "
         f"-e ACTION_RECORD_ID={self.action_record_id} "
         f"-e MATRICE_ACCESS_KEY_ID={self.matrice_access_key_id} "
         f"-e MATRICE_SECRET_ACCESS_KEY={self.matrice_secret_access_key} "
@@ -1056,15 +1058,16 @@ def database_setup_execute(self: ActionInstance):
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f"{image} "
     )
-    print("Docker command", cmd)
+    logging.info("Starting MongoDB container (Port: 27020:27017): %s", cmd)
+    # Qdrant container with --net=host (Port: 6334)
     qdrant_cmd = (
-        f"docker run --pull=always "
+        f"docker run --pull=always --net=host "
         f"--name qdrant "
-        f"-p 6333:6333 "
-        f"-p 6334:6334 "
+        f"-v matrice_myvol:/matrice_data "
         f"{'qdrant/qdrant:latest'} "
     )
+    logging.info("Starting Qdrant container (Port: 6334): %s", qdrant_cmd)
     # Docker Command run
     self.start(cmd, "database_setup")
@@ -1075,7 +1078,8 @@ def database_setup_execute(self: ActionInstance):
 @log_errors(raise_exception=False)
 def facial_recognition_setup_execute(self: ActionInstance):
     """
-    Creates and setup the database for facial recognition server.
+    Creates and setup the facial recognition worker server.
+    Facial recognition worker runs on port 8081 (localhost only with --net=host).
     """
     action_details = self.get_action_details()
@@ -1085,18 +1089,18 @@ def facial_recognition_setup_execute(self: ActionInstance):
     self.setup_action_requirements(action_details)
-    # Add worker container run command
+    # Facial recognition worker container with --net=host (Port: 8081)
     worker_cmd = (
-        f"docker run -d --pull=always "
+        f"docker run -d --pull=always --net=host "
         f"--name worker "
-        f"-p 8081:8081 "
+        f"-v matrice_myvol:/matrice_data "
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
         f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
         f'-e ACTION_ID="{self.action_record_id}" '
         f"{image}"
     )
-    print("Worker docker run command:", worker_cmd)
+    logging.info("Starting facial recognition worker (Port: 8081): %s", worker_cmd)
     # Docker Command run
     self.start(worker_cmd, "facial_recognition_setup")
@@ -1104,30 +1108,30 @@ def facial_recognition_setup_execute(self: ActionInstance):
 @log_errors(raise_exception=False)
 def lpr_setup_execute(self: ActionInstance):
     """
-    Creates and setup the database for license plate server.
+    Creates and setup the license plate recognition server.
+    LPR worker runs on port 8082 (localhost only with --net=host).
     """
     action_details = self.get_action_details()
     if not action_details:
         return
     image = self.docker_container
-    external_port = self.scaling.get_open_port()
     self.setup_action_requirements(action_details)
-    # Add worker container run command
+    # LPR worker container with --net=host (Port: 8082)
     worker_cmd = (
-        f"docker run -d --pull=always "
+        f"docker run -d --net=host --pull=always "
         f"--name lpr-worker "
-        f"-p {external_port}:8082 "
+        f"-v matrice_myvol:/matrice_data "
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
         f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
         f'-e ACTION_ID="{self.action_record_id}" '
-        f'-e PORT={external_port} '
+        f'-e PORT=8082 '
         f"{image}"
     )
-    print("Worker docker run command:", worker_cmd)
+    logging.info("Starting LPR worker (Port: 8082): %s", worker_cmd)
     # Docker Command run
     self.start(worker_cmd, "lpr_setup")
@@ -1169,7 +1173,7 @@ def inference_ws_server_execute(self: ActionInstance):
 def fe_fs_streaming_execute(self: ActionInstance):
     """
     Creates and setup the frontend for fs streaming.
-    Frontend streaming runs on port 3000 (localhost only with --net=host).
+    Frontend streaming runs on port 3001 (localhost only with --net=host).
     """
     action_details = self.get_action_details()
@@ -1179,16 +1183,17 @@ def fe_fs_streaming_execute(self: ActionInstance):
     self.setup_action_requirements(action_details)
-    # Frontend streaming with --net=host (Port: 3000)
+    # Frontend streaming with --net=host (Port: 3001)
     worker_cmd = (
         f"docker run -d --pull=always --net=host "
         f"--name fe_streaming "
+        f"-v matrice_myvol:/matrice_data "
         f'-e ENV="{os.environ.get("ENV", "prod")}" '
         f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
         f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
         f"{image}"
     )
-    logging.info("Starting frontend streaming (Port: 3000): %s", worker_cmd)
+    logging.info("Starting frontend streaming (Port: 3001): %s", worker_cmd)
     # Docker Command run
     self.start(worker_cmd, "fe_fs_streaming")
@@ -1462,7 +1467,10 @@ def streaming_gateway_execute(self: ActionInstance):
 @log_errors(raise_exception=False)
 def kafka_setup_execute(self: ActionInstance):
-    """Execute kafka server task."""
+    """
+    Execute kafka server task.
+    Kafka runs on port 9092 (SASL_PLAINTEXT) and 9093 (CONTROLLER) - localhost only with --net=host.
+    """
     action_details = self.get_action_details()
     if not action_details:
         return
@@ -1470,7 +1478,6 @@ def kafka_setup_execute(self: ActionInstance):
     host_ip = (
         urllib.request.urlopen("https://ident.me", timeout=10).read().decode("utf8")
     )
-    container_port = 9092
     # Setup credentials
     self.setup_action_requirements(action_details)
@@ -1538,7 +1545,7 @@ def kafka_setup_execute(self: ActionInstance):
         [f"-e {key}={shlex.quote(str(value))}" for key, value in env_vars.items()]
     )
-    # Build the docker command directly to match user's pattern
+    # Build the docker command with --net=host
     pypi_index = f"https://{'test.' if env != 'prod' else ''}pypi.org/simple/"
     if env == 'dev':
@@ -1547,8 +1554,9 @@ def kafka_setup_execute(self: ActionInstance):
     else:
         pkgs = f"matrice_common matrice"
+    # Kafka container with --net=host (Ports: 9092, 9093)
     cmd = (
-        f"docker run -p {host_port}:{container_port} "
+        f"docker run --net=host "
         f"{env_args} "
         f"--shm-size=30G --pull=always "
         f'aiforeveryone/matrice-kafka:latest /bin/bash -c "'
@@ -1561,5 +1569,5 @@ def kafka_setup_execute(self: ActionInstance):
         f'venv/bin/python3 main.py {self.action_record_id} {host_port}"'
     )
-    logging.info("cmd is: %s", cmd)
+    logging.info("Starting Kafka container (Ports: 9092, 9093): %s", cmd)
     self.start(cmd, "kafka_setup")

matrice_compute/instance_utils.py CHANGED Viewed

@@ -128,8 +128,12 @@ def has_gpu() -> bool:
     Returns:
         bool: True if GPU is present, False otherwise
     """
-    subprocess.run("nvidia-smi", check=True)
-    return True
+    try:
+        subprocess.run("nvidia-smi", timeout=5)
+        return True
+    except subprocess.TimeoutExpired:
+        logging.warning("nvidia-smi command timed out after 5 seconds")
+        return False
 @log_errors(default_return=0, raise_exception=False)
@@ -141,13 +145,17 @@ def get_gpu_memory_usage() -> float:
         float: Memory usage between 0 and 1
     """
     command = "nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader"
-    output = subprocess.check_output(command.split()).decode("ascii").strip().split("\n")
-    memory_percentages = []
-    for line in output:
-        used, total = map(int, line.split(","))
-        usage_percentage = used / total
-        memory_percentages.append(usage_percentage)
-    return min(memory_percentages)
+    try:
+        output = subprocess.check_output(command.split(), timeout=5).decode("ascii").strip().split("\n")
+        memory_percentages = []
+        for line in output:
+            used, total = map(int, line.split(","))
+            usage_percentage = used / total
+            memory_percentages.append(usage_percentage)
+        return min(memory_percentages)
+    except subprocess.TimeoutExpired:
+        logging.warning("nvidia-smi command timed out after 5 seconds in get_gpu_memory_usage")
+        return 0
 @log_errors(default_return=0, raise_exception=False)
@@ -194,17 +202,24 @@ def get_gpu_info() -> list:
     Returns:
         list: GPU information strings
     """
-    with subprocess.Popen(
+    proc = subprocess.Popen(
         [
             "nvidia-smi",
             "--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
             "--format=csv,noheader,nounits",
         ],
         stdout=subprocess.PIPE,
-    ) as proc:
-        stdout, _ = proc.communicate()
+        stderr=subprocess.PIPE,
+    )
+    try:
+        stdout, stderr = proc.communicate(timeout=5)
         output = stdout.decode("UTF-8")
         return output.split("\n")[:-1]
+    except subprocess.TimeoutExpired:
+        logging.warning("nvidia-smi command timed out after 5 seconds in get_gpu_info")
+        proc.kill()
+        proc.communicate()  # flush output after kill
+        return []
 @log_errors(default_return="", raise_exception=False)
@@ -505,7 +520,12 @@ def get_gpu_with_sufficient_memory_for_action(
     """
     required_gpu_memory = get_required_gpu_memory(action_details)
     command = "nvidia-smi --query-gpu=memory.free --format=csv"
-    memory_free_info = subprocess.check_output(command.split()).decode("ascii").split("\n")
+    try:
+        memory_free_info = subprocess.check_output(command.split(), timeout=5).decode("ascii").split("\n")
+    except subprocess.TimeoutExpired:
+        logging.error("nvidia-smi command timed out after 5 seconds in get_gpu_with_sufficient_memory_for_action")
+        raise ValueError("Failed to get GPU information - nvidia-smi timed out")
     if len(memory_free_info) < 2:
         raise ValueError("No GPU information available from nvidia-smi")
     memory_free_values = [int(x.split()[0]) for x in memory_free_info[1:-1]]
@@ -548,7 +568,12 @@ def get_single_gpu_with_sufficient_memory_for_action(
     """
     required_gpu_memory = get_required_gpu_memory(action_details)
     command = "nvidia-smi --query-gpu=memory.free --format=csv"
-    memory_free_info = subprocess.check_output(command.split()).decode("ascii").split("\n")
+    try:
+        memory_free_info = subprocess.check_output(command.split(), timeout=5).decode("ascii").split("\n")
+    except subprocess.TimeoutExpired:
+        logging.error("nvidia-smi command timed out after 5 seconds in get_single_gpu_with_sufficient_memory_for_action")
+        raise ValueError("Failed to get GPU information - nvidia-smi timed out")
     if len(memory_free_info) < 2:
         raise ValueError("No GPU information available from nvidia-smi")
     memory_free_values = [int(x.split()[0]) for x in memory_free_info[1:-1]]

matrice_compute/resources_tracker.py CHANGED Viewed

@@ -150,20 +150,25 @@ class ResourcesTracker:
         if not has_gpu():
             return 0
         gpu_util = 0
-        result = subprocess.run(
-            ["nvidia-smi", "pmon", "-c", "1"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        pmon_output = result.stdout.strip().split("\n")
-        for line in pmon_output[2:]:
-            parts = line.split()
-            if len(parts) >= 8:
-                pid = parts[1]
-                gpu_usage = parts[3]
-                if pid == str(container_pid):
-                    gpu_util += float(gpu_usage) if gpu_usage != "-" else 0
+        try:
+            result = subprocess.run(
+                ["nvidia-smi", "pmon", "-c", "1"],
+                capture_output=True,
+                text=True,
+                check=True,
+                timeout=5,
+            )
+            pmon_output = result.stdout.strip().split("\n")
+            for line in pmon_output[2:]:
+                parts = line.split()
+                if len(parts) >= 8:
+                    pid = parts[1]
+                    gpu_usage = parts[3]
+                    if pid == str(container_pid):
+                        gpu_util += float(gpu_usage) if gpu_usage != "-" else 0
+        except subprocess.TimeoutExpired:
+            logging.warning("nvidia-smi pmon command timed out after 5 seconds in get_container_gpu_usage")
+            return 0
         return gpu_util
     @log_errors(default_return=0, raise_exception=False, log_error=False)
@@ -185,19 +190,24 @@ class ResourcesTracker:
             "--format=csv,noheader,nounits",
         ]
         total_memory = 0
-        result = subprocess.run(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            check=True,
-        )
-        for line in result.stdout.splitlines():
-            parts = line.strip().split(", ")
-            if len(parts) == 2:
-                process_pid, used_memory = parts
-                if process_pid == str(container_pid):
-                    total_memory += int(used_memory)
+        try:
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=True,
+                timeout=5,
+            )
+            for line in result.stdout.splitlines():
+                parts = line.strip().split(", ")
+                if len(parts) == 2:
+                    process_pid, used_memory = parts
+                    if process_pid == str(container_pid):
+                        total_memory += int(used_memory)
+        except subprocess.TimeoutExpired:
+            logging.warning("nvidia-smi command timed out after 5 seconds in get_container_gpu_memory_usage")
+            return 0
         return total_memory
     @log_errors(default_return=(0, 0, 0, 0), raise_exception=False, log_error=True)
@@ -227,7 +237,12 @@ class ResourcesTracker:
         if not has_gpu():
             return gpu_memory_free, gpu_utilization
-        subprocess.check_output("nvidia-smi")
+        try:
+            subprocess.check_output("nvidia-smi", timeout=5)
+        except subprocess.TimeoutExpired:
+            logging.warning("nvidia-smi command timed out after 5 seconds in _get_gpu_resources")
+            return 0, 0.0
         info_list = get_gpu_info()
         for info in info_list:
             info_split = info.split(", ")

{matrice_compute-0.1.14.dist-info → matrice_compute-0.1.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: matrice_compute
-Version: 0.1.14
+Version: 0.1.16
 Summary: Common server utilities for Matrice.ai services
 Author-email: "Matrice.ai" <dipendra@matrice.ai>
 License-Expression: MIT

{matrice_compute-0.1.14.dist-info → matrice_compute-0.1.16.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
-matrice_compute/action_instance.py,sha256=aYNpRySPatxFltn_ekVmCd5h69I992_YerUTZwGWyHA,59763
+matrice_compute/action_instance.py,sha256=cilzBD3o6K5CpDZEJCGMrNg0bCoUyOW3aCLNrMGyS10,60554
 matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
 matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
 matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
-matrice_compute/instance_utils.py,sha256=7jnWurSpq8PQxPGlSTc0qmpNdD5jIL8pjYKdjhVhS60,22310
+matrice_compute/instance_utils.py,sha256=cANKRUlUzfecnzVEMC6Gkg9K7GZajH9ojNPiChdJL9s,23455
 matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
 matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-matrice_compute/resources_tracker.py,sha256=2hLKVxYihROtQ6fO4V_BplTgvkN8qH2H9_qxpOIpZkc,18521
+matrice_compute/resources_tracker.py,sha256=n57IJmT5GjNEX8yQL7nbKv57bjvESYM-vRQcQ0DgQXQ,19256
 matrice_compute/scaling.py,sha256=3F8SWvy9wWczpJ6dbY5RrXWw5ByZlIzAPJklir3KIFI,35359
 matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
 matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
-matrice_compute-0.1.14.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
-matrice_compute-0.1.14.dist-info/METADATA,sha256=u8ZIOoIX3uMEA4Lgaiuh73xsoPSdcHTZXAJuIBpn6KE,1038
-matrice_compute-0.1.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-matrice_compute-0.1.14.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
-matrice_compute-0.1.14.dist-info/RECORD,,
+matrice_compute-0.1.16.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
+matrice_compute-0.1.16.dist-info/METADATA,sha256=gTIsLb7gHIZCl4rvaQ5tKQW8b2OW2jfvLqyYxn_BMFo,1038
+matrice_compute-0.1.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+matrice_compute-0.1.16.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
+matrice_compute-0.1.16.dist-info/RECORD,,

{matrice_compute-0.1.14.dist-info → matrice_compute-0.1.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{matrice_compute-0.1.14.dist-info → matrice_compute-0.1.16.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{matrice_compute-0.1.14.dist-info → matrice_compute-0.1.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

matrice-compute 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

matrice-compute 0.1.14py3-none-any.whl → 0.1.16py3-none-any.whl