matrice-compute 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1034,6 +1034,8 @@ def augmentation_server_creation_execute(
1034
1034
  def database_setup_execute(self: ActionInstance):
1035
1035
  """
1036
1036
  Creates and setup the database for facial recognition server.
1037
+ MongoDB runs on port 27020:27017 (localhost only with --net=host).
1038
+ Qdrant runs on port 6334 (localhost only with --net=host).
1037
1039
  """
1038
1040
  action_details = self.get_action_details()
1039
1041
  if not action_details:
@@ -1044,11 +1046,11 @@ def database_setup_execute(self: ActionInstance):
1044
1046
 
1045
1047
  project_id = action_details["_idProject"]
1046
1048
 
1047
- # Run docker compose up
1048
-
1049
+ # MongoDB container with --net=host (Port: 27020:27017)
1049
1050
  cmd = (
1050
- f"docker run --pull=always -p 27020:27017 "
1051
+ f"docker run --pull=always --net=host "
1051
1052
  f"--name mongodbdatabase "
1053
+ f"-v matrice_myvol:/matrice_data "
1052
1054
  f"-e ACTION_RECORD_ID={self.action_record_id} "
1053
1055
  f"-e MATRICE_ACCESS_KEY_ID={self.matrice_access_key_id} "
1054
1056
  f"-e MATRICE_SECRET_ACCESS_KEY={self.matrice_secret_access_key} "
@@ -1056,15 +1058,16 @@ def database_setup_execute(self: ActionInstance):
1056
1058
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1057
1059
  f"{image} "
1058
1060
  )
1059
- print("Docker command", cmd)
1061
+ logging.info("Starting MongoDB container (Port: 27020:27017): %s", cmd)
1060
1062
 
1063
+ # Qdrant container with --net=host (Port: 6334)
1061
1064
  qdrant_cmd = (
1062
- f"docker run --pull=always "
1065
+ f"docker run --pull=always --net=host "
1063
1066
  f"--name qdrant "
1064
- f"-p 6333:6333 "
1065
- f"-p 6334:6334 "
1067
+ f"-v matrice_myvol:/matrice_data "
1066
1068
  f"{'qdrant/qdrant:latest'} "
1067
1069
  )
1070
+ logging.info("Starting Qdrant container (Port: 6334): %s", qdrant_cmd)
1068
1071
 
1069
1072
  # Docker Command run
1070
1073
  self.start(cmd, "database_setup")
@@ -1075,7 +1078,8 @@ def database_setup_execute(self: ActionInstance):
1075
1078
  @log_errors(raise_exception=False)
1076
1079
  def facial_recognition_setup_execute(self: ActionInstance):
1077
1080
  """
1078
- Creates and setup the database for facial recognition server.
1081
+ Creates and setup the facial recognition worker server.
1082
+ Facial recognition worker runs on port 8081 (localhost only with --net=host).
1079
1083
  """
1080
1084
  action_details = self.get_action_details()
1081
1085
 
@@ -1085,18 +1089,18 @@ def facial_recognition_setup_execute(self: ActionInstance):
1085
1089
 
1086
1090
  self.setup_action_requirements(action_details)
1087
1091
 
1088
- # Add worker container run command
1092
+ # Facial recognition worker container with --net=host (Port: 8081)
1089
1093
  worker_cmd = (
1090
- f"docker run -d --pull=always "
1094
+ f"docker run -d --pull=always --net=host "
1091
1095
  f"--name worker "
1092
- f"-p 8081:8081 "
1096
+ f"-v matrice_myvol:/matrice_data "
1093
1097
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1094
1098
  f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1095
1099
  f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1096
1100
  f'-e ACTION_ID="{self.action_record_id}" '
1097
1101
  f"{image}"
1098
1102
  )
1099
- print("Worker docker run command:", worker_cmd)
1103
+ logging.info("Starting facial recognition worker (Port: 8081): %s", worker_cmd)
1100
1104
 
1101
1105
  # Docker Command run
1102
1106
  self.start(worker_cmd, "facial_recognition_setup")
@@ -1104,30 +1108,30 @@ def facial_recognition_setup_execute(self: ActionInstance):
1104
1108
  @log_errors(raise_exception=False)
1105
1109
  def lpr_setup_execute(self: ActionInstance):
1106
1110
  """
1107
- Creates and setup the database for license plate server.
1111
+ Creates and setup the license plate recognition server.
1112
+ LPR worker runs on port 8082 (localhost only with --net=host).
1108
1113
  """
1109
1114
  action_details = self.get_action_details()
1110
1115
 
1111
1116
  if not action_details:
1112
1117
  return
1113
1118
  image = self.docker_container
1114
- external_port = self.scaling.get_open_port()
1115
1119
 
1116
1120
  self.setup_action_requirements(action_details)
1117
1121
 
1118
- # Add worker container run command
1122
+ # LPR worker container with --net=host (Port: 8082)
1119
1123
  worker_cmd = (
1120
- f"docker run -d --pull=always "
1124
+ f"docker run -d --net=host --pull=always "
1121
1125
  f"--name lpr-worker "
1122
- f"-p {external_port}:8082 "
1126
+ f"-v matrice_myvol:/matrice_data "
1123
1127
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1124
1128
  f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1125
1129
  f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1126
1130
  f'-e ACTION_ID="{self.action_record_id}" '
1127
- f'-e PORT={external_port} '
1131
+ f'-e PORT=8082 '
1128
1132
  f"{image}"
1129
1133
  )
1130
- print("Worker docker run command:", worker_cmd)
1134
+ logging.info("Starting LPR worker (Port: 8082): %s", worker_cmd)
1131
1135
 
1132
1136
  # Docker Command run
1133
1137
  self.start(worker_cmd, "lpr_setup")
@@ -1169,7 +1173,7 @@ def inference_ws_server_execute(self: ActionInstance):
1169
1173
  def fe_fs_streaming_execute(self: ActionInstance):
1170
1174
  """
1171
1175
  Creates and setup the frontend for fs streaming.
1172
- Frontend streaming runs on port 3000 (localhost only with --net=host).
1176
+ Frontend streaming runs on port 3001 (localhost only with --net=host).
1173
1177
  """
1174
1178
  action_details = self.get_action_details()
1175
1179
 
@@ -1179,16 +1183,17 @@ def fe_fs_streaming_execute(self: ActionInstance):
1179
1183
 
1180
1184
  self.setup_action_requirements(action_details)
1181
1185
 
1182
- # Frontend streaming with --net=host (Port: 3000)
1186
+ # Frontend streaming with --net=host (Port: 3001)
1183
1187
  worker_cmd = (
1184
1188
  f"docker run -d --pull=always --net=host "
1185
1189
  f"--name fe_streaming "
1190
+ f"-v matrice_myvol:/matrice_data "
1186
1191
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1187
1192
  f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1188
1193
  f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1189
1194
  f"{image}"
1190
1195
  )
1191
- logging.info("Starting frontend streaming (Port: 3000): %s", worker_cmd)
1196
+ logging.info("Starting frontend streaming (Port: 3001): %s", worker_cmd)
1192
1197
 
1193
1198
  # Docker Command run
1194
1199
  self.start(worker_cmd, "fe_fs_streaming")
@@ -1462,7 +1467,10 @@ def streaming_gateway_execute(self: ActionInstance):
1462
1467
 
1463
1468
  @log_errors(raise_exception=False)
1464
1469
  def kafka_setup_execute(self: ActionInstance):
1465
- """Execute kafka server task."""
1470
+ """
1471
+ Execute kafka server task.
1472
+ Kafka runs on port 9092 (SASL_PLAINTEXT) and 9093 (CONTROLLER) - localhost only with --net=host.
1473
+ """
1466
1474
  action_details = self.get_action_details()
1467
1475
  if not action_details:
1468
1476
  return
@@ -1470,7 +1478,6 @@ def kafka_setup_execute(self: ActionInstance):
1470
1478
  host_ip = (
1471
1479
  urllib.request.urlopen("https://ident.me", timeout=10).read().decode("utf8")
1472
1480
  )
1473
- container_port = 9092
1474
1481
  # Setup credentials
1475
1482
  self.setup_action_requirements(action_details)
1476
1483
 
@@ -1538,7 +1545,7 @@ def kafka_setup_execute(self: ActionInstance):
1538
1545
  [f"-e {key}={shlex.quote(str(value))}" for key, value in env_vars.items()]
1539
1546
  )
1540
1547
 
1541
- # Build the docker command directly to match user's pattern
1548
+ # Build the docker command with --net=host
1542
1549
  pypi_index = f"https://{'test.' if env != 'prod' else ''}pypi.org/simple/"
1543
1550
 
1544
1551
  if env == 'dev':
@@ -1547,8 +1554,9 @@ def kafka_setup_execute(self: ActionInstance):
1547
1554
  else:
1548
1555
  pkgs = f"matrice_common matrice"
1549
1556
 
1557
+ # Kafka container with --net=host (Ports: 9092, 9093)
1550
1558
  cmd = (
1551
- f"docker run -p {host_port}:{container_port} "
1559
+ f"docker run --net=host "
1552
1560
  f"{env_args} "
1553
1561
  f"--shm-size=30G --pull=always "
1554
1562
  f'aiforeveryone/matrice-kafka:latest /bin/bash -c "'
@@ -1561,5 +1569,5 @@ def kafka_setup_execute(self: ActionInstance):
1561
1569
  f'venv/bin/python3 main.py {self.action_record_id} {host_port}"'
1562
1570
  )
1563
1571
 
1564
- logging.info("cmd is: %s", cmd)
1572
+ logging.info("Starting Kafka container (Ports: 9092, 9093): %s", cmd)
1565
1573
  self.start(cmd, "kafka_setup")
@@ -128,8 +128,12 @@ def has_gpu() -> bool:
128
128
  Returns:
129
129
  bool: True if GPU is present, False otherwise
130
130
  """
131
- subprocess.run("nvidia-smi", check=True)
132
- return True
131
+ try:
132
+ subprocess.run("nvidia-smi", timeout=5)
133
+ return True
134
+ except subprocess.TimeoutExpired:
135
+ logging.warning("nvidia-smi command timed out after 5 seconds")
136
+ return False
133
137
 
134
138
 
135
139
  @log_errors(default_return=0, raise_exception=False)
@@ -141,13 +145,17 @@ def get_gpu_memory_usage() -> float:
141
145
  float: Memory usage between 0 and 1
142
146
  """
143
147
  command = "nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader"
144
- output = subprocess.check_output(command.split()).decode("ascii").strip().split("\n")
145
- memory_percentages = []
146
- for line in output:
147
- used, total = map(int, line.split(","))
148
- usage_percentage = used / total
149
- memory_percentages.append(usage_percentage)
150
- return min(memory_percentages)
148
+ try:
149
+ output = subprocess.check_output(command.split(), timeout=5).decode("ascii").strip().split("\n")
150
+ memory_percentages = []
151
+ for line in output:
152
+ used, total = map(int, line.split(","))
153
+ usage_percentage = used / total
154
+ memory_percentages.append(usage_percentage)
155
+ return min(memory_percentages)
156
+ except subprocess.TimeoutExpired:
157
+ logging.warning("nvidia-smi command timed out after 5 seconds in get_gpu_memory_usage")
158
+ return 0
151
159
 
152
160
 
153
161
  @log_errors(default_return=0, raise_exception=False)
@@ -194,17 +202,24 @@ def get_gpu_info() -> list:
194
202
  Returns:
195
203
  list: GPU information strings
196
204
  """
197
- with subprocess.Popen(
205
+ proc = subprocess.Popen(
198
206
  [
199
207
  "nvidia-smi",
200
208
  "--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
201
209
  "--format=csv,noheader,nounits",
202
210
  ],
203
211
  stdout=subprocess.PIPE,
204
- ) as proc:
205
- stdout, _ = proc.communicate()
212
+ stderr=subprocess.PIPE,
213
+ )
214
+ try:
215
+ stdout, stderr = proc.communicate(timeout=5)
206
216
  output = stdout.decode("UTF-8")
207
217
  return output.split("\n")[:-1]
218
+ except subprocess.TimeoutExpired:
219
+ logging.warning("nvidia-smi command timed out after 5 seconds in get_gpu_info")
220
+ proc.kill()
221
+ proc.communicate() # flush output after kill
222
+ return []
208
223
 
209
224
 
210
225
  @log_errors(default_return="", raise_exception=False)
@@ -505,7 +520,12 @@ def get_gpu_with_sufficient_memory_for_action(
505
520
  """
506
521
  required_gpu_memory = get_required_gpu_memory(action_details)
507
522
  command = "nvidia-smi --query-gpu=memory.free --format=csv"
508
- memory_free_info = subprocess.check_output(command.split()).decode("ascii").split("\n")
523
+ try:
524
+ memory_free_info = subprocess.check_output(command.split(), timeout=5).decode("ascii").split("\n")
525
+ except subprocess.TimeoutExpired:
526
+ logging.error("nvidia-smi command timed out after 5 seconds in get_gpu_with_sufficient_memory_for_action")
527
+ raise ValueError("Failed to get GPU information - nvidia-smi timed out")
528
+
509
529
  if len(memory_free_info) < 2:
510
530
  raise ValueError("No GPU information available from nvidia-smi")
511
531
  memory_free_values = [int(x.split()[0]) for x in memory_free_info[1:-1]]
@@ -548,7 +568,12 @@ def get_single_gpu_with_sufficient_memory_for_action(
548
568
  """
549
569
  required_gpu_memory = get_required_gpu_memory(action_details)
550
570
  command = "nvidia-smi --query-gpu=memory.free --format=csv"
551
- memory_free_info = subprocess.check_output(command.split()).decode("ascii").split("\n")
571
+ try:
572
+ memory_free_info = subprocess.check_output(command.split(), timeout=5).decode("ascii").split("\n")
573
+ except subprocess.TimeoutExpired:
574
+ logging.error("nvidia-smi command timed out after 5 seconds in get_single_gpu_with_sufficient_memory_for_action")
575
+ raise ValueError("Failed to get GPU information - nvidia-smi timed out")
576
+
552
577
  if len(memory_free_info) < 2:
553
578
  raise ValueError("No GPU information available from nvidia-smi")
554
579
  memory_free_values = [int(x.split()[0]) for x in memory_free_info[1:-1]]
@@ -150,20 +150,25 @@ class ResourcesTracker:
150
150
  if not has_gpu():
151
151
  return 0
152
152
  gpu_util = 0
153
- result = subprocess.run(
154
- ["nvidia-smi", "pmon", "-c", "1"],
155
- capture_output=True,
156
- text=True,
157
- check=True,
158
- )
159
- pmon_output = result.stdout.strip().split("\n")
160
- for line in pmon_output[2:]:
161
- parts = line.split()
162
- if len(parts) >= 8:
163
- pid = parts[1]
164
- gpu_usage = parts[3]
165
- if pid == str(container_pid):
166
- gpu_util += float(gpu_usage) if gpu_usage != "-" else 0
153
+ try:
154
+ result = subprocess.run(
155
+ ["nvidia-smi", "pmon", "-c", "1"],
156
+ capture_output=True,
157
+ text=True,
158
+ check=True,
159
+ timeout=5,
160
+ )
161
+ pmon_output = result.stdout.strip().split("\n")
162
+ for line in pmon_output[2:]:
163
+ parts = line.split()
164
+ if len(parts) >= 8:
165
+ pid = parts[1]
166
+ gpu_usage = parts[3]
167
+ if pid == str(container_pid):
168
+ gpu_util += float(gpu_usage) if gpu_usage != "-" else 0
169
+ except subprocess.TimeoutExpired:
170
+ logging.warning("nvidia-smi pmon command timed out after 5 seconds in get_container_gpu_usage")
171
+ return 0
167
172
  return gpu_util
168
173
 
169
174
  @log_errors(default_return=0, raise_exception=False, log_error=False)
@@ -185,19 +190,24 @@ class ResourcesTracker:
185
190
  "--format=csv,noheader,nounits",
186
191
  ]
187
192
  total_memory = 0
188
- result = subprocess.run(
189
- cmd,
190
- stdout=subprocess.PIPE,
191
- stderr=subprocess.PIPE,
192
- text=True,
193
- check=True,
194
- )
195
- for line in result.stdout.splitlines():
196
- parts = line.strip().split(", ")
197
- if len(parts) == 2:
198
- process_pid, used_memory = parts
199
- if process_pid == str(container_pid):
200
- total_memory += int(used_memory)
193
+ try:
194
+ result = subprocess.run(
195
+ cmd,
196
+ stdout=subprocess.PIPE,
197
+ stderr=subprocess.PIPE,
198
+ text=True,
199
+ check=True,
200
+ timeout=5,
201
+ )
202
+ for line in result.stdout.splitlines():
203
+ parts = line.strip().split(", ")
204
+ if len(parts) == 2:
205
+ process_pid, used_memory = parts
206
+ if process_pid == str(container_pid):
207
+ total_memory += int(used_memory)
208
+ except subprocess.TimeoutExpired:
209
+ logging.warning("nvidia-smi command timed out after 5 seconds in get_container_gpu_memory_usage")
210
+ return 0
201
211
  return total_memory
202
212
 
203
213
  @log_errors(default_return=(0, 0, 0, 0), raise_exception=False, log_error=True)
@@ -227,7 +237,12 @@ class ResourcesTracker:
227
237
  if not has_gpu():
228
238
  return gpu_memory_free, gpu_utilization
229
239
 
230
- subprocess.check_output("nvidia-smi")
240
+ try:
241
+ subprocess.check_output("nvidia-smi", timeout=5)
242
+ except subprocess.TimeoutExpired:
243
+ logging.warning("nvidia-smi command timed out after 5 seconds in _get_gpu_resources")
244
+ return 0, 0.0
245
+
231
246
  info_list = get_gpu_info()
232
247
  for info in info_list:
233
248
  info_split = info.split(", ")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.14
3
+ Version: 0.1.16
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,17 +1,17 @@
1
1
  matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
2
- matrice_compute/action_instance.py,sha256=aYNpRySPatxFltn_ekVmCd5h69I992_YerUTZwGWyHA,59763
2
+ matrice_compute/action_instance.py,sha256=cilzBD3o6K5CpDZEJCGMrNg0bCoUyOW3aCLNrMGyS10,60554
3
3
  matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
4
4
  matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
5
5
  matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
6
- matrice_compute/instance_utils.py,sha256=7jnWurSpq8PQxPGlSTc0qmpNdD5jIL8pjYKdjhVhS60,22310
6
+ matrice_compute/instance_utils.py,sha256=cANKRUlUzfecnzVEMC6Gkg9K7GZajH9ojNPiChdJL9s,23455
7
7
  matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
8
8
  matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- matrice_compute/resources_tracker.py,sha256=2hLKVxYihROtQ6fO4V_BplTgvkN8qH2H9_qxpOIpZkc,18521
9
+ matrice_compute/resources_tracker.py,sha256=n57IJmT5GjNEX8yQL7nbKv57bjvESYM-vRQcQ0DgQXQ,19256
10
10
  matrice_compute/scaling.py,sha256=3F8SWvy9wWczpJ6dbY5RrXWw5ByZlIzAPJklir3KIFI,35359
11
11
  matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
12
12
  matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
13
- matrice_compute-0.1.14.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
- matrice_compute-0.1.14.dist-info/METADATA,sha256=u8ZIOoIX3uMEA4Lgaiuh73xsoPSdcHTZXAJuIBpn6KE,1038
15
- matrice_compute-0.1.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- matrice_compute-0.1.14.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
- matrice_compute-0.1.14.dist-info/RECORD,,
13
+ matrice_compute-0.1.16.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
+ matrice_compute-0.1.16.dist-info/METADATA,sha256=gTIsLb7gHIZCl4rvaQ5tKQW8b2OW2jfvLqyYxn_BMFo,1038
15
+ matrice_compute-0.1.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ matrice_compute-0.1.16.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
+ matrice_compute-0.1.16.dist-info/RECORD,,