matrice-compute 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_compute/__init__.py +4 -0
- matrice_compute/action_instance.py +354 -162
- matrice_compute/actions_manager.py +6 -2
- matrice_compute/instance_manager.py +1 -1
- matrice_compute/scaling.py +1 -1
- matrice_compute/shutdown_manager.py +2 -2
- {matrice_compute-0.1.31.dist-info → matrice_compute-0.1.33.dist-info}/METADATA +1 -1
- matrice_compute-0.1.33.dist-info/RECORD +18 -0
- matrice_compute-0.1.31.dist-info/RECORD +0 -18
- {matrice_compute-0.1.31.dist-info → matrice_compute-0.1.33.dist-info}/WHEEL +0 -0
- {matrice_compute-0.1.31.dist-info → matrice_compute-0.1.33.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_compute-0.1.31.dist-info → matrice_compute-0.1.33.dist-info}/top_level.txt +0 -0
matrice_compute/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Module providing __init__ functionality."""
|
|
2
2
|
|
|
3
3
|
import subprocess
|
|
4
|
+
import logging
|
|
4
5
|
|
|
5
6
|
from matrice_common.utils import dependencies_check
|
|
6
7
|
|
|
@@ -17,4 +18,7 @@ subprocess.run( # Re-upgrade docker to avoid missing DOCKER_HOST connection erro
|
|
|
17
18
|
|
|
18
19
|
from matrice_compute.instance_manager import InstanceManager # noqa: E402
|
|
19
20
|
|
|
21
|
+
logging.getLogger("kafka").setLevel(logging.INFO)
|
|
22
|
+
logging.getLogger("confluent_kafka").setLevel(logging.INFO)
|
|
23
|
+
|
|
20
24
|
__all__ = ["InstanceManager"]
|
|
@@ -296,7 +296,7 @@ class ActionInstance:
|
|
|
296
296
|
getattr(self, "action_record_id", "unknown"),
|
|
297
297
|
)
|
|
298
298
|
else:
|
|
299
|
-
logging.
|
|
299
|
+
logging.info(
|
|
300
300
|
"No additional logs to send for action %s",
|
|
301
301
|
getattr(self, "action_record_id", "unknown"),
|
|
302
302
|
)
|
|
@@ -411,6 +411,7 @@ class ActionInstance:
|
|
|
411
411
|
destination_workspace_path: str = "/usr/src/workspace",
|
|
412
412
|
docker_workdir: str = "",
|
|
413
413
|
extra_pkgs: list = [],
|
|
414
|
+
container_name: str = "",
|
|
414
415
|
):
|
|
415
416
|
"""Build base Docker command with common options.
|
|
416
417
|
|
|
@@ -425,6 +426,7 @@ class ActionInstance:
|
|
|
425
426
|
destination_workspace_path (str): Container workspace path
|
|
426
427
|
docker_workdir (str): Docker working directory
|
|
427
428
|
extra_pkgs (list): List of extra packages to install
|
|
429
|
+
container_name (str): Docker container name (format: {action_type}_{action_id})
|
|
428
430
|
Returns:
|
|
429
431
|
str: Base Docker command
|
|
430
432
|
"""
|
|
@@ -489,8 +491,12 @@ class ActionInstance:
|
|
|
489
491
|
]
|
|
490
492
|
)
|
|
491
493
|
|
|
494
|
+
# Build container name option if provided
|
|
495
|
+
name_option = f"--name {container_name}" if container_name else ""
|
|
496
|
+
|
|
492
497
|
cmd_parts = [
|
|
493
498
|
f"docker run -d {use_gpu} ",
|
|
499
|
+
name_option,
|
|
494
500
|
network_config,
|
|
495
501
|
*[f"-e {key}={shlex.quote(str(value))}" for key, value in env_vars.items()],
|
|
496
502
|
*volumes,
|
|
@@ -882,6 +888,34 @@ class ActionInstance:
|
|
|
882
888
|
job_params=action_details["jobParams"],
|
|
883
889
|
)
|
|
884
890
|
|
|
891
|
+
@staticmethod
|
|
892
|
+
def container_exists(container_id: str) -> bool:
|
|
893
|
+
"""Check if a Docker container exists.
|
|
894
|
+
|
|
895
|
+
Args:
|
|
896
|
+
container_id (str): Container ID or name to check
|
|
897
|
+
|
|
898
|
+
Returns:
|
|
899
|
+
bool: True if container exists, False otherwise
|
|
900
|
+
"""
|
|
901
|
+
if not container_id:
|
|
902
|
+
return False
|
|
903
|
+
try:
|
|
904
|
+
result = subprocess.run(
|
|
905
|
+
["docker", "inspect", container_id],
|
|
906
|
+
capture_output=True,
|
|
907
|
+
text=True,
|
|
908
|
+
timeout=10
|
|
909
|
+
)
|
|
910
|
+
return result.returncode == 0
|
|
911
|
+
except Exception as e:
|
|
912
|
+
logging.warning(
|
|
913
|
+
"Error checking if container %s exists: %s",
|
|
914
|
+
container_id,
|
|
915
|
+
str(e)
|
|
916
|
+
)
|
|
917
|
+
return False
|
|
918
|
+
|
|
885
919
|
@log_errors(raise_exception=True)
|
|
886
920
|
def start_process(self, cmd, log_name):
|
|
887
921
|
"""Start the process and initialize logging.
|
|
@@ -905,7 +939,16 @@ class ActionInstance:
|
|
|
905
939
|
env={**os.environ},
|
|
906
940
|
)
|
|
907
941
|
|
|
908
|
-
|
|
942
|
+
# Use a longer timeout for docker run since --pull=always may need to
|
|
943
|
+
# download large images on first run. Default: 30 minutes (1800 seconds)
|
|
944
|
+
# Can be configured via DOCKER_START_TIMEOUT_SECONDS environment variable
|
|
945
|
+
docker_start_timeout = int(os.environ.get("DOCKER_START_TIMEOUT_SECONDS", 1800))
|
|
946
|
+
logging.info(
|
|
947
|
+
"Waiting for docker container to start for action %s (timeout: %d seconds)",
|
|
948
|
+
self.action_record_id,
|
|
949
|
+
docker_start_timeout,
|
|
950
|
+
)
|
|
951
|
+
stdout, stderr = process.communicate(timeout=docker_start_timeout)
|
|
909
952
|
|
|
910
953
|
if process.returncode != 0:
|
|
911
954
|
logging.error(
|
|
@@ -1095,7 +1138,8 @@ def data_preparation_execute(
|
|
|
1095
1138
|
"Started pulling Docker image with PID: %s",
|
|
1096
1139
|
process.pid,
|
|
1097
1140
|
)
|
|
1098
|
-
|
|
1141
|
+
container_name = f"data_prep_{self.action_record_id}"
|
|
1142
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, destination_workspace_path="/usr/src/app/workspace", docker_workdir="/usr/src/app/workspace", extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_preparation.py {self.action_record_id} "'
|
|
1099
1143
|
logging.info("cmd is: %s", cmd)
|
|
1100
1144
|
self.start(cmd, "data_preparation_log")
|
|
1101
1145
|
|
|
@@ -1124,7 +1168,8 @@ def data_processing_execute(self: ActionInstance):
|
|
|
1124
1168
|
service="bg-job-scheduler",
|
|
1125
1169
|
job_params=action["jobParams"],
|
|
1126
1170
|
)
|
|
1127
|
-
|
|
1171
|
+
container_name = f"data_processing_{self.action_record_id}"
|
|
1172
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/main.py {self.action_record_id} "'
|
|
1128
1173
|
logging.info("cmd: %s", cmd)
|
|
1129
1174
|
self.start(cmd, "data_processing_log")
|
|
1130
1175
|
|
|
@@ -1137,7 +1182,8 @@ def data_split_execute(self: ActionInstance):
|
|
|
1137
1182
|
if not action_details:
|
|
1138
1183
|
return
|
|
1139
1184
|
self.setup_action_requirements(action_details, work_fs, model_family="")
|
|
1140
|
-
|
|
1185
|
+
container_name = f"data_split_{self.action_record_id}"
|
|
1186
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_split.py {self.action_record_id} "'
|
|
1141
1187
|
logging.info("cmd: %s", cmd)
|
|
1142
1188
|
self.start(cmd, "data_split")
|
|
1143
1189
|
|
|
@@ -1152,7 +1198,8 @@ def dataset_annotation_execute(
|
|
|
1152
1198
|
if not action_details:
|
|
1153
1199
|
return
|
|
1154
1200
|
self.setup_action_requirements(action_details, work_fs)
|
|
1155
|
-
|
|
1201
|
+
container_name = f"dataset_annotation_{self.action_record_id}"
|
|
1202
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/dataset_annotation.py {self.action_record_id} "'
|
|
1156
1203
|
logging.info("cmd: %s", cmd)
|
|
1157
1204
|
self.start(cmd, "dataset_annotation")
|
|
1158
1205
|
|
|
@@ -1167,7 +1214,8 @@ def dataset_augmentation_execute(
|
|
|
1167
1214
|
if not action_details:
|
|
1168
1215
|
return
|
|
1169
1216
|
self.setup_action_requirements(action_details, work_fs)
|
|
1170
|
-
|
|
1217
|
+
container_name = f"dataset_augmentation_{self.action_record_id}"
|
|
1218
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_augmentation.py {self.action_record_id} "'
|
|
1171
1219
|
logging.info("cmd: %s", cmd)
|
|
1172
1220
|
self.start(cmd, "dataset_augmentation")
|
|
1173
1221
|
|
|
@@ -1183,7 +1231,8 @@ def augmentation_server_creation_execute(
|
|
|
1183
1231
|
if not action_details:
|
|
1184
1232
|
return
|
|
1185
1233
|
self.setup_action_requirements(action_details, work_fs)
|
|
1186
|
-
|
|
1234
|
+
container_name = f"augmentation_setup_{self.action_record_id}"
|
|
1235
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/aug_server.py {self.action_record_id} {external_port} "'
|
|
1187
1236
|
logging.info("cmd: %s", cmd)
|
|
1188
1237
|
self.start(cmd, "augmentation_setup")
|
|
1189
1238
|
|
|
@@ -1204,25 +1253,41 @@ def database_setup_execute(self: ActionInstance):
|
|
|
1204
1253
|
|
|
1205
1254
|
project_id = action_details["_idProject"]
|
|
1206
1255
|
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
action_details["actionDetails"]["containerId"],
|
|
1211
|
-
)
|
|
1212
|
-
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
1213
|
-
cmd = "docker restart " + self.docker_container
|
|
1214
|
-
self.start(cmd, "qdrant_setup")
|
|
1256
|
+
# Define container names with action_record_id for uniqueness
|
|
1257
|
+
mongodb_container_name = f"database_setup_{self.action_record_id}"
|
|
1258
|
+
qdrant_container_name = f"qdrant_{self.action_record_id}"
|
|
1215
1259
|
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1260
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1261
|
+
if existing_container_id:
|
|
1262
|
+
# Check if both containers actually exist before trying to restart
|
|
1263
|
+
mongodb_container_exists = ActionInstance.container_exists(existing_container_id)
|
|
1264
|
+
qdrant_container_exists = ActionInstance.container_exists(qdrant_container_name)
|
|
1219
1265
|
|
|
1220
|
-
|
|
1266
|
+
if mongodb_container_exists and qdrant_container_exists:
|
|
1267
|
+
logging.info(
|
|
1268
|
+
"Using existing container ID for database setup: %s",
|
|
1269
|
+
existing_container_id,
|
|
1270
|
+
)
|
|
1271
|
+
self.docker_container = existing_container_id
|
|
1272
|
+
cmd = "docker restart " + self.docker_container
|
|
1273
|
+
self.start(cmd, "qdrant_setup")
|
|
1274
|
+
|
|
1275
|
+
# qdrant restart
|
|
1276
|
+
qdrant_cmd = f"docker restart {qdrant_container_name}"
|
|
1277
|
+
self.start(qdrant_cmd, "qdrant_setup")
|
|
1278
|
+
return
|
|
1279
|
+
else:
|
|
1280
|
+
logging.warning(
|
|
1281
|
+
"Container(s) not found (mongodb=%s, qdrant=%s). Creating new containers.",
|
|
1282
|
+
mongodb_container_exists,
|
|
1283
|
+
qdrant_container_exists
|
|
1284
|
+
)
|
|
1285
|
+
# Fall through to create new containers
|
|
1221
1286
|
|
|
1222
1287
|
# MongoDB container with --net=host (Port: 27020:27017)
|
|
1223
1288
|
cmd = (
|
|
1224
1289
|
f"docker run --pull=always --net=host "
|
|
1225
|
-
f"--name
|
|
1290
|
+
f"--name {mongodb_container_name} "
|
|
1226
1291
|
f"-v matrice_myvol:/matrice_data "
|
|
1227
1292
|
f"--cidfile ./{self.action_record_id}.cid "
|
|
1228
1293
|
f"-e ACTION_RECORD_ID={self.action_record_id} "
|
|
@@ -1237,7 +1302,7 @@ def database_setup_execute(self: ActionInstance):
|
|
|
1237
1302
|
# Qdrant container with --net=host (Port: 6334)
|
|
1238
1303
|
qdrant_cmd = (
|
|
1239
1304
|
f"docker run --pull=always --net=host "
|
|
1240
|
-
f"--name
|
|
1305
|
+
f"--name {qdrant_container_name} "
|
|
1241
1306
|
f"-v matrice_myvol:/matrice_data "
|
|
1242
1307
|
f"{'qdrant/qdrant:latest'} "
|
|
1243
1308
|
)
|
|
@@ -1263,23 +1328,32 @@ def facial_recognition_setup_execute(self: ActionInstance):
|
|
|
1263
1328
|
|
|
1264
1329
|
self.setup_action_requirements(action_details)
|
|
1265
1330
|
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1331
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1332
|
+
if existing_container_id:
|
|
1333
|
+
# Check if container actually exists before trying to restart
|
|
1334
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1335
|
+
logging.info(
|
|
1336
|
+
"Using existing container ID for facial recognition worker: %s",
|
|
1337
|
+
existing_container_id,
|
|
1338
|
+
)
|
|
1339
|
+
self.docker_container = existing_container_id
|
|
1340
|
+
cmd = "docker restart " + self.docker_container
|
|
1341
|
+
self.start(cmd, "facial_recognition_setup")
|
|
1342
|
+
return
|
|
1343
|
+
else:
|
|
1344
|
+
logging.warning(
|
|
1345
|
+
"Container %s not found. Creating new container.",
|
|
1346
|
+
existing_container_id
|
|
1347
|
+
)
|
|
1348
|
+
# Fall through to create new container
|
|
1275
1349
|
|
|
1276
1350
|
# Facial recognition worker container with --net=host (Port: 8081)
|
|
1351
|
+
container_name = f"facial_recognition_{self.action_record_id}"
|
|
1277
1352
|
worker_cmd = (
|
|
1278
1353
|
f"docker run -d --pull=always --net=host "
|
|
1279
|
-
f"--name
|
|
1280
|
-
f"--cidfile ./{self.action_record_id}.cid "
|
|
1281
|
-
f"-v matrice_myvol:/matrice_data "
|
|
1354
|
+
f"--name {container_name} "
|
|
1282
1355
|
f"--cidfile ./{self.action_record_id}.cid "
|
|
1356
|
+
f"-v matrice_myvol:/matrice_data "
|
|
1283
1357
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
1284
1358
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
1285
1359
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
@@ -1305,20 +1379,30 @@ def lpr_setup_execute(self: ActionInstance):
|
|
|
1305
1379
|
|
|
1306
1380
|
self.setup_action_requirements(action_details)
|
|
1307
1381
|
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1382
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1383
|
+
if existing_container_id:
|
|
1384
|
+
# Check if container actually exists before trying to restart
|
|
1385
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1386
|
+
logging.info(
|
|
1387
|
+
"Using existing container ID for LPR worker: %s",
|
|
1388
|
+
existing_container_id,
|
|
1389
|
+
)
|
|
1390
|
+
self.docker_container = existing_container_id
|
|
1391
|
+
cmd = "docker restart " + self.docker_container
|
|
1392
|
+
self.start(cmd, "lpr_setup")
|
|
1393
|
+
return
|
|
1394
|
+
else:
|
|
1395
|
+
logging.warning(
|
|
1396
|
+
"Container %s not found. Creating new container.",
|
|
1397
|
+
existing_container_id
|
|
1398
|
+
)
|
|
1399
|
+
# Fall through to create new container
|
|
1317
1400
|
|
|
1318
1401
|
# LPR worker container with --net=host (Port: 8082)
|
|
1402
|
+
container_name = f"lpr_{self.action_record_id}"
|
|
1319
1403
|
worker_cmd = (
|
|
1320
1404
|
f"docker run -d --net=host --pull=always "
|
|
1321
|
-
f"--name
|
|
1405
|
+
f"--name {container_name} "
|
|
1322
1406
|
f"--cidfile ./{self.action_record_id}.cid "
|
|
1323
1407
|
f"-v matrice_myvol:/matrice_data "
|
|
1324
1408
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
@@ -1356,20 +1440,30 @@ def inference_ws_server_execute(self: ActionInstance):
|
|
|
1356
1440
|
|
|
1357
1441
|
logging.info(f"Inference WebSocket server will use IP: {ws_host} on port 8102 (use_host_network={use_host_network})")
|
|
1358
1442
|
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1443
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1444
|
+
if existing_container_id:
|
|
1445
|
+
# Check if container actually exists before trying to restart
|
|
1446
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1447
|
+
logging.info(
|
|
1448
|
+
"Using existing container ID for inference WebSocket server: %s",
|
|
1449
|
+
existing_container_id,
|
|
1450
|
+
)
|
|
1451
|
+
self.docker_container = existing_container_id
|
|
1452
|
+
cmd = "docker restart " + self.docker_container
|
|
1453
|
+
self.start(cmd, "inference_ws_server")
|
|
1454
|
+
return
|
|
1455
|
+
else:
|
|
1456
|
+
logging.warning(
|
|
1457
|
+
"Container %s not found. Creating new container.",
|
|
1458
|
+
existing_container_id
|
|
1459
|
+
)
|
|
1460
|
+
# Fall through to create new container
|
|
1368
1461
|
|
|
1369
1462
|
# Inference WebSocket server with --net=host (Port: 8102)
|
|
1463
|
+
container_name = f"inference_ws_{self.action_record_id}"
|
|
1370
1464
|
worker_cmd = (
|
|
1371
1465
|
f"docker run -d --pull=always --net=host "
|
|
1372
|
-
f"--name
|
|
1466
|
+
f"--name {container_name} "
|
|
1373
1467
|
f"--cidfile ./{self.action_record_id}.cid "
|
|
1374
1468
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
1375
1469
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
@@ -1404,20 +1498,30 @@ def fe_fs_streaming_execute(self: ActionInstance):
|
|
|
1404
1498
|
|
|
1405
1499
|
logging.info(f"Frontend streaming will connect to WebSocket at: {ws_url}")
|
|
1406
1500
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1501
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1502
|
+
if existing_container_id:
|
|
1503
|
+
# Check if container actually exists before trying to restart
|
|
1504
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1505
|
+
logging.info(
|
|
1506
|
+
"Using existing container ID for frontend streaming: %s",
|
|
1507
|
+
existing_container_id,
|
|
1508
|
+
)
|
|
1509
|
+
self.docker_container = existing_container_id
|
|
1510
|
+
cmd = "docker restart " + self.docker_container
|
|
1511
|
+
self.start(cmd, "fe_fs_streaming")
|
|
1512
|
+
return
|
|
1513
|
+
else:
|
|
1514
|
+
logging.warning(
|
|
1515
|
+
"Container %s not found. Creating new container.",
|
|
1516
|
+
existing_container_id
|
|
1517
|
+
)
|
|
1518
|
+
# Fall through to create new container
|
|
1519
|
+
|
|
1417
1520
|
# Frontend streaming with --net=host (Port: 3000)
|
|
1521
|
+
container_name = f"fe_streaming_{self.action_record_id}"
|
|
1418
1522
|
worker_cmd = (
|
|
1419
1523
|
f"docker run -d --pull=always --net=host "
|
|
1420
|
-
f"--name
|
|
1524
|
+
f"--name {container_name} "
|
|
1421
1525
|
f"--cidfile ./{self.action_record_id}.cid "
|
|
1422
1526
|
f"-v matrice_myvol:/matrice_data "
|
|
1423
1527
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
@@ -1449,20 +1553,30 @@ def fe_analytics_service_execute(self: ActionInstance):
|
|
|
1449
1553
|
|
|
1450
1554
|
project_id = action_details["_idProject"]
|
|
1451
1555
|
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1556
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1557
|
+
if existing_container_id:
|
|
1558
|
+
# Check if container actually exists before trying to restart
|
|
1559
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1560
|
+
logging.info(
|
|
1561
|
+
"Using existing container ID for frontend analytics service: %s",
|
|
1562
|
+
existing_container_id,
|
|
1563
|
+
)
|
|
1564
|
+
self.docker_container = existing_container_id
|
|
1565
|
+
cmd = "docker restart " + self.docker_container
|
|
1566
|
+
self.start(cmd, "fe_analytics_service")
|
|
1567
|
+
return
|
|
1568
|
+
else:
|
|
1569
|
+
logging.warning(
|
|
1570
|
+
"Container %s not found. Creating new container.",
|
|
1571
|
+
existing_container_id
|
|
1572
|
+
)
|
|
1573
|
+
# Fall through to create new container
|
|
1574
|
+
|
|
1462
1575
|
# Frontend analytics service with --net=host (Port: 3001)
|
|
1576
|
+
container_name = f"fe_analytics_{self.action_record_id}"
|
|
1463
1577
|
worker_cmd = (
|
|
1464
1578
|
f"docker run -d --pull=always --net=host "
|
|
1465
|
-
f"--name
|
|
1579
|
+
f"--name {container_name} "
|
|
1466
1580
|
f"--cidfile ./{self.action_record_id}.cid "
|
|
1467
1581
|
f'-e NEXT_PUBLIC_DEPLOYMENT_ENV="{os.environ.get("ENV", "prod")}" '
|
|
1468
1582
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
@@ -1494,7 +1608,8 @@ def synthetic_dataset_generation_execute(self: ActionInstance):
|
|
|
1494
1608
|
else:
|
|
1495
1609
|
return
|
|
1496
1610
|
use_gpu = self.get_gpu_config(action_details)
|
|
1497
|
-
|
|
1611
|
+
container_name = f"dataset_generation_{self.action_record_id}"
|
|
1612
|
+
cmd = f'{self.get_base_docker_cmd(work_fs=work_fs, use_gpu=use_gpu, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/synthetic_dataset_generation.py {self.action_record_id} "'
|
|
1498
1613
|
logging.info("cmd is: %s", cmd)
|
|
1499
1614
|
self.start(cmd, "dataset_generation")
|
|
1500
1615
|
|
|
@@ -1515,7 +1630,8 @@ def synthetic_data_setup_execute(self: ActionInstance):
|
|
|
1515
1630
|
else:
|
|
1516
1631
|
return
|
|
1517
1632
|
use_gpu = self.get_gpu_config(action_details)
|
|
1518
|
-
|
|
1633
|
+
container_name = f"synthetic_data_setup_{self.action_record_id}"
|
|
1634
|
+
cmd = f'{self.get_base_docker_cmd(work_fs=work_fs, use_gpu=use_gpu, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_generation.py {self.action_record_id} {external_port} "'
|
|
1519
1635
|
logging.info("cmd is: %s", cmd)
|
|
1520
1636
|
self.start(cmd, "synthetic_data_setup")
|
|
1521
1637
|
|
|
@@ -1552,26 +1668,40 @@ def redis_setup_execute(self: ActionInstance):
|
|
|
1552
1668
|
|
|
1553
1669
|
redis_image = action_details["actionDetails"].get("redis_image", "redis:latest")
|
|
1554
1670
|
|
|
1671
|
+
# Define container names with action_record_id for uniqueness
|
|
1672
|
+
redis_container_name = f"redis_{self.action_record_id}"
|
|
1555
1673
|
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
)
|
|
1561
|
-
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
1562
|
-
cmd = "docker restart " + self.docker_container
|
|
1563
|
-
self.start(cmd, "redis_setup")
|
|
1674
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1675
|
+
if existing_container_id:
|
|
1676
|
+
# Check if both containers actually exist before trying to restart
|
|
1677
|
+
management_container_exists = ActionInstance.container_exists(existing_container_id)
|
|
1678
|
+
redis_container_exists = ActionInstance.container_exists(redis_container_name)
|
|
1564
1679
|
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1680
|
+
if management_container_exists and redis_container_exists:
|
|
1681
|
+
logging.info(
|
|
1682
|
+
"Using existing container ID for redis management: %s",
|
|
1683
|
+
existing_container_id,
|
|
1684
|
+
)
|
|
1685
|
+
self.docker_container = existing_container_id
|
|
1686
|
+
cmd = "docker restart " + self.docker_container
|
|
1687
|
+
self.start(cmd, "redis_setup")
|
|
1688
|
+
|
|
1689
|
+
# Redis container restart
|
|
1690
|
+
redis_restart_cmd = f"docker restart {redis_container_name}"
|
|
1691
|
+
self.start(redis_restart_cmd, "redis")
|
|
1692
|
+
return
|
|
1693
|
+
else:
|
|
1694
|
+
logging.warning(
|
|
1695
|
+
"Container(s) not found (management=%s, redis=%s). Creating new containers.",
|
|
1696
|
+
management_container_exists,
|
|
1697
|
+
redis_container_exists
|
|
1698
|
+
)
|
|
1699
|
+
# Fall through to create new containers
|
|
1568
1700
|
|
|
1569
|
-
return
|
|
1570
|
-
|
|
1571
1701
|
# Redis container with --net=host (Port: 6379)
|
|
1572
1702
|
redis_cmd = (
|
|
1573
1703
|
f"docker run -d --net=host "
|
|
1574
|
-
f"--name
|
|
1704
|
+
f"--name {redis_container_name} "
|
|
1575
1705
|
f"--restart unless-stopped "
|
|
1576
1706
|
f"{redis_image} "
|
|
1577
1707
|
f"redis-server --bind 0.0.0.0 "
|
|
@@ -1641,7 +1771,8 @@ def deploy_aggregator_execute(
|
|
|
1641
1771
|
if not action_details:
|
|
1642
1772
|
return
|
|
1643
1773
|
self.setup_action_requirements(action_details, work_fs)
|
|
1644
|
-
|
|
1774
|
+
container_name = f"deploy_aggregator_{self.action_record_id}"
|
|
1775
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, container_name=container_name)} python3 /usr/src/app/deploy_aggregator.py {self.action_record_id} "'
|
|
1645
1776
|
logging.info("cmd: %s", cmd)
|
|
1646
1777
|
self.start(cmd, "deploy_aggregator")
|
|
1647
1778
|
|
|
@@ -1689,7 +1820,8 @@ def model_deploy_execute(self: ActionInstance):
|
|
|
1689
1820
|
"TRITON_PORTS": triton_ports
|
|
1690
1821
|
}
|
|
1691
1822
|
|
|
1692
|
-
|
|
1823
|
+
container_name = f"model_deploy_{self.action_record_id}"
|
|
1824
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"], container_name=container_name)} python3 deploy.py {self.action_record_id} {external_port}"'
|
|
1693
1825
|
logging.info("cmd is: %s", cmd)
|
|
1694
1826
|
self.start(cmd, "deploy_log")
|
|
1695
1827
|
|
|
@@ -1712,17 +1844,27 @@ def model_train_execute(self: ActionInstance):
|
|
|
1712
1844
|
action_id=action_id,
|
|
1713
1845
|
)
|
|
1714
1846
|
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1847
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1848
|
+
if existing_container_id:
|
|
1849
|
+
# Check if container actually exists before trying to restart
|
|
1850
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1851
|
+
logging.info(
|
|
1852
|
+
"Using existing container ID for training: %s",
|
|
1853
|
+
existing_container_id,
|
|
1854
|
+
)
|
|
1855
|
+
self.docker_container = existing_container_id
|
|
1856
|
+
cmd = "docker restart " + self.docker_container
|
|
1857
|
+
self.start(cmd, "train_log")
|
|
1858
|
+
return
|
|
1859
|
+
else:
|
|
1860
|
+
logging.warning(
|
|
1861
|
+
"Container %s not found. Creating new container.",
|
|
1862
|
+
existing_container_id
|
|
1863
|
+
)
|
|
1864
|
+
# Fall through to create new container
|
|
1865
|
+
|
|
1866
|
+
container_name = f"model_train_{self.action_record_id}"
|
|
1867
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, model_key=model_key, container_name=container_name)} python3 train.py {self.action_record_id} "'
|
|
1726
1868
|
logging.info("cmd is: %s", cmd)
|
|
1727
1869
|
self.start(cmd, "train_log")
|
|
1728
1870
|
|
|
@@ -1743,17 +1885,27 @@ def model_eval_execute(self: ActionInstance):
|
|
|
1743
1885
|
model_family=model_family,
|
|
1744
1886
|
action_id=action_id,
|
|
1745
1887
|
)
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1888
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1889
|
+
if existing_container_id:
|
|
1890
|
+
# Check if container actually exists before trying to restart
|
|
1891
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1892
|
+
logging.info(
|
|
1893
|
+
"Using existing container ID for evaluation: %s",
|
|
1894
|
+
existing_container_id,
|
|
1895
|
+
)
|
|
1896
|
+
self.docker_container = existing_container_id
|
|
1897
|
+
cmd = "docker restart " + self.docker_container
|
|
1898
|
+
self.start(cmd, "eval_log")
|
|
1899
|
+
return
|
|
1900
|
+
else:
|
|
1901
|
+
logging.warning(
|
|
1902
|
+
"Container %s not found. Creating new container.",
|
|
1903
|
+
existing_container_id
|
|
1904
|
+
)
|
|
1905
|
+
# Fall through to create new container
|
|
1906
|
+
|
|
1907
|
+
container_name = f"model_eval_{self.action_record_id}"
|
|
1908
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, container_name=container_name)} python3 eval.py {self.action_record_id} "'
|
|
1757
1909
|
logging.info("cmd is: %s", cmd)
|
|
1758
1910
|
self.start(cmd, "eval_log")
|
|
1759
1911
|
|
|
@@ -1777,17 +1929,27 @@ def model_export_execute(self: ActionInstance):
|
|
|
1777
1929
|
model_family=model_family,
|
|
1778
1930
|
action_id=action_id,
|
|
1779
1931
|
)
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1932
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1933
|
+
if existing_container_id:
|
|
1934
|
+
# Check if container actually exists before trying to restart
|
|
1935
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
1936
|
+
logging.info(
|
|
1937
|
+
"Using existing container ID for export: %s",
|
|
1938
|
+
existing_container_id,
|
|
1939
|
+
)
|
|
1940
|
+
self.docker_container = existing_container_id
|
|
1941
|
+
cmd = "docker restart " + self.docker_container
|
|
1942
|
+
self.start(cmd, "export_log")
|
|
1943
|
+
return
|
|
1944
|
+
else:
|
|
1945
|
+
logging.warning(
|
|
1946
|
+
"Container %s not found. Creating new container.",
|
|
1947
|
+
existing_container_id
|
|
1948
|
+
)
|
|
1949
|
+
# Fall through to create new container
|
|
1950
|
+
|
|
1951
|
+
container_name = f"model_export_{self.action_record_id}"
|
|
1952
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, container_name=container_name)} python3 export.py {self.action_record_id} "'
|
|
1791
1953
|
logging.info("cmd is: %s", cmd)
|
|
1792
1954
|
self.start(cmd, "export_log")
|
|
1793
1955
|
|
|
@@ -1803,7 +1965,8 @@ def image_build_execute(self: ActionInstance):
|
|
|
1803
1965
|
action_id = action_details["_id"]
|
|
1804
1966
|
internal_api_key = self.get_internal_api_key(action_id)
|
|
1805
1967
|
extra_env_vars = {"MATRICE_INTERNAL_API_KEY": internal_api_key}
|
|
1806
|
-
|
|
1968
|
+
container_name = f"image_build_{self.action_record_id}"
|
|
1969
|
+
cmd = f'{self.get_base_docker_cmd(mount_docker_sock=True, extra_env_vars=extra_env_vars, container_name=container_name)} python3 main.py {model_family_id} {action_id}"'
|
|
1807
1970
|
logging.info("cmd is: %s", cmd)
|
|
1808
1971
|
self.start(cmd, "image_build_log")
|
|
1809
1972
|
|
|
@@ -1815,7 +1978,8 @@ def resource_clone_execute(self: ActionInstance):
|
|
|
1815
1978
|
if not action_details:
|
|
1816
1979
|
return
|
|
1817
1980
|
self.setup_action_requirements(action_details)
|
|
1818
|
-
|
|
1981
|
+
container_name = f"resource_clone_{self.action_record_id}"
|
|
1982
|
+
cmd = f'{self.get_base_docker_cmd(container_name=container_name)} python3 main.py {self.action_record_id} "'
|
|
1819
1983
|
logging.info("cmd is: %s", cmd)
|
|
1820
1984
|
self.start(cmd, "resource_clone")
|
|
1821
1985
|
|
|
@@ -1831,17 +1995,27 @@ def streaming_gateway_execute(self: ActionInstance):
|
|
|
1831
1995
|
self.docker_container = (
|
|
1832
1996
|
f"aiforeveryone/streaming-gateway:{os.environ.get('ENV', 'prod')}"
|
|
1833
1997
|
)
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1998
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
1999
|
+
if existing_container_id:
|
|
2000
|
+
# Check if container actually exists before trying to restart
|
|
2001
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
2002
|
+
logging.info(
|
|
2003
|
+
"Using existing container ID for streaming gateway: %s",
|
|
2004
|
+
existing_container_id,
|
|
2005
|
+
)
|
|
2006
|
+
self.docker_container = existing_container_id
|
|
2007
|
+
cmd = "docker restart " + self.docker_container
|
|
2008
|
+
self.start(cmd, "streaming_gateway")
|
|
2009
|
+
return
|
|
2010
|
+
else:
|
|
2011
|
+
logging.warning(
|
|
2012
|
+
"Container %s not found. Creating new container.",
|
|
2013
|
+
existing_container_id
|
|
2014
|
+
)
|
|
2015
|
+
# Fall through to create new container
|
|
2016
|
+
|
|
2017
|
+
container_name = f"streaming_gateway_{self.action_record_id}"
|
|
2018
|
+
cmd = f'{self.get_base_docker_cmd(extra_pkgs=["matrice_streaming"], container_name=container_name)} python3 /usr/src/app/streaming_gateway.py {self.action_record_id} "'
|
|
1845
2019
|
logging.info("cmd is: %s", cmd)
|
|
1846
2020
|
self.start(cmd, "streaming_gateway")
|
|
1847
2021
|
|
|
@@ -1935,16 +2109,24 @@ def kafka_setup_execute(self: ActionInstance):
|
|
|
1935
2109
|
else:
|
|
1936
2110
|
pkgs = f"matrice_common matrice"
|
|
1937
2111
|
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
2112
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
2113
|
+
if existing_container_id:
|
|
2114
|
+
# Check if container actually exists before trying to restart
|
|
2115
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
2116
|
+
logging.info(
|
|
2117
|
+
"Using existing container ID for kafka: %s",
|
|
2118
|
+
existing_container_id,
|
|
2119
|
+
)
|
|
2120
|
+
self.docker_container = existing_container_id
|
|
2121
|
+
cmd = "docker restart " + self.docker_container
|
|
2122
|
+
self.start(cmd, "kafka_setup")
|
|
2123
|
+
return
|
|
2124
|
+
else:
|
|
2125
|
+
logging.warning(
|
|
2126
|
+
"Container %s not found. Creating new container.",
|
|
2127
|
+
existing_container_id
|
|
2128
|
+
)
|
|
2129
|
+
# Fall through to create new container
|
|
1948
2130
|
|
|
1949
2131
|
# Kafka container with --net=host (Ports: 9092, 9093)
|
|
1950
2132
|
cmd = (
|
|
@@ -1981,21 +2163,31 @@ def inference_tracker_setup_execute(self: ActionInstance):
|
|
|
1981
2163
|
|
|
1982
2164
|
self.setup_action_requirements(action_details)
|
|
1983
2165
|
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
2166
|
+
existing_container_id = action_details["actionDetails"].get("containerId")
|
|
2167
|
+
if existing_container_id:
|
|
2168
|
+
# Check if container actually exists before trying to restart
|
|
2169
|
+
if ActionInstance.container_exists(existing_container_id):
|
|
2170
|
+
logging.info(
|
|
2171
|
+
"Using existing container ID for inference tracker: %s",
|
|
2172
|
+
existing_container_id,
|
|
2173
|
+
)
|
|
2174
|
+
self.docker_container = existing_container_id
|
|
2175
|
+
cmd = "docker restart " + self.docker_container
|
|
2176
|
+
self.start(cmd, "inference_tracker_setup")
|
|
2177
|
+
return
|
|
2178
|
+
else:
|
|
2179
|
+
logging.warning(
|
|
2180
|
+
"Container %s not found. Creating new container.",
|
|
2181
|
+
existing_container_id
|
|
2182
|
+
)
|
|
2183
|
+
# Fall through to create new container
|
|
2184
|
+
|
|
1994
2185
|
# This is the existing Docker run command
|
|
2186
|
+
container_name = f"inference_tracker_{self.action_record_id}"
|
|
1995
2187
|
worker_cmd = (
|
|
1996
2188
|
f"docker run -d --pull=always --net=host "
|
|
1997
|
-
|
|
1998
|
-
f"--name
|
|
2189
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
2190
|
+
f"--name {container_name} "
|
|
1999
2191
|
f"-v matrice_myvol:/matrice_data "
|
|
2000
2192
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
2001
2193
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
@@ -43,7 +43,11 @@ class ActionsManager:
|
|
|
43
43
|
"""
|
|
44
44
|
actions = []
|
|
45
45
|
logging.info("Polling backend for new jobs")
|
|
46
|
-
|
|
46
|
+
result = self.scaling.assign_jobs(has_gpu())
|
|
47
|
+
if result is None:
|
|
48
|
+
logging.error("assign_jobs returned None")
|
|
49
|
+
return actions
|
|
50
|
+
fetched_actions, error, _ = result
|
|
47
51
|
if error:
|
|
48
52
|
logging.error("Error assigning jobs: %s", error)
|
|
49
53
|
return actions
|
|
@@ -224,7 +228,7 @@ class ActionsManager:
|
|
|
224
228
|
action_ids
|
|
225
229
|
)
|
|
226
230
|
else:
|
|
227
|
-
logging.
|
|
231
|
+
logging.info("No actions currently running")
|
|
228
232
|
|
|
229
233
|
return self.current_actions
|
|
230
234
|
|
|
@@ -404,7 +404,7 @@ class InstanceManager:
|
|
|
404
404
|
if self.container_kafka_producer:
|
|
405
405
|
try:
|
|
406
406
|
self.container_kafka_producer.send(topic_name, status_message)
|
|
407
|
-
logging.
|
|
407
|
+
logging.info("Container status monitor: Sent status for %d containers", len(containers))
|
|
408
408
|
except Exception as e:
|
|
409
409
|
logging.error("Container status monitor: Failed to send to Kafka: %s", str(e))
|
|
410
410
|
|
matrice_compute/scaling.py
CHANGED
|
@@ -295,7 +295,7 @@ class Scaling:
|
|
|
295
295
|
logging.warning(f"Kafka returned error for {api}, falling back to REST")
|
|
296
296
|
|
|
297
297
|
# Kafka failed or disabled, try REST
|
|
298
|
-
logging.
|
|
298
|
+
logging.debug(f"Using REST API for {api}")
|
|
299
299
|
try:
|
|
300
300
|
rest_response = rest_fallback_func()
|
|
301
301
|
|
|
@@ -185,7 +185,7 @@ class ShutdownManager:
|
|
|
185
185
|
time.sleep(2)
|
|
186
186
|
return True
|
|
187
187
|
except Exception as e:
|
|
188
|
-
logging.
|
|
188
|
+
logging.info("Aggressive command failed: %s", str(e))
|
|
189
189
|
except Exception as e:
|
|
190
190
|
logging.error("Error in aggressive shutdown methods: %s", str(e))
|
|
191
191
|
return False
|
|
@@ -271,7 +271,7 @@ class ShutdownManager:
|
|
|
271
271
|
"""
|
|
272
272
|
# CRITICAL: Check if this is a reserved instance that should not be shut down
|
|
273
273
|
# if self.reserved_instance:
|
|
274
|
-
# logging.
|
|
274
|
+
# logging.info("Reserved instance detected, skipping shutdown check")
|
|
275
275
|
# return
|
|
276
276
|
|
|
277
277
|
# Update idle time tracking
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
matrice_compute/__init__.py,sha256=YZhx7rQlD1TAlhBMbsU3_Xp-tpLyTAxWZDcQvqmwR2g,723
|
|
2
|
+
matrice_compute/action_instance.py,sha256=jWv-YlB1_YiaEU-oazGBMMwSHFh2TJ5NU88R7NxCtUM,85957
|
|
3
|
+
matrice_compute/actions_manager.py,sha256=a_TulMnu462xc0t_A-Mpug5zhQTmtpjiv7mhiC_IAVw,18280
|
|
4
|
+
matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
|
|
5
|
+
matrice_compute/compute_operations_handler.py,sha256=amcMhmXtv2irE6qK8Vbgec_8uFqjWmVVp0VWq-73_MU,17781
|
|
6
|
+
matrice_compute/instance_manager.py,sha256=9u3QRTP-MkAWmrSQMMbCKc0TfK584teAg1wWIaqMZdE,19291
|
|
7
|
+
matrice_compute/instance_utils.py,sha256=N4yPDvNukFEEBngR0lEt4x_XT5hur1q0P-spM2xQIlU,42025
|
|
8
|
+
matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
|
|
9
|
+
matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
matrice_compute/resources_tracker.py,sha256=1jSLrIFlOh-vgyNzFrUrE2Ak2JAGCIfV7wcyEPJ0f2c,32246
|
|
11
|
+
matrice_compute/scaling.py,sha256=UQDI8wN9JEKafvUVPF0Pk9XmhKlbMkeu16AZyyOuSE8,55147
|
|
12
|
+
matrice_compute/shutdown_manager.py,sha256=rnP9Qes6JJKDnebmBC9rqkH__X9a8TMjhWQPWoOQKFs,13232
|
|
13
|
+
matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
|
|
14
|
+
matrice_compute-0.1.33.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
|
|
15
|
+
matrice_compute-0.1.33.dist-info/METADATA,sha256=iJWPWtfLyDOfNVvSxmH8BMLcXkFlX1Z7Lm6Sih-JSfY,1038
|
|
16
|
+
matrice_compute-0.1.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
matrice_compute-0.1.33.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
|
|
18
|
+
matrice_compute-0.1.33.dist-info/RECORD,,
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
|
|
2
|
-
matrice_compute/action_instance.py,sha256=NpI7uCaLJ5GKdW-2JBGCjTwijb8XBrRc7GKRC4uhQF4,76650
|
|
3
|
-
matrice_compute/actions_manager.py,sha256=Iex5uw0PLRR4pvIAZDxc2CypucbanKDbJ3SK8mMGXK8,18148
|
|
4
|
-
matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
|
|
5
|
-
matrice_compute/compute_operations_handler.py,sha256=amcMhmXtv2irE6qK8Vbgec_8uFqjWmVVp0VWq-73_MU,17781
|
|
6
|
-
matrice_compute/instance_manager.py,sha256=kPZYfiq3Oevs5r1xzwvDzE27zeWF9oBBxh9KhpHJuG4,19292
|
|
7
|
-
matrice_compute/instance_utils.py,sha256=N4yPDvNukFEEBngR0lEt4x_XT5hur1q0P-spM2xQIlU,42025
|
|
8
|
-
matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
|
|
9
|
-
matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
matrice_compute/resources_tracker.py,sha256=1jSLrIFlOh-vgyNzFrUrE2Ak2JAGCIfV7wcyEPJ0f2c,32246
|
|
11
|
-
matrice_compute/scaling.py,sha256=cdEJqdVsPGDeOjkVAG85lubOn-qwDRV5qqmrNl_XpCM,55146
|
|
12
|
-
matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
|
|
13
|
-
matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
|
|
14
|
-
matrice_compute-0.1.31.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
|
|
15
|
-
matrice_compute-0.1.31.dist-info/METADATA,sha256=nhJU2AA0SxaSWMZXKjYtAthzjbjdEmmD3agMYqukQx8,1038
|
|
16
|
-
matrice_compute-0.1.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
-
matrice_compute-0.1.31.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
|
|
18
|
-
matrice_compute-0.1.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|