matrice-compute 0.1.39__py3-none-any.whl → 0.1.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_compute/action_instance.py +113 -239
- {matrice_compute-0.1.39.dist-info → matrice_compute-0.1.40.dist-info}/METADATA +1 -1
- {matrice_compute-0.1.39.dist-info → matrice_compute-0.1.40.dist-info}/RECORD +6 -6
- {matrice_compute-0.1.39.dist-info → matrice_compute-0.1.40.dist-info}/WHEEL +0 -0
- {matrice_compute-0.1.39.dist-info → matrice_compute-0.1.40.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_compute-0.1.39.dist-info → matrice_compute-0.1.40.dist-info}/top_level.txt +0 -0
|
@@ -26,10 +26,6 @@ from matrice_common.utils import log_errors
|
|
|
26
26
|
class ActionInstance:
|
|
27
27
|
"""Base class for tasks that run in Action containers."""
|
|
28
28
|
|
|
29
|
-
# Class-level dictionary to track deployed services and their ports
|
|
30
|
-
# Key: _idService, Value: {"triton_ports": "port1,port2,port3"}
|
|
31
|
-
_deployed_services = {}
|
|
32
|
-
|
|
33
29
|
def __init__(self, scaling: Scaling, action_info: dict):
|
|
34
30
|
"""Initialize an action instance.
|
|
35
31
|
|
|
@@ -89,52 +85,6 @@ class ActionInstance:
|
|
|
89
85
|
raise ValueError(f"Unknown action type: {self.action_type}")
|
|
90
86
|
self.task = self.actions_map[self.action_type]
|
|
91
87
|
|
|
92
|
-
@classmethod
|
|
93
|
-
def get_or_create_triton_ports(cls, service_id, scaling_instance):
|
|
94
|
-
"""Get existing TRITON_PORTS for a service or create new ones.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
service_id (str): Service ID (_idService)
|
|
98
|
-
scaling_instance: Scaling instance to get open ports
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
str: Comma-separated string of 3 port numbers (e.g., "8001,8002,8003")
|
|
102
|
-
"""
|
|
103
|
-
if not service_id:
|
|
104
|
-
# No service_id, generate new ports
|
|
105
|
-
port1 = scaling_instance.get_open_port()
|
|
106
|
-
port2 = scaling_instance.get_open_port()
|
|
107
|
-
port3 = scaling_instance.get_open_port()
|
|
108
|
-
return f"{port1},{port2},{port3}"
|
|
109
|
-
|
|
110
|
-
# Check if ports already exist for this service
|
|
111
|
-
if service_id in cls._deployed_services:
|
|
112
|
-
triton_ports = cls._deployed_services[service_id]["triton_ports"]
|
|
113
|
-
logging.info(
|
|
114
|
-
"Reusing TRITON_PORTS for service %s: %s",
|
|
115
|
-
service_id,
|
|
116
|
-
triton_ports
|
|
117
|
-
)
|
|
118
|
-
return triton_ports
|
|
119
|
-
|
|
120
|
-
# First deployment: generate new ports and store them
|
|
121
|
-
port1 = scaling_instance.get_open_port()
|
|
122
|
-
port2 = scaling_instance.get_open_port()
|
|
123
|
-
port3 = scaling_instance.get_open_port()
|
|
124
|
-
triton_ports = f"{port1},{port2},{port3}"
|
|
125
|
-
|
|
126
|
-
# Store for future use
|
|
127
|
-
cls._deployed_services[service_id] = {
|
|
128
|
-
"triton_ports": triton_ports,
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
logging.info(
|
|
132
|
-
"First deployment for service %s - generated TRITON_PORTS: %s",
|
|
133
|
-
service_id,
|
|
134
|
-
triton_ports
|
|
135
|
-
)
|
|
136
|
-
return triton_ports
|
|
137
|
-
|
|
138
88
|
@log_errors(default_return={}, raise_exception=True, log_error=False)
|
|
139
89
|
def _init_credentials(self):
|
|
140
90
|
"""Initialize Matrice credentials.
|
|
@@ -396,7 +346,6 @@ class ActionInstance:
|
|
|
396
346
|
destination_workspace_path: str = "/usr/src/workspace",
|
|
397
347
|
docker_workdir: str = "",
|
|
398
348
|
extra_pkgs: list = [],
|
|
399
|
-
container_name: str = "",
|
|
400
349
|
):
|
|
401
350
|
"""Build base Docker command with common options.
|
|
402
351
|
|
|
@@ -411,7 +360,6 @@ class ActionInstance:
|
|
|
411
360
|
destination_workspace_path (str): Container workspace path
|
|
412
361
|
docker_workdir (str): Docker working directory
|
|
413
362
|
extra_pkgs (list): List of extra packages to install
|
|
414
|
-
container_name (str): Docker container name (format: {action_type}_{action_id})
|
|
415
363
|
Returns:
|
|
416
364
|
str: Base Docker command
|
|
417
365
|
"""
|
|
@@ -482,15 +430,13 @@ class ActionInstance:
|
|
|
482
430
|
else:
|
|
483
431
|
use_restart_policy = ""
|
|
484
432
|
|
|
485
|
-
# Build container name option if provided
|
|
486
|
-
name_option = f"--name {container_name}" if container_name else ""
|
|
487
|
-
|
|
488
433
|
cmd_parts = [
|
|
489
|
-
f"docker run
|
|
434
|
+
f"docker run {use_gpu} {use_restart_policy} ",
|
|
490
435
|
network_config,
|
|
491
436
|
*[f"-e {key}={shlex.quote(str(value))}" for key, value in env_vars.items()],
|
|
492
437
|
*volumes,
|
|
493
438
|
# Container configuration and startup commands
|
|
439
|
+
f"--cidfile ./{self.action_record_id}.cid ",
|
|
494
440
|
f"--shm-size=30G --pull=always {shlex.quote(self.docker_container)}",
|
|
495
441
|
f'/bin/bash -c "cd {docker_workdir} && '
|
|
496
442
|
f"{env_exports} && "
|
|
@@ -892,50 +838,55 @@ class ActionInstance:
|
|
|
892
838
|
self.cmd = cmd
|
|
893
839
|
self.log_path = f"{self.get_log_path()}/{log_name}_{self.action_record_id}.txt"
|
|
894
840
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
# Use a longer timeout for docker run since --pull=always may need to
|
|
905
|
-
# download large images on first run. Default: 30 minutes (1800 seconds)
|
|
906
|
-
# Can be configured via DOCKER_START_TIMEOUT_SECONDS environment variable
|
|
907
|
-
docker_start_timeout = int(os.environ.get("DOCKER_START_TIMEOUT_SECONDS", 1800))
|
|
908
|
-
logging.info(
|
|
909
|
-
"Waiting for docker container to start for action %s (timeout: %d seconds)",
|
|
910
|
-
self.action_record_id,
|
|
911
|
-
docker_start_timeout,
|
|
912
|
-
)
|
|
913
|
-
stdout, stderr = process.communicate(timeout=docker_start_timeout)
|
|
841
|
+
with open(self.log_path, "wb") as out:
|
|
842
|
+
self.process = subprocess.Popen(
|
|
843
|
+
shlex.split(self.cmd),
|
|
844
|
+
stdout=out,
|
|
845
|
+
stderr=out,
|
|
846
|
+
env={**os.environ},
|
|
847
|
+
start_new_session=True,
|
|
848
|
+
)
|
|
914
849
|
|
|
915
|
-
|
|
850
|
+
self.container_id = None
|
|
851
|
+
|
|
852
|
+
cid_file_path = f"./{self.action_record_id}.cid"
|
|
853
|
+
max_retries = 5
|
|
854
|
+
retry_delay = 1 # seconds
|
|
855
|
+
for attempt in range(max_retries):
|
|
856
|
+
try:
|
|
857
|
+
with open(cid_file_path, "r") as cid_file:
|
|
858
|
+
container_id = cid_file.read().strip()
|
|
859
|
+
self.container_id = container_id
|
|
860
|
+
logging.info(
|
|
861
|
+
"Started process for action %s with container ID: %s",
|
|
862
|
+
self.action_record_id,
|
|
863
|
+
self.container_id,
|
|
864
|
+
)
|
|
865
|
+
break
|
|
866
|
+
except FileNotFoundError:
|
|
867
|
+
logging.warning(
|
|
868
|
+
"CID file not found for action %s, attempt %d/%d",
|
|
869
|
+
self.action_record_id,
|
|
870
|
+
attempt + 1,
|
|
871
|
+
max_retries,
|
|
872
|
+
)
|
|
873
|
+
time.sleep(retry_delay)
|
|
874
|
+
except Exception as e:
|
|
875
|
+
logging.error(
|
|
876
|
+
"Error reading CID file for action %s: %s",
|
|
877
|
+
self.action_record_id,
|
|
878
|
+
str(e),
|
|
879
|
+
)
|
|
880
|
+
time.sleep(retry_delay)
|
|
881
|
+
else:
|
|
916
882
|
logging.error(
|
|
917
|
-
"
|
|
883
|
+
"Failed to read CID file for action %s after %d attempts",
|
|
918
884
|
self.action_record_id,
|
|
919
|
-
|
|
885
|
+
max_retries,
|
|
920
886
|
)
|
|
921
|
-
raise
|
|
887
|
+
raise Exception("Failed to start process: CID file not found")
|
|
922
888
|
|
|
923
|
-
|
|
924
|
-
logging.info(
|
|
925
|
-
"Started container for action %s with ID: %s",
|
|
926
|
-
self.action_record_id,
|
|
927
|
-
self.container_id,
|
|
928
|
-
)
|
|
929
|
-
|
|
930
|
-
# Start following container logs in background
|
|
931
|
-
self.process = subprocess.Popen(
|
|
932
|
-
["docker", "logs", "-f", self.container_id],
|
|
933
|
-
stdout=open(self.log_path, "wb"),
|
|
934
|
-
stderr=subprocess.STDOUT,
|
|
935
|
-
start_new_session=True,
|
|
936
|
-
)
|
|
937
|
-
|
|
938
|
-
# Report container id to scaling service
|
|
889
|
+
# report container id to scaling service
|
|
939
890
|
self.scaling.update_action_container_id(
|
|
940
891
|
action_record_id=self.action_record_id,
|
|
941
892
|
container_id=self.container_id,
|
|
@@ -1101,8 +1052,7 @@ def data_preparation_execute(
|
|
|
1101
1052
|
"Started pulling Docker image with PID: %s",
|
|
1102
1053
|
process.pid,
|
|
1103
1054
|
)
|
|
1104
|
-
|
|
1105
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, destination_workspace_path="/usr/src/app/workspace", docker_workdir="/usr/src/app/workspace", extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_preparation.py {self.action_record_id} "'
|
|
1055
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, destination_workspace_path="/usr/src/app/workspace", docker_workdir="/usr/src/app/workspace", extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/data_preparation.py {self.action_record_id} "'
|
|
1106
1056
|
logging.info("cmd is: %s", cmd)
|
|
1107
1057
|
self.start(cmd, "data_preparation_log")
|
|
1108
1058
|
|
|
@@ -1131,8 +1081,7 @@ def data_processing_execute(self: ActionInstance):
|
|
|
1131
1081
|
service="bg-job-scheduler",
|
|
1132
1082
|
job_params=action["jobParams"],
|
|
1133
1083
|
)
|
|
1134
|
-
|
|
1135
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/main.py {self.action_record_id} "'
|
|
1084
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/main.py {self.action_record_id} "'
|
|
1136
1085
|
logging.info("cmd: %s", cmd)
|
|
1137
1086
|
self.start(cmd, "data_processing_log")
|
|
1138
1087
|
|
|
@@ -1145,8 +1094,7 @@ def data_split_execute(self: ActionInstance):
|
|
|
1145
1094
|
if not action_details:
|
|
1146
1095
|
return
|
|
1147
1096
|
self.setup_action_requirements(action_details, work_fs, model_family="")
|
|
1148
|
-
|
|
1149
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_split.py {self.action_record_id} "'
|
|
1097
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/data_split.py {self.action_record_id} "'
|
|
1150
1098
|
logging.info("cmd: %s", cmd)
|
|
1151
1099
|
self.start(cmd, "data_split")
|
|
1152
1100
|
|
|
@@ -1161,8 +1109,7 @@ def dataset_annotation_execute(
|
|
|
1161
1109
|
if not action_details:
|
|
1162
1110
|
return
|
|
1163
1111
|
self.setup_action_requirements(action_details, work_fs)
|
|
1164
|
-
|
|
1165
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/dataset_annotation.py {self.action_record_id} "'
|
|
1112
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/dataset_annotation.py {self.action_record_id} "'
|
|
1166
1113
|
logging.info("cmd: %s", cmd)
|
|
1167
1114
|
self.start(cmd, "dataset_annotation")
|
|
1168
1115
|
|
|
@@ -1177,8 +1124,7 @@ def dataset_augmentation_execute(
|
|
|
1177
1124
|
if not action_details:
|
|
1178
1125
|
return
|
|
1179
1126
|
self.setup_action_requirements(action_details, work_fs)
|
|
1180
|
-
|
|
1181
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_augmentation.py {self.action_record_id} "'
|
|
1127
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/data_augmentation.py {self.action_record_id} "'
|
|
1182
1128
|
logging.info("cmd: %s", cmd)
|
|
1183
1129
|
self.start(cmd, "dataset_augmentation")
|
|
1184
1130
|
|
|
@@ -1194,8 +1140,7 @@ def augmentation_server_creation_execute(
|
|
|
1194
1140
|
if not action_details:
|
|
1195
1141
|
return
|
|
1196
1142
|
self.setup_action_requirements(action_details, work_fs)
|
|
1197
|
-
|
|
1198
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/aug_server.py {self.action_record_id} {external_port} "'
|
|
1143
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/aug_server.py {self.action_record_id} {external_port} "'
|
|
1199
1144
|
logging.info("cmd: %s", cmd)
|
|
1200
1145
|
self.start(cmd, "augmentation_setup")
|
|
1201
1146
|
|
|
@@ -1216,34 +1161,32 @@ def database_setup_execute(self: ActionInstance):
|
|
|
1216
1161
|
|
|
1217
1162
|
project_id = action_details["_idProject"]
|
|
1218
1163
|
|
|
1219
|
-
# Define container names with action_record_id for uniqueness
|
|
1220
|
-
mongodb_container_name = f"database_setup_{self.action_record_id}"
|
|
1221
|
-
qdrant_container_name = f"qdrant_{self.action_record_id}"
|
|
1222
|
-
|
|
1223
1164
|
if action_details["actionDetails"].get("containerId"):
|
|
1224
1165
|
logging.info(
|
|
1225
|
-
"Using existing container ID for
|
|
1166
|
+
"Using existing container ID for inference tracker: %s",
|
|
1226
1167
|
action_details["actionDetails"]["containerId"],
|
|
1227
1168
|
)
|
|
1228
1169
|
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
1229
1170
|
cmd = "docker restart " + self.docker_container
|
|
1230
|
-
self.start(cmd, "
|
|
1171
|
+
self.start(cmd, "qdrant_setup")
|
|
1231
1172
|
|
|
1232
|
-
#
|
|
1233
|
-
qdrant_cmd =
|
|
1234
|
-
self.start(qdrant_cmd,
|
|
1173
|
+
#qdrant restart
|
|
1174
|
+
qdrant_cmd = "docker restart qdrant"
|
|
1175
|
+
self.start(qdrant_cmd, 'qdrant_setup')
|
|
1235
1176
|
|
|
1236
1177
|
return
|
|
1178
|
+
|
|
1179
|
+
|
|
1180
|
+
dbPath =action_details["jobParams"].get("dbPath","/host/data/path/mongodb_data")
|
|
1237
1181
|
|
|
1238
|
-
dbPath = action_details["jobParams"].get("dbPath", "/host/data/path/mongodb_data")
|
|
1239
1182
|
|
|
1240
1183
|
# MongoDB container with --net=host (Port: 27020:27017)
|
|
1241
1184
|
cmd = (
|
|
1242
|
-
f"docker run
|
|
1243
|
-
f"--name {mongodb_container_name} "
|
|
1244
|
-
f"-v matrice_myvol:/matrice_data "
|
|
1185
|
+
f"docker run --pull=always --net=host "
|
|
1245
1186
|
f"-v {dbPath}:{dbPath} "
|
|
1187
|
+
f"--name database_setup_{self.action_record_id} "
|
|
1246
1188
|
f"-v /var/run/docker.sock:/var/run/docker.sock "
|
|
1189
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1247
1190
|
f"-e ACTION_RECORD_ID={self.action_record_id} "
|
|
1248
1191
|
f"-e MATRICE_ACCESS_KEY_ID={self.matrice_access_key_id} "
|
|
1249
1192
|
f"-e MATRICE_SECRET_ACCESS_KEY={self.matrice_secret_access_key} "
|
|
@@ -1253,23 +1196,6 @@ def database_setup_execute(self: ActionInstance):
|
|
|
1253
1196
|
)
|
|
1254
1197
|
logging.info("Starting DB container (Port: 27020:27017): %s", cmd)
|
|
1255
1198
|
|
|
1256
|
-
# Qdrant container with --net=host (Port: 6334)
|
|
1257
|
-
qdrant_cmd = (
|
|
1258
|
-
f"docker run -d --pull=always --net=host "
|
|
1259
|
-
f"--name {qdrant_container_name} "
|
|
1260
|
-
f"-v matrice_myvol:/matrice_data "
|
|
1261
|
-
f"qdrant/qdrant:latest "
|
|
1262
|
-
)
|
|
1263
|
-
logging.info("Starting Qdrant container (Port: 6334): %s", qdrant_cmd)
|
|
1264
|
-
|
|
1265
|
-
# Start Qdrant container
|
|
1266
|
-
qdrant_process = subprocess.Popen(
|
|
1267
|
-
qdrant_cmd,
|
|
1268
|
-
shell=True,
|
|
1269
|
-
stdout=subprocess.PIPE,
|
|
1270
|
-
stderr=subprocess.PIPE,
|
|
1271
|
-
)
|
|
1272
|
-
logging.info("Qdrant container started successfully")
|
|
1273
1199
|
|
|
1274
1200
|
# Docker Command run
|
|
1275
1201
|
self.start(cmd, "database_setup")
|
|
@@ -1289,8 +1215,6 @@ def facial_recognition_setup_execute(self: ActionInstance):
|
|
|
1289
1215
|
|
|
1290
1216
|
self.setup_action_requirements(action_details)
|
|
1291
1217
|
|
|
1292
|
-
container_name = f"facial_recognition_{self.action_record_id}"
|
|
1293
|
-
|
|
1294
1218
|
if action_details["actionDetails"].get("containerId"):
|
|
1295
1219
|
logging.info(
|
|
1296
1220
|
"Using existing container ID for facial recognition worker: %s",
|
|
@@ -1304,13 +1228,15 @@ def facial_recognition_setup_execute(self: ActionInstance):
|
|
|
1304
1228
|
# Facial recognition worker container with --net=host (Port: 8081)
|
|
1305
1229
|
worker_cmd = (
|
|
1306
1230
|
f"docker run -d --pull=always --net=host "
|
|
1307
|
-
f"--name
|
|
1231
|
+
f"--name worker "
|
|
1232
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1308
1233
|
f"-v matrice_myvol:/matrice_data "
|
|
1234
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1309
1235
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
1310
1236
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
1311
1237
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
1312
1238
|
f'-e ACTION_ID="{self.action_record_id}" '
|
|
1313
|
-
f'--restart=unless-stopped '
|
|
1239
|
+
f' --restart=unless-stopped '
|
|
1314
1240
|
f"{image}"
|
|
1315
1241
|
)
|
|
1316
1242
|
logging.info("Starting facial recognition worker (Port: 8081): %s", worker_cmd)
|
|
@@ -1332,8 +1258,6 @@ def lpr_setup_execute(self: ActionInstance):
|
|
|
1332
1258
|
|
|
1333
1259
|
self.setup_action_requirements(action_details)
|
|
1334
1260
|
|
|
1335
|
-
container_name = f"lpr_{self.action_record_id}"
|
|
1336
|
-
|
|
1337
1261
|
if action_details["actionDetails"].get("containerId"):
|
|
1338
1262
|
logging.info(
|
|
1339
1263
|
"Using existing container ID for LPR worker: %s",
|
|
@@ -1347,14 +1271,15 @@ def lpr_setup_execute(self: ActionInstance):
|
|
|
1347
1271
|
# LPR worker container with --net=host (Port: 8082)
|
|
1348
1272
|
worker_cmd = (
|
|
1349
1273
|
f"docker run -d --net=host --pull=always "
|
|
1350
|
-
f"--name
|
|
1274
|
+
f"--name lpr-worker "
|
|
1275
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1351
1276
|
f"-v matrice_myvol:/matrice_data "
|
|
1352
1277
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
1353
1278
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
1354
1279
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
1355
1280
|
f'-e ACTION_ID="{self.action_record_id}" '
|
|
1356
1281
|
f'-e PORT=8082 '
|
|
1357
|
-
f'--restart=unless-stopped '
|
|
1282
|
+
f' --restart=unless-stopped '
|
|
1358
1283
|
f"{image}"
|
|
1359
1284
|
)
|
|
1360
1285
|
logging.info("Starting LPR worker (Port: 8082): %s", worker_cmd)
|
|
@@ -1385,8 +1310,6 @@ def inference_ws_server_execute(self: ActionInstance):
|
|
|
1385
1310
|
|
|
1386
1311
|
logging.info(f"Inference WebSocket server will use IP: {ws_host} on port 8102 (use_host_network={use_host_network})")
|
|
1387
1312
|
|
|
1388
|
-
container_name = f"inference_ws_{self.action_record_id}"
|
|
1389
|
-
|
|
1390
1313
|
if action_details["actionDetails"].get("containerId"):
|
|
1391
1314
|
logging.info(
|
|
1392
1315
|
"Using existing container ID for inference WebSocket server: %s",
|
|
@@ -1400,11 +1323,12 @@ def inference_ws_server_execute(self: ActionInstance):
|
|
|
1400
1323
|
# Inference WebSocket server with --net=host (Port: 8102)
|
|
1401
1324
|
worker_cmd = (
|
|
1402
1325
|
f"docker run -d --pull=always --net=host "
|
|
1403
|
-
f"--name
|
|
1326
|
+
f"--name inference "
|
|
1327
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1404
1328
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
1405
1329
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
1406
1330
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
1407
|
-
f'--restart=unless-stopped '
|
|
1331
|
+
f' --restart=unless-stopped '
|
|
1408
1332
|
f"{image} "
|
|
1409
1333
|
f"./app "
|
|
1410
1334
|
f"{self.action_record_id} "
|
|
@@ -1435,8 +1359,6 @@ def fe_fs_streaming_execute(self: ActionInstance):
|
|
|
1435
1359
|
|
|
1436
1360
|
logging.info(f"Frontend streaming will connect to WebSocket at: {ws_url}")
|
|
1437
1361
|
|
|
1438
|
-
container_name = f"fe_streaming_{self.action_record_id}"
|
|
1439
|
-
|
|
1440
1362
|
if action_details["actionDetails"].get("containerId"):
|
|
1441
1363
|
logging.info(
|
|
1442
1364
|
"Using existing container ID for frontend streaming: %s",
|
|
@@ -1450,14 +1372,15 @@ def fe_fs_streaming_execute(self: ActionInstance):
|
|
|
1450
1372
|
# Frontend streaming with --net=host (Port: 3000)
|
|
1451
1373
|
worker_cmd = (
|
|
1452
1374
|
f"docker run -d --pull=always --net=host "
|
|
1453
|
-
f"--name
|
|
1375
|
+
f"--name fe_streaming "
|
|
1376
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1454
1377
|
f"-v matrice_myvol:/matrice_data "
|
|
1455
1378
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
1456
1379
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
1457
1380
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
1458
1381
|
f"-e PORT=3000 "
|
|
1459
1382
|
f'-e WS_HOST="{ws_url}" '
|
|
1460
|
-
f'--restart=unless-stopped '
|
|
1383
|
+
f' --restart=unless-stopped '
|
|
1461
1384
|
f"{image}"
|
|
1462
1385
|
)
|
|
1463
1386
|
logging.info("Starting frontend streaming (Port: 3000) with WS_HOST=%s: %s", ws_url, worker_cmd)
|
|
@@ -1482,8 +1405,6 @@ def fe_analytics_service_execute(self: ActionInstance):
|
|
|
1482
1405
|
|
|
1483
1406
|
project_id = action_details["_idProject"]
|
|
1484
1407
|
|
|
1485
|
-
container_name = f"fe_analytics_{self.action_record_id}"
|
|
1486
|
-
|
|
1487
1408
|
if action_details["actionDetails"].get("containerId"):
|
|
1488
1409
|
logging.info(
|
|
1489
1410
|
"Using existing container ID for frontend analytics service: %s",
|
|
@@ -1497,14 +1418,15 @@ def fe_analytics_service_execute(self: ActionInstance):
|
|
|
1497
1418
|
# Frontend analytics service with --net=host (Port: 3001)
|
|
1498
1419
|
worker_cmd = (
|
|
1499
1420
|
f"docker run -d --pull=always --net=host "
|
|
1500
|
-
f"--name
|
|
1421
|
+
f"--name fe-analytics "
|
|
1422
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1501
1423
|
f'-e NEXT_PUBLIC_DEPLOYMENT_ENV="{os.environ.get("ENV", "prod")}" '
|
|
1502
1424
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
1503
1425
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
1504
1426
|
f'-e ACTION_ID="{self.action_record_id}" '
|
|
1505
1427
|
f"-e PORT=3001 "
|
|
1506
1428
|
f'-e PROJECT_ID="{project_id}" '
|
|
1507
|
-
f'--restart=unless-stopped '
|
|
1429
|
+
f' --restart=unless-stopped '
|
|
1508
1430
|
f"{image}"
|
|
1509
1431
|
)
|
|
1510
1432
|
logging.info("Starting frontend analytics service (Port: 3001): %s", worker_cmd)
|
|
@@ -1529,8 +1451,7 @@ def synthetic_dataset_generation_execute(self: ActionInstance):
|
|
|
1529
1451
|
else:
|
|
1530
1452
|
return
|
|
1531
1453
|
use_gpu = self.get_gpu_config(action_details)
|
|
1532
|
-
|
|
1533
|
-
cmd = f'{self.get_base_docker_cmd(work_fs=work_fs, use_gpu=use_gpu, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/synthetic_dataset_generation.py {self.action_record_id} "'
|
|
1454
|
+
cmd = f'{self.get_base_docker_cmd(work_fs=work_fs, use_gpu=use_gpu, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/synthetic_dataset_generation.py {self.action_record_id} "'
|
|
1534
1455
|
logging.info("cmd is: %s", cmd)
|
|
1535
1456
|
self.start(cmd, "dataset_generation")
|
|
1536
1457
|
|
|
@@ -1551,8 +1472,7 @@ def synthetic_data_setup_execute(self: ActionInstance):
|
|
|
1551
1472
|
else:
|
|
1552
1473
|
return
|
|
1553
1474
|
use_gpu = self.get_gpu_config(action_details)
|
|
1554
|
-
|
|
1555
|
-
cmd = f'{self.get_base_docker_cmd(work_fs=work_fs, use_gpu=use_gpu, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_dataset"], container_name=container_name)} python3 /usr/src/app/data_generation.py {self.action_record_id} {external_port} "'
|
|
1475
|
+
cmd = f'{self.get_base_docker_cmd(work_fs=work_fs, use_gpu=use_gpu, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_dataset"])} python3 /usr/src/app/data_generation.py {self.action_record_id} {external_port} "'
|
|
1556
1476
|
logging.info("cmd is: %s", cmd)
|
|
1557
1477
|
self.start(cmd, "synthetic_data_setup")
|
|
1558
1478
|
|
|
@@ -1589,8 +1509,6 @@ def redis_setup_execute(self: ActionInstance):
|
|
|
1589
1509
|
|
|
1590
1510
|
redis_image = action_details["actionDetails"].get("redis_image", "redis:latest")
|
|
1591
1511
|
|
|
1592
|
-
# Define container names with action_record_id for uniqueness
|
|
1593
|
-
redis_container_name = f"redis_{self.action_record_id}"
|
|
1594
1512
|
|
|
1595
1513
|
if action_details["actionDetails"].get("containerId"):
|
|
1596
1514
|
logging.info(
|
|
@@ -1602,34 +1520,18 @@ def redis_setup_execute(self: ActionInstance):
|
|
|
1602
1520
|
self.start(cmd, "redis_setup")
|
|
1603
1521
|
|
|
1604
1522
|
# Redis container restart
|
|
1605
|
-
redis_restart_cmd =
|
|
1523
|
+
redis_restart_cmd = "docker restart redis_container"
|
|
1606
1524
|
self.start(redis_restart_cmd, "redis")
|
|
1607
1525
|
|
|
1608
1526
|
return
|
|
1609
1527
|
|
|
1610
|
-
# Redis container with --net=host (Port: 6379)
|
|
1528
|
+
# Redis container with --net=host (Port: 6379)
|
|
1611
1529
|
redis_cmd = (
|
|
1612
1530
|
f"docker run -d --net=host "
|
|
1613
|
-
f"--name
|
|
1531
|
+
f"--name redis_container "
|
|
1614
1532
|
f"--restart unless-stopped "
|
|
1615
1533
|
f"{redis_image} "
|
|
1616
|
-
f"redis-server --bind 0.0.0.0 "
|
|
1617
|
-
f"--appendonly no "
|
|
1618
|
-
f'--save "" '
|
|
1619
|
-
f"--maxmemory 30gb "
|
|
1620
|
-
f"--maxmemory-policy allkeys-lru "
|
|
1621
|
-
f"--io-threads 4 "
|
|
1622
|
-
f"--io-threads-do-reads yes "
|
|
1623
|
-
f"--stream-node-max-bytes 8192 "
|
|
1624
|
-
f"--stream-node-max-entries 1000 "
|
|
1625
|
-
f"--hz 100 "
|
|
1626
|
-
f"--tcp-backlog 2048 "
|
|
1627
|
-
f"--timeout 0 "
|
|
1628
|
-
f"--lazyfree-lazy-eviction yes "
|
|
1629
|
-
f"--lazyfree-lazy-expire yes "
|
|
1630
|
-
f"--lazyfree-lazy-server-del yes "
|
|
1631
|
-
f"--activedefrag yes "
|
|
1632
|
-
f"--requirepass {redis_password}"
|
|
1534
|
+
f"redis-server --bind 0.0.0.0 --appendonly yes --requirepass {redis_password}"
|
|
1633
1535
|
)
|
|
1634
1536
|
|
|
1635
1537
|
logging.info("Starting Redis container on %s:6379: %s", redis_host, redis_cmd)
|
|
@@ -1653,9 +1555,8 @@ def redis_setup_execute(self: ActionInstance):
|
|
|
1653
1555
|
|
|
1654
1556
|
# bg-redis management container with --net=host (Port: 8082)
|
|
1655
1557
|
cmd = (
|
|
1656
|
-
f"docker run
|
|
1657
|
-
|
|
1658
|
-
f"--name bg-redis_{self.action_record_id} "
|
|
1558
|
+
f"docker run --net=host "
|
|
1559
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1659
1560
|
f"-e REDIS_URL={shlex.quote(env_vars['REDIS_URL'])} "
|
|
1660
1561
|
f"-e REDIS_PASSWORD={shlex.quote(env_vars['REDIS_PASSWORD'])} "
|
|
1661
1562
|
f"-e MATRICE_ACCESS_KEY_ID={shlex.quote(self.matrice_access_key_id)} "
|
|
@@ -1682,8 +1583,7 @@ def deploy_aggregator_execute(
|
|
|
1682
1583
|
if not action_details:
|
|
1683
1584
|
return
|
|
1684
1585
|
self.setup_action_requirements(action_details, work_fs)
|
|
1685
|
-
|
|
1686
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, container_name=container_name)} python3 /usr/src/app/deploy_aggregator.py {self.action_record_id} "'
|
|
1586
|
+
cmd = f'{self.get_base_docker_cmd(work_fs)} python3 /usr/src/app/deploy_aggregator.py {self.action_record_id} "'
|
|
1687
1587
|
logging.info("cmd: %s", cmd)
|
|
1688
1588
|
self.start(cmd, "deploy_aggregator")
|
|
1689
1589
|
|
|
@@ -1699,10 +1599,6 @@ def model_deploy_execute(self: ActionInstance):
|
|
|
1699
1599
|
return
|
|
1700
1600
|
action_id = action_details["_id"]
|
|
1701
1601
|
model_family = action_details["actionDetails"]["modelFamily"]
|
|
1702
|
-
|
|
1703
|
-
# Get the service ID to track deployments
|
|
1704
|
-
service_id = action_details.get("_idService")
|
|
1705
|
-
|
|
1706
1602
|
self.setup_action_requirements(
|
|
1707
1603
|
action_details,
|
|
1708
1604
|
work_fs,
|
|
@@ -1710,29 +1606,17 @@ def model_deploy_execute(self: ActionInstance):
|
|
|
1710
1606
|
action_id=action_id,
|
|
1711
1607
|
)
|
|
1712
1608
|
|
|
1713
|
-
#
|
|
1609
|
+
# Get GPU configuration based on requirements and availability
|
|
1610
|
+
# This uses the best-fit algorithm to select the most appropriate GPU(s)
|
|
1611
|
+
use_gpu = self.get_gpu_config(action_details)
|
|
1612
|
+
|
|
1613
|
+
# Override: If GPU is required, use all available GPUs
|
|
1714
1614
|
gpuRequired = action_details["actionDetails"].get("gpuRequired", False)
|
|
1715
1615
|
if gpuRequired:
|
|
1716
1616
|
use_gpu = "--runtime=nvidia --gpus all"
|
|
1717
|
-
else:
|
|
1718
|
-
use_gpu = ""
|
|
1719
|
-
|
|
1720
|
-
logging.info(
|
|
1721
|
-
"Action %s: Model deployment GPU config: %s",
|
|
1722
|
-
action_id,
|
|
1723
|
-
use_gpu if use_gpu else "CPU-only"
|
|
1724
|
-
)
|
|
1725
|
-
|
|
1726
|
-
# Get or create TRITON_PORTS (uses utility method)
|
|
1727
|
-
triton_ports = ActionInstance.get_or_create_triton_ports(service_id, self.scaling)
|
|
1728
1617
|
|
|
1729
|
-
extra_env_vars = {
|
|
1730
|
-
|
|
1731
|
-
"TRITON_PORTS": triton_ports
|
|
1732
|
-
}
|
|
1733
|
-
|
|
1734
|
-
container_name = f"model_deploy_{self.action_record_id}"
|
|
1735
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"], container_name=container_name)} python3 deploy.py {self.action_record_id} {external_port}"'
|
|
1618
|
+
extra_env_vars = {"INTERNAL_PORT": internal_port}
|
|
1619
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, mount_docker_sock=True, action_id=action_id, extra_env_vars=extra_env_vars, extra_pkgs=["matrice_inference", "matrice_analytics"])} python3 deploy.py {self.action_record_id} {external_port}"'
|
|
1736
1620
|
logging.info("cmd is: %s", cmd)
|
|
1737
1621
|
self.start(cmd, "deploy_log")
|
|
1738
1622
|
|
|
@@ -1765,8 +1649,7 @@ def model_train_execute(self: ActionInstance):
|
|
|
1765
1649
|
self.start(cmd, "train_log")
|
|
1766
1650
|
return
|
|
1767
1651
|
|
|
1768
|
-
|
|
1769
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, model_key=model_key, container_name=container_name)} python3 train.py {self.action_record_id} "'
|
|
1652
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, model_key=model_key)} python3 train.py {self.action_record_id} "'
|
|
1770
1653
|
logging.info("cmd is: %s", cmd)
|
|
1771
1654
|
self.start(cmd, "train_log")
|
|
1772
1655
|
|
|
@@ -1789,7 +1672,7 @@ def model_eval_execute(self: ActionInstance):
|
|
|
1789
1672
|
)
|
|
1790
1673
|
if action_details["actionDetails"].get("containerId"):
|
|
1791
1674
|
logging.info(
|
|
1792
|
-
"Using existing container ID for
|
|
1675
|
+
"Using existing container ID for training: %s",
|
|
1793
1676
|
action_details["actionDetails"]["containerId"],
|
|
1794
1677
|
)
|
|
1795
1678
|
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
@@ -1797,8 +1680,7 @@ def model_eval_execute(self: ActionInstance):
|
|
|
1797
1680
|
self.start(cmd, "eval_log")
|
|
1798
1681
|
return
|
|
1799
1682
|
|
|
1800
|
-
|
|
1801
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, container_name=container_name)} python3 eval.py {self.action_record_id} "'
|
|
1683
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id)} python3 eval.py {self.action_record_id} "'
|
|
1802
1684
|
logging.info("cmd is: %s", cmd)
|
|
1803
1685
|
self.start(cmd, "eval_log")
|
|
1804
1686
|
|
|
@@ -1824,7 +1706,7 @@ def model_export_execute(self: ActionInstance):
|
|
|
1824
1706
|
)
|
|
1825
1707
|
if action_details["actionDetails"].get("containerId"):
|
|
1826
1708
|
logging.info(
|
|
1827
|
-
"Using existing container ID for
|
|
1709
|
+
"Using existing container ID for training: %s",
|
|
1828
1710
|
action_details["actionDetails"]["containerId"],
|
|
1829
1711
|
)
|
|
1830
1712
|
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
@@ -1832,8 +1714,7 @@ def model_export_execute(self: ActionInstance):
|
|
|
1832
1714
|
self.start(cmd, "export_log")
|
|
1833
1715
|
return
|
|
1834
1716
|
|
|
1835
|
-
|
|
1836
|
-
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id, container_name=container_name)} python3 export.py {self.action_record_id} "'
|
|
1717
|
+
cmd = f'{self.get_base_docker_cmd(work_fs, use_gpu, action_id=action_id)} python3 export.py {self.action_record_id} "'
|
|
1837
1718
|
logging.info("cmd is: %s", cmd)
|
|
1838
1719
|
self.start(cmd, "export_log")
|
|
1839
1720
|
|
|
@@ -1849,8 +1730,7 @@ def image_build_execute(self: ActionInstance):
|
|
|
1849
1730
|
action_id = action_details["_id"]
|
|
1850
1731
|
internal_api_key = self.get_internal_api_key(action_id)
|
|
1851
1732
|
extra_env_vars = {"MATRICE_INTERNAL_API_KEY": internal_api_key}
|
|
1852
|
-
|
|
1853
|
-
cmd = f'{self.get_base_docker_cmd(mount_docker_sock=True, extra_env_vars=extra_env_vars, container_name=container_name)} python3 main.py {model_family_id} {action_id}"'
|
|
1733
|
+
cmd = f'{self.get_base_docker_cmd(mount_docker_sock=True, extra_env_vars=extra_env_vars)} python3 main.py {model_family_id} {action_id}"'
|
|
1854
1734
|
logging.info("cmd is: %s", cmd)
|
|
1855
1735
|
self.start(cmd, "image_build_log")
|
|
1856
1736
|
|
|
@@ -1862,8 +1742,7 @@ def resource_clone_execute(self: ActionInstance):
|
|
|
1862
1742
|
if not action_details:
|
|
1863
1743
|
return
|
|
1864
1744
|
self.setup_action_requirements(action_details)
|
|
1865
|
-
|
|
1866
|
-
cmd = f'{self.get_base_docker_cmd(container_name=container_name)} python3 main.py {self.action_record_id} "'
|
|
1745
|
+
cmd = f'{self.get_base_docker_cmd()} python3 main.py {self.action_record_id} "'
|
|
1867
1746
|
logging.info("cmd is: %s", cmd)
|
|
1868
1747
|
self.start(cmd, "resource_clone")
|
|
1869
1748
|
|
|
@@ -1881,7 +1760,7 @@ def streaming_gateway_execute(self: ActionInstance):
|
|
|
1881
1760
|
)
|
|
1882
1761
|
if action_details["actionDetails"].get("containerId"):
|
|
1883
1762
|
logging.info(
|
|
1884
|
-
"Using existing container ID for
|
|
1763
|
+
"Using existing container ID for training: %s",
|
|
1885
1764
|
action_details["actionDetails"]["containerId"],
|
|
1886
1765
|
)
|
|
1887
1766
|
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
@@ -1889,8 +1768,7 @@ def streaming_gateway_execute(self: ActionInstance):
|
|
|
1889
1768
|
self.start(cmd, "streaming_gateway")
|
|
1890
1769
|
return
|
|
1891
1770
|
|
|
1892
|
-
|
|
1893
|
-
cmd = f'{self.get_base_docker_cmd(extra_pkgs=["matrice_streaming"], container_name=container_name)} python3 /usr/src/app/streaming_gateway.py {self.action_record_id} "'
|
|
1771
|
+
cmd = f'{self.get_base_docker_cmd(extra_pkgs=["matrice_streaming"])} python3 /usr/src/app/streaming_gateway.py {self.action_record_id} "'
|
|
1894
1772
|
logging.info("cmd is: %s", cmd)
|
|
1895
1773
|
self.start(cmd, "streaming_gateway")
|
|
1896
1774
|
|
|
@@ -1986,7 +1864,7 @@ def kafka_setup_execute(self: ActionInstance):
|
|
|
1986
1864
|
|
|
1987
1865
|
if action_details["actionDetails"].get("containerId"):
|
|
1988
1866
|
logging.info(
|
|
1989
|
-
"Using existing container ID for
|
|
1867
|
+
"Using existing container ID for training: %s",
|
|
1990
1868
|
action_details["actionDetails"]["containerId"],
|
|
1991
1869
|
)
|
|
1992
1870
|
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
@@ -1994,12 +1872,10 @@ def kafka_setup_execute(self: ActionInstance):
|
|
|
1994
1872
|
self.start(cmd, "kafka_setup")
|
|
1995
1873
|
return
|
|
1996
1874
|
|
|
1997
|
-
container_name = f"kafka_{self.action_record_id}"
|
|
1998
1875
|
|
|
1999
1876
|
# Kafka container with --net=host (Ports: 9092, 9093)
|
|
2000
1877
|
cmd = (
|
|
2001
|
-
f"docker run
|
|
2002
|
-
f"--name {container_name} "
|
|
1878
|
+
f"docker run --net=host "
|
|
2003
1879
|
f"{env_args} "
|
|
2004
1880
|
f"--shm-size=30G --pull=always "
|
|
2005
1881
|
f'aiforeveryone/matrice-kafka:latest /bin/bash -c "'
|
|
@@ -2032,8 +1908,6 @@ def inference_tracker_setup_execute(self: ActionInstance):
|
|
|
2032
1908
|
|
|
2033
1909
|
self.setup_action_requirements(action_details)
|
|
2034
1910
|
|
|
2035
|
-
container_name = f"inference_tracker_{self.action_record_id}"
|
|
2036
|
-
|
|
2037
1911
|
if action_details["actionDetails"].get("containerId"):
|
|
2038
1912
|
logging.info(
|
|
2039
1913
|
"Using existing container ID for inference tracker: %s",
|
|
@@ -2047,13 +1921,14 @@ def inference_tracker_setup_execute(self: ActionInstance):
|
|
|
2047
1921
|
# This is the existing Docker run command
|
|
2048
1922
|
worker_cmd = (
|
|
2049
1923
|
f"docker run -d --pull=always --net=host "
|
|
2050
|
-
|
|
1924
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1925
|
+
f"--name inference-tracker-worker "
|
|
2051
1926
|
f"-v matrice_myvol:/matrice_data "
|
|
2052
1927
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
2053
1928
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
2054
1929
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
2055
1930
|
f'-e ACTION_ID="{self.action_record_id}" '
|
|
2056
|
-
f'--restart=unless-stopped '
|
|
1931
|
+
f' --restart=unless-stopped '
|
|
2057
1932
|
f"{image}"
|
|
2058
1933
|
)
|
|
2059
1934
|
|
|
@@ -2075,11 +1950,9 @@ def video_storage_setup_execute(self: ActionInstance):
|
|
|
2075
1950
|
|
|
2076
1951
|
self.setup_action_requirements(action_details)
|
|
2077
1952
|
|
|
2078
|
-
container_name = f"video_storage_{self.action_record_id}"
|
|
2079
|
-
|
|
2080
1953
|
if action_details["actionDetails"].get("containerId"):
|
|
2081
1954
|
logging.info(
|
|
2082
|
-
"Using existing container ID for
|
|
1955
|
+
"Using existing container ID for inference tracker: %s",
|
|
2083
1956
|
action_details["actionDetails"]["containerId"],
|
|
2084
1957
|
)
|
|
2085
1958
|
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
@@ -2090,13 +1963,14 @@ def video_storage_setup_execute(self: ActionInstance):
|
|
|
2090
1963
|
# This is the existing Docker run command
|
|
2091
1964
|
worker_cmd = (
|
|
2092
1965
|
f"docker run -d --pull=always --net=host "
|
|
2093
|
-
|
|
1966
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
1967
|
+
f"--name media_server "
|
|
2094
1968
|
f"-v matrice_myvol:/matrice_data "
|
|
2095
1969
|
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
2096
1970
|
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
2097
1971
|
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
2098
1972
|
f'-e ACTION_ID="{self.action_record_id}" '
|
|
2099
|
-
f'--restart=unless-stopped '
|
|
1973
|
+
f' --restart=unless-stopped '
|
|
2100
1974
|
f"{image}"
|
|
2101
1975
|
)
|
|
2102
1976
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
matrice_compute/__init__.py,sha256=YZhx7rQlD1TAlhBMbsU3_Xp-tpLyTAxWZDcQvqmwR2g,723
|
|
2
|
-
matrice_compute/action_instance.py,sha256=
|
|
2
|
+
matrice_compute/action_instance.py,sha256=LaspKYqh_pgIBpPEvdl6AAjXQv0u8NVvlaxhxSv5ky0,75943
|
|
3
3
|
matrice_compute/actions_manager.py,sha256=a_TulMnu462xc0t_A-Mpug5zhQTmtpjiv7mhiC_IAVw,18280
|
|
4
4
|
matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
|
|
5
5
|
matrice_compute/compute_operations_handler.py,sha256=amcMhmXtv2irE6qK8Vbgec_8uFqjWmVVp0VWq-73_MU,17781
|
|
@@ -11,8 +11,8 @@ matrice_compute/resources_tracker.py,sha256=AG_lnxoSi1TIDD0atBybntGyvyenwmP7sGCf
|
|
|
11
11
|
matrice_compute/scaling.py,sha256=UQDI8wN9JEKafvUVPF0Pk9XmhKlbMkeu16AZyyOuSE8,55147
|
|
12
12
|
matrice_compute/shutdown_manager.py,sha256=rnP9Qes6JJKDnebmBC9rqkH__X9a8TMjhWQPWoOQKFs,13232
|
|
13
13
|
matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
|
|
14
|
-
matrice_compute-0.1.
|
|
15
|
-
matrice_compute-0.1.
|
|
16
|
-
matrice_compute-0.1.
|
|
17
|
-
matrice_compute-0.1.
|
|
18
|
-
matrice_compute-0.1.
|
|
14
|
+
matrice_compute-0.1.40.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
|
|
15
|
+
matrice_compute-0.1.40.dist-info/METADATA,sha256=xtlVwHnFMs8_fGoXD9F2K98IV2Q8as8-H2b0Q6Dr584,1038
|
|
16
|
+
matrice_compute-0.1.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
matrice_compute-0.1.40.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
|
|
18
|
+
matrice_compute-0.1.40.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|