matrice-compute 0.1.32__tar.gz → 0.1.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/PKG-INFO +1 -1
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/matrice_compute.egg-info/PKG-INFO +1 -1
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/action_instance.py +55 -4
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/actions_manager.py +5 -1
- matrice_compute-0.1.34/src/matrice_compute/resources_tracker.py +1485 -0
- matrice_compute-0.1.32/src/matrice_compute/resources_tracker.py +0 -842
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/LICENSE.txt +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/README.md +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/matrice_compute.egg-info/SOURCES.txt +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/matrice_compute.egg-info/dependency_links.txt +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/matrice_compute.egg-info/not-zip-safe +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/matrice_compute.egg-info/top_level.txt +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/pyproject.toml +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/setup.cfg +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/setup.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/__init__.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/actions_scaledown_manager.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/compute_operations_handler.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/instance_manager.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/instance_utils.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/prechecks.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/py.typed +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/scaling.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/shutdown_manager.py +0 -0
- {matrice_compute-0.1.32 → matrice_compute-0.1.34}/src/matrice_compute/task_utils.py +0 -0
|
@@ -83,7 +83,8 @@ class ActionInstance:
|
|
|
83
83
|
"inference_ws_server": inference_ws_server_execute,
|
|
84
84
|
"fe_analytics_service": fe_analytics_service_execute,
|
|
85
85
|
"lpr_setup": lpr_setup_execute,
|
|
86
|
-
"inference_tracker_server": inference_tracker_setup_execute
|
|
86
|
+
"inference_tracker_server": inference_tracker_setup_execute,
|
|
87
|
+
"video_storage_setup" : video_storage_setup_execute
|
|
87
88
|
}
|
|
88
89
|
if self.action_type not in self.actions_map:
|
|
89
90
|
raise ValueError(f"Unknown action type: {self.action_type}")
|
|
@@ -309,7 +310,7 @@ class ActionInstance:
|
|
|
309
310
|
)
|
|
310
311
|
|
|
311
312
|
@log_errors(default_return=None, raise_exception=False, log_error=False)
|
|
312
|
-
def
|
|
313
|
+
def get_action_details(self):
|
|
313
314
|
"""Get action details from scaling service.
|
|
314
315
|
|
|
315
316
|
Returns:
|
|
@@ -939,7 +940,16 @@ class ActionInstance:
|
|
|
939
940
|
env={**os.environ},
|
|
940
941
|
)
|
|
941
942
|
|
|
942
|
-
|
|
943
|
+
# Use a longer timeout for docker run since --pull=always may need to
|
|
944
|
+
# download large images on first run. Default: 30 minutes (1800 seconds)
|
|
945
|
+
# Can be configured via DOCKER_START_TIMEOUT_SECONDS environment variable
|
|
946
|
+
docker_start_timeout = int(os.environ.get("DOCKER_START_TIMEOUT_SECONDS", 1800))
|
|
947
|
+
logging.info(
|
|
948
|
+
"Waiting for docker container to start for action %s (timeout: %d seconds)",
|
|
949
|
+
self.action_record_id,
|
|
950
|
+
docker_start_timeout,
|
|
951
|
+
)
|
|
952
|
+
stdout, stderr = process.communicate(timeout=docker_start_timeout)
|
|
943
953
|
|
|
944
954
|
if process.returncode != 0:
|
|
945
955
|
logging.error(
|
|
@@ -2187,4 +2197,45 @@ def inference_tracker_setup_execute(self: ActionInstance):
|
|
|
2187
2197
|
f"{image}"
|
|
2188
2198
|
)
|
|
2189
2199
|
|
|
2190
|
-
self.start(worker_cmd, "inference_tracker_setup")
|
|
2200
|
+
self.start(worker_cmd, "inference_tracker_setup")
|
|
2201
|
+
|
|
2202
|
+
@log_errors(raise_exception=False)
|
|
2203
|
+
def video_storage_setup_execute(self: ActionInstance):
|
|
2204
|
+
|
|
2205
|
+
"""
|
|
2206
|
+
Creates and start Video Storage
|
|
2207
|
+
Video Stroage runs on port 8106 (localhost only with --net=host).
|
|
2208
|
+
"""
|
|
2209
|
+
|
|
2210
|
+
action_details = self.get_action_details()
|
|
2211
|
+
if not action_details:
|
|
2212
|
+
return
|
|
2213
|
+
|
|
2214
|
+
image = self.docker_container
|
|
2215
|
+
|
|
2216
|
+
self.setup_action_requirements(action_details)
|
|
2217
|
+
|
|
2218
|
+
if action_details["actionDetails"].get("containerId"):
|
|
2219
|
+
logging.info(
|
|
2220
|
+
"Using existing container ID for inference tracker: %s",
|
|
2221
|
+
action_details["actionDetails"]["containerId"],
|
|
2222
|
+
)
|
|
2223
|
+
self.docker_container = action_details["actionDetails"]["containerId"]
|
|
2224
|
+
cmd = "docker restart " + self.docker_container
|
|
2225
|
+
self.start(cmd, "video_storage_setup_execute")
|
|
2226
|
+
return
|
|
2227
|
+
|
|
2228
|
+
# This is the existing Docker run command
|
|
2229
|
+
worker_cmd = (
|
|
2230
|
+
f"docker run -d --pull=always --net=host "
|
|
2231
|
+
f"--cidfile ./{self.action_record_id}.cid "
|
|
2232
|
+
f"--name media_server "
|
|
2233
|
+
f"-v matrice_myvol:/matrice_data "
|
|
2234
|
+
f'-e ENV="{os.environ.get("ENV", "prod")}" '
|
|
2235
|
+
f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
|
|
2236
|
+
f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
|
|
2237
|
+
f'-e ACTION_ID="{self.action_record_id}" '
|
|
2238
|
+
f"{image}"
|
|
2239
|
+
)
|
|
2240
|
+
|
|
2241
|
+
self.start(worker_cmd, "video_storage_setup_execute")
|
|
@@ -43,7 +43,11 @@ class ActionsManager:
|
|
|
43
43
|
"""
|
|
44
44
|
actions = []
|
|
45
45
|
logging.info("Polling backend for new jobs")
|
|
46
|
-
|
|
46
|
+
result = self.scaling.assign_jobs(has_gpu())
|
|
47
|
+
if result is None:
|
|
48
|
+
logging.error("assign_jobs returned None")
|
|
49
|
+
return actions
|
|
50
|
+
fetched_actions, error, _ = result
|
|
47
51
|
if error:
|
|
48
52
|
logging.error("Error assigning jobs: %s", error)
|
|
49
53
|
return actions
|