matrice-compute 0.1.44__py3-none-any.whl → 0.1.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_compute/__init__.py +21 -10
- matrice_compute/__init__.pyi +2056 -0
- matrice_compute/action_instance.py +19 -6
- matrice_compute/actions_manager.py +2 -1
- matrice_compute/actions_scaledown_manager.py +5 -0
- matrice_compute/instance_manager.py +26 -6
- matrice_compute/instance_utils.py +8 -8
- matrice_compute/k8s_scheduler.py +749 -0
- matrice_compute/prechecks.py +5 -6
- matrice_compute/resources_tracker.py +68 -53
- matrice_compute/scaling.py +31 -2
- matrice_compute/task_utils.py +51 -0
- {matrice_compute-0.1.44.dist-info → matrice_compute-0.1.45.dist-info}/METADATA +4 -4
- matrice_compute-0.1.45.dist-info/RECORD +20 -0
- {matrice_compute-0.1.44.dist-info → matrice_compute-0.1.45.dist-info}/WHEEL +1 -1
- matrice_compute-0.1.44.dist-info/RECORD +0 -18
- {matrice_compute-0.1.44.dist-info → matrice_compute-0.1.45.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_compute-0.1.44.dist-info → matrice_compute-0.1.45.dist-info}/top_level.txt +0 -0
matrice_compute/prechecks.py
CHANGED
|
@@ -25,7 +25,7 @@ from matrice_compute.instance_utils import (
|
|
|
25
25
|
get_max_file_system,
|
|
26
26
|
has_gpu,
|
|
27
27
|
)
|
|
28
|
-
from matrice.docker_utils import check_docker
|
|
28
|
+
from matrice.docker_utils import check_docker as ensure_docker
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class Prechecks:
|
|
@@ -223,10 +223,8 @@ class Prechecks:
|
|
|
223
223
|
Returns:
|
|
224
224
|
bool: True if docker is working
|
|
225
225
|
"""
|
|
226
|
-
if not check_docker():
|
|
227
|
-
logging.error("Docker not installed or not running")
|
|
228
|
-
sys.exit(1)
|
|
229
226
|
try:
|
|
227
|
+
ensure_docker()
|
|
230
228
|
import docker
|
|
231
229
|
|
|
232
230
|
client = docker.from_env()
|
|
@@ -384,9 +382,10 @@ class Prechecks:
|
|
|
384
382
|
try:
|
|
385
383
|
import torch
|
|
386
384
|
|
|
387
|
-
test_tensor = torch.
|
|
385
|
+
test_tensor = torch.ones((2, 2), device="cuda", dtype=torch.float32)
|
|
388
386
|
result = torch.matmul(test_tensor, test_tensor)
|
|
389
|
-
|
|
387
|
+
expected = torch.full((2, 2), 2.0, device="cuda", dtype=torch.float32)
|
|
388
|
+
if not torch.equal(result, expected):
|
|
390
389
|
logging.error("GPU computation test failed")
|
|
391
390
|
sys.exit(1)
|
|
392
391
|
except Exception as err:
|
|
@@ -10,7 +10,7 @@ import json
|
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
import psutil
|
|
12
12
|
import docker
|
|
13
|
-
from typing import List, Tuple, Dict, Optional
|
|
13
|
+
from typing import Any, List, Tuple, Dict, Optional, TYPE_CHECKING, Iterator, cast, ClassVar, Type
|
|
14
14
|
from matrice_compute.instance_utils import (
|
|
15
15
|
has_gpu,
|
|
16
16
|
get_gpu_info,
|
|
@@ -18,6 +18,10 @@ from matrice_compute.instance_utils import (
|
|
|
18
18
|
)
|
|
19
19
|
from matrice_compute.scaling import Scaling
|
|
20
20
|
from matrice_common.utils import log_errors
|
|
21
|
+
from docker.client import DockerClient
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from docker.models.containers import Container as DockerContainer
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
class ResourcesTracker:
|
|
@@ -30,10 +34,10 @@ class ResourcesTracker:
|
|
|
30
34
|
"""
|
|
31
35
|
|
|
32
36
|
# Cache for nvidia-smi output to reduce subprocess overhead
|
|
33
|
-
_gpu_cache: Dict = {}
|
|
34
|
-
_gpu_cache_timestamp: float = 0
|
|
35
|
-
_gpu_cache_ttl: float = 1.0 # Cache TTL in seconds
|
|
36
|
-
_gpu_cache_lock = threading.Lock()
|
|
37
|
+
_gpu_cache: ClassVar[Dict[str, Any]] = {}
|
|
38
|
+
_gpu_cache_timestamp: ClassVar[float] = 0.0
|
|
39
|
+
_gpu_cache_ttl: ClassVar[float] = 1.0 # Cache TTL in seconds
|
|
40
|
+
_gpu_cache_lock: ClassVar[threading.Lock] = threading.Lock()
|
|
37
41
|
|
|
38
42
|
def __init__(self) -> None:
|
|
39
43
|
"""
|
|
@@ -42,7 +46,7 @@ class ResourcesTracker:
|
|
|
42
46
|
pass
|
|
43
47
|
|
|
44
48
|
@log_errors(default_return=(0, 0), raise_exception=False)
|
|
45
|
-
def get_container_cpu_and_memory(self, container:
|
|
49
|
+
def get_container_cpu_and_memory(self, container: 'DockerContainer') -> Tuple[float, float]:
|
|
46
50
|
"""
|
|
47
51
|
Get CPU and memory usage for a container.
|
|
48
52
|
|
|
@@ -52,35 +56,45 @@ class ResourcesTracker:
|
|
|
52
56
|
Returns:
|
|
53
57
|
Tuple[float, float]: CPU utilization percentage (0-100 per core used) and memory usage in MB.
|
|
54
58
|
"""
|
|
55
|
-
|
|
56
|
-
if
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
]
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
59
|
+
stats_raw = container.stats(stream=False)
|
|
60
|
+
if not stats_raw:
|
|
61
|
+
return 0.0, 0.0
|
|
62
|
+
|
|
63
|
+
# Normalize to a dictionary for type-checking safety
|
|
64
|
+
if isinstance(stats_raw, dict):
|
|
65
|
+
stats = cast(Dict[str, Any], stats_raw)
|
|
66
|
+
else:
|
|
67
|
+
# Some client types may return an iterator; take the first element
|
|
68
|
+
stats = cast(Dict[str, Any], next(cast(Iterator[Dict[str, Any]], stats_raw), {}))
|
|
69
|
+
if not stats:
|
|
70
|
+
return 0.0, 0.0
|
|
71
|
+
|
|
72
|
+
cpu_utilization = 0.0
|
|
73
|
+
cpu_delta = (
|
|
74
|
+
stats["cpu_stats"]["cpu_usage"]["total_usage"]
|
|
75
|
+
- stats["precpu_stats"]["cpu_usage"]["total_usage"]
|
|
76
|
+
)
|
|
77
|
+
system_delta = stats["cpu_stats"].get("system_cpu_usage", 0) - stats[
|
|
78
|
+
"precpu_stats"
|
|
79
|
+
].get("system_cpu_usage", 0)
|
|
80
|
+
|
|
81
|
+
if system_delta > 0:
|
|
82
|
+
# FIX: Multiply by online_cpus to get correct percentage
|
|
83
|
+
# Docker formula: (cpu_delta / system_delta) * online_cpus * 100
|
|
84
|
+
online_cpus = stats["cpu_stats"].get("online_cpus")
|
|
85
|
+
if not online_cpus:
|
|
86
|
+
# Fallback: count from percpu_usage or use system CPU count
|
|
87
|
+
percpu = stats["cpu_stats"]["cpu_usage"].get("percpu_usage", [])
|
|
88
|
+
online_cpus = len(percpu) if percpu else psutil.cpu_count()
|
|
89
|
+
cpu_utilization = (cpu_delta / system_delta) * online_cpus * 100.0
|
|
90
|
+
|
|
91
|
+
# Return memory in MB (consistent units) instead of percentage
|
|
92
|
+
memory_usage_bytes = stats["memory_stats"].get("usage", 0)
|
|
93
|
+
# Subtract cache if available for more accurate "real" memory
|
|
94
|
+
cache_bytes = stats["memory_stats"].get("stats", {}).get("cache", 0)
|
|
95
|
+
memory_usage_mb = (memory_usage_bytes - cache_bytes) / (1024 * 1024)
|
|
96
|
+
|
|
97
|
+
return cpu_utilization, max(0, memory_usage_mb)
|
|
84
98
|
|
|
85
99
|
@staticmethod
|
|
86
100
|
def _parse_memory_string(memory_str: str) -> float:
|
|
@@ -218,23 +232,24 @@ class ResourcesTracker:
|
|
|
218
232
|
"""
|
|
219
233
|
import time as time_module
|
|
220
234
|
current_time = time_module.time()
|
|
235
|
+
cls: Type[ResourcesTracker] = type(self)
|
|
221
236
|
|
|
222
|
-
with
|
|
237
|
+
with cls._gpu_cache_lock:
|
|
223
238
|
# Return cache if still valid
|
|
224
|
-
if (
|
|
225
|
-
current_time -
|
|
226
|
-
return
|
|
239
|
+
if (cls._gpu_cache and
|
|
240
|
+
current_time - cls._gpu_cache_timestamp < cls._gpu_cache_ttl):
|
|
241
|
+
return cls._gpu_cache
|
|
227
242
|
|
|
228
243
|
# Refresh cache
|
|
229
|
-
cache = {
|
|
244
|
+
cache: Dict[str, Any] = {
|
|
230
245
|
'processes': [],
|
|
231
246
|
'gpus': [],
|
|
232
247
|
'timestamp': current_time,
|
|
233
248
|
}
|
|
234
249
|
|
|
235
250
|
if not has_gpu():
|
|
236
|
-
|
|
237
|
-
|
|
251
|
+
cls._gpu_cache = cache
|
|
252
|
+
cls._gpu_cache_timestamp = current_time
|
|
238
253
|
return cache
|
|
239
254
|
|
|
240
255
|
try:
|
|
@@ -290,8 +305,8 @@ class ResourcesTracker:
|
|
|
290
305
|
except Exception as e:
|
|
291
306
|
logging.debug("Error refreshing GPU cache: %s", e)
|
|
292
307
|
|
|
293
|
-
|
|
294
|
-
|
|
308
|
+
cls._gpu_cache = cache
|
|
309
|
+
cls._gpu_cache_timestamp = current_time
|
|
295
310
|
return cache
|
|
296
311
|
|
|
297
312
|
@log_errors(default_return=(0, 0), raise_exception=False, log_error=False)
|
|
@@ -473,7 +488,7 @@ class ResourcesTracker:
|
|
|
473
488
|
return pids
|
|
474
489
|
|
|
475
490
|
@log_errors(default_return=set(), raise_exception=False, log_error=False)
|
|
476
|
-
def _get_child_pids(self, parent_pid: str, visited: set = None) -> set:
|
|
491
|
+
def _get_child_pids(self, parent_pid: str, visited: Optional[set[Any]] = None) -> set:
|
|
477
492
|
"""
|
|
478
493
|
Recursively get all child PIDs of a process.
|
|
479
494
|
|
|
@@ -989,10 +1004,10 @@ class ActionsResourcesTracker:
|
|
|
989
1004
|
def __init__(self, scaling: Scaling):
|
|
990
1005
|
"""Initialize ActionsResourcesTracker"""
|
|
991
1006
|
self.scaling = scaling
|
|
992
|
-
self.max_actions_usage = {}
|
|
1007
|
+
self.max_actions_usage: dict[Any, Any] = {}
|
|
993
1008
|
self.resources_tracker = ResourcesTracker()
|
|
994
1009
|
self.client = docker.from_env()
|
|
995
|
-
self.logged_stopped_containers = []
|
|
1010
|
+
self.logged_stopped_containers: list[Any] = []
|
|
996
1011
|
|
|
997
1012
|
@log_errors(raise_exception=False, log_error=True)
|
|
998
1013
|
def update_actions_resources(self) -> None:
|
|
@@ -1057,7 +1072,7 @@ class ActionsResourcesTracker:
|
|
|
1057
1072
|
"""Remove quotes from container args"""
|
|
1058
1073
|
new_args = []
|
|
1059
1074
|
for arg in args:
|
|
1060
|
-
new_args.extend(x.replace('"', "").replace("'", "") for x in arg.split(" "))
|
|
1075
|
+
new_args.extend([x.replace('"', "").replace("'", "") for x in arg.split(" ")])
|
|
1061
1076
|
return new_args
|
|
1062
1077
|
|
|
1063
1078
|
def is_valid_objectid(s: str) -> bool:
|
|
@@ -1275,10 +1290,10 @@ class ContainerResourceMonitor:
|
|
|
1275
1290
|
self._monitor_thread: Optional[threading.Thread] = None
|
|
1276
1291
|
self._producer = None
|
|
1277
1292
|
self._is_running = False
|
|
1278
|
-
self._docker_client = None
|
|
1293
|
+
self._docker_client: Optional[DockerClient] = None
|
|
1279
1294
|
self._resources_tracker = ResourcesTracker()
|
|
1280
1295
|
|
|
1281
|
-
def _get_all_running_containers(self) -> List[
|
|
1296
|
+
def _get_all_running_containers(self) -> List['DockerContainer']:
|
|
1282
1297
|
"""
|
|
1283
1298
|
Get all running Docker containers.
|
|
1284
1299
|
|
|
@@ -1288,14 +1303,14 @@ class ContainerResourceMonitor:
|
|
|
1288
1303
|
try:
|
|
1289
1304
|
if not self._docker_client:
|
|
1290
1305
|
self._docker_client = docker.from_env()
|
|
1291
|
-
|
|
1306
|
+
assert self._docker_client is not None
|
|
1292
1307
|
containers = self._docker_client.containers.list(filters={"status": "running"})
|
|
1293
1308
|
return containers
|
|
1294
1309
|
except Exception as e:
|
|
1295
1310
|
logging.debug("Error getting running containers: %s", e)
|
|
1296
1311
|
return []
|
|
1297
1312
|
|
|
1298
|
-
def _collect_container_resources(self, container:
|
|
1313
|
+
def _collect_container_resources(self, container: 'DockerContainer') -> Optional[dict[str, Any]]:
|
|
1299
1314
|
"""
|
|
1300
1315
|
Collect resource usage for a single container.
|
|
1301
1316
|
|
|
@@ -1590,7 +1605,7 @@ class KafkaResourceMonitor:
|
|
|
1590
1605
|
CPU usage %, CPU cores, RAM total GB, RAM used GB, GPU memory dict (used, total), Storage dict (free, total)
|
|
1591
1606
|
"""
|
|
1592
1607
|
cpu_usage = psutil.cpu_percent(interval=1)
|
|
1593
|
-
cpu_cores = psutil.cpu_count(logical=True) # Total logical CPU cores
|
|
1608
|
+
cpu_cores = psutil.cpu_count(logical=True) or 0 # Total logical CPU cores
|
|
1594
1609
|
|
|
1595
1610
|
mem = psutil.virtual_memory()
|
|
1596
1611
|
ram_total = mem.total / (1024 ** 3)
|
matrice_compute/scaling.py
CHANGED
|
@@ -12,6 +12,7 @@ import base64
|
|
|
12
12
|
import threading
|
|
13
13
|
import platform
|
|
14
14
|
import subprocess
|
|
15
|
+
from typing import Any, Callable, Dict, Optional, Tuple
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class Scaling:
|
|
@@ -189,7 +190,7 @@ class Scaling:
|
|
|
189
190
|
|
|
190
191
|
logging.info("Kafka response listener thread stopped")
|
|
191
192
|
|
|
192
|
-
def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=5):
|
|
193
|
+
def _send_kafka_request(self, api: str, payload: Dict[str, Any], request_topic: Optional[str], response_topic: Optional[str], timeout: int = 5) -> Tuple[Any, Optional[str], str, bool]:
|
|
193
194
|
"""
|
|
194
195
|
Send a request via Kafka and wait for response using the persistent consumer.
|
|
195
196
|
|
|
@@ -266,7 +267,7 @@ class Scaling:
|
|
|
266
267
|
logging.error(f"Kafka send error for {api}: {e}")
|
|
267
268
|
return None, f"Kafka error: {e}", "Kafka send failed", False
|
|
268
269
|
|
|
269
|
-
def _hybrid_request(self, api, payload, request_topic, response_topic, rest_fallback_func):
|
|
270
|
+
def _hybrid_request(self, api: str, payload: Dict[str, Any], request_topic: Optional[str], response_topic: Optional[str], rest_fallback_func: Callable[[], Tuple[Any, Optional[str], str]]) -> Tuple[Any, Optional[str], str]:
|
|
270
271
|
"""
|
|
271
272
|
Hybrid request method: try Kafka first, fallback to REST, cache if both fail.
|
|
272
273
|
|
|
@@ -282,6 +283,11 @@ class Scaling:
|
|
|
282
283
|
"""
|
|
283
284
|
# Try Kafka first
|
|
284
285
|
if self.enable_kafka:
|
|
286
|
+
# Explicitly annotate tuple-unpacked variables to satisfy mypy
|
|
287
|
+
data: Any
|
|
288
|
+
error: Optional[str]
|
|
289
|
+
message: str
|
|
290
|
+
kafka_success: bool
|
|
285
291
|
data, error, message, kafka_success = self._send_kafka_request(
|
|
286
292
|
api, payload, request_topic, response_topic, timeout=5
|
|
287
293
|
)
|
|
@@ -937,6 +943,29 @@ class Scaling:
|
|
|
937
943
|
rest_fallback_func=rest_fallback
|
|
938
944
|
)
|
|
939
945
|
|
|
946
|
+
@log_errors(log_error=True)
|
|
947
|
+
def refresh_presigned_url(self, url: str):
|
|
948
|
+
"""Refresh a presigned URL that may have expired.
|
|
949
|
+
|
|
950
|
+
Args:
|
|
951
|
+
url: The presigned URL to refresh
|
|
952
|
+
|
|
953
|
+
Returns:
|
|
954
|
+
Tuple of (refreshed_url, error, message) from API response
|
|
955
|
+
"""
|
|
956
|
+
if not url:
|
|
957
|
+
return None, "URL is required", "No URL provided to refresh"
|
|
958
|
+
|
|
959
|
+
import urllib.parse
|
|
960
|
+
encoded_url = urllib.parse.quote(url, safe='')
|
|
961
|
+
path = f"/v1/model/refresh_presigned_url?url={encoded_url}"
|
|
962
|
+
resp = self.rpc.get(path=path)
|
|
963
|
+
return self.handle_response(
|
|
964
|
+
resp,
|
|
965
|
+
"Presigned URL refreshed successfully",
|
|
966
|
+
"Could not refresh the presigned URL",
|
|
967
|
+
)
|
|
968
|
+
|
|
940
969
|
@log_errors(log_error=True)
|
|
941
970
|
def get_model_codebase(self, model_family_id):
|
|
942
971
|
"""Get model codebase.
|
matrice_compute/task_utils.py
CHANGED
|
@@ -1,11 +1,51 @@
|
|
|
1
1
|
"""Module providing task_utils functionality."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
import shutil
|
|
5
6
|
import urllib.request
|
|
6
7
|
import zipfile
|
|
7
8
|
from typing import Optional
|
|
8
9
|
from matrice_common.utils import log_errors
|
|
10
|
+
from matrice_compute.scaling import Scaling
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@log_errors(raise_exception=False, log_error=True, default_return=None)
|
|
14
|
+
def refresh_url_if_needed(url: Optional[str], scaling: Optional[Scaling] = None) -> Optional[str]:
|
|
15
|
+
"""Refresh a presigned URL if it appears to be expired or about to expire.
|
|
16
|
+
|
|
17
|
+
This function attempts to refresh presigned URLs for model codebase and requirements
|
|
18
|
+
to ensure they are valid before downloading.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
url: The URL to potentially refresh. If None or empty, returns None.
|
|
22
|
+
scaling: The Scaling instance to use for API calls. If None, returns original URL.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
The refreshed URL if successful, or the original URL if refresh fails or is not needed.
|
|
26
|
+
"""
|
|
27
|
+
if not url:
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
if not scaling:
|
|
31
|
+
logging.warning("No scaling instance provided, returning original URL")
|
|
32
|
+
return url
|
|
33
|
+
|
|
34
|
+
logging.info("Attempting to refresh presigned URL")
|
|
35
|
+
try:
|
|
36
|
+
refreshed_url, error, message = scaling.refresh_presigned_url(url)
|
|
37
|
+
if error:
|
|
38
|
+
logging.warning(f"Failed to refresh presigned URL: {message}. Using original URL.")
|
|
39
|
+
return url
|
|
40
|
+
if refreshed_url:
|
|
41
|
+
logging.info("Successfully refreshed presigned URL")
|
|
42
|
+
return refreshed_url
|
|
43
|
+
else:
|
|
44
|
+
logging.warning("Refresh returned empty URL, using original URL")
|
|
45
|
+
return url
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logging.warning(f"Exception while refreshing presigned URL: {e}. Using original URL.")
|
|
48
|
+
return url
|
|
9
49
|
|
|
10
50
|
|
|
11
51
|
@log_errors(raise_exception=True, log_error=True)
|
|
@@ -14,6 +54,7 @@ def setup_workspace_and_run_task(
|
|
|
14
54
|
action_id: str,
|
|
15
55
|
model_codebase_url: str,
|
|
16
56
|
model_codebase_requirements_url: Optional[str] = None,
|
|
57
|
+
scaling: Optional[Scaling] = None,
|
|
17
58
|
) -> None:
|
|
18
59
|
"""Set up workspace and run task with provided parameters.
|
|
19
60
|
|
|
@@ -22,6 +63,7 @@ def setup_workspace_and_run_task(
|
|
|
22
63
|
action_id (str): Unique identifier for the action.
|
|
23
64
|
model_codebase_url (str): URL to download model codebase from.
|
|
24
65
|
model_codebase_requirements_url (Optional[str]): URL to download requirements from. Defaults to None.
|
|
66
|
+
scaling (Optional[Scaling]): Scaling instance for refreshing presigned URLs. Defaults to None.
|
|
25
67
|
|
|
26
68
|
Returns:
|
|
27
69
|
None
|
|
@@ -33,6 +75,11 @@ def setup_workspace_and_run_task(
|
|
|
33
75
|
# return
|
|
34
76
|
os.makedirs(workspace_dir, exist_ok=True)
|
|
35
77
|
|
|
78
|
+
# Refresh presigned URLs before downloading to ensure they are valid
|
|
79
|
+
refreshed_codebase_url = refresh_url_if_needed(model_codebase_url, scaling)
|
|
80
|
+
if refreshed_codebase_url:
|
|
81
|
+
model_codebase_url = refreshed_codebase_url
|
|
82
|
+
|
|
36
83
|
# Download codebase ZIP file
|
|
37
84
|
urllib.request.urlretrieve(model_codebase_url, codebase_zip_path)
|
|
38
85
|
|
|
@@ -74,4 +121,8 @@ def setup_workspace_and_run_task(
|
|
|
74
121
|
|
|
75
122
|
# Download requirements.txt if URL is provided
|
|
76
123
|
if model_codebase_requirements_url:
|
|
124
|
+
# Refresh presigned URL for requirements before downloading
|
|
125
|
+
refreshed_requirements_url = refresh_url_if_needed(model_codebase_requirements_url, scaling)
|
|
126
|
+
if refreshed_requirements_url:
|
|
127
|
+
model_codebase_requirements_url = refreshed_requirements_url
|
|
77
128
|
urllib.request.urlretrieve(model_codebase_requirements_url, requirements_txt_path)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: matrice_compute
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.45
|
|
4
4
|
Summary: Common server utilities for Matrice.ai services
|
|
5
5
|
Author-email: "Matrice.ai" <dipendra@matrice.ai>
|
|
6
|
-
License
|
|
7
|
-
Keywords: matrice,common,utilities,
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: matrice,common,utilities,mypyc,compiled
|
|
8
8
|
Classifier: Development Status :: 4 - Beta
|
|
9
9
|
Classifier: Intended Audience :: Developers
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Typing :: Typed
|
|
21
|
-
Requires-Python: >=3.
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
License-File: LICENSE.txt
|
|
24
24
|
Dynamic: license-file
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
matrice_compute/__init__.py,sha256=dflUMEGODPUMWpxHpIGdsDnRnoRf8qjb_rNh79mqTQw,1142
|
|
2
|
+
matrice_compute/__init__.pyi,sha256=GYiGrhaEM8YSnlmj3mPCjkqJxaNe9dZZU3dmXSPL81c,61685
|
|
3
|
+
matrice_compute/action_instance.py,sha256=Dde4qV11Z-Q3XnMBr_-2mvqfnrAGizpfKDKATigW_SU,75017
|
|
4
|
+
matrice_compute/actions_manager.py,sha256=-_cXMpT1nY3lhFt_z56txguF9jyyh_zqYxbkoNlJ12c,18314
|
|
5
|
+
matrice_compute/actions_scaledown_manager.py,sha256=sV6IIMgwRxMv3nCniLz4wYJViNpf4_0Z_xbiGu0uRfQ,2210
|
|
6
|
+
matrice_compute/compute_operations_handler.py,sha256=amcMhmXtv2irE6qK8Vbgec_8uFqjWmVVp0VWq-73_MU,17781
|
|
7
|
+
matrice_compute/instance_manager.py,sha256=efyzcMJAUiapY1UDs_KaCtx3UX_SNTY2yz1TtsT0Wmg,23051
|
|
8
|
+
matrice_compute/instance_utils.py,sha256=GAmxs2yR7h7d8tFe4qe_c8GvgNa6bwurlh88tx9QKXk,42167
|
|
9
|
+
matrice_compute/k8s_scheduler.py,sha256=j7dax-JIGJaMbiHyDM909-_ss9oFUALeLys9HuhqRb0,30924
|
|
10
|
+
matrice_compute/prechecks.py,sha256=-l_9F_Xe6rPgezfkmcjVB9IFCBPxrTQhJNaRjtHvkQM,17228
|
|
11
|
+
matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
matrice_compute/resources_tracker.py,sha256=7lOkQoaOI85Hs5Q5r1TJAD2y5qsVpaHmX_Y8DuG1ml0,68741
|
|
13
|
+
matrice_compute/scaling.py,sha256=NTFfZ_rEFpUv1552p6D_kbCwY9pQar-m6405I19IOyg,56379
|
|
14
|
+
matrice_compute/shutdown_manager.py,sha256=rnP9Qes6JJKDnebmBC9rqkH__X9a8TMjhWQPWoOQKFs,13232
|
|
15
|
+
matrice_compute/task_utils.py,sha256=hJA_omHHGcSMcshreoQ4GEDf6SZauz8BvgNRzcAH-UU,5069
|
|
16
|
+
matrice_compute-0.1.45.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
|
|
17
|
+
matrice_compute-0.1.45.dist-info/METADATA,sha256=_BorL-LTPoh4zzTNFvXY4LFFgEowLjfFqE655Dx1y4o,1024
|
|
18
|
+
matrice_compute-0.1.45.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
19
|
+
matrice_compute-0.1.45.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
|
|
20
|
+
matrice_compute-0.1.45.dist-info/RECORD,,
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
matrice_compute/__init__.py,sha256=YZhx7rQlD1TAlhBMbsU3_Xp-tpLyTAxWZDcQvqmwR2g,723
|
|
2
|
-
matrice_compute/action_instance.py,sha256=Wowl1Sw6gwH2iwXD6yIWnODhgFDNBzM1olpUthV7m44,74145
|
|
3
|
-
matrice_compute/actions_manager.py,sha256=a_TulMnu462xc0t_A-Mpug5zhQTmtpjiv7mhiC_IAVw,18280
|
|
4
|
-
matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
|
|
5
|
-
matrice_compute/compute_operations_handler.py,sha256=amcMhmXtv2irE6qK8Vbgec_8uFqjWmVVp0VWq-73_MU,17781
|
|
6
|
-
matrice_compute/instance_manager.py,sha256=W0BN1mkfcqCP1jxb6JjhNPUHM-iTmrDu7WoyfTTKGdY,22098
|
|
7
|
-
matrice_compute/instance_utils.py,sha256=N4yPDvNukFEEBngR0lEt4x_XT5hur1q0P-spM2xQIlU,42025
|
|
8
|
-
matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
|
|
9
|
-
matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
matrice_compute/resources_tracker.py,sha256=Hn_auCSQ2vQIc8X3PZ-KvoVOamjfEkQmbL4ekmWgbt8,68149
|
|
11
|
-
matrice_compute/scaling.py,sha256=UQDI8wN9JEKafvUVPF0Pk9XmhKlbMkeu16AZyyOuSE8,55147
|
|
12
|
-
matrice_compute/shutdown_manager.py,sha256=rnP9Qes6JJKDnebmBC9rqkH__X9a8TMjhWQPWoOQKFs,13232
|
|
13
|
-
matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
|
|
14
|
-
matrice_compute-0.1.44.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
|
|
15
|
-
matrice_compute-0.1.44.dist-info/METADATA,sha256=yrkFxOzfY0cq4wKGpoetb3vt8PoPoRDszI64DKTWZWE,1038
|
|
16
|
-
matrice_compute-0.1.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
-
matrice_compute-0.1.44.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
|
|
18
|
-
matrice_compute-0.1.44.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|