matrice-compute 0.1.44__py3-none-any.whl → 0.1.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,7 @@ from matrice_compute.scaling import (
21
21
  Scaling,
22
22
  )
23
23
  from matrice_common.utils import log_errors
24
+ from typing import cast
24
25
 
25
26
 
26
27
  class ActionInstance:
@@ -369,6 +370,8 @@ class ActionInstance:
369
370
  "MATRICE_SECRET_ACCESS_KEY": self.matrice_secret_access_key,
370
371
  "MATRICE_ACCESS_KEY_ID": self.matrice_access_key_id,
371
372
  }
373
+ if os.environ.get("MATRICE_BASE_URL"):
374
+ env_vars["MATRICE_BASE_URL"] = os.environ["MATRICE_BASE_URL"]
372
375
  if self.get_hugging_face_token(model_key):
373
376
  env_vars["HUGGING_FACE_ACCESS_TOKEN"] = self.get_hugging_face_token(
374
377
  model_key
@@ -563,6 +566,7 @@ class ActionInstance:
563
566
  action_id,
564
567
  model_codebase_url,
565
568
  model_codebase_requirements_url,
569
+ scaling=self.scaling,
566
570
  )
567
571
 
568
572
  # Setup Docker credentials
@@ -872,7 +876,8 @@ class ActionInstance:
872
876
  "bg-job-scheduler",
873
877
  "DKR_CMD",
874
878
  "OK",
875
- f"Start docker container with command: {cmd.replace(self.matrice_access_key_id, 'MATRICE_ACCESS_KEY_ID').replace(self.matrice_secret_access_key, 'MATRICE_SECRET_ACCESS_KEY')}",
879
+ f"Start docker container with command: "
880
+ f"{cmd.replace(cast(str, self.matrice_access_key_id), 'MATRICE_ACCESS_KEY_ID').replace(cast(str, self.matrice_secret_access_key), 'MATRICE_SECRET_ACCESS_KEY')}",
876
881
  )
877
882
 
878
883
  @log_errors(raise_exception=False, log_error=False)
@@ -1140,6 +1145,8 @@ def database_setup_execute(self: ActionInstance):
1140
1145
  f"-v {dbPath}:{dbPath} "
1141
1146
  f"--name {self.action_record_id}_{self.action_type} "
1142
1147
  f"-v /var/run/docker.sock:/var/run/docker.sock "
1148
+ f"-v /etc/matrice/service-config.yaml:/etc/matrice/service-config.yaml "
1149
+ f'-e SERVICE_CONFIG_PATH="/etc/matrice/service-config.yaml" '
1143
1150
  f"-e ACTION_RECORD_ID={self.action_record_id} "
1144
1151
  f"-e MATRICE_ACCESS_KEY_ID={self.matrice_access_key_id} "
1145
1152
  f"-e MATRICE_SECRET_ACCESS_KEY={self.matrice_secret_access_key} "
@@ -1274,9 +1281,11 @@ def inference_ws_server_execute(self: ActionInstance):
1274
1281
  worker_cmd = (
1275
1282
  f"docker run -d --pull=always --net=host "
1276
1283
  f"--name {self.action_record_id}_{self.action_type} "
1284
+ f"-v /etc/matrice/service-config.yaml:/etc/matrice/service-config.yaml "
1277
1285
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1278
1286
  f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1279
1287
  f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1288
+ f'-e SERVICE_CONFIG_PATH="/etc/matrice/service-config.yaml" '
1280
1289
  f' --restart=unless-stopped '
1281
1290
  f"{image} "
1282
1291
  f"./app "
@@ -1503,10 +1512,10 @@ def redis_setup_execute(self: ActionInstance):
1503
1512
  # bg-redis management container with --net=host (Port: 8082)
1504
1513
  cmd = (
1505
1514
  f"docker run --net=host "
1506
- f"-e REDIS_URL={shlex.quote(env_vars['REDIS_URL'])} "
1507
- f"-e REDIS_PASSWORD={shlex.quote(env_vars['REDIS_PASSWORD'])} "
1508
- f"-e MATRICE_ACCESS_KEY_ID={shlex.quote(self.matrice_access_key_id)} "
1509
- f"-e MATRICE_SECRET_ACCESS_KEY={shlex.quote(self.matrice_secret_access_key)} "
1515
+ f"-e REDIS_URL={shlex.quote(cast(str, env_vars['REDIS_URL']))} "
1516
+ f"-e REDIS_PASSWORD={shlex.quote(cast(str, env_vars['REDIS_PASSWORD']))} "
1517
+ f"-e MATRICE_ACCESS_KEY_ID={shlex.quote(cast(str, self.matrice_access_key_id))} "
1518
+ f"-e MATRICE_SECRET_ACCESS_KEY={shlex.quote(cast(str, self.matrice_secret_access_key))} "
1510
1519
  f"-e ENV={shlex.quote(os.environ.get('ENV', 'prod'))} "
1511
1520
  f"-v /var/run/docker.sock:/var/run/docker.sock "
1512
1521
  f"--shm-size=30G --pull=always "
@@ -1869,10 +1878,12 @@ def inference_tracker_setup_execute(self: ActionInstance):
1869
1878
  f"docker run -d --pull=always --net=host "
1870
1879
  f"--name {self.action_record_id}_{self.action_type} "
1871
1880
  f"-v matrice_myvol:/matrice_data "
1881
+ f"-v /etc/matrice/service-config.yaml:/etc/matrice/service-config.yaml "
1872
1882
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1873
1883
  f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1874
1884
  f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1875
1885
  f'-e ACTION_ID="{self.action_record_id}" '
1886
+ f'-e SERVICE_CONFIG_PATH="/etc/matrice/service-config.yaml" '
1876
1887
  f' --restart=unless-stopped '
1877
1888
  f"{image}"
1878
1889
  )
@@ -1912,11 +1923,13 @@ def video_storage_setup_execute(self: ActionInstance):
1912
1923
  worker_cmd = (
1913
1924
  f"docker run -d --pull=always --net=host "
1914
1925
  f"--name {self.action_record_id}_{self.action_type} "
1915
- f"-v {media_storage_path}:/storage "
1926
+ f"-v {media_storage_path}:/storage "
1927
+ f"-v /etc/matrice/service-config.yaml:/etc/matrice/service-config.yaml "
1916
1928
  f'-e ENV="{os.environ.get("ENV", "prod")}" '
1917
1929
  f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1918
1930
  f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1919
1931
  f'-e ACTION_ID="{self.action_record_id}" '
1932
+ f'-e SERVICE_CONFIG_PATH="/etc/matrice/service-config.yaml" '
1920
1933
  f' --restart=unless-stopped '
1921
1934
  f"{image}"
1922
1935
  )
@@ -3,6 +3,7 @@
3
3
  import logging
4
4
  import os
5
5
  import time
6
+ from typing import Any
6
7
  from matrice_compute.action_instance import (
7
8
  ActionInstance,
8
9
  )
@@ -41,7 +42,7 @@ class ActionsManager:
41
42
  Returns:
42
43
  list: List of fetched actions
43
44
  """
44
- actions = []
45
+ actions: list[Any] = []
45
46
  logging.info("Polling backend for new jobs")
46
47
  result = self.scaling.assign_jobs(has_gpu())
47
48
  if result is None:
@@ -37,6 +37,11 @@ class ActionsScaleDownManager:
37
37
  if down_scaled_jobs:
38
38
  for container in containers:
39
39
  container_id = container.id
40
+ if container_id is None:
41
+ logging.warning(
42
+ "Skipping container with missing id while inspecting."
43
+ )
44
+ continue
40
45
  inspect_data = self.docker_client.api.inspect_container(container_id)
41
46
  action_record_id = next(
42
47
  (arg for arg in inspect_data["Args"] if len(arg) == 24),
@@ -6,6 +6,7 @@ import os
6
6
  import subprocess
7
7
  import threading
8
8
  import time
9
+ from typing import Any, Optional
9
10
  from kafka import KafkaProducer
10
11
  from matrice_compute.actions_manager import ActionsManager
11
12
  from matrice_compute.actions_scaledown_manager import ActionsScaleDownManager
@@ -31,6 +32,22 @@ class InstanceManager:
31
32
 
32
33
  Now includes auto streaming capabilities for specified deployment IDs.
33
34
  """
35
+ # Instance attributes for type checking
36
+ scaling: Scaling
37
+ current_actions: dict[Any, Any]
38
+ actions_manager: ActionsManager
39
+ scale_down_manager: ActionsScaleDownManager
40
+ shutdown_manager: ShutdownManager
41
+ machine_resources_tracker: MachineResourcesTracker
42
+ actions_resources_tracker: ActionsResourcesTracker
43
+ kafka_resource_monitor: Optional[KafkaResourceMonitor]
44
+ container_resource_monitor: Optional[ContainerResourceMonitor]
45
+ compute_operations_handler: Optional[ComputeOperationsHandler]
46
+ poll_interval: int
47
+ container_monitor_thread: Optional[threading.Thread]
48
+ container_monitor_running: bool
49
+ container_kafka_producer: Optional[KafkaProducer]
50
+ encryption_key: str
34
51
 
35
52
  def __init__(
36
53
  self,
@@ -87,7 +104,7 @@ class InstanceManager:
87
104
  logging.info("InstanceManager updated Jupyter token")
88
105
  else:
89
106
  logging.warning("No Jupyter token found in environment variables")
90
- self.current_actions = {}
107
+ self.current_actions: dict[Any, Any] = {}
91
108
  self.actions_manager = ActionsManager(self.scaling)
92
109
  logging.info("InstanceManager initialized with actions manager")
93
110
  self.scale_down_manager = ActionsScaleDownManager(self.scaling)
@@ -100,6 +117,7 @@ class InstanceManager:
100
117
  logging.info("InstanceManager initialized with actions resources tracker")
101
118
 
102
119
  # Initialize Kafka resource monitor using the same internal Kafka as scaling
120
+ self.kafka_resource_monitor = None
103
121
  try:
104
122
  kafka_bootstrap = self.scaling.get_kafka_bootstrap_servers()
105
123
  self.kafka_resource_monitor = KafkaResourceMonitor(
@@ -113,6 +131,7 @@ class InstanceManager:
113
131
  self.kafka_resource_monitor = None
114
132
 
115
133
  # Initialize Container resource monitor using the same internal Kafka as scaling
134
+ self.container_resource_monitor = None
116
135
  try:
117
136
  kafka_bootstrap = self.scaling.get_kafka_bootstrap_servers()
118
137
  self.container_resource_monitor = ContainerResourceMonitor(
@@ -127,13 +146,14 @@ class InstanceManager:
127
146
 
128
147
  # Initialize Compute Operations Handler for event-driven operations
129
148
  # Uses EventListener from matrice_common for simplified Kafka consumption
149
+ self.compute_operations_handler = None
130
150
  try:
131
- instance_id = os.environ.get("INSTANCE_ID")
151
+ instance_id_env = os.environ.get("INSTANCE_ID") or ""
132
152
  self.compute_operations_handler = ComputeOperationsHandler(
133
153
  actions_manager=self.actions_manager,
134
154
  session=self.session,
135
155
  scaling=self.scaling,
136
- instance_id=instance_id
156
+ instance_id=instance_id_env
137
157
  )
138
158
  logging.info("InstanceManager initialized with Compute Operations Handler for instance ID: %s", instance_id)
139
159
  except (ValueError, Exception) as e:
@@ -225,10 +245,10 @@ class InstanceManager:
225
245
  raise Exception(
226
246
  "SERVICE_PROVIDER and INSTANCE_ID must be set as environment variables or passed as arguments"
227
247
  )
228
- self.encryption_key = manual_instance_info["MATRICE_ENCRYPTION_KEY"]
248
+ self.encryption_key = str(manual_instance_info["MATRICE_ENCRYPTION_KEY"] or "")
229
249
 
230
- access_key = manual_instance_info["MATRICE_ACCESS_KEY_ID"]
231
- secret_key = manual_instance_info["MATRICE_SECRET_ACCESS_KEY"]
250
+ access_key = str(manual_instance_info["MATRICE_ACCESS_KEY_ID"] or "")
251
+ secret_key = str(manual_instance_info["MATRICE_SECRET_ACCESS_KEY"] or "")
232
252
 
233
253
  if ( # Keys are not encrypted
234
254
  self.encryption_key
@@ -15,9 +15,9 @@ from cryptography.hazmat.primitives.ciphers import (
15
15
  )
16
16
  from cryptography.hazmat.backends import default_backend
17
17
  from matrice_common.utils import log_errors
18
+ from typing import Optional, Tuple
18
19
 
19
-
20
- def get_instance_info(service_provider: str = None, instance_id: str = None) -> tuple:
20
+ def get_instance_info(service_provider: Optional[str] = None, instance_id: Optional[str] = None) -> tuple:
21
21
  """
22
22
  Get instance provider and ID information.
23
23
 
@@ -450,7 +450,7 @@ def get_disk_space_usage() -> list:
450
450
 
451
451
 
452
452
  @log_errors(default_return=None, raise_exception=False)
453
- def get_max_file_system() -> str:
453
+ def get_max_file_system() -> Optional[str]:
454
454
  """
455
455
  Get filesystem with maximum available space.
456
456
 
@@ -1060,7 +1060,7 @@ def get_decrypted_access_key_pair(
1060
1060
  enc_access_key: str,
1061
1061
  enc_secret_key: str,
1062
1062
  encryption_key: str = "",
1063
- ) -> tuple:
1063
+ ) -> Tuple[Optional[str], Optional[str]]:
1064
1064
  """
1065
1065
  Get decrypted access key pair.
1066
1066
 
@@ -1072,7 +1072,7 @@ def get_decrypted_access_key_pair(
1072
1072
  Returns:
1073
1073
  tuple: (access_key, secret_key) strings
1074
1074
  """
1075
- encryption_key = encryption_key or os.environ.get("MATRICE_ENCRYPTION_KEY")
1075
+ encryption_key = encryption_key or os.environ.get("MATRICE_ENCRYPTION_KEY", "")
1076
1076
  if not encryption_key:
1077
1077
  logging.warning("Encryption key is not set, Will assume that the keys are not encrypted")
1078
1078
  return enc_access_key, enc_secret_key
@@ -1107,7 +1107,7 @@ def get_encrypted_access_key_pair(
1107
1107
  access_key: str,
1108
1108
  secret_key: str,
1109
1109
  encryption_key: str = "",
1110
- ) -> tuple:
1110
+ ) -> Tuple[Optional[str], Optional[str]]:
1111
1111
  """
1112
1112
  Get encrypted access key pair.
1113
1113
 
@@ -1119,7 +1119,7 @@ def get_encrypted_access_key_pair(
1119
1119
  Returns:
1120
1120
  tuple: (encrypted_access_key, encrypted_secret_key) strings
1121
1121
  """
1122
- encryption_key = encryption_key or os.environ.get("MATRICE_ENCRYPTION_KEY")
1122
+ encryption_key = encryption_key or os.environ.get("MATRICE_ENCRYPTION_KEY", "")
1123
1123
  if not encryption_key:
1124
1124
  logging.warning("Encryption key is not set, returning unencrypted keys")
1125
1125
  return access_key, secret_key
@@ -1155,7 +1155,7 @@ def get_encrypted_access_key_pair(
1155
1155
 
1156
1156
  return encoded_access_key, encoded_secret_key
1157
1157
 
1158
- def _get_private_ip() -> str:
1158
+ def _get_private_ip() -> Optional[str]:
1159
1159
  """
1160
1160
  Get the actual private/LAN IP address using UDP socket trick.
1161
1161
  This works reliably even in Docker, NAT, VPN, etc.