matrice-compute 0.1.25__py3-none-any.whl → 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_compute/actions_manager.py +288 -48
- matrice_compute/compute_operations_handler.py +490 -0
- matrice_compute/instance_manager.py +25 -0
- matrice_compute/resources_tracker.py +7 -2
- matrice_compute/scaling.py +23 -0
- {matrice_compute-0.1.25.dist-info → matrice_compute-0.1.26.dist-info}/METADATA +1 -1
- {matrice_compute-0.1.25.dist-info → matrice_compute-0.1.26.dist-info}/RECORD +10 -9
- {matrice_compute-0.1.25.dist-info → matrice_compute-0.1.26.dist-info}/WHEEL +0 -0
- {matrice_compute-0.1.25.dist-info → matrice_compute-0.1.26.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_compute-0.1.25.dist-info → matrice_compute-0.1.26.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,7 @@ class ActionsManager:
|
|
|
27
27
|
scaling (Scaling): Scaling service instance
|
|
28
28
|
"""
|
|
29
29
|
self.current_actions: dict[str, ActionInstance] = {}
|
|
30
|
+
self.stopped_actions: dict[str, ActionInstance] = {} # Track stopped actions separately
|
|
30
31
|
self.scaling = scaling
|
|
31
32
|
self.memory_threshold = 0.9
|
|
32
33
|
self.poll_interval = 10
|
|
@@ -111,75 +112,110 @@ class ActionsManager:
|
|
|
111
112
|
def process_actions(self) -> None:
|
|
112
113
|
"""Process fetched actions."""
|
|
113
114
|
for action in self.fetch_actions():
|
|
115
|
+
action_id = action["_id"]
|
|
116
|
+
|
|
117
|
+
# Skip if action is already running in current_actions
|
|
118
|
+
if action_id in self.current_actions:
|
|
119
|
+
logging.info("Action %s already in current_actions, skipping", action_id)
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# If action exists in stopped_actions, remove it before starting fresh
|
|
123
|
+
if action_id in self.stopped_actions:
|
|
124
|
+
logging.info("Action %s found in stopped_actions, removing before restart", action_id)
|
|
125
|
+
del self.stopped_actions[action_id]
|
|
126
|
+
|
|
127
|
+
# Process and add to current_actions
|
|
114
128
|
action_instance = self.process_action(action)
|
|
115
129
|
if action_instance:
|
|
116
|
-
|
|
130
|
+
# Ensure action is not in stopped_actions (defensive check)
|
|
131
|
+
if action_id in self.stopped_actions:
|
|
132
|
+
del self.stopped_actions[action_id]
|
|
133
|
+
self.current_actions[action_id] = action_instance
|
|
117
134
|
|
|
118
135
|
@log_errors(raise_exception=False)
|
|
119
|
-
def
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
This method checks all actions
|
|
123
|
-
|
|
124
|
-
|
|
136
|
+
def update_actions_status(self) -> None:
|
|
137
|
+
"""Update tracking of running vs stopped actions.
|
|
138
|
+
|
|
139
|
+
This method checks all actions and moves stopped ones to stopped_actions dict
|
|
140
|
+
without deleting them. This prevents interference with compute operations
|
|
141
|
+
handler while maintaining accurate status reporting.
|
|
125
142
|
"""
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
# Check each action and
|
|
143
|
+
moved_to_stopped = 0
|
|
144
|
+
|
|
145
|
+
# Check each action and update its status
|
|
129
146
|
for action_id, instance in list(self.current_actions.items()):
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# Check if process is
|
|
134
|
-
if
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
147
|
+
is_running = False
|
|
148
|
+
status_reason = ""
|
|
149
|
+
|
|
150
|
+
# Check if process is running
|
|
151
|
+
if hasattr(instance, 'is_running'):
|
|
152
|
+
try:
|
|
153
|
+
is_running = instance.is_running()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logging.error("Error checking is_running for action %s: %s", action_id, str(e))
|
|
156
|
+
is_running = False
|
|
157
|
+
status_reason = f"error checking status: {str(e)}"
|
|
158
|
+
|
|
138
159
|
# Check for process object validity
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
160
|
+
if not is_running and not status_reason:
|
|
161
|
+
if not hasattr(instance, 'process') or instance.process is None:
|
|
162
|
+
status_reason = "no process object"
|
|
163
|
+
else:
|
|
164
|
+
status_reason = "process not running"
|
|
165
|
+
|
|
166
|
+
# Move to stopped_actions if not running (but don't delete)
|
|
167
|
+
if not is_running:
|
|
145
168
|
logging.info(
|
|
146
|
-
"Action %s
|
|
169
|
+
"Action %s moved to stopped_actions: %s",
|
|
147
170
|
action_id,
|
|
148
|
-
|
|
171
|
+
status_reason
|
|
149
172
|
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
173
|
+
# Ensure action is removed from current_actions before adding to stopped_actions
|
|
174
|
+
if action_id in self.current_actions:
|
|
175
|
+
del self.current_actions[action_id]
|
|
176
|
+
# Ensure action is not duplicated in stopped_actions
|
|
177
|
+
if action_id not in self.stopped_actions:
|
|
178
|
+
self.stopped_actions[action_id] = instance
|
|
179
|
+
moved_to_stopped += 1
|
|
154
180
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
logging.error(f"Error stopping action {action_id}: {str(e)}")
|
|
161
|
-
|
|
162
|
-
if purged_count > 0:
|
|
181
|
+
# Log current state
|
|
182
|
+
running_ids = list(self.current_actions.keys())
|
|
183
|
+
stopped_ids = list(self.stopped_actions.keys())
|
|
184
|
+
|
|
185
|
+
if self.current_actions or self.stopped_actions:
|
|
163
186
|
logging.info(
|
|
164
|
-
"
|
|
165
|
-
|
|
166
|
-
|
|
187
|
+
"Actions status: %d running %s, %d stopped %s",
|
|
188
|
+
len(self.current_actions),
|
|
189
|
+
running_ids if running_ids else "[]",
|
|
190
|
+
len(self.stopped_actions),
|
|
191
|
+
stopped_ids if stopped_ids else "[]"
|
|
167
192
|
)
|
|
168
193
|
|
|
194
|
+
@log_errors(raise_exception=False)
|
|
195
|
+
def purge_unwanted(self) -> None:
|
|
196
|
+
"""Purge completed or failed actions.
|
|
197
|
+
|
|
198
|
+
NOTE: This now calls update_actions_status() which moves stopped actions
|
|
199
|
+
to a separate dict instead of deleting them. This prevents interference
|
|
200
|
+
with compute operations handler while maintaining accurate status.
|
|
201
|
+
"""
|
|
202
|
+
self.update_actions_status()
|
|
203
|
+
|
|
169
204
|
@log_errors(default_return={}, raise_exception=False)
|
|
170
205
|
def get_current_actions(self) -> dict:
|
|
171
|
-
"""Get the current actions.
|
|
206
|
+
"""Get the current running actions.
|
|
172
207
|
|
|
173
208
|
This method:
|
|
174
|
-
1.
|
|
175
|
-
2.
|
|
209
|
+
1. Updates action status tracking via update_actions_status()
|
|
210
|
+
2. Returns only the running actions (current_actions dict)
|
|
176
211
|
3. Provides detailed logging about current actions state
|
|
177
212
|
|
|
178
213
|
Returns:
|
|
179
|
-
dict: Current
|
|
214
|
+
dict: Current running actions only
|
|
180
215
|
"""
|
|
181
|
-
#
|
|
182
|
-
self.
|
|
216
|
+
# Update status tracking (moves stopped to stopped_actions)
|
|
217
|
+
self.update_actions_status()
|
|
218
|
+
|
|
183
219
|
if self.current_actions:
|
|
184
220
|
action_ids = list(self.current_actions.keys())
|
|
185
221
|
logging.info(
|
|
@@ -189,9 +225,213 @@ class ActionsManager:
|
|
|
189
225
|
)
|
|
190
226
|
else:
|
|
191
227
|
logging.debug("No actions currently running")
|
|
192
|
-
|
|
228
|
+
|
|
193
229
|
return self.current_actions
|
|
194
230
|
|
|
231
|
+
@log_errors(default_return={}, raise_exception=False)
|
|
232
|
+
def get_all_actions(self) -> dict:
|
|
233
|
+
"""Get all tracked actions (both running and stopped).
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
dict: All tracked actions with their status
|
|
237
|
+
"""
|
|
238
|
+
all_actions = {}
|
|
239
|
+
for action_id, instance in self.current_actions.items():
|
|
240
|
+
all_actions[action_id] = {"instance": instance, "status": "running"}
|
|
241
|
+
for action_id, instance in self.stopped_actions.items():
|
|
242
|
+
all_actions[action_id] = {"instance": instance, "status": "stopped"}
|
|
243
|
+
return all_actions
|
|
244
|
+
|
|
245
|
+
@log_errors(default_return={}, raise_exception=False)
|
|
246
|
+
def get_stopped_actions(self) -> dict:
|
|
247
|
+
"""Get stopped actions.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
dict: Stopped actions
|
|
251
|
+
"""
|
|
252
|
+
return self.stopped_actions
|
|
253
|
+
|
|
254
|
+
@log_errors(default_return={}, raise_exception=False)
|
|
255
|
+
def stop_action(self, action_record_id: str) -> dict:
|
|
256
|
+
"""Stop a specific action by its record ID.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
action_record_id (str): The action record ID to stop
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
dict: Result dictionary with status information
|
|
263
|
+
"""
|
|
264
|
+
logging.info("Attempting to stop action: %s", action_record_id)
|
|
265
|
+
|
|
266
|
+
# Check if action exists in current (running) actions
|
|
267
|
+
action_instance = None
|
|
268
|
+
action_source = None
|
|
269
|
+
|
|
270
|
+
if action_record_id in self.current_actions:
|
|
271
|
+
action_instance = self.current_actions[action_record_id]
|
|
272
|
+
action_source = "current_actions"
|
|
273
|
+
elif action_record_id in self.stopped_actions:
|
|
274
|
+
# Action already in stopped_actions
|
|
275
|
+
logging.info("Action %s already in stopped_actions", action_record_id)
|
|
276
|
+
return {
|
|
277
|
+
"success": True,
|
|
278
|
+
"reason": "already_stopped",
|
|
279
|
+
"action_id": action_record_id
|
|
280
|
+
}
|
|
281
|
+
else:
|
|
282
|
+
logging.warning("Action %s not found in current or stopped actions", action_record_id)
|
|
283
|
+
return {
|
|
284
|
+
"success": False,
|
|
285
|
+
"reason": "action_not_found",
|
|
286
|
+
"action_id": action_record_id
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
# Check if action is actually running
|
|
290
|
+
if not action_instance.is_running():
|
|
291
|
+
logging.info("Action %s is not running, moving to stopped_actions", action_record_id)
|
|
292
|
+
# Move to stopped_actions instead of deleting
|
|
293
|
+
# Ensure action is removed from current_actions first
|
|
294
|
+
if action_record_id in self.current_actions:
|
|
295
|
+
del self.current_actions[action_record_id]
|
|
296
|
+
# Ensure action is not duplicated in stopped_actions
|
|
297
|
+
if action_record_id not in self.stopped_actions:
|
|
298
|
+
self.stopped_actions[action_record_id] = action_instance
|
|
299
|
+
return {
|
|
300
|
+
"success": True,
|
|
301
|
+
"reason": "already_stopped",
|
|
302
|
+
"action_id": action_record_id
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
# Stop the action
|
|
306
|
+
try:
|
|
307
|
+
logging.info("Stopping action %s", action_record_id)
|
|
308
|
+
action_instance.stop()
|
|
309
|
+
|
|
310
|
+
# Update action status to stopped
|
|
311
|
+
self.scaling.update_action_status(
|
|
312
|
+
service_provider=os.environ["SERVICE_PROVIDER"],
|
|
313
|
+
action_record_id=action_record_id,
|
|
314
|
+
status="stopped",
|
|
315
|
+
isRunning=False,
|
|
316
|
+
action_duration=0,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Move to stopped_actions instead of deleting
|
|
320
|
+
# Ensure action is removed from current_actions first
|
|
321
|
+
if action_record_id in self.current_actions:
|
|
322
|
+
del self.current_actions[action_record_id]
|
|
323
|
+
# Ensure action is not duplicated in stopped_actions
|
|
324
|
+
if action_record_id not in self.stopped_actions:
|
|
325
|
+
self.stopped_actions[action_record_id] = action_instance
|
|
326
|
+
|
|
327
|
+
logging.info("Successfully stopped action: %s", action_record_id)
|
|
328
|
+
return {
|
|
329
|
+
"success": True,
|
|
330
|
+
"action_id": action_record_id,
|
|
331
|
+
"stopped_at": time.time()
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logging.error("Error stopping action %s: %s", action_record_id, str(e))
|
|
336
|
+
return {
|
|
337
|
+
"success": False,
|
|
338
|
+
"reason": "stop_failed",
|
|
339
|
+
"error": str(e),
|
|
340
|
+
"action_id": action_record_id
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
@log_errors(default_return={}, raise_exception=False)
|
|
344
|
+
def restart_action(self, action_record_id: str) -> dict:
|
|
345
|
+
"""Restart a specific action by its record ID.
|
|
346
|
+
|
|
347
|
+
This method stops the action if it's running, then fetches fresh action
|
|
348
|
+
details from the backend and starts it again.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
action_record_id (str): The action record ID to restart
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
dict: Result dictionary with status information
|
|
355
|
+
"""
|
|
356
|
+
logging.info("Attempting to restart action: %s", action_record_id)
|
|
357
|
+
|
|
358
|
+
# Step 1: Stop the action if it exists in current_actions or stopped_actions
|
|
359
|
+
stop_result = {"success": True, "reason": "not_running"}
|
|
360
|
+
if action_record_id in self.current_actions:
|
|
361
|
+
logging.info("Stopping existing action %s before restart", action_record_id)
|
|
362
|
+
stop_result = self.stop_action(action_record_id)
|
|
363
|
+
|
|
364
|
+
if not stop_result.get("success"):
|
|
365
|
+
logging.error("Failed to stop action %s for restart", action_record_id)
|
|
366
|
+
return {
|
|
367
|
+
"success": False,
|
|
368
|
+
"reason": "stop_failed_before_restart",
|
|
369
|
+
"stop_result": stop_result,
|
|
370
|
+
"action_id": action_record_id
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# Wait a moment for cleanup
|
|
374
|
+
time.sleep(2)
|
|
375
|
+
elif action_record_id in self.stopped_actions:
|
|
376
|
+
logging.info("Action %s found in stopped_actions, will restart", action_record_id)
|
|
377
|
+
stop_result = {"success": True, "reason": "was_stopped"}
|
|
378
|
+
|
|
379
|
+
# Step 2: Fetch fresh action details from backend
|
|
380
|
+
try:
|
|
381
|
+
logging.info("Fetching action details for restart: %s", action_record_id)
|
|
382
|
+
|
|
383
|
+
# Get action details via API
|
|
384
|
+
action_details, error, _ = self.scaling.get_action_details(action_record_id)
|
|
385
|
+
|
|
386
|
+
if error or not action_details:
|
|
387
|
+
logging.error("Failed to fetch action details for %s: %s",
|
|
388
|
+
action_record_id, error)
|
|
389
|
+
return {
|
|
390
|
+
"success": False,
|
|
391
|
+
"reason": "fetch_failed",
|
|
392
|
+
"error": error,
|
|
393
|
+
"action_id": action_record_id
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
# Step 3: Process (start) the action
|
|
397
|
+
logging.info("Starting action %s after restart", action_record_id)
|
|
398
|
+
action_instance = self.process_action(action_details)
|
|
399
|
+
|
|
400
|
+
if action_instance:
|
|
401
|
+
# Ensure action is removed from stopped_actions if present
|
|
402
|
+
if action_record_id in self.stopped_actions:
|
|
403
|
+
del self.stopped_actions[action_record_id]
|
|
404
|
+
# Ensure action is removed from current_actions if present (defensive check)
|
|
405
|
+
if action_record_id in self.current_actions:
|
|
406
|
+
logging.warning("Action %s already in current_actions during restart, replacing", action_record_id)
|
|
407
|
+
del self.current_actions[action_record_id]
|
|
408
|
+
# Add to current_actions
|
|
409
|
+
self.current_actions[action_record_id] = action_instance
|
|
410
|
+
|
|
411
|
+
logging.info("Successfully restarted action: %s", action_record_id)
|
|
412
|
+
return {
|
|
413
|
+
"success": True,
|
|
414
|
+
"action_id": action_record_id,
|
|
415
|
+
"restarted_at": time.time(),
|
|
416
|
+
"stop_result": stop_result
|
|
417
|
+
}
|
|
418
|
+
else:
|
|
419
|
+
logging.error("Failed to start action %s after restart", action_record_id)
|
|
420
|
+
return {
|
|
421
|
+
"success": False,
|
|
422
|
+
"reason": "start_failed_after_restart",
|
|
423
|
+
"action_id": action_record_id
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
except Exception as e:
|
|
427
|
+
logging.error("Error restarting action %s: %s", action_record_id, str(e))
|
|
428
|
+
return {
|
|
429
|
+
"success": False,
|
|
430
|
+
"reason": "restart_failed",
|
|
431
|
+
"error": str(e),
|
|
432
|
+
"action_id": action_record_id
|
|
433
|
+
}
|
|
434
|
+
|
|
195
435
|
@log_errors(raise_exception=True)
|
|
196
436
|
def start_actions_manager(self) -> None:
|
|
197
437
|
"""Start the actions manager main loop."""
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Compute Operations Handler - Kafka Event-Driven Operations Manager
|
|
3
|
+
|
|
4
|
+
This module handles compute instance operations (start/stop/restart) triggered from
|
|
5
|
+
the frontend dashboard via Kafka events. It consumes events from the 'compute_operations'
|
|
6
|
+
topic and performs the actual operations on compute instances and their actions.
|
|
7
|
+
|
|
8
|
+
Uses EventListener from matrice_common for simplified Kafka consumption.
|
|
9
|
+
|
|
10
|
+
Event Structure:
|
|
11
|
+
{
|
|
12
|
+
"instance_id": "string",
|
|
13
|
+
"action_record_id": "string", # Can be ObjectID("000000000000000000000000") or all zeros for instance-level operations
|
|
14
|
+
"operation": "start|stop|restart",
|
|
15
|
+
"account_number": 12345,
|
|
16
|
+
"requested_by": "user@example.com",
|
|
17
|
+
"request_id": "uuid-string",
|
|
18
|
+
"timestamp": "2025-11-21T10:30:00.123Z"
|
|
19
|
+
}
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import re
|
|
24
|
+
import time
|
|
25
|
+
from typing import Dict, Any, Optional
|
|
26
|
+
import sys
|
|
27
|
+
import traceback
|
|
28
|
+
import os
|
|
29
|
+
import subprocess
|
|
30
|
+
|
|
31
|
+
from matrice_common.stream.event_listener import EventListener
|
|
32
|
+
|
|
33
|
+
# Configure logging
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ComputeOperationsHandler:
|
|
38
|
+
"""
|
|
39
|
+
Handles Kafka-based compute operations for instance and action management.
|
|
40
|
+
|
|
41
|
+
This class uses EventListener from matrice_common to listen for operation
|
|
42
|
+
events from the 'compute_operations' Kafka topic. It delegates operations
|
|
43
|
+
to the ActionsManager for execution and updates status via API calls.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
KAFKA_TOPIC = "compute_operations"
|
|
47
|
+
|
|
48
|
+
def __init__(self, actions_manager, session, scaling, instance_id: str):
|
|
49
|
+
"""
|
|
50
|
+
Initialize the Compute Operations Handler.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
actions_manager: Reference to the ActionsManager instance
|
|
54
|
+
session: Session object for authentication and Kafka configuration
|
|
55
|
+
scaling: Scaling service instance for API status updates
|
|
56
|
+
instance_id: This compute instance's ID for filtering events
|
|
57
|
+
"""
|
|
58
|
+
self.actions_manager = actions_manager
|
|
59
|
+
self.session = session
|
|
60
|
+
self.scaling = scaling
|
|
61
|
+
self.instance_id = instance_id
|
|
62
|
+
self.event_listener: Optional[EventListener] = None
|
|
63
|
+
self.running = False
|
|
64
|
+
|
|
65
|
+
logger.info(f"Initializing ComputeOperationsHandler for instance ID: {instance_id}")
|
|
66
|
+
|
|
67
|
+
def start(self) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
Start the operations handler using EventListener.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
bool: True if started successfully, False otherwise
|
|
73
|
+
"""
|
|
74
|
+
if self.running:
|
|
75
|
+
logger.warning("ComputeOperationsHandler is already running")
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
self.event_listener = EventListener(
|
|
80
|
+
session=self.session,
|
|
81
|
+
topics=[self.KAFKA_TOPIC],
|
|
82
|
+
event_handler=self._handle_operation_event,
|
|
83
|
+
filter_field='instance_id',
|
|
84
|
+
filter_value=self.instance_id,
|
|
85
|
+
consumer_group_id=f"compute_ops_{self.instance_id}"
|
|
86
|
+
)
|
|
87
|
+
self.running = self.event_listener.start()
|
|
88
|
+
|
|
89
|
+
if self.running:
|
|
90
|
+
logger.info("ComputeOperationsHandler started successfully")
|
|
91
|
+
else:
|
|
92
|
+
logger.error("ComputeOperationsHandler failed to start")
|
|
93
|
+
|
|
94
|
+
return self.running
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.error(f"Failed to start ComputeOperationsHandler: {e}")
|
|
98
|
+
logger.error(traceback.format_exc())
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
def stop(self):
|
|
102
|
+
"""
|
|
103
|
+
Stop the operations handler gracefully.
|
|
104
|
+
"""
|
|
105
|
+
logger.info("Stopping ComputeOperationsHandler...")
|
|
106
|
+
self.running = False
|
|
107
|
+
|
|
108
|
+
if self.event_listener:
|
|
109
|
+
self.event_listener.stop()
|
|
110
|
+
|
|
111
|
+
logger.info("ComputeOperationsHandler stopped")
|
|
112
|
+
|
|
113
|
+
def _handle_operation_event(self, event: Dict[str, Any]):
|
|
114
|
+
"""
|
|
115
|
+
Handle incoming operation event from Kafka.
|
|
116
|
+
|
|
117
|
+
This is the callback function passed to EventListener.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
event: The operation event dictionary
|
|
121
|
+
"""
|
|
122
|
+
logger.info(f"Received operation event: {event}")
|
|
123
|
+
|
|
124
|
+
# Validate event structure
|
|
125
|
+
if not self._validate_event(event):
|
|
126
|
+
logger.error(f"Invalid event structure: {event}")
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Process the operation
|
|
130
|
+
self._process_operation(event)
|
|
131
|
+
|
|
132
|
+
def _is_instance_level_operation(self, action_record_id: str) -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Check if action_record_id represents an instance-level operation.
|
|
135
|
+
Instance-level operations are identified by action_record_id containing only zeros,
|
|
136
|
+
which can come in various formats:
|
|
137
|
+
- "000000000000000000000000"
|
|
138
|
+
- "ObjectID(\"000000000000000000000000\")"
|
|
139
|
+
- "ObjectID('000000000000000000000000')"
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
action_record_id: The action record ID to check
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
True if this is an instance-level operation, False otherwise
|
|
146
|
+
"""
|
|
147
|
+
if not action_record_id:
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
# Handle ObjectID("...") or ObjectID('...') format from Kafka messages
|
|
151
|
+
clean_id = action_record_id
|
|
152
|
+
if 'ObjectID' in action_record_id:
|
|
153
|
+
match = re.search(r'ObjectID\(["\']([^"\']+)["\']\)', action_record_id)
|
|
154
|
+
if match:
|
|
155
|
+
clean_id = match.group(1)
|
|
156
|
+
|
|
157
|
+
# Check if the string contains only zeros (any length)
|
|
158
|
+
return clean_id.replace('0', '') == ''
|
|
159
|
+
|
|
160
|
+
def _extract_action_record_id(self, action_record_id: str) -> str:
|
|
161
|
+
"""
|
|
162
|
+
Extract the actual action record ID from various formats.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
action_record_id: The raw action record ID (may be wrapped in ObjectID)
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
The extracted action record ID string
|
|
169
|
+
"""
|
|
170
|
+
if not action_record_id:
|
|
171
|
+
return action_record_id
|
|
172
|
+
|
|
173
|
+
# Handle ObjectID("...") or ObjectID('...') format
|
|
174
|
+
if 'ObjectID' in action_record_id:
|
|
175
|
+
match = re.search(r'ObjectID\(["\']([^"\']+)["\']\)', action_record_id)
|
|
176
|
+
if match:
|
|
177
|
+
return match.group(1)
|
|
178
|
+
|
|
179
|
+
return action_record_id
|
|
180
|
+
|
|
181
|
+
def _validate_event(self, event: Dict[str, Any]) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Validate that the event has all required fields.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
event: The event dictionary to validate
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
True if event is valid, False otherwise
|
|
190
|
+
"""
|
|
191
|
+
required_fields = [
|
|
192
|
+
"instance_id",
|
|
193
|
+
"action_record_id",
|
|
194
|
+
"operation",
|
|
195
|
+
"account_number",
|
|
196
|
+
"requested_by",
|
|
197
|
+
"request_id",
|
|
198
|
+
"timestamp"
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
for field in required_fields:
|
|
202
|
+
if field not in event:
|
|
203
|
+
logger.error(f"Missing required field: {field}")
|
|
204
|
+
return False
|
|
205
|
+
|
|
206
|
+
# Validate operation type
|
|
207
|
+
valid_operations = ["start", "stop", "restart"]
|
|
208
|
+
if event["operation"] not in valid_operations:
|
|
209
|
+
logger.error(f"Invalid operation: {event['operation']}. Must be one of {valid_operations}")
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
def _process_operation(self, event: Dict[str, Any]):
|
|
215
|
+
"""
|
|
216
|
+
Process a compute operation event.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
event: The operation event dictionary
|
|
220
|
+
"""
|
|
221
|
+
operation = event["operation"]
|
|
222
|
+
raw_action_record_id = event["action_record_id"]
|
|
223
|
+
action_record_id = self._extract_action_record_id(raw_action_record_id)
|
|
224
|
+
request_id = event["request_id"]
|
|
225
|
+
requested_by = event["requested_by"]
|
|
226
|
+
|
|
227
|
+
logger.info(f"Processing {operation} operation for action {action_record_id} "
|
|
228
|
+
f"(request: {request_id}, user: {requested_by})")
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
# Check if this is an instance-level operation (action_record_id contains only zeros)
|
|
232
|
+
is_instance_operation = self._is_instance_level_operation(raw_action_record_id)
|
|
233
|
+
|
|
234
|
+
if is_instance_operation:
|
|
235
|
+
result = self._handle_instance_operation(operation, event)
|
|
236
|
+
else:
|
|
237
|
+
result = self._handle_action_operation(operation, action_record_id, event)
|
|
238
|
+
|
|
239
|
+
# Update status via API and logging
|
|
240
|
+
self._update_operation_status(event, action_record_id, "completed", result)
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
error_msg = f"Operation failed: {str(e)}"
|
|
244
|
+
logger.error(error_msg)
|
|
245
|
+
logger.error(traceback.format_exc())
|
|
246
|
+
|
|
247
|
+
# Update failure status
|
|
248
|
+
self._update_operation_status(event, action_record_id, "failed", {"error": error_msg})
|
|
249
|
+
|
|
250
|
+
def _handle_action_operation(self, operation: str, action_record_id: str,
|
|
251
|
+
event: Dict[str, Any]) -> Dict[str, Any]:
|
|
252
|
+
"""
|
|
253
|
+
Handle operations on a specific action.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
operation: The operation type (start/stop/restart)
|
|
257
|
+
action_record_id: The action record ID to operate on
|
|
258
|
+
event: The full event dictionary
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Result dictionary with operation details
|
|
262
|
+
"""
|
|
263
|
+
if operation == "start":
|
|
264
|
+
return self._start_action(action_record_id, event)
|
|
265
|
+
elif operation == "stop":
|
|
266
|
+
return self._stop_action(action_record_id, event)
|
|
267
|
+
elif operation == "restart":
|
|
268
|
+
return self._restart_action(action_record_id, event)
|
|
269
|
+
else:
|
|
270
|
+
raise ValueError(f"Unknown operation: {operation}")
|
|
271
|
+
|
|
272
|
+
def _handle_instance_operation(self, operation: str, event: Dict[str, Any]) -> Dict[str, Any]:
|
|
273
|
+
"""
|
|
274
|
+
Handle operations on the entire instance (the Python application itself).
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
operation: The operation type (start/stop/restart)
|
|
278
|
+
event: The full event dictionary
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Result dictionary with operation details (may not return if app is killed/restarted)
|
|
282
|
+
"""
|
|
283
|
+
logger.info(f"Executing instance-level {operation} operation on Python application")
|
|
284
|
+
|
|
285
|
+
if operation == "stop":
|
|
286
|
+
# Kill the Python application itself
|
|
287
|
+
logger.critical("Instance-level STOP: Killing Python application process")
|
|
288
|
+
try:
|
|
289
|
+
# Log status before killing
|
|
290
|
+
logger.warning(
|
|
291
|
+
f"Operation {operation} on instance {self.instance_id}: "
|
|
292
|
+
f"completed - killing_application (PID: {os.getpid()})"
|
|
293
|
+
)
|
|
294
|
+
# Give a moment for logs to be written
|
|
295
|
+
time.sleep(0.5)
|
|
296
|
+
except Exception as e:
|
|
297
|
+
logger.error(f"Failed to log status before kill: {e}")
|
|
298
|
+
|
|
299
|
+
# Forcefully exit the application
|
|
300
|
+
logger.critical(f"Terminating Python application (PID: {os.getpid()})")
|
|
301
|
+
os._exit(0) # Forceful exit, doesn't call cleanup handlers
|
|
302
|
+
|
|
303
|
+
elif operation == "restart":
|
|
304
|
+
# Restart the Python application itself
|
|
305
|
+
logger.critical("Instance-level RESTART: Restarting Python application process")
|
|
306
|
+
try:
|
|
307
|
+
# Log status before restarting
|
|
308
|
+
logger.warning(
|
|
309
|
+
f"Operation {operation} on instance {self.instance_id}: "
|
|
310
|
+
f"completed - restarting_application (PID: {os.getpid()})"
|
|
311
|
+
)
|
|
312
|
+
# Give a moment for logs to be written
|
|
313
|
+
time.sleep(0.5)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.error(f"Failed to log status before restart: {e}")
|
|
316
|
+
|
|
317
|
+
# Restart the application
|
|
318
|
+
logger.critical(f"Restarting Python application (PID: {os.getpid()})")
|
|
319
|
+
self._restart_application()
|
|
320
|
+
|
|
321
|
+
elif operation == "start":
|
|
322
|
+
# Start doesn't make sense for instance-level
|
|
323
|
+
logger.warning("Start operation not supported at instance level")
|
|
324
|
+
return {
|
|
325
|
+
"operation": operation,
|
|
326
|
+
"instance_level": True,
|
|
327
|
+
"status": "not_supported",
|
|
328
|
+
"message": "Start operation is not supported at instance level"
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
# This should not be reached for stop/restart operations
|
|
332
|
+
return {
|
|
333
|
+
"operation": operation,
|
|
334
|
+
"instance_level": True,
|
|
335
|
+
"status": "completed"
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def _restart_application(self):
|
|
339
|
+
"""
|
|
340
|
+
Restart the Python application by replacing the current process.
|
|
341
|
+
This uses os.execv() to replace the current process with a new one.
|
|
342
|
+
"""
|
|
343
|
+
try:
|
|
344
|
+
python_executable = sys.executable
|
|
345
|
+
script_args = sys.argv
|
|
346
|
+
|
|
347
|
+
logger.info(f"Restarting with: {python_executable} {' '.join(script_args)}")
|
|
348
|
+
|
|
349
|
+
# Use os.execv() to replace the current process
|
|
350
|
+
# This will restart the application with the same arguments
|
|
351
|
+
os.execv(python_executable, [python_executable] + script_args)
|
|
352
|
+
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.error(f"Failed to restart application: {e}")
|
|
355
|
+
logger.error(traceback.format_exc())
|
|
356
|
+
# Fallback: try using subprocess to start a new process and exit
|
|
357
|
+
try:
|
|
358
|
+
logger.info("Attempting fallback restart method")
|
|
359
|
+
python_executable = sys.executable
|
|
360
|
+
script_args = sys.argv
|
|
361
|
+
|
|
362
|
+
# Start new process
|
|
363
|
+
subprocess.Popen([python_executable] + script_args)
|
|
364
|
+
# Exit current process
|
|
365
|
+
logger.critical("New process started, exiting current process")
|
|
366
|
+
os._exit(0)
|
|
367
|
+
except Exception as fallback_error:
|
|
368
|
+
logger.error(f"Fallback restart also failed: {fallback_error}")
|
|
369
|
+
logger.error(traceback.format_exc())
|
|
370
|
+
# Last resort: just exit
|
|
371
|
+
os._exit(1)
|
|
372
|
+
|
|
373
|
+
def _start_action(self, action_record_id: str, event: Dict[str, Any]) -> Dict[str, Any]:
|
|
374
|
+
"""
|
|
375
|
+
Start a specific action.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
action_record_id: The action record ID to start
|
|
379
|
+
event: The full event dictionary
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Result dictionary
|
|
383
|
+
"""
|
|
384
|
+
logger.info(f"Starting action: {action_record_id}")
|
|
385
|
+
|
|
386
|
+
# Check if action is already running
|
|
387
|
+
current_actions = self.actions_manager.get_current_actions()
|
|
388
|
+
if action_record_id in current_actions:
|
|
389
|
+
action_instance = current_actions[action_record_id]
|
|
390
|
+
if action_instance.is_running():
|
|
391
|
+
logger.warning(f"Action {action_record_id} is already running")
|
|
392
|
+
return {
|
|
393
|
+
"status": "already_running",
|
|
394
|
+
"action_id": action_record_id
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
# Fetch action details from backend and start it
|
|
398
|
+
# This will be handled by the ActionsManager's normal flow
|
|
399
|
+
# Force a fetch to pick up this specific action
|
|
400
|
+
self.actions_manager.fetch_actions()
|
|
401
|
+
|
|
402
|
+
return {
|
|
403
|
+
"status": "started",
|
|
404
|
+
"action_id": action_record_id
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
def _stop_action(self, action_record_id: str, event: Dict[str, Any]) -> Dict[str, Any]:
|
|
408
|
+
"""
|
|
409
|
+
Stop a specific action.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
action_record_id: The action record ID to stop
|
|
413
|
+
event: The full event dictionary
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
Result dictionary
|
|
417
|
+
"""
|
|
418
|
+
logger.info(f"Stopping action: {action_record_id}")
|
|
419
|
+
|
|
420
|
+
result = self.actions_manager.stop_action(action_record_id)
|
|
421
|
+
|
|
422
|
+
return {
|
|
423
|
+
"status": "stopped",
|
|
424
|
+
"action_id": action_record_id,
|
|
425
|
+
"details": result
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
def _restart_action(self, action_record_id: str, event: Dict[str, Any]) -> Dict[str, Any]:
|
|
429
|
+
"""
|
|
430
|
+
Restart a specific action.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
action_record_id: The action record ID to restart
|
|
434
|
+
event: The full event dictionary
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
Result dictionary
|
|
438
|
+
"""
|
|
439
|
+
logger.info(f"Restarting action: {action_record_id}")
|
|
440
|
+
|
|
441
|
+
result = self.actions_manager.restart_action(action_record_id)
|
|
442
|
+
|
|
443
|
+
return {
|
|
444
|
+
"status": "restarted",
|
|
445
|
+
"action_id": action_record_id,
|
|
446
|
+
"details": result
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
def _update_operation_status(self, event: Dict[str, Any], action_record_id: str,
|
|
450
|
+
status: str, result: Dict[str, Any]):
|
|
451
|
+
"""
|
|
452
|
+
Update operation status via API and logging.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
event: The original event
|
|
456
|
+
action_record_id: The extracted action record ID
|
|
457
|
+
status: Operation status (completed/failed)
|
|
458
|
+
result: Result details
|
|
459
|
+
"""
|
|
460
|
+
operation = event["operation"]
|
|
461
|
+
request_id = event["request_id"]
|
|
462
|
+
|
|
463
|
+
# Log status as warning for visibility
|
|
464
|
+
logger.warning(
|
|
465
|
+
f"Operation {operation} on {action_record_id}: {status} - "
|
|
466
|
+
f"request_id={request_id}, result={result}"
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Update via API (for action-level operations only)
|
|
470
|
+
if not self._is_instance_level_operation(event["action_record_id"]):
|
|
471
|
+
try:
|
|
472
|
+
# Determine isRunning based on operation and status
|
|
473
|
+
is_running = False
|
|
474
|
+
if status == "completed":
|
|
475
|
+
if operation == "start":
|
|
476
|
+
is_running = True
|
|
477
|
+
elif operation == "restart":
|
|
478
|
+
is_running = True
|
|
479
|
+
elif operation == "stop":
|
|
480
|
+
is_running = False
|
|
481
|
+
|
|
482
|
+
self.scaling.update_action_status(
|
|
483
|
+
service_provider=os.environ.get("SERVICE_PROVIDER", ""),
|
|
484
|
+
action_record_id=action_record_id,
|
|
485
|
+
status=status,
|
|
486
|
+
isRunning=is_running,
|
|
487
|
+
)
|
|
488
|
+
logger.info(f"API status updated for action {action_record_id}: {status}")
|
|
489
|
+
except Exception as e:
|
|
490
|
+
logger.error(f"Failed to update API status for action {action_record_id}: {e}")
|
|
@@ -7,6 +7,7 @@ import threading
|
|
|
7
7
|
import time
|
|
8
8
|
from matrice_compute.actions_manager import ActionsManager
|
|
9
9
|
from matrice_compute.actions_scaledown_manager import ActionsScaleDownManager
|
|
10
|
+
from matrice_compute.compute_operations_handler import ComputeOperationsHandler
|
|
10
11
|
from matrice_compute.instance_utils import (
|
|
11
12
|
get_instance_info,
|
|
12
13
|
get_decrypted_access_key_pair,
|
|
@@ -90,6 +91,22 @@ class InstanceManager:
|
|
|
90
91
|
logging.info("InstanceManager initialized with machine resources tracker")
|
|
91
92
|
self.actions_resources_tracker = ActionsResourcesTracker(self.scaling)
|
|
92
93
|
logging.info("InstanceManager initialized with actions resources tracker")
|
|
94
|
+
|
|
95
|
+
# Initialize Compute Operations Handler for event-driven operations
|
|
96
|
+
# Uses EventListener from matrice_common for simplified Kafka consumption
|
|
97
|
+
try:
|
|
98
|
+
instance_id = os.environ.get("INSTANCE_ID")
|
|
99
|
+
self.compute_operations_handler = ComputeOperationsHandler(
|
|
100
|
+
actions_manager=self.actions_manager,
|
|
101
|
+
session=self.session,
|
|
102
|
+
scaling=self.scaling,
|
|
103
|
+
instance_id=instance_id
|
|
104
|
+
)
|
|
105
|
+
logging.info("InstanceManager initialized with Compute Operations Handler for instance ID: %s", instance_id)
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logging.warning("Failed to initialize Compute Operations Handler: %s", e)
|
|
108
|
+
self.compute_operations_handler = None
|
|
109
|
+
|
|
93
110
|
self.poll_interval = 10
|
|
94
111
|
# Note: encryption_key is set in _setup_env_credentials
|
|
95
112
|
logging.info("InstanceManager initialized.")
|
|
@@ -252,6 +269,14 @@ class InstanceManager:
|
|
|
252
269
|
Returns:
|
|
253
270
|
tuple: (instance_manager_thread, actions_manager_thread)
|
|
254
271
|
"""
|
|
272
|
+
# Start Compute Operations Handler in background thread
|
|
273
|
+
if self.compute_operations_handler:
|
|
274
|
+
try:
|
|
275
|
+
self.compute_operations_handler.start()
|
|
276
|
+
logging.info("Started Compute Operations Handler")
|
|
277
|
+
except Exception as exc:
|
|
278
|
+
logging.error("Failed to start Compute Operations Handler: %s", str(exc))
|
|
279
|
+
|
|
255
280
|
# Create and start threads
|
|
256
281
|
instance_manager_thread = threading.Thread(
|
|
257
282
|
target=self.start_instance_manager,
|
|
@@ -402,8 +402,13 @@ class ActionsResourcesTracker:
|
|
|
402
402
|
new_args.extend(x.replace('"', "").replace("'", "") for x in arg.split(" "))
|
|
403
403
|
return new_args
|
|
404
404
|
|
|
405
|
-
|
|
406
|
-
|
|
405
|
+
def is_valid_objectid(s: str) -> bool:
|
|
406
|
+
"""Check if string is a valid MongoDB ObjectId (24 hex characters)"""
|
|
407
|
+
s = s.strip()
|
|
408
|
+
return len(s) == 24 and all(c in '0123456789abcdefABCDEF' for c in s)
|
|
409
|
+
|
|
410
|
+
valid_objectids = [arg for arg in remove_quotation_marks(inspect_data["Args"]) if is_valid_objectid(arg)]
|
|
411
|
+
action_record_id = valid_objectids[-1] if valid_objectids else None
|
|
407
412
|
if not action_record_id:
|
|
408
413
|
logging.debug("No valid action_id found for the container. Container ID: %s, Args: %s", container.id, inspect_data["Args"])
|
|
409
414
|
duration = calculate_time_difference(start_time, finish_time)
|
matrice_compute/scaling.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
import logging
|
|
5
|
+
import base64
|
|
5
6
|
from matrice_common.utils import log_errors
|
|
6
7
|
|
|
7
8
|
class Scaling:
|
|
@@ -33,6 +34,28 @@ class Scaling:
|
|
|
33
34
|
"Initialized Scaling with instance_id: %s (REST API only)",
|
|
34
35
|
instance_id
|
|
35
36
|
)
|
|
37
|
+
|
|
38
|
+
@log_errors(default_return=None, log_error=True)
|
|
39
|
+
def get_kafka_bootstrap_servers(self):
|
|
40
|
+
"""Get Kafka bootstrap servers from API and decode base64 fields.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str: Kafka bootstrap servers in format "ip:port"
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If unable to fetch Kafka configuration
|
|
47
|
+
"""
|
|
48
|
+
path = "/v1/actions/get_kafka_info"
|
|
49
|
+
response = self.rpc.get(path=path)
|
|
50
|
+
if not response or not response.get("success"):
|
|
51
|
+
raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
|
|
52
|
+
encoded_ip = response["data"]["ip"]
|
|
53
|
+
encoded_port = response["data"]["port"]
|
|
54
|
+
ip = base64.b64decode(encoded_ip).decode("utf-8")
|
|
55
|
+
port = base64.b64decode(encoded_port).decode("utf-8")
|
|
56
|
+
bootstrap_servers = f"{ip}:{port}"
|
|
57
|
+
# logging.info(f"Retrieved Kafka bootstrap servers: {bootstrap_servers}")
|
|
58
|
+
return bootstrap_servers
|
|
36
59
|
|
|
37
60
|
@log_errors(default_return=(None, "Error processing response", "Response processing failed"), log_error=True)
|
|
38
61
|
def handle_response(self, resp, success_message, error_message):
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
|
|
2
2
|
matrice_compute/action_instance.py,sha256=SYUZrfj6dtcgEjeEgCyKlrc2p2o08jlW84Y__V4Aqew,69552
|
|
3
|
-
matrice_compute/actions_manager.py,sha256=
|
|
3
|
+
matrice_compute/actions_manager.py,sha256=Iex5uw0PLRR4pvIAZDxc2CypucbanKDbJ3SK8mMGXK8,18148
|
|
4
4
|
matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
|
|
5
|
-
matrice_compute/
|
|
5
|
+
matrice_compute/compute_operations_handler.py,sha256=amcMhmXtv2irE6qK8Vbgec_8uFqjWmVVp0VWq-73_MU,17781
|
|
6
|
+
matrice_compute/instance_manager.py,sha256=sUkDsy_XrPp7CKQxlujQRz3E_8rVbVZOy7byJOgMlEs,11376
|
|
6
7
|
matrice_compute/instance_utils.py,sha256=N4yPDvNukFEEBngR0lEt4x_XT5hur1q0P-spM2xQIlU,42025
|
|
7
8
|
matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
|
|
8
9
|
matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
matrice_compute/resources_tracker.py,sha256=
|
|
10
|
-
matrice_compute/scaling.py,sha256=
|
|
10
|
+
matrice_compute/resources_tracker.py,sha256=wy1huqB3Tw_kYC2wfnLa9iSyhDmgI7WQ5I9Kyr-1RSs,22829
|
|
11
|
+
matrice_compute/scaling.py,sha256=JNOgSpAPqbTlZ4qJokkdS9PehqyFwfPh4q98qrfNVCQ,24708
|
|
11
12
|
matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
|
|
12
13
|
matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
|
|
13
|
-
matrice_compute-0.1.
|
|
14
|
-
matrice_compute-0.1.
|
|
15
|
-
matrice_compute-0.1.
|
|
16
|
-
matrice_compute-0.1.
|
|
17
|
-
matrice_compute-0.1.
|
|
14
|
+
matrice_compute-0.1.26.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
|
|
15
|
+
matrice_compute-0.1.26.dist-info/METADATA,sha256=t7TsI5DcNElRmlKsa8CArXCcA4iBO-9QwZ6j9UQOdg0,1038
|
|
16
|
+
matrice_compute-0.1.26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
matrice_compute-0.1.26.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
|
|
18
|
+
matrice_compute-0.1.26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|