matrice-compute 0.1.25__py3-none-any.whl → 0.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,6 +27,7 @@ class ActionsManager:
27
27
  scaling (Scaling): Scaling service instance
28
28
  """
29
29
  self.current_actions: dict[str, ActionInstance] = {}
30
+ self.stopped_actions: dict[str, ActionInstance] = {} # Track stopped actions separately
30
31
  self.scaling = scaling
31
32
  self.memory_threshold = 0.9
32
33
  self.poll_interval = 10
@@ -111,75 +112,110 @@ class ActionsManager:
111
112
  def process_actions(self) -> None:
112
113
  """Process fetched actions."""
113
114
  for action in self.fetch_actions():
115
+ action_id = action["_id"]
116
+
117
+ # Skip if action is already running in current_actions
118
+ if action_id in self.current_actions:
119
+ logging.info("Action %s already in current_actions, skipping", action_id)
120
+ continue
121
+
122
+ # If action exists in stopped_actions, remove it before starting fresh
123
+ if action_id in self.stopped_actions:
124
+ logging.info("Action %s found in stopped_actions, removing before restart", action_id)
125
+ del self.stopped_actions[action_id]
126
+
127
+ # Process and add to current_actions
114
128
  action_instance = self.process_action(action)
115
129
  if action_instance:
116
- self.current_actions[action["_id"]] = action_instance
130
+ # Ensure action is not in stopped_actions (defensive check)
131
+ if action_id in self.stopped_actions:
132
+ del self.stopped_actions[action_id]
133
+ self.current_actions[action_id] = action_instance
117
134
 
118
135
  @log_errors(raise_exception=False)
119
- def purge_unwanted(self) -> None:
120
- """Purge completed or failed actions.
121
-
122
- This method checks all actions in the current_actions dictionary and removes any that:
123
- 1. Are explicitly reported as not running by the is_running() method
124
- 2. Have invalid or corrupted process objects
136
+ def update_actions_status(self) -> None:
137
+ """Update tracking of running vs stopped actions.
138
+
139
+ This method checks all actions and moves stopped ones to stopped_actions dict
140
+ without deleting them. This prevents interference with compute operations
141
+ handler while maintaining accurate status reporting.
125
142
  """
126
- purged_count = 0
127
-
128
- # Check each action and purge if needed
143
+ moved_to_stopped = 0
144
+
145
+ # Check each action and update its status
129
146
  for action_id, instance in list(self.current_actions.items()):
130
- should_purge = False
131
- purge_reason = ""
132
-
133
- # Check if process is reported as not running
134
- if not instance.is_running():
135
- should_purge = True
136
- purge_reason = "process reported as not running"
137
-
147
+ is_running = False
148
+ status_reason = ""
149
+
150
+ # Check if process is running
151
+ if hasattr(instance, 'is_running'):
152
+ try:
153
+ is_running = instance.is_running()
154
+ except Exception as e:
155
+ logging.error("Error checking is_running for action %s: %s", action_id, str(e))
156
+ is_running = False
157
+ status_reason = f"error checking status: {str(e)}"
158
+
138
159
  # Check for process object validity
139
- elif not hasattr(instance, 'process') or instance.process is None:
140
- should_purge = True
141
- purge_reason = "invalid process object"
142
-
143
- # Purge if any condition was met
144
- if should_purge:
160
+ if not is_running and not status_reason:
161
+ if not hasattr(instance, 'process') or instance.process is None:
162
+ status_reason = "no process object"
163
+ else:
164
+ status_reason = "process not running"
165
+
166
+ # Move to stopped_actions if not running (but don't delete)
167
+ if not is_running:
145
168
  logging.info(
146
- "Action %s is being purged: %s",
169
+ "Action %s moved to stopped_actions: %s",
147
170
  action_id,
148
- purge_reason
171
+ status_reason
149
172
  )
150
-
151
- # Remove from tracking dictionaries
152
- del self.current_actions[action_id]
153
- purged_count += 1
173
+ # Ensure action is removed from current_actions before adding to stopped_actions
174
+ if action_id in self.current_actions:
175
+ del self.current_actions[action_id]
176
+ # Ensure action is not duplicated in stopped_actions
177
+ if action_id not in self.stopped_actions:
178
+ self.stopped_actions[action_id] = instance
179
+ moved_to_stopped += 1
154
180
 
155
- # Try to explicitly stop the action if possible
156
- try:
157
- if hasattr(instance, 'stop'):
158
- instance.stop()
159
- except Exception as e:
160
- logging.error(f"Error stopping action {action_id}: {str(e)}")
161
-
162
- if purged_count > 0:
181
+ # Log current state
182
+ running_ids = list(self.current_actions.keys())
183
+ stopped_ids = list(self.stopped_actions.keys())
184
+
185
+ if self.current_actions or self.stopped_actions:
163
186
  logging.info(
164
- "Purged %d completed actions, %d actions remain in queue",
165
- purged_count,
166
- len(self.current_actions)
187
+ "Actions status: %d running %s, %d stopped %s",
188
+ len(self.current_actions),
189
+ running_ids if running_ids else "[]",
190
+ len(self.stopped_actions),
191
+ stopped_ids if stopped_ids else "[]"
167
192
  )
168
193
 
194
+ @log_errors(raise_exception=False)
195
+ def purge_unwanted(self) -> None:
196
+ """Purge completed or failed actions.
197
+
198
+ NOTE: This now calls update_actions_status() which moves stopped actions
199
+ to a separate dict instead of deleting them. This prevents interference
200
+ with compute operations handler while maintaining accurate status.
201
+ """
202
+ self.update_actions_status()
203
+
169
204
  @log_errors(default_return={}, raise_exception=False)
170
205
  def get_current_actions(self) -> dict:
171
- """Get the current actions.
206
+ """Get the current running actions.
172
207
 
173
208
  This method:
174
- 1. Purges any completed actions using purge_unwanted()
175
- 2. Double-checks remaining actions to ensure they are truly running
209
+ 1. Updates action status tracking via update_actions_status()
210
+ 2. Returns only the running actions (current_actions dict)
176
211
  3. Provides detailed logging about current actions state
177
212
 
178
213
  Returns:
179
- dict: Current active actions
214
+ dict: Current running actions only
180
215
  """
181
- # Always purge unwanted actions first
182
- self.purge_unwanted()
216
+ # Update status tracking (moves stopped to stopped_actions)
217
+ self.update_actions_status()
218
+
183
219
  if self.current_actions:
184
220
  action_ids = list(self.current_actions.keys())
185
221
  logging.info(
@@ -189,9 +225,213 @@ class ActionsManager:
189
225
  )
190
226
  else:
191
227
  logging.debug("No actions currently running")
192
- return {}
228
+
193
229
  return self.current_actions
194
230
 
231
+ @log_errors(default_return={}, raise_exception=False)
232
+ def get_all_actions(self) -> dict:
233
+ """Get all tracked actions (both running and stopped).
234
+
235
+ Returns:
236
+ dict: All tracked actions with their status
237
+ """
238
+ all_actions = {}
239
+ for action_id, instance in self.current_actions.items():
240
+ all_actions[action_id] = {"instance": instance, "status": "running"}
241
+ for action_id, instance in self.stopped_actions.items():
242
+ all_actions[action_id] = {"instance": instance, "status": "stopped"}
243
+ return all_actions
244
+
245
+ @log_errors(default_return={}, raise_exception=False)
246
+ def get_stopped_actions(self) -> dict:
247
+ """Get stopped actions.
248
+
249
+ Returns:
250
+ dict: Stopped actions
251
+ """
252
+ return self.stopped_actions
253
+
254
+ @log_errors(default_return={}, raise_exception=False)
255
+ def stop_action(self, action_record_id: str) -> dict:
256
+ """Stop a specific action by its record ID.
257
+
258
+ Args:
259
+ action_record_id (str): The action record ID to stop
260
+
261
+ Returns:
262
+ dict: Result dictionary with status information
263
+ """
264
+ logging.info("Attempting to stop action: %s", action_record_id)
265
+
266
+ # Check if action exists in current (running) actions
267
+ action_instance = None
268
+ action_source = None
269
+
270
+ if action_record_id in self.current_actions:
271
+ action_instance = self.current_actions[action_record_id]
272
+ action_source = "current_actions"
273
+ elif action_record_id in self.stopped_actions:
274
+ # Action already in stopped_actions
275
+ logging.info("Action %s already in stopped_actions", action_record_id)
276
+ return {
277
+ "success": True,
278
+ "reason": "already_stopped",
279
+ "action_id": action_record_id
280
+ }
281
+ else:
282
+ logging.warning("Action %s not found in current or stopped actions", action_record_id)
283
+ return {
284
+ "success": False,
285
+ "reason": "action_not_found",
286
+ "action_id": action_record_id
287
+ }
288
+
289
+ # Check if action is actually running
290
+ if not action_instance.is_running():
291
+ logging.info("Action %s is not running, moving to stopped_actions", action_record_id)
292
+ # Move to stopped_actions instead of deleting
293
+ # Ensure action is removed from current_actions first
294
+ if action_record_id in self.current_actions:
295
+ del self.current_actions[action_record_id]
296
+ # Ensure action is not duplicated in stopped_actions
297
+ if action_record_id not in self.stopped_actions:
298
+ self.stopped_actions[action_record_id] = action_instance
299
+ return {
300
+ "success": True,
301
+ "reason": "already_stopped",
302
+ "action_id": action_record_id
303
+ }
304
+
305
+ # Stop the action
306
+ try:
307
+ logging.info("Stopping action %s", action_record_id)
308
+ action_instance.stop()
309
+
310
+ # Update action status to stopped
311
+ self.scaling.update_action_status(
312
+ service_provider=os.environ["SERVICE_PROVIDER"],
313
+ action_record_id=action_record_id,
314
+ status="stopped",
315
+ isRunning=False,
316
+ action_duration=0,
317
+ )
318
+
319
+ # Move to stopped_actions instead of deleting
320
+ # Ensure action is removed from current_actions first
321
+ if action_record_id in self.current_actions:
322
+ del self.current_actions[action_record_id]
323
+ # Ensure action is not duplicated in stopped_actions
324
+ if action_record_id not in self.stopped_actions:
325
+ self.stopped_actions[action_record_id] = action_instance
326
+
327
+ logging.info("Successfully stopped action: %s", action_record_id)
328
+ return {
329
+ "success": True,
330
+ "action_id": action_record_id,
331
+ "stopped_at": time.time()
332
+ }
333
+
334
+ except Exception as e:
335
+ logging.error("Error stopping action %s: %s", action_record_id, str(e))
336
+ return {
337
+ "success": False,
338
+ "reason": "stop_failed",
339
+ "error": str(e),
340
+ "action_id": action_record_id
341
+ }
342
+
343
+ @log_errors(default_return={}, raise_exception=False)
344
+ def restart_action(self, action_record_id: str) -> dict:
345
+ """Restart a specific action by its record ID.
346
+
347
+ This method stops the action if it's running, then fetches fresh action
348
+ details from the backend and starts it again.
349
+
350
+ Args:
351
+ action_record_id (str): The action record ID to restart
352
+
353
+ Returns:
354
+ dict: Result dictionary with status information
355
+ """
356
+ logging.info("Attempting to restart action: %s", action_record_id)
357
+
358
+ # Step 1: Stop the action if it exists in current_actions or stopped_actions
359
+ stop_result = {"success": True, "reason": "not_running"}
360
+ if action_record_id in self.current_actions:
361
+ logging.info("Stopping existing action %s before restart", action_record_id)
362
+ stop_result = self.stop_action(action_record_id)
363
+
364
+ if not stop_result.get("success"):
365
+ logging.error("Failed to stop action %s for restart", action_record_id)
366
+ return {
367
+ "success": False,
368
+ "reason": "stop_failed_before_restart",
369
+ "stop_result": stop_result,
370
+ "action_id": action_record_id
371
+ }
372
+
373
+ # Wait a moment for cleanup
374
+ time.sleep(2)
375
+ elif action_record_id in self.stopped_actions:
376
+ logging.info("Action %s found in stopped_actions, will restart", action_record_id)
377
+ stop_result = {"success": True, "reason": "was_stopped"}
378
+
379
+ # Step 2: Fetch fresh action details from backend
380
+ try:
381
+ logging.info("Fetching action details for restart: %s", action_record_id)
382
+
383
+ # Get action details via API
384
+ action_details, error, _ = self.scaling.get_action_details(action_record_id)
385
+
386
+ if error or not action_details:
387
+ logging.error("Failed to fetch action details for %s: %s",
388
+ action_record_id, error)
389
+ return {
390
+ "success": False,
391
+ "reason": "fetch_failed",
392
+ "error": error,
393
+ "action_id": action_record_id
394
+ }
395
+
396
+ # Step 3: Process (start) the action
397
+ logging.info("Starting action %s after restart", action_record_id)
398
+ action_instance = self.process_action(action_details)
399
+
400
+ if action_instance:
401
+ # Ensure action is removed from stopped_actions if present
402
+ if action_record_id in self.stopped_actions:
403
+ del self.stopped_actions[action_record_id]
404
+ # Ensure action is removed from current_actions if present (defensive check)
405
+ if action_record_id in self.current_actions:
406
+ logging.warning("Action %s already in current_actions during restart, replacing", action_record_id)
407
+ del self.current_actions[action_record_id]
408
+ # Add to current_actions
409
+ self.current_actions[action_record_id] = action_instance
410
+
411
+ logging.info("Successfully restarted action: %s", action_record_id)
412
+ return {
413
+ "success": True,
414
+ "action_id": action_record_id,
415
+ "restarted_at": time.time(),
416
+ "stop_result": stop_result
417
+ }
418
+ else:
419
+ logging.error("Failed to start action %s after restart", action_record_id)
420
+ return {
421
+ "success": False,
422
+ "reason": "start_failed_after_restart",
423
+ "action_id": action_record_id
424
+ }
425
+
426
+ except Exception as e:
427
+ logging.error("Error restarting action %s: %s", action_record_id, str(e))
428
+ return {
429
+ "success": False,
430
+ "reason": "restart_failed",
431
+ "error": str(e),
432
+ "action_id": action_record_id
433
+ }
434
+
195
435
  @log_errors(raise_exception=True)
196
436
  def start_actions_manager(self) -> None:
197
437
  """Start the actions manager main loop."""