matrice-inference 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-inference might be problematic. Click here for more details.

Files changed (37) hide show
  1. matrice_inference/__init__.py +72 -0
  2. matrice_inference/py.typed +0 -0
  3. matrice_inference/server/__init__.py +23 -0
  4. matrice_inference/server/inference_interface.py +176 -0
  5. matrice_inference/server/model/__init__.py +1 -0
  6. matrice_inference/server/model/model_manager.py +274 -0
  7. matrice_inference/server/model/model_manager_wrapper.py +550 -0
  8. matrice_inference/server/model/triton_model_manager.py +290 -0
  9. matrice_inference/server/model/triton_server.py +1248 -0
  10. matrice_inference/server/proxy_interface.py +371 -0
  11. matrice_inference/server/server.py +1004 -0
  12. matrice_inference/server/stream/__init__.py +0 -0
  13. matrice_inference/server/stream/app_deployment.py +228 -0
  14. matrice_inference/server/stream/consumer_worker.py +201 -0
  15. matrice_inference/server/stream/frame_cache.py +127 -0
  16. matrice_inference/server/stream/inference_worker.py +163 -0
  17. matrice_inference/server/stream/post_processing_worker.py +230 -0
  18. matrice_inference/server/stream/producer_worker.py +147 -0
  19. matrice_inference/server/stream/stream_pipeline.py +451 -0
  20. matrice_inference/server/stream/utils.py +23 -0
  21. matrice_inference/tmp/abstract_model_manager.py +58 -0
  22. matrice_inference/tmp/aggregator/__init__.py +18 -0
  23. matrice_inference/tmp/aggregator/aggregator.py +330 -0
  24. matrice_inference/tmp/aggregator/analytics.py +906 -0
  25. matrice_inference/tmp/aggregator/ingestor.py +438 -0
  26. matrice_inference/tmp/aggregator/latency.py +597 -0
  27. matrice_inference/tmp/aggregator/pipeline.py +968 -0
  28. matrice_inference/tmp/aggregator/publisher.py +431 -0
  29. matrice_inference/tmp/aggregator/synchronizer.py +594 -0
  30. matrice_inference/tmp/batch_manager.py +239 -0
  31. matrice_inference/tmp/overall_inference_testing.py +338 -0
  32. matrice_inference/tmp/triton_utils.py +638 -0
  33. matrice_inference-0.1.2.dist-info/METADATA +28 -0
  34. matrice_inference-0.1.2.dist-info/RECORD +37 -0
  35. matrice_inference-0.1.2.dist-info/WHEEL +5 -0
  36. matrice_inference-0.1.2.dist-info/licenses/LICENSE.txt +21 -0
  37. matrice_inference-0.1.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1004 @@
1
+ """Module providing server functionality."""
2
+
3
+ import os
4
+ import threading
5
+ import time
6
+ import urllib.request
7
+ import logging
8
+ import asyncio
9
+ import signal
10
+ import atexit
11
+ import time
12
+ from datetime import datetime, timezone
13
+ from typing import Optional, Callable
14
+
15
+ from matrice.action_tracker import ActionTracker
16
+ from matrice_analytics.post_processing.post_processor import PostProcessor
17
+ from matrice_inference.server.proxy_interface import MatriceProxyInterface
18
+ from matrice_inference.server.model.model_manager_wrapper import ModelManagerWrapper
19
+ from matrice_inference.server.inference_interface import InferenceInterface
20
+ from matrice_inference.server.stream.stream_pipeline import StreamingPipeline
21
+ from matrice_inference.server.stream.app_deployment import AppDeployment
22
+
23
+
24
+ # Module constants
25
+ DEFAULT_EXTERNAL_PORT = 80
26
+ DEFAULT_SHUTDOWN_THRESHOLD_MINUTES = 15
27
+ MIN_SHUTDOWN_THRESHOLD_MINUTES = 1
28
+ HEARTBEAT_INTERVAL_SECONDS = 30
29
+ SHUTDOWN_CHECK_INTERVAL_SECONDS = 30
30
+ CLEANUP_DELAY_SECONDS = 5
31
+ FINAL_CLEANUP_DELAY_SECONDS = 10
32
+ MAX_IP_FETCH_ATTEMPTS = 5 # Increased from 3 to 5
33
+ IP_FETCH_TIMEOUT_SECONDS = 30 # Increased from 10 to 30
34
+ # Shutdown after 10 minutes of consecutive failures (increased from 5 minutes)
35
+ MAX_HEARTBEAT_FAILURES_BEFORE_SHUTDOWN = 20 # 10 minutes at 30 second intervals
36
+ MAX_DEPLOYMENT_CHECK_FAILURES_BEFORE_SHUTDOWN = 20 # 10 minutes at 30 second intervals
37
+
38
+
39
+ class MatriceDeployServer:
40
+ """Class for managing model deployment and server functionality."""
41
+
42
+ def __init__(
43
+ self,
44
+ load_model: Optional[Callable] = None,
45
+ predict: Optional[Callable] = None,
46
+ action_id: str = "",
47
+ external_port: int = DEFAULT_EXTERNAL_PORT,
48
+ batch_predict: Optional[Callable] = None,
49
+ custom_post_processing_fn: Optional[Callable] = None,
50
+ ):
51
+ """Initialize MatriceDeploy.
52
+
53
+ Args:
54
+ load_model (callable, optional): Function to load model. Defaults to None.
55
+ predict (callable, optional): Function to make predictions. Defaults to None.
56
+ batch_predict (callable, optional): Function to make batch predictions. Defaults to None.
57
+ custom_post_processing_fn (callable, optional): Function to get custom post processing config. Defaults to None.
58
+ action_id (str, optional): ID for action tracking. Defaults to "".
59
+ external_port (int, optional): External port number. Defaults to 80.
60
+
61
+ Raises:
62
+ ValueError: If required parameters are invalid
63
+ Exception: If initialization fails
64
+ """
65
+ try:
66
+ # Validate inputs
67
+ self._validate_init_parameters(
68
+ load_model, predict, action_id, external_port
69
+ )
70
+
71
+ self.external_port = int(external_port)
72
+
73
+ # Initialize action tracker
74
+ self.action_tracker = ActionTracker(action_id)
75
+
76
+ # Get session and RPC from action tracker
77
+ self.session = self.action_tracker.session
78
+ self.rpc = self.session.rpc
79
+ self.action_details = self.action_tracker.action_details
80
+ self.job_params = self.action_tracker.get_job_params()
81
+ self.server_type = self.action_tracker.server_type
82
+ self.app_name = self.job_params.get("application_name", "")
83
+ self.app_version = self.job_params.get("application_version", "")
84
+
85
+ logging.info("Action details: %s", self.action_details)
86
+
87
+ # Extract deployment information
88
+ self.deployment_instance_id = self.action_details.get(
89
+ "_idModelDeployInstance"
90
+ )
91
+ self.deployment_id = self.action_details.get("_idDeployment")
92
+ self.model_id = self.action_details.get("_idModelDeploy")
93
+ self.inference_pipeline_id = self.action_details.get("inference_pipeline_id")
94
+
95
+ # Validate deployment information
96
+ if not all(
97
+ [self.deployment_instance_id, self.deployment_id, self.model_id]
98
+ ):
99
+ raise ValueError(
100
+ "Missing required deployment identifiers in action details"
101
+ )
102
+
103
+ # Set shutdown configuration
104
+ shutdown_threshold_minutes = int(
105
+ self.action_details.get(
106
+ "shutdownThreshold", DEFAULT_SHUTDOWN_THRESHOLD_MINUTES
107
+ )
108
+ )
109
+ if shutdown_threshold_minutes < MIN_SHUTDOWN_THRESHOLD_MINUTES:
110
+ logging.warning(
111
+ "Invalid shutdown threshold %d, using default: %d",
112
+ shutdown_threshold_minutes,
113
+ DEFAULT_SHUTDOWN_THRESHOLD_MINUTES,
114
+ )
115
+ shutdown_threshold_minutes = DEFAULT_SHUTDOWN_THRESHOLD_MINUTES
116
+ self.shutdown_threshold = shutdown_threshold_minutes * 60
117
+
118
+ self.auto_shutdown = bool(self.action_details.get("autoShutdown", True))
119
+
120
+ # Store user functions
121
+ self.load_model = load_model
122
+ self.predict = predict
123
+ self.batch_predict = batch_predict
124
+ self.custom_post_processing_fn = custom_post_processing_fn
125
+
126
+ # Initialize component references
127
+ self.proxy_interface = None
128
+ self.model_manager = None
129
+ self.inference_interface = None
130
+ self.post_processor = None
131
+ self.streaming_pipeline = None
132
+ self.app_deployment = None
133
+ self.stream_manager = None
134
+
135
+ # Initialize utilities
136
+ self.utils = None
137
+
138
+ # Shutdown coordination
139
+ self._shutdown_event = threading.Event()
140
+ self._stream_manager_thread = None
141
+
142
+ # Register shutdown handlers to ensure clean shutdown
143
+ self._register_shutdown_handlers()
144
+
145
+ # Update initial status
146
+ self.action_tracker.update_status(
147
+ "MDL_DPY_ACK",
148
+ "OK",
149
+ "Model deployment acknowledged",
150
+ )
151
+
152
+ logging.info("MatriceDeployServer initialized successfully")
153
+
154
+ except Exception as exc:
155
+ logging.error("Failed to initialize MatriceDeployServer: %s", str(exc))
156
+ raise
157
+
158
+ def _register_shutdown_handlers(self):
159
+ """Register signal handlers and atexit callback for graceful shutdown."""
160
+ def signal_handler(signum, frame):
161
+ logging.info("Received signal %d, triggering shutdown through utils...", signum)
162
+ try:
163
+ # Use utils shutdown to trigger coordinated shutdown
164
+ if hasattr(self, 'utils') and self.utils:
165
+ self.utils._shutdown_initiated.set()
166
+ else:
167
+ # Fallback to direct shutdown if utils not available
168
+ self.stop_server()
169
+ os._exit(0)
170
+ except Exception as exc:
171
+ logging.error("Error during signal-triggered shutdown: %s", str(exc))
172
+ os._exit(1)
173
+
174
+ def atexit_handler():
175
+ logging.info("Process exiting, ensuring graceful shutdown...")
176
+ try:
177
+ if not self._shutdown_event.is_set():
178
+ if hasattr(self, 'utils') and self.utils and not self.utils._shutdown_initiated.is_set():
179
+ self.utils._shutdown_initiated.set()
180
+ else:
181
+ self.stop_server()
182
+ except Exception as exc:
183
+ logging.error("Error during atexit shutdown: %s", str(exc))
184
+
185
+ # Register signal handlers for graceful shutdown
186
+ signal.signal(signal.SIGTERM, signal_handler)
187
+ signal.signal(signal.SIGINT, signal_handler)
188
+
189
+ # Register atexit handler as a final safety net
190
+ atexit.register(atexit_handler)
191
+
192
+ logging.info("Shutdown handlers registered successfully")
193
+
194
+ def _validate_init_parameters(self, load_model, predict, action_id, external_port):
195
+ """Validate initialization parameters.
196
+
197
+ Args:
198
+ load_model: Model loading function
199
+ predict: Prediction function
200
+ action_id: Action ID string
201
+ external_port: External port number
202
+
203
+ Raises:
204
+ ValueError: If parameters are invalid
205
+ """
206
+ # Validate callable functions
207
+ if load_model is not None and not callable(load_model):
208
+ raise ValueError("load_model must be callable or None")
209
+
210
+ if predict is not None and not callable(predict):
211
+ raise ValueError("predict must be callable or None")
212
+
213
+ # Validate action_id
214
+ if not isinstance(action_id, str):
215
+ raise ValueError("action_id must be a string")
216
+
217
+ external_port = int(external_port)
218
+ # Validate external_port
219
+ if not isinstance(external_port, int):
220
+ raise ValueError("external_port must be an integer")
221
+ if not (1 <= external_port <= 65535):
222
+ raise ValueError(
223
+ f"Invalid external port: {external_port}. Must be between 1 and 65535"
224
+ )
225
+
226
+ def start(self, block=True):
227
+ """Start the proxy interface and all server components."""
228
+ try:
229
+ self._validate_configuration()
230
+ self._initialize_model_manager()
231
+ self._initialize_inference_interface()
232
+ self._initialize_streaming_pipeline()
233
+ self._start_proxy_interface()
234
+
235
+ logging.info("All server components started successfully")
236
+
237
+ # Update deployment status and address
238
+ self.action_tracker.update_status(
239
+ "MDL_DPY_MDL",
240
+ "OK",
241
+ "Model deployment model loaded",
242
+ )
243
+ self.utils = MatriceDeployServerUtils(
244
+ self.action_tracker, self.inference_interface, self.external_port, self
245
+ )
246
+ self.utils.update_deployment_address()
247
+ self.utils.run_background_checkers()
248
+ self.action_tracker.update_status(
249
+ "MDL_DPY_STR",
250
+ "SUCCESS",
251
+ "Model deployment started",
252
+ )
253
+ if block:
254
+ self.utils.wait_for_shutdown()
255
+ except Exception as exc:
256
+ logging.error("Failed to start server components: %s", str(exc))
257
+ self.action_tracker.update_status(
258
+ "ERROR",
259
+ "ERROR",
260
+ f"Model deployment error: {str(exc)}",
261
+ )
262
+ raise
263
+
264
+ def _validate_configuration(self):
265
+ """Validate server configuration before starting components."""
266
+ required_env_vars = ["INTERNAL_PORT"]
267
+ missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
268
+ if missing_vars:
269
+ raise ValueError(f"Missing required environment variables: {missing_vars}")
270
+
271
+ # Validate action details
272
+ required_details = ["_idModelDeployInstance", "_idDeployment", "_idModelDeploy"]
273
+ missing_details = [
274
+ key for key in required_details if not self.action_details.get(key)
275
+ ]
276
+ if missing_details:
277
+ raise ValueError(f"Missing required action details: {missing_details}")
278
+
279
+ # Validate port
280
+ internal_port = int(os.environ["INTERNAL_PORT"])
281
+ if not (1 <= internal_port <= 65535):
282
+ raise ValueError(f"Invalid internal port: {internal_port}")
283
+
284
+ logging.info("Configuration validation passed")
285
+
286
+ # NOTE: Integrate model manager wrapper here if needed
287
+ def _initialize_model_manager(self):
288
+ """Initialize the model manager component."""
289
+ logging.info("Initializing model manager wrapper for model ID: %s", self.model_id)
290
+
291
+ # Determine model type based on server configuration
292
+ model_type = "triton" if self.server_type.lower() == "triton" else "default"
293
+
294
+ self.model_manager = ModelManagerWrapper(
295
+ action_tracker=self.action_tracker,
296
+ test_env=False, # Use production mode with action_tracker
297
+ model_type=model_type,
298
+ load_model=self.load_model,
299
+ predict=self.predict,
300
+ batch_predict=self.batch_predict,
301
+ num_model_instances=self.action_details.get("numModelInstances", 1),
302
+ internal_server_type=self.server_type,
303
+ internal_port=int(os.environ["INTERNAL_PORT"]),
304
+ internal_host="localhost"
305
+ )
306
+
307
+ logging.info("Model manager wrapper initialized successfully")
308
+
309
+ def _initialize_inference_interface(self):
310
+ """Initialize the inference interface component."""
311
+ logging.info("Initializing inference interface and post-processor")
312
+
313
+ # Initialize PostProcessor with configuration from job params
314
+ post_processing_config = self.job_params.get(
315
+ "post_processing_config", self.job_params.get("postProcessingConfig", None)
316
+ )
317
+
318
+ # Add session and facial recognition server ID to config if available
319
+ if post_processing_config is None:
320
+ post_processing_config = {}
321
+ if isinstance(post_processing_config, dict):
322
+ post_processing_config["facial_recognition_server_id"] = self.job_params.get("facial_recognition_server_id", None)
323
+ post_processing_config["session"] = self.session # Pass the session to post-processing
324
+
325
+ # Get index_to_category from action_tracker if available
326
+ index_to_category = None
327
+ target_categories = None
328
+ try:
329
+ if hasattr(self.action_tracker, 'get_index_to_category'):
330
+ index_to_category = self.action_tracker.get_index_to_category(
331
+ getattr(self.action_tracker, 'is_exported', True)
332
+ )
333
+ except Exception as e:
334
+ logging.warning(f"Failed to get index_to_category from action_tracker: {str(e)}")
335
+
336
+ # Create PostProcessor
337
+ self.post_processor = PostProcessor(
338
+ post_processing_config=post_processing_config,
339
+ app_name=self.app_name,
340
+ index_to_category=index_to_category,
341
+ target_categories=target_categories
342
+ )
343
+
344
+ # Create InferenceInterface with simplified parameters
345
+ self.inference_interface = InferenceInterface(
346
+ model_manager=self.model_manager,
347
+ post_processor=self.post_processor
348
+ )
349
+
350
+ logging.info("Inference interface and post-processor initialized successfully")
351
+
352
+ def _initialize_streaming_pipeline(self):
353
+ """Initialize the streaming pipeline component."""
354
+ try:
355
+ logging.info("Initializing streaming pipeline...")
356
+
357
+ # Initialize app deployment to get camera configurations
358
+ app_deployment_id = self.action_details.get("app_deployment_id", self.job_params.get("app_deployment_id", None))
359
+ if not app_deployment_id:
360
+ logging.warning("No app_deployment_id found in job_params, starting pipeline without cameras")
361
+ camera_configs = {}
362
+ else:
363
+ self.app_deployment = AppDeployment(
364
+ session=self.session,
365
+ app_deployment_id=app_deployment_id,
366
+ connection_timeout=self.job_params.get("stream_connection_timeout", 1200) # Increased timeout
367
+ )
368
+
369
+ # Get camera configurations from the API
370
+ camera_configs = self.app_deployment.get_camera_configs()
371
+ logging.info(f"Retrieved {len(camera_configs)} camera configurations from app deployment")
372
+
373
+ # Create streaming pipeline with configured parameters
374
+ self.streaming_pipeline = StreamingPipeline(
375
+ inference_interface=self.inference_interface,
376
+ post_processor=self.post_processor,
377
+ consumer_threads=self.job_params.get("consumer_threads", 4),
378
+ producer_threads=self.job_params.get("producer_threads", 2),
379
+ inference_threads=self.job_params.get("inference_threads", 4),
380
+ postprocessing_threads=self.job_params.get("postprocessing_threads", 2),
381
+ inference_queue_maxsize=self.job_params.get("inference_queue_maxsize", 5000),
382
+ postproc_queue_maxsize=self.job_params.get("postproc_queue_maxsize", 5000),
383
+ output_queue_maxsize=self.job_params.get("output_queue_maxsize", 5000),
384
+ message_timeout=self.job_params.get("message_timeout", 2.0), # Increased from 1.0
385
+ inference_timeout=self.job_params.get("inference_timeout", 60.0), # Increased from 30.0
386
+ shutdown_timeout=self.job_params.get("shutdown_timeout", 60.0), # Increased from 30.0
387
+ camera_configs=camera_configs
388
+ )
389
+
390
+ # Handle event loop properly - check if we're already in an event loop
391
+ try:
392
+ # Try to get current event loop
393
+ loop = asyncio.get_running_loop()
394
+ # If we get here, we're in an event loop, so we need to run in a thread
395
+ import concurrent.futures
396
+ with concurrent.futures.ThreadPoolExecutor() as executor:
397
+ future = executor.submit(self._start_pipeline_in_new_loop)
398
+ future.result(timeout=120) # Increased timeout for pipeline startup
399
+ except RuntimeError:
400
+ # No event loop running, safe to use asyncio.run()
401
+ asyncio.run(self.streaming_pipeline.start())
402
+
403
+ logging.info("Streaming pipeline initialized successfully")
404
+
405
+ except Exception as e:
406
+ logging.error(f"Failed to initialize streaming pipeline: {str(e)}")
407
+ raise
408
+
409
+ def _start_pipeline_in_new_loop(self):
410
+ """Start the pipeline in a new event loop (for use when already in an event loop)."""
411
+ import asyncio
412
+ loop = asyncio.new_event_loop()
413
+ asyncio.set_event_loop(loop)
414
+ try:
415
+ loop.run_until_complete(self.streaming_pipeline.start())
416
+ finally:
417
+ loop.close()
418
+
419
+ def _stop_pipeline_in_new_loop(self):
420
+ """Stop the pipeline in a new event loop (for use when already in an event loop)."""
421
+ import asyncio
422
+ loop = asyncio.new_event_loop()
423
+ asyncio.set_event_loop(loop)
424
+ try:
425
+ loop.run_until_complete(self.streaming_pipeline.stop())
426
+ finally:
427
+ loop.close()
428
+
429
+ def _start_proxy_interface(self):
430
+ """Start the proxy interface component."""
431
+ logging.info(
432
+ "Starting proxy interface on external port: %d",
433
+ self.external_port,
434
+ )
435
+
436
+ self.proxy_interface = MatriceProxyInterface(
437
+ session=self.session,
438
+ deployment_id=self.deployment_id,
439
+ deployment_instance_id=self.deployment_instance_id,
440
+ external_port=self.external_port,
441
+ inference_interface=self.inference_interface,
442
+ )
443
+
444
+ self.proxy_interface.start()
445
+ logging.info("Proxy interface started successfully")
446
+
447
+ def start_server(self, block=True):
448
+ """Start the server and related components.
449
+
450
+ Args:
451
+ block: If True, wait for shutdown signal. If False, return immediately after starting.
452
+
453
+ Raises:
454
+ Exception: If unable to initialize server
455
+ """
456
+ self.start(block=block)
457
+
458
+ def stop_server(self):
459
+ """Stop the server and related components."""
460
+ try:
461
+ logging.info("Initiating server shutdown...")
462
+
463
+ # Signal shutdown to all components
464
+ self._shutdown_event.set()
465
+
466
+ # Stop streaming pipeline
467
+ if self.streaming_pipeline:
468
+ try:
469
+ # Handle event loop properly for stopping
470
+ try:
471
+ # Try to get current event loop
472
+ loop = asyncio.get_running_loop()
473
+ # If we get here, we're in an event loop, so we need to run in a thread
474
+ import concurrent.futures
475
+ with concurrent.futures.ThreadPoolExecutor() as executor:
476
+ future = executor.submit(self._stop_pipeline_in_new_loop)
477
+ future.result(timeout=60) # Increased timeout for pipeline shutdown
478
+ except RuntimeError:
479
+ # No event loop running, safe to use asyncio.run()
480
+ asyncio.run(self.streaming_pipeline.stop())
481
+ logging.info("Streaming pipeline stopped")
482
+ except Exception as exc:
483
+ logging.error("Error stopping streaming pipeline: %s", str(exc))
484
+
485
+ # Wait for stream manager thread to finish
486
+ if self._stream_manager_thread and self._stream_manager_thread.is_alive():
487
+ logging.info("Waiting for stream manager thread to stop...")
488
+ try:
489
+ self._stream_manager_thread.join(timeout=10.0)
490
+ if self._stream_manager_thread.is_alive():
491
+ logging.warning("Stream manager thread did not stop within timeout")
492
+ else:
493
+ logging.info("Stream manager thread stopped successfully")
494
+ except Exception as exc:
495
+ logging.error("Error waiting for stream manager thread: %s", str(exc))
496
+
497
+ # Stop proxy interface
498
+ if self.proxy_interface:
499
+ try:
500
+ self.proxy_interface.stop()
501
+ logging.info("Proxy interface stopped")
502
+ except Exception as exc:
503
+ logging.error("Error stopping proxy interface: %s", str(exc))
504
+
505
+ logging.info("Server shutdown completed")
506
+
507
+ except Exception as exc:
508
+ logging.error("Error during server shutdown: %s", str(exc))
509
+ raise
510
+
511
+
512
+ class MatriceDeployServerUtils:
513
+ """Utility class for managing deployment server operations."""
514
+
515
+ def __init__(
516
+ self,
517
+ action_tracker: ActionTracker,
518
+ inference_interface: InferenceInterface,
519
+ external_port: int,
520
+ main_server: 'MatriceDeployServer' = None,
521
+ ):
522
+ """Initialize utils with reference to the main server.
523
+
524
+ Args:
525
+ action_tracker: ActionTracker instance
526
+ inference_interface: InferenceInterface instance
527
+ external_port: External port number
528
+ main_server: Reference to the main MatriceDeployServer instance
529
+ """
530
+ self.action_tracker = action_tracker
531
+ self.session = self.action_tracker.session
532
+ self.rpc = self.session.rpc
533
+ self.action_details = self.action_tracker.action_details
534
+ self.deployment_instance_id = self.action_details["_idModelDeployInstance"]
535
+ self.deployment_id = self.action_details["_idDeployment"]
536
+ self.model_id = self.action_details["_idModelDeploy"]
537
+ self.shutdown_threshold = (
538
+ int(self.action_details.get("shutdownThreshold", 15)) * 60
539
+ )
540
+ self.auto_shutdown = self.action_details.get("autoShutdown", True)
541
+ self.inference_interface = inference_interface
542
+ self.external_port = external_port
543
+ self.main_server = main_server
544
+ self._ip = None
545
+ self._ip_fetch_attempts = 0
546
+ self._max_ip_fetch_attempts = MAX_IP_FETCH_ATTEMPTS
547
+
548
+ # Shutdown coordination
549
+ self._shutdown_initiated = threading.Event()
550
+ self._shutdown_complete = threading.Event()
551
+
552
+ @property
553
+ def ip(self):
554
+ """Get the external IP address with caching and retry logic."""
555
+ if self._ip is None and self._ip_fetch_attempts < self._max_ip_fetch_attempts:
556
+ self._ip_fetch_attempts += 1
557
+ try:
558
+ with urllib.request.urlopen(
559
+ "https://ident.me", timeout=IP_FETCH_TIMEOUT_SECONDS
560
+ ) as response:
561
+ self._ip = response.read().decode("utf8").strip()
562
+ logging.info("Successfully fetched external IP: %s", self._ip)
563
+ except Exception as exc:
564
+ logging.warning(
565
+ "Failed to fetch external IP (attempt %d/%d): %s",
566
+ self._ip_fetch_attempts,
567
+ self._max_ip_fetch_attempts,
568
+ str(exc),
569
+ )
570
+ if self._ip_fetch_attempts >= self._max_ip_fetch_attempts:
571
+ # Fallback to localhost for local development
572
+ self._ip = "localhost"
573
+ logging.warning("Using localhost as fallback IP address")
574
+
575
+ return self._ip or "localhost"
576
+
577
+ def is_instance_running(self):
578
+ """Check if deployment instance is running.
579
+
580
+ Returns:
581
+ bool: True if instance is running, False otherwise
582
+ """
583
+ try:
584
+ resp = self.rpc.get(
585
+ f"/v1/inference/get_deployment_without_auth_key/{self.deployment_id}",
586
+ raise_exception=False,
587
+ )
588
+ if not resp:
589
+ logging.warning("No response received when checking instance status")
590
+ return False
591
+
592
+ if not resp.get("success"):
593
+ error_msg = resp.get("message", "Unknown error")
594
+ logging.warning(
595
+ "Failed to get deployment instance status: %s", error_msg
596
+ )
597
+ return False
598
+
599
+ running_instances = resp.get("data", {}).get("runningInstances", [])
600
+ if not running_instances:
601
+ logging.warning("No running instances found")
602
+ return False
603
+
604
+ for instance in running_instances:
605
+ if instance.get("modelDeployInstanceId") == self.deployment_instance_id:
606
+ is_deployed = instance.get("deployed", False)
607
+ logging.debug(
608
+ "Instance %s deployment status: %s",
609
+ self.deployment_instance_id,
610
+ "deployed" if is_deployed else "not deployed",
611
+ )
612
+ if not is_deployed:
613
+ logging.warning("Instance %s is not deployed", self.deployment_instance_id)
614
+ return is_deployed
615
+
616
+ logging.warning(
617
+ "Instance %s not found in running instances list",
618
+ self.deployment_instance_id,
619
+ )
620
+ return False
621
+
622
+ except Exception as exc:
623
+ logging.warning(
624
+ "Exception checking deployment instance status: %s",
625
+ str(exc),
626
+ )
627
+ return False
628
+
629
+ def get_elapsed_time_since_latest_inference(self):
630
+ """Get time elapsed since latest inference.
631
+
632
+ Returns:
633
+ float: Elapsed time in seconds
634
+
635
+ Raises:
636
+ Exception: If unable to get elapsed time and no fallback available
637
+ """
638
+ now = datetime.now(timezone.utc)
639
+ if self.inference_interface.get_latest_inference_time():
640
+ elapsed_time = (
641
+ now - self.inference_interface.get_latest_inference_time()
642
+ ).total_seconds()
643
+ logging.debug(
644
+ "Using latest inference time for elapsed calculation: %.1fs",
645
+ elapsed_time,
646
+ )
647
+ return elapsed_time
648
+
649
+ # Final fallback: return a safe default
650
+ logging.warning(
651
+ "No latest inference time available, using safe default of 0 seconds"
652
+ )
653
+ return 0.0
654
+
655
+ def trigger_shutdown_if_needed(self):
656
+ """Check idle time and trigger shutdown if threshold exceeded."""
657
+ try:
658
+ # Check if auto shutdown is enabled
659
+ if not self.auto_shutdown:
660
+ logging.debug("Auto shutdown is disabled")
661
+ return
662
+
663
+ # Check elapsed time
664
+ elapsed_time = self.get_elapsed_time_since_latest_inference()
665
+
666
+ if elapsed_time > self.shutdown_threshold:
667
+ logging.info(
668
+ "Idle time (%.1fs) exceeded threshold (%.1fs), initiating shutdown",
669
+ elapsed_time,
670
+ self.shutdown_threshold,
671
+ )
672
+ self.shutdown()
673
+ else:
674
+ time_until_shutdown = max(0, self.shutdown_threshold - elapsed_time)
675
+ # Only log every 10 minutes to reduce noise
676
+ if int(elapsed_time) % 600 == 0 or elapsed_time < 60:
677
+ logging.info(
678
+ "Time since last inference: %.1fs, time until shutdown: %.1fs",
679
+ elapsed_time,
680
+ time_until_shutdown,
681
+ )
682
+
683
+ except Exception as exc:
684
+ logging.error(
685
+ "Error checking shutdown condition: %s",
686
+ str(exc),
687
+ )
688
+
689
+ def shutdown(self):
690
+ """Gracefully shutdown the deployment instance."""
691
+ try:
692
+ logging.warning("Initiating shutdown sequence...")
693
+
694
+ # Notify backend of shutdown
695
+ try:
696
+ # resp = self.rpc.delete( # TODO: Enable after fixing shutdown and remove return None
697
+ # f"/v1/inference/delete_deploy_instance/{self.deployment_instance_id}",
698
+ # raise_exception=False,
699
+ # )
700
+ # if resp and resp.get("success"):
701
+ # logging.warning(
702
+ # "Successfully notified backend of deployment instance shutdown"
703
+ # )
704
+ # else:
705
+ # error_msg = (
706
+ # resp.get("message", "Unknown error") if resp else "No response"
707
+ # )
708
+ # logging.warning(
709
+ # "Failed to notify backend of shutdown: %s", error_msg
710
+ # )
711
+ logging.warning("Shutdown is triggered, but notifying backend is disabled")
712
+ return None
713
+ except Exception as exc:
714
+ logging.error(
715
+ "Exception while notifying backend of shutdown: %s", str(exc)
716
+ )
717
+
718
+ # Update status
719
+ try:
720
+ self.action_tracker.update_status(
721
+ "MDL_DPL_STP",
722
+ "SUCCESS",
723
+ "Model deployment stopped",
724
+ )
725
+ logging.warning("Updated deployment status to stopped")
726
+ except Exception as exc:
727
+ logging.error("Failed to update deployment status: %s", str(exc))
728
+
729
+ # Signal shutdown initiation instead of direct exit
730
+ logging.warning("Signaling shutdown to main thread...")
731
+ self._shutdown_initiated.set()
732
+
733
+ # Wait for coordinated shutdown to complete or timeout
734
+ if self._shutdown_complete.wait(timeout=30.0):
735
+ logging.warning("Coordinated shutdown completed, exiting process")
736
+ else:
737
+ logging.warning("Coordinated shutdown timed out, forcing exit")
738
+
739
+ # Final exit
740
+ os._exit(0)
741
+
742
+ except Exception as exc:
743
+ logging.error("Error during shutdown: %s", str(exc))
744
+ # Signal shutdown even on error
745
+ self._shutdown_initiated.set()
746
+ os._exit(1)
747
+
748
+ def shutdown_checker(self):
749
+ """Background thread to periodically check for idle shutdown condition and deployment status."""
750
+ consecutive_deployment_failures = 0
751
+ logging.warning("Shutdown checker started")
752
+
753
+ while True:
754
+ try:
755
+ # Check if deployment instance is still running
756
+ is_running = self.is_instance_running()
757
+
758
+ if is_running:
759
+ # Reset failure counter if deployment check succeeds
760
+ if consecutive_deployment_failures > 0:
761
+ logging.info(
762
+ "Deployment status check recovered after %d failures",
763
+ consecutive_deployment_failures,
764
+ )
765
+ consecutive_deployment_failures = 0
766
+
767
+ # Check for idle shutdown condition
768
+ self.trigger_shutdown_if_needed()
769
+ else:
770
+ consecutive_deployment_failures += 1
771
+ failure_duration_minutes = (
772
+ consecutive_deployment_failures
773
+ * SHUTDOWN_CHECK_INTERVAL_SECONDS
774
+ ) / 60
775
+
776
+ logging.warning(
777
+ "Deployment status check failed (%d/%d) - %.1f minutes of failures",
778
+ consecutive_deployment_failures,
779
+ MAX_DEPLOYMENT_CHECK_FAILURES_BEFORE_SHUTDOWN,
780
+ failure_duration_minutes,
781
+ )
782
+
783
+ if (
784
+ consecutive_deployment_failures
785
+ >= MAX_DEPLOYMENT_CHECK_FAILURES_BEFORE_SHUTDOWN
786
+ ):
787
+ logging.error(
788
+ "Deployment status check failed %d consecutive times (%.1f minutes), initiating shutdown",
789
+ consecutive_deployment_failures,
790
+ failure_duration_minutes,
791
+ )
792
+ self.shutdown()
793
+ return
794
+
795
+ except Exception as exc:
796
+ consecutive_deployment_failures += 1
797
+ failure_duration_minutes = (
798
+ consecutive_deployment_failures * SHUTDOWN_CHECK_INTERVAL_SECONDS
799
+ ) / 60
800
+
801
+ logging.error(
802
+ "Error in shutdown checker (%d/%d) - %.1f minutes of failures: %s",
803
+ consecutive_deployment_failures,
804
+ MAX_DEPLOYMENT_CHECK_FAILURES_BEFORE_SHUTDOWN,
805
+ failure_duration_minutes,
806
+ str(exc),
807
+ )
808
+
809
+ if (
810
+ consecutive_deployment_failures
811
+ >= MAX_DEPLOYMENT_CHECK_FAILURES_BEFORE_SHUTDOWN
812
+ ):
813
+ logging.error(
814
+ "Shutdown checker failed %d consecutive times (%.1f minutes), initiating shutdown",
815
+ consecutive_deployment_failures,
816
+ failure_duration_minutes,
817
+ )
818
+ self.shutdown()
819
+ return
820
+ finally:
821
+ time.sleep(SHUTDOWN_CHECK_INTERVAL_SECONDS)
822
+
823
+ def heartbeat_checker(self):
824
+ """Background thread to periodically send heartbeat."""
825
+ consecutive_failures = 0
826
+
827
+ logging.info("Heartbeat checker started")
828
+ while True:
829
+ try:
830
+ resp = self.rpc.post(
831
+ f"/v1/inference/add_instance_heartbeat/{self.deployment_instance_id}",
832
+ raise_exception=False,
833
+ )
834
+
835
+ if resp and resp.get("success"):
836
+ if consecutive_failures > 0:
837
+ logging.info(
838
+ "Heartbeat recovered after %d failures: %s",
839
+ consecutive_failures,
840
+ resp.get("message", "Success"),
841
+ )
842
+ else:
843
+ logging.debug(
844
+ "Heartbeat successful: %s", resp.get("message", "Success")
845
+ )
846
+ consecutive_failures = 0
847
+ else:
848
+ consecutive_failures += 1
849
+ error_msg = (
850
+ resp.get("message", "Unknown error") if resp else "No response"
851
+ )
852
+ failure_duration_minutes = (
853
+ consecutive_failures * HEARTBEAT_INTERVAL_SECONDS
854
+ ) / 60
855
+
856
+ logging.warning(
857
+ "Heartbeat failed (%d/%d) - %.1f minutes of failures: %s",
858
+ consecutive_failures,
859
+ MAX_HEARTBEAT_FAILURES_BEFORE_SHUTDOWN,
860
+ failure_duration_minutes,
861
+ error_msg,
862
+ )
863
+
864
+ if consecutive_failures >= MAX_HEARTBEAT_FAILURES_BEFORE_SHUTDOWN:
865
+ logging.error(
866
+ "Heartbeat failed %d consecutive times (%.1f minutes), initiating shutdown",
867
+ consecutive_failures,
868
+ failure_duration_minutes,
869
+ )
870
+ self.shutdown()
871
+ return
872
+
873
+ except Exception as exc:
874
+ consecutive_failures += 1
875
+ failure_duration_minutes = (
876
+ consecutive_failures * HEARTBEAT_INTERVAL_SECONDS
877
+ ) / 60
878
+
879
+ logging.warning(
880
+ "Heartbeat exception (%d/%d) - %.1f minutes of failures: %s",
881
+ consecutive_failures,
882
+ MAX_HEARTBEAT_FAILURES_BEFORE_SHUTDOWN,
883
+ failure_duration_minutes,
884
+ str(exc),
885
+ )
886
+
887
+ if consecutive_failures >= MAX_HEARTBEAT_FAILURES_BEFORE_SHUTDOWN:
888
+ logging.error(
889
+ "Heartbeat failed %d consecutive times (%.1f minutes), initiating shutdown",
890
+ consecutive_failures,
891
+ failure_duration_minutes,
892
+ )
893
+ self.shutdown()
894
+ return
895
+
896
+ time.sleep(HEARTBEAT_INTERVAL_SECONDS)
897
+
898
+ def run_background_checkers(self):
899
+ """Start the shutdown checker and heartbeat checker threads as daemons."""
900
+ shutdown_thread = threading.Thread(
901
+ target=self.shutdown_checker,
902
+ name="ShutdownChecker",
903
+ daemon=False,
904
+ )
905
+ heartbeat_thread = threading.Thread(
906
+ target=self.heartbeat_checker,
907
+ name="HeartbeatChecker",
908
+ daemon=False,
909
+ )
910
+
911
+ shutdown_thread.start()
912
+ heartbeat_thread.start()
913
+
914
+ logging.info("Background checker threads started successfully")
915
+
916
+ def wait_for_shutdown(self):
917
+ """Wait for shutdown to be initiated by background checkers or external signals.
918
+
919
+ This method blocks the main thread until shutdown is triggered.
920
+ """
921
+ try:
922
+ logging.warning("Main thread waiting for shutdown signal...")
923
+
924
+ # Wait for shutdown to be initiated
925
+ while not self._shutdown_initiated.is_set():
926
+ time.sleep(10)
927
+
928
+ logging.warning("Shutdown signal received, initiating server shutdown...")
929
+
930
+ # Trigger coordinated shutdown # TODO: Enable after fixing shutdown
931
+ # if self.main_server:
932
+ # try:
933
+ # self.main_server.stop_server()
934
+ # logging.warning("Server shutdown completed")
935
+ # except Exception as exc:
936
+ # logging.error("Error during server shutdown: %s", str(exc))
937
+
938
+ # # Signal that shutdown is complete
939
+ # self._shutdown_complete.set()
940
+
941
+ except KeyboardInterrupt:
942
+ logging.warning("Received KeyboardInterrupt, initiating shutdown...")
943
+ self._shutdown_initiated.set()
944
+ if self.main_server:
945
+ try:
946
+ self.main_server.stop_server()
947
+ except Exception as exc:
948
+ logging.error("Error during keyboard interrupt shutdown: %s", str(exc))
949
+ self._shutdown_complete.set()
950
+ except Exception as exc:
951
+ logging.error("Error in wait_for_shutdown: %s", str(exc))
952
+ self._shutdown_initiated.set()
953
+ if self.main_server:
954
+ try:
955
+ self.main_server.stop_server()
956
+ except Exception as exc:
957
+ logging.error("Error during exception shutdown: %s", str(exc))
958
+ self._shutdown_complete.set()
959
+
960
+ def update_deployment_address(self):
961
+ """Update the deployment address in the backend.
962
+
963
+ Raises:
964
+ Exception: If unable to update deployment address
965
+ """
966
+ try:
967
+ # Get IP address (with fallback to localhost)
968
+ ip_address = self.ip
969
+ logging.info(f"Using IP address: {ip_address}")
970
+
971
+ # Validate external port
972
+ if not (1 <= self.external_port <= 65535):
973
+ raise ValueError(f"Invalid external port: {self.external_port}")
974
+
975
+ instance_id = self.action_details.get("instanceID")
976
+ if not instance_id:
977
+ raise ValueError("Missing instanceID in action details")
978
+
979
+ payload = {
980
+ "port": int(self.external_port),
981
+ "ipAddress": ip_address,
982
+ "_idDeploymentInstance": self.deployment_instance_id,
983
+ "_idModelDeploy": self.deployment_id,
984
+ "_idInstance": instance_id,
985
+ }
986
+
987
+ logging.info(f"Updating deployment address with payload: {payload}")
988
+
989
+ resp = self.rpc.put(
990
+ path="/v1/inference/update_deploy_instance_address",
991
+ payload=payload
992
+ )
993
+ logging.info(
994
+ "Successfully updated deployment address to %s:%s, response: %s",
995
+ ip_address,
996
+ self.external_port,
997
+ resp,
998
+ )
999
+ except Exception as exc:
1000
+ logging.error(
1001
+ "Failed to update deployment address: %s",
1002
+ str(exc),
1003
+ )
1004
+ raise