matrice-inference 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-inference might be problematic. Click here for more details.

Files changed (37) hide show
  1. matrice_inference/__init__.py +72 -0
  2. matrice_inference/py.typed +0 -0
  3. matrice_inference/server/__init__.py +23 -0
  4. matrice_inference/server/inference_interface.py +176 -0
  5. matrice_inference/server/model/__init__.py +1 -0
  6. matrice_inference/server/model/model_manager.py +274 -0
  7. matrice_inference/server/model/model_manager_wrapper.py +550 -0
  8. matrice_inference/server/model/triton_model_manager.py +290 -0
  9. matrice_inference/server/model/triton_server.py +1248 -0
  10. matrice_inference/server/proxy_interface.py +371 -0
  11. matrice_inference/server/server.py +1004 -0
  12. matrice_inference/server/stream/__init__.py +0 -0
  13. matrice_inference/server/stream/app_deployment.py +228 -0
  14. matrice_inference/server/stream/consumer_worker.py +201 -0
  15. matrice_inference/server/stream/frame_cache.py +127 -0
  16. matrice_inference/server/stream/inference_worker.py +163 -0
  17. matrice_inference/server/stream/post_processing_worker.py +230 -0
  18. matrice_inference/server/stream/producer_worker.py +147 -0
  19. matrice_inference/server/stream/stream_pipeline.py +451 -0
  20. matrice_inference/server/stream/utils.py +23 -0
  21. matrice_inference/tmp/abstract_model_manager.py +58 -0
  22. matrice_inference/tmp/aggregator/__init__.py +18 -0
  23. matrice_inference/tmp/aggregator/aggregator.py +330 -0
  24. matrice_inference/tmp/aggregator/analytics.py +906 -0
  25. matrice_inference/tmp/aggregator/ingestor.py +438 -0
  26. matrice_inference/tmp/aggregator/latency.py +597 -0
  27. matrice_inference/tmp/aggregator/pipeline.py +968 -0
  28. matrice_inference/tmp/aggregator/publisher.py +431 -0
  29. matrice_inference/tmp/aggregator/synchronizer.py +594 -0
  30. matrice_inference/tmp/batch_manager.py +239 -0
  31. matrice_inference/tmp/overall_inference_testing.py +338 -0
  32. matrice_inference/tmp/triton_utils.py +638 -0
  33. matrice_inference-0.1.2.dist-info/METADATA +28 -0
  34. matrice_inference-0.1.2.dist-info/RECORD +37 -0
  35. matrice_inference-0.1.2.dist-info/WHEEL +5 -0
  36. matrice_inference-0.1.2.dist-info/licenses/LICENSE.txt +21 -0
  37. matrice_inference-0.1.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,906 @@
1
+ import logging
2
+ import threading
3
+ import time
4
+ import base64
5
+ import json
6
+ from typing import Dict, Any, Optional, List, Tuple
7
+
8
+ from matrice_common.session import Session
9
+ from confluent_kafka import Producer
10
+
11
+ class AnalyticsSummarizer:
12
+ """
13
+ Buffers aggregated camera_results and emits 5-minute rollups per camera
14
+ focusing on tracking_stats per application.
15
+
16
+ Output structure example per camera:
17
+ {
18
+ "camera_name": "camera_1",
19
+ "inferencePipelineId": "pipeline-xyz",
20
+ "camera_group": "group_a",
21
+ "location": "Lobby",
22
+ "agg_apps": [
23
+ {
24
+ "application_name": "People Counting",
25
+ "application_key_name": "People_Counting",
26
+ "application_version": "1.3",
27
+ "tracking_stats": {
28
+ "input_timestamp": "00:00:09.9", # last seen
29
+ "reset_timestamp": "00:00:00", # earliest seen in window
30
+ "current_counts": [{"category": "person", "count": 4}], # last seen
31
+ "total_counts": [{"category": "person", "count": 37}] # max seen in window
32
+ }
33
+ }
34
+ ],
35
+ "summary_metadata": {
36
+ "window_seconds": 300,
37
+ "messages_aggregated": 123,
38
+ "start_time": 1710000000.0,
39
+ "end_time": 1710000300.0
40
+ }
41
+ }
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ session: Session,
47
+ inference_pipeline_id: str,
48
+ flush_interval_seconds: int = 300,
49
+ ) -> None:
50
+ self.session = session
51
+ self.inference_pipeline_id = inference_pipeline_id
52
+ self.flush_interval_seconds = flush_interval_seconds
53
+
54
+ self.kafka_producer = self._setup_kafka_producer()
55
+
56
+ # Threading
57
+ self._stop = threading.Event()
58
+ self._thread: Optional[threading.Thread] = None
59
+ self._is_running = False
60
+ self._lock = threading.RLock()
61
+
62
+ # Ingestion queue
63
+ self._ingest_queue: List[Dict[str, Any]] = []
64
+
65
+ # Aggregation buffers keyed by (camera_group, camera_name)
66
+ # Each value holds:
67
+ # {
68
+ # "window_start": float,
69
+ # "last_seen": float,
70
+ # "camera_info": dict,
71
+ # "messages": int,
72
+ # "apps": {
73
+ # application_key_name: {
74
+ # "meta": {name, key_name, version},
75
+ # "last_input_timestamp": str,
76
+ # "earliest_reset_timestamp": str | None,
77
+ # "current_counts": {category: last_value},
78
+ # "total_counts": {category: max_value}
79
+ # }
80
+ # }
81
+ # }
82
+ #
83
+ self._buffers: Dict[Tuple[str, str], Dict[str, Any]] = {}
84
+
85
+ # Track previous total_counts per (camera_group, camera_name, application_key_name)
86
+ # Used to compute per-window deltas for current_counts at flush time
87
+ self._prev_total_counts: Dict[Tuple[str, str, str], Dict[str, int]] = {}
88
+
89
+ # Location aggregation buffers keyed by (camera_group, location)
90
+ # Each value holds similar structure to camera buffers but aggregated by location
91
+ self._location_buffers: Dict[Tuple[str, str], Dict[str, Any]] = {}
92
+
93
+ # Track previous location total_counts per (camera_group, location, application_key_name)
94
+ self._prev_location_total_counts: Dict[Tuple[str, str, str], Dict[str, int]] = {}
95
+
96
+ # Incident tracking buffers keyed by (camera_group, camera_name, application_key_name)
97
+ # Each value holds incidents for immediate publishing
98
+ self._incident_buffers: Dict[Tuple[str, str, str], List[Dict[str, Any]]] = {}
99
+
100
+ # Stats
101
+ self.stats = {
102
+ "start_time": None,
103
+ "summaries_published": 0,
104
+ "location_summaries_published": 0,
105
+ "incidents_published": 0,
106
+ "messages_ingested": 0,
107
+ "errors": 0,
108
+ "last_error": None,
109
+ "last_error_time": None,
110
+ "last_flush_time": None,
111
+ }
112
+
113
+ def _setup_kafka_producer(self):
114
+ path = "/v1/actions/get_kafka_info"
115
+
116
+ response = self.session.rpc.get(path=path, raise_exception=True)
117
+
118
+ if not response or not response.get("success"):
119
+ raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
120
+
121
+ # Decode base64 fields
122
+ encoded_ip = response["data"]["ip"]
123
+ encoded_port = response["data"]["port"]
124
+ ip = base64.b64decode(encoded_ip).decode("utf-8")
125
+ port = base64.b64decode(encoded_port).decode("utf-8")
126
+ bootstrap_servers = f"{ip}:{port}"
127
+
128
+
129
+ # Kafka handler for summaries (non-blocking configuration)
130
+ kafka_producer = Producer({
131
+ "bootstrap.servers": bootstrap_servers,
132
+ "acks": "all", # Changed from "all" to reduce blocking
133
+ "retries": 2, # Reduced retries for faster failure
134
+ "retry.backoff.ms": 500, # Reduced backoff time
135
+ "request.timeout.ms": 15000, # Reduced timeout
136
+ "max.in.flight.requests.per.connection": 5, # Increased for better throughput
137
+ "linger.ms": 5, # Reduced linger time
138
+ "batch.size": 8192, # Increased batch size
139
+ "queue.buffering.max.ms": 100, # Increased buffer time
140
+ "log_level": 0,
141
+ "delivery.timeout.ms": 30000, # Add delivery timeout
142
+ "enable.idempotence": True, # Ensure exactly-once delivery
143
+ })
144
+ return kafka_producer
145
+
146
+ def start(self) -> bool:
147
+ if self._is_running:
148
+ logging.warning("Analytics summarizer already running")
149
+ return True
150
+ try:
151
+ self._stop.clear()
152
+ self._is_running = True
153
+ self.stats["start_time"] = time.time()
154
+ self.stats["last_flush_time"] = time.time()
155
+ self._thread = threading.Thread(
156
+ target=self._run, name=f"AnalyticsSummarizer-{self.inference_pipeline_id}", daemon=True
157
+ )
158
+ self._thread.start()
159
+ logging.info("Analytics summarizer started")
160
+ return True
161
+ except Exception as exc:
162
+ self._record_error(f"Failed to start analytics summarizer: {exc}")
163
+ self.stop()
164
+ return False
165
+
166
+ def stop(self) -> None:
167
+ if not self._is_running:
168
+ logging.info("Analytics summarizer not running")
169
+ return
170
+ logging.info("Stopping analytics summarizer...")
171
+ self._is_running = False
172
+ self._stop.set()
173
+ try:
174
+ if self._thread and self._thread.is_alive():
175
+ self._thread.join(timeout=5.0)
176
+ except Exception as exc:
177
+ logging.error(f"Error joining analytics summarizer thread: {exc}")
178
+ self._thread = None
179
+ logging.info("Analytics summarizer stopped")
180
+
181
+ def ingest_result(self, aggregated_result: Dict[str, Any]) -> None:
182
+ """
183
+ Receive a single aggregated camera_results payload for buffering.
184
+ This is intended to be called by the publisher after successful publish.
185
+ """
186
+ try:
187
+ with self._lock:
188
+ self._ingest_queue.append(aggregated_result)
189
+ self.stats["messages_ingested"] += 1
190
+ except Exception as exc:
191
+ self._record_error(f"Failed to ingest result: {exc}")
192
+
193
+ def _run(self) -> None:
194
+ logging.info("Analytics summarizer worker started")
195
+ while not self._stop.is_set():
196
+ try:
197
+ # Drain ingestion queue
198
+ self._drain_ingest_queue()
199
+
200
+ # Time-based flush
201
+ current_time = time.time()
202
+ last_flush = self.stats.get("last_flush_time") or current_time
203
+ if current_time - last_flush >= self.flush_interval_seconds:
204
+ self._flush_all(current_time)
205
+ self.stats["last_flush_time"] = current_time
206
+
207
+ # Prevent busy loop
208
+ time.sleep(0.5)
209
+
210
+ except Exception as exc:
211
+ if not self._stop.is_set():
212
+ self._record_error(f"Error in summarizer loop: {exc}")
213
+ time.sleep(0.2)
214
+ # Final flush on stop
215
+ try:
216
+ self._flush_all(time.time())
217
+ except Exception as exc:
218
+ logging.error(f"Error during final analytics flush: {exc}")
219
+ logging.info("Analytics summarizer worker stopped")
220
+
221
+ def _drain_ingest_queue(self) -> None:
222
+ local_batch: List[Dict[str, Any]] = []
223
+ with self._lock:
224
+ if self._ingest_queue:
225
+ local_batch = self._ingest_queue
226
+ self._ingest_queue = []
227
+
228
+ for result in local_batch:
229
+ try:
230
+ self._add_to_buffers(result)
231
+ except Exception as exc:
232
+ self._record_error(f"Failed buffering result: {exc}")
233
+
234
+ def _add_to_buffers(self, result: Dict[str, Any]) -> None:
235
+ camera_info = result.get("camera_info", {}) or {}
236
+ camera_name = camera_info.get("camera_name") or "unknown"
237
+ camera_group = camera_info.get("camera_group") or "default_group"
238
+ location = camera_info.get("location")
239
+
240
+ key = (camera_group, camera_name)
241
+ now = time.time()
242
+ buffer = self._buffers.get(key)
243
+ if not buffer:
244
+ buffer = {
245
+ "window_start": now,
246
+ "last_seen": now,
247
+ "camera_info": {
248
+ "camera_name": camera_name,
249
+ "camera_group": camera_group,
250
+ "location": location,
251
+ },
252
+ "messages": 0,
253
+ "apps": {},
254
+ }
255
+ self._buffers[key] = buffer
256
+ else:
257
+ buffer["last_seen"] = now
258
+ # Update location if provided
259
+ if location:
260
+ buffer["camera_info"]["location"] = location
261
+
262
+ buffer["messages"] += 1
263
+
264
+ # Also update location buffer
265
+ if location:
266
+ self._add_to_location_buffer(camera_group, location, result, now)
267
+
268
+ # Process each app
269
+ agg_apps = result.get("agg_apps", []) or []
270
+ for app in agg_apps:
271
+ app_name = app.get("application_name") or app.get("app_name") or "unknown"
272
+ app_key = app.get("application_key_name") or app.get("application_key") or app_name
273
+ app_ver = app.get("application_version") or app.get("version") or ""
274
+
275
+ app_buf = buffer["apps"].get(app_key)
276
+ if not app_buf:
277
+ app_buf = {
278
+ "meta": {
279
+ "application_name": app_name,
280
+ "application_key_name": app_key,
281
+ "application_version": app_ver,
282
+ },
283
+ "last_input_timestamp": None,
284
+ "reset_timestamp": None,
285
+ "current_counts": {},
286
+ "total_counts": {},
287
+ }
288
+ buffer["apps"][app_key] = app_buf
289
+
290
+ # Extract tracking_stats from app
291
+ tracking_stats = self._extract_tracking_stats_from_app(app)
292
+ if not tracking_stats:
293
+ continue
294
+
295
+ input_ts = tracking_stats.get("input_timestamp")
296
+ reset_ts = tracking_stats.get("reset_timestamp")
297
+ current_counts = tracking_stats.get("current_counts") or []
298
+ total_counts = tracking_stats.get("total_counts") or []
299
+
300
+ if input_ts:
301
+ app_buf["last_input_timestamp"] = input_ts
302
+ if reset_ts is not None:
303
+ # Simplify: keep last seen reset timestamp only
304
+ app_buf["reset_timestamp"] = reset_ts
305
+
306
+ # Update current counts (take last observed)
307
+ for item in current_counts:
308
+ cat = item.get("category")
309
+ cnt = item.get("count")
310
+ if cat is not None and cnt is not None:
311
+ app_buf["current_counts"][cat] = cnt
312
+
313
+ # Update total counts (take max observed to avoid double-counting cumulative totals)
314
+ for item in total_counts:
315
+ cat = item.get("category")
316
+ cnt = item.get("count")
317
+ if cat is None or cnt is None:
318
+ continue
319
+ existing = app_buf["total_counts"].get(cat)
320
+ if existing is None or cnt > existing:
321
+ app_buf["total_counts"][cat] = cnt
322
+
323
+ # Extract and process incidents from the main buffer processing
324
+ self._extract_and_process_incidents(camera_group, camera_name, app, camera_info)
325
+
326
+ def _add_to_location_buffer(self, camera_group: str, location: str, result: Dict[str, Any], now: float) -> None:
327
+ """Add result to location aggregation buffer."""
328
+ key = (camera_group, location)
329
+ buffer = self._location_buffers.get(key)
330
+
331
+ if not buffer:
332
+ buffer = {
333
+ "window_start": now,
334
+ "last_seen": now,
335
+ "camera_group": camera_group,
336
+ "location": location,
337
+ "messages": 0,
338
+ "cameras": {}, # Track individual cameras in this location
339
+ "apps": {}, # Aggregated app data for this location
340
+ }
341
+ self._location_buffers[key] = buffer
342
+ else:
343
+ buffer["last_seen"] = now
344
+
345
+ buffer["messages"] += 1
346
+
347
+ # Track camera data within location
348
+ camera_info = result.get("camera_info", {}) or {}
349
+ camera_name = camera_info.get("camera_name") or "unknown"
350
+
351
+ if camera_name not in buffer["cameras"]:
352
+ buffer["cameras"][camera_name] = {
353
+ "camera_name": camera_name,
354
+ "camera_group": camera_group,
355
+ "location": location,
356
+ "messages": 0,
357
+ "apps": {},
358
+ }
359
+
360
+ camera_buffer = buffer["cameras"][camera_name]
361
+ camera_buffer["messages"] += 1
362
+
363
+ # Process each app for both camera-level and location-level aggregation
364
+ agg_apps = result.get("agg_apps", []) or []
365
+ for app in agg_apps:
366
+ app_name = app.get("application_name") or app.get("app_name") or "unknown"
367
+ app_key = app.get("application_key_name") or app.get("application_key") or app_name
368
+ app_ver = app.get("application_version") or app.get("version") or ""
369
+
370
+ # Update camera-level app data
371
+ if app_key not in camera_buffer["apps"]:
372
+ camera_buffer["apps"][app_key] = {
373
+ "meta": {
374
+ "application_name": app_name,
375
+ "application_key_name": app_key,
376
+ "application_version": app_ver,
377
+ },
378
+ "last_input_timestamp": None,
379
+ "reset_timestamp": None,
380
+ "current_counts": {},
381
+ "total_counts": {},
382
+ }
383
+
384
+ # Update location-level app data
385
+ if app_key not in buffer["apps"]:
386
+ buffer["apps"][app_key] = {
387
+ "meta": {
388
+ "application_name": app_name,
389
+ "application_key_name": app_key,
390
+ "application_version": app_ver,
391
+ },
392
+ "last_input_timestamp": None,
393
+ "reset_timestamp": None,
394
+ "current_counts": {},
395
+ "total_counts": {},
396
+ }
397
+
398
+ # Extract tracking stats and update both levels
399
+ tracking_stats = self._extract_tracking_stats_from_app(app)
400
+ if tracking_stats:
401
+ self._update_app_buffer_with_tracking_stats(camera_buffer["apps"][app_key], tracking_stats)
402
+ self._update_location_app_buffer_with_tracking_stats(buffer["apps"][app_key], tracking_stats)
403
+
404
+ # Extract and process incidents from the app
405
+ self._extract_and_process_incidents(camera_group, camera_name, app, camera_info)
406
+
407
+ def _update_app_buffer_with_tracking_stats(self, app_buf: Dict[str, Any], tracking_stats: Dict[str, Any]) -> None:
408
+ """Update app buffer with tracking stats - shared logic."""
409
+ input_ts = tracking_stats.get("input_timestamp")
410
+ reset_ts = tracking_stats.get("reset_timestamp")
411
+ current_counts = tracking_stats.get("current_counts") or []
412
+ total_counts = tracking_stats.get("total_counts") or []
413
+
414
+ if input_ts:
415
+ app_buf["last_input_timestamp"] = input_ts
416
+ if reset_ts is not None:
417
+ app_buf["reset_timestamp"] = reset_ts
418
+
419
+ # Update current counts (take last observed)
420
+ for item in current_counts:
421
+ cat = item.get("category")
422
+ cnt = item.get("count")
423
+ if cat is not None and cnt is not None:
424
+ app_buf["current_counts"][cat] = cnt
425
+
426
+ # Update total counts (take max observed)
427
+ for item in total_counts:
428
+ cat = item.get("category")
429
+ cnt = item.get("count")
430
+ if cat is None or cnt is None:
431
+ continue
432
+ existing = app_buf["total_counts"].get(cat)
433
+ if existing is None or cnt > existing:
434
+ app_buf["total_counts"][cat] = cnt
435
+
436
+ def _update_location_app_buffer_with_tracking_stats(self, app_buf: Dict[str, Any], tracking_stats: Dict[str, Any]) -> None:
437
+ """Update location app buffer with tracking stats - aggregated across cameras."""
438
+ input_ts = tracking_stats.get("input_timestamp")
439
+ reset_ts = tracking_stats.get("reset_timestamp")
440
+ current_counts = tracking_stats.get("current_counts") or []
441
+ total_counts = tracking_stats.get("total_counts") or []
442
+
443
+ # For location aggregation, take the latest timestamp
444
+ if input_ts:
445
+ current_ts = app_buf.get("last_input_timestamp")
446
+ if not current_ts or input_ts > current_ts:
447
+ app_buf["last_input_timestamp"] = input_ts
448
+
449
+ if reset_ts is not None:
450
+ current_reset = app_buf.get("reset_timestamp")
451
+ if not current_reset or reset_ts < current_reset:
452
+ app_buf["reset_timestamp"] = reset_ts
453
+
454
+ # Aggregate current counts (sum across cameras)
455
+ for item in current_counts:
456
+ cat = item.get("category")
457
+ cnt = item.get("count")
458
+ if cat is not None and cnt is not None:
459
+ existing = app_buf["current_counts"].get(cat, 0)
460
+ app_buf["current_counts"][cat] = existing + cnt
461
+
462
+ # Aggregate total counts (sum across cameras)
463
+ for item in total_counts:
464
+ cat = item.get("category")
465
+ cnt = item.get("count")
466
+ if cat is None or cnt is None:
467
+ continue
468
+ existing = app_buf["total_counts"].get(cat, 0)
469
+ app_buf["total_counts"][cat] = existing + cnt
470
+
471
+ def _extract_and_process_incidents(self, camera_group: str, camera_name: str, app: Dict[str, Any], camera_info: Dict[str, Any]) -> None:
472
+ """Extract incidents from app data and publish them immediately to incident_res topic."""
473
+ try:
474
+ app_name = app.get("application_name") or app.get("app_name") or "unknown"
475
+ app_key = app.get("application_key_name") or app.get("application_key") or app_name
476
+ app_ver = app.get("application_version") or app.get("version") or ""
477
+
478
+ # Extract incidents from various possible locations in the app data
479
+ incidents = []
480
+
481
+ # Check direct incidents in app
482
+ if "incidents" in app and isinstance(app["incidents"], list):
483
+ incidents.extend(app["incidents"])
484
+
485
+ # Check incidents in agg_summary
486
+ agg_summary = app.get("agg_summary", {})
487
+ if isinstance(agg_summary, dict):
488
+ # Look through frame-based structure
489
+ for frame_key, frame_data in agg_summary.items():
490
+ if isinstance(frame_data, dict) and "incidents" in frame_data:
491
+ frame_incidents = frame_data["incidents"]
492
+ if isinstance(frame_incidents, list):
493
+ incidents.extend(frame_incidents)
494
+ elif isinstance(frame_incidents, dict):
495
+ # If incidents is a dict, treat values as incidents
496
+ for incident_data in frame_incidents.values():
497
+ if isinstance(incident_data, dict):
498
+ incidents.append(incident_data)
499
+
500
+ # Check incidents in tracking_stats
501
+ tracking_stats = self._extract_tracking_stats_from_app(app)
502
+ if tracking_stats and "incidents" in tracking_stats:
503
+ if isinstance(tracking_stats["incidents"], list):
504
+ incidents.extend(tracking_stats["incidents"])
505
+
506
+ # If we found incidents, publish them immediately
507
+ if incidents:
508
+ self._publish_incidents_immediately(
509
+ camera_name, camera_group, camera_info.get("location"),
510
+ app_name, app_key, app_ver, incidents
511
+ )
512
+
513
+ except Exception as exc:
514
+ self._record_error(f"Failed to extract incidents from app {app_name}: {exc}")
515
+
516
+ def _publish_incidents_immediately(self, camera_name: str, camera_group: str, location: str,
517
+ app_name: str, app_key: str, app_version: str,
518
+ incidents: List[Dict[str, Any]]) -> None:
519
+ """Publish incidents immediately to incident_res topic."""
520
+ try:
521
+ # Create incident payload according to the specified structure
522
+ incident_payload = {
523
+ "camera_name": camera_name,
524
+ "inferencePipelineId": self.inference_pipeline_id,
525
+ "camera_group": camera_group,
526
+ "location": location or "Unknown Location",
527
+ "application_name": app_name,
528
+ "application_key_name": app_key,
529
+ "application_version": app_version,
530
+ "incidents": []
531
+ }
532
+
533
+ # Process each incident to ensure proper structure
534
+ for incident in incidents:
535
+ if not isinstance(incident, dict):
536
+ continue
537
+
538
+ # Ensure required incident fields with defaults
539
+ processed_incident = {
540
+ "incident_id": incident.get("incident_id", f"inc_{int(time.time())}"),
541
+ "incident_type": incident.get("incident_type", "unknown"),
542
+ "severity_level": incident.get("severity_level", "low"),
543
+ "human_text": incident.get("human_text", "Incident detected"),
544
+ "start_time": incident.get("start_time", time.strftime("%Y-%m-%dT%H:%M:%SZ")),
545
+ "end_time": incident.get("end_time", ""),
546
+ "camera_info": incident.get("camera_info", f"Camera at {location}"),
547
+ "level_settings": incident.get("level_settings", "Level 1"),
548
+ "content": incident.get("content", ""), # Base64 image content if available
549
+ "alerts": incident.get("alerts", []),
550
+ "alert_settings": incident.get("alert_settings", [])
551
+ }
552
+
553
+ incident_payload["incidents"].append(processed_incident)
554
+
555
+ # Only publish if we have valid incidents
556
+ if incident_payload["incidents"]:
557
+ # Publish incident data via Kafka - non-blocking with robust error handling
558
+ try:
559
+ # Use callback for non-blocking operation
560
+ def incident_delivery_callback(err, msg):
561
+ if err:
562
+ self._record_error(f"Failed to deliver incident message: {err}")
563
+ else:
564
+ logging.debug(f"Incident message delivered to {msg.topic()} [{msg.partition()}]")
565
+
566
+ self.kafka_producer.produce(
567
+ topic="incident_res",
568
+ key=f"{camera_name}_{app_key}".encode("utf-8"),
569
+ value=json.dumps(incident_payload, separators=(",", ":")).encode("utf-8"),
570
+ callback=incident_delivery_callback
571
+ )
572
+
573
+ # Non-blocking poll to process delivery callbacks
574
+ self.kafka_producer.poll(0)
575
+
576
+ self.stats["incidents_published"] += 1
577
+ logging.debug(
578
+ f"Published {len(incident_payload['incidents'])} incidents for camera {camera_name}, app {app_name}"
579
+ )
580
+ except Exception as kafka_exc:
581
+ self._record_error(f"Failed to produce incident message to Kafka: {kafka_exc}")
582
+ # Continue processing - don't fail the entire analytics flow for incident issues
583
+
584
+ except Exception as exc:
585
+ self._record_error(f"Failed to publish incidents for {camera_name}/{app_name}: {exc}")
586
+
587
+ def _extract_tracking_stats_from_app(self, app: Dict[str, Any]) -> Optional[Dict[str, Any]]:
588
+ # Prefer direct 'tracking_stats' if present
589
+ if isinstance(app.get("tracking_stats"), dict):
590
+ return app["tracking_stats"]
591
+
592
+ # Otherwise, try agg_summary structure: pick latest by key order
593
+ agg_summary = app.get("agg_summary")
594
+ if isinstance(agg_summary, dict) and agg_summary:
595
+ # Keys might be frame numbers as strings -> choose max numerically
596
+ try:
597
+ latest_key = max(agg_summary.keys(), key=lambda k: int(str(k)))
598
+ except Exception:
599
+ latest_key = sorted(agg_summary.keys())[-1]
600
+ entry = agg_summary.get(latest_key) or {}
601
+ ts = entry.get("tracking_stats")
602
+ if isinstance(ts, dict):
603
+ return ts
604
+ return None
605
+
606
+ def _flush_all(self, end_time: float) -> None:
607
+ # Build and publish summaries per camera
608
+ with self._lock:
609
+ items = list(self._buffers.items())
610
+ location_items = list(self._location_buffers.items())
611
+ # Reset buffers after copying references
612
+ self._buffers = {}
613
+ self._location_buffers = {}
614
+
615
+ for (camera_group, camera_name), buf in items:
616
+ try:
617
+ camera_info = buf.get("camera_info", {})
618
+ start_time = buf.get("window_start", end_time)
619
+ messages = buf.get("messages", 0)
620
+
621
+ agg_apps_output: List[Dict[str, Any]] = []
622
+ for app_key, app_buf in buf.get("apps", {}).items():
623
+ # Compute per-window delta for current_counts using previous total_counts
624
+ curr_total_dict = app_buf.get("total_counts", {}) or {}
625
+ prev_key = (camera_group, camera_name, app_key)
626
+ prev_total_dict = self._prev_total_counts.get(prev_key, {}) or {}
627
+
628
+ # Delta = max(curr_total - prev_total, 0) per category
629
+ window_delta_dict: Dict[str, int] = {}
630
+ for cat, curr_cnt in curr_total_dict.items():
631
+ try:
632
+ prev_cnt = int(prev_total_dict.get(cat, 0))
633
+ curr_cnt_int = int(curr_cnt)
634
+ delta = curr_cnt_int - prev_cnt
635
+ if delta < 0:
636
+ # Counter reset detected; treat current as delta for this window
637
+ delta = curr_cnt_int
638
+ window_delta_dict[cat] = delta
639
+ except Exception:
640
+ # Fallback: if parsing fails, emit current as-is
641
+ window_delta_dict[cat] = curr_cnt
642
+
643
+ # Convert dicts to lists for output
644
+ current_list = [
645
+ {"category": cat, "count": cnt}
646
+ for cat, cnt in window_delta_dict.items()
647
+ ]
648
+ total_list = [
649
+ {"category": cat, "count": cnt}
650
+ for cat, cnt in curr_total_dict.items()
651
+ ]
652
+
653
+ agg_apps_output.append(
654
+ {
655
+ **app_buf["meta"],
656
+ "tracking_stats": {
657
+ "input_timestamp": app_buf.get("last_input_timestamp"),
658
+ "reset_timestamp": app_buf.get("reset_timestamp"),
659
+ "current_counts": current_list,
660
+ "total_counts": total_list,
661
+ },
662
+ }
663
+ )
664
+
665
+ # Update previous totals baseline for next window
666
+ self._prev_total_counts[prev_key] = dict(curr_total_dict)
667
+
668
+ summary_payload = {
669
+ "camera_name": camera_info.get("camera_name", camera_name),
670
+ "inferencePipelineId": self.inference_pipeline_id,
671
+ "camera_group": camera_info.get("camera_group", camera_group),
672
+ "location": camera_info.get("location"),
673
+ "agg_apps": agg_apps_output,
674
+ "summary_metadata": {
675
+ "window_seconds": self.flush_interval_seconds,
676
+ "messages_aggregated": messages,
677
+ },
678
+ }
679
+
680
+ # Publish via Kafka (JSON bytes) - non-blocking with robust error handling
681
+ try:
682
+ # Use callback for non-blocking operation
683
+ def delivery_callback(err, msg):
684
+ if err:
685
+ self._record_error(f"Failed to deliver analytics message: {err}")
686
+ else:
687
+ logging.debug(f"Analytics message delivered to {msg.topic()} [{msg.partition()}]")
688
+
689
+ self.kafka_producer.produce(
690
+ topic="results-agg",
691
+ key=str(camera_name).encode("utf-8"),
692
+ value=json.dumps(summary_payload, separators=(",", ":")).encode("utf-8"),
693
+ callback=delivery_callback
694
+ )
695
+
696
+ # Non-blocking poll to process delivery callbacks
697
+ self.kafka_producer.poll(0)
698
+
699
+ self.stats["summaries_published"] += 1
700
+ logging.debug(
701
+ f"Published 5-min summary for camera {camera_group}/{camera_name} with {len(agg_apps_output)} apps"
702
+ )
703
+ except Exception as kafka_exc:
704
+ self._record_error(f"Failed to produce analytics message to Kafka: {kafka_exc}")
705
+ # Continue processing other cameras even if one fails
706
+ except Exception as exc:
707
+ self._record_error(f"Failed to publish summary for {camera_group}/{camera_name}: {exc}")
708
+
709
+ # Flush location aggregation data
710
+ self._flush_location_data(location_items, end_time)
711
+
712
+ # Brief non-blocking flush for delivery
713
+ try:
714
+ # Non-blocking poll to handle any pending callbacks
715
+ self.kafka_producer.poll(0)
716
+ # Short flush timeout to avoid blocking
717
+ self.kafka_producer.flush(timeout=2)
718
+ except Exception as flush_exc:
719
+ logging.warning(f"Analytics kafka flush error (non-critical): {flush_exc}")
720
+ pass
721
+
722
+ def _flush_location_data(self, location_items: List[Tuple[Tuple[str, str], Dict[str, Any]]], end_time: float) -> None:
723
+ """Flush location aggregation data to location_raw topic."""
724
+ for (camera_group, location), buf in location_items:
725
+ try:
726
+ start_time = buf.get("window_start", end_time)
727
+ messages = buf.get("messages", 0)
728
+ cameras_data = buf.get("cameras", {})
729
+ location_apps = buf.get("apps", {})
730
+
731
+ # Build camera stats array
732
+ camera_stats = []
733
+ for camera_name, camera_data in cameras_data.items():
734
+ camera_apps = []
735
+ for app_key, app_buf in camera_data.get("apps", {}).items():
736
+ # Convert dicts to lists for camera output
737
+ current_list = [
738
+ {"category": cat, "count": cnt}
739
+ for cat, cnt in app_buf.get("current_counts", {}).items()
740
+ ]
741
+ total_list = [
742
+ {"category": cat, "count": cnt}
743
+ for cat, cnt in app_buf.get("total_counts", {}).items()
744
+ ]
745
+
746
+ camera_apps.append({
747
+ **app_buf["meta"],
748
+ "tracking_stats": {
749
+ "input_timestamp": app_buf.get("last_input_timestamp"),
750
+ "reset_timestamp": app_buf.get("reset_timestamp"),
751
+ "current_counts": current_list,
752
+ "total_counts": total_list,
753
+ },
754
+ })
755
+
756
+ camera_stats.append({
757
+ "camera_name": camera_name,
758
+ "inferencePipelineId": self.inference_pipeline_id,
759
+ "camera_group": camera_group,
760
+ "location": location,
761
+ "agg_apps": camera_apps,
762
+ "summary_metadata": {
763
+ "window_seconds": self.flush_interval_seconds,
764
+ "messages_aggregated": camera_data.get("messages", 0),
765
+ },
766
+ })
767
+
768
+ # Build location-level app stats
769
+ apps_stats = []
770
+ for app_key, app_buf in location_apps.items():
771
+ # For location aggregation, compute per-window delta using previous location totals
772
+ curr_total_dict = app_buf.get("total_counts", {}) or {}
773
+ prev_key = (camera_group, location, app_key)
774
+ prev_total_dict = self._prev_location_total_counts.get(prev_key, {}) or {}
775
+
776
+ # Delta = max(curr_total - prev_total, 0) per category
777
+ window_delta_dict: Dict[str, int] = {}
778
+ for cat, curr_cnt in curr_total_dict.items():
779
+ try:
780
+ prev_cnt = int(prev_total_dict.get(cat, 0))
781
+ curr_cnt_int = int(curr_cnt)
782
+ delta = curr_cnt_int - prev_cnt
783
+ if delta < 0:
784
+ # Counter reset detected; treat current as delta for this window
785
+ delta = curr_cnt_int
786
+ window_delta_dict[cat] = delta
787
+ except Exception:
788
+ # Fallback: if parsing fails, emit current as-is
789
+ window_delta_dict[cat] = curr_cnt
790
+
791
+ # Convert dicts to lists for output
792
+ current_list = [
793
+ {"category": cat, "count": cnt}
794
+ for cat, cnt in window_delta_dict.items()
795
+ ]
796
+ total_list = [
797
+ {"category": cat, "count": cnt}
798
+ for cat, cnt in curr_total_dict.items()
799
+ ]
800
+
801
+ apps_stats.append({
802
+ **app_buf["meta"],
803
+ "tracking_stats": {
804
+ "input_timestamp": app_buf.get("last_input_timestamp"),
805
+ "reset_timestamp": app_buf.get("reset_timestamp"),
806
+ "current_counts": current_list,
807
+ "total_counts": total_list,
808
+ },
809
+ })
810
+
811
+ # Update previous totals baseline for next window
812
+ self._prev_location_total_counts[prev_key] = dict(curr_total_dict)
813
+
814
+ # Build location payload
815
+ location_payload = {
816
+ "message": "success",
817
+ "inferencePipelineId": self.inference_pipeline_id,
818
+ "camera_group_name": camera_group,
819
+ "camera_group_location": location,
820
+ "camera_stats": camera_stats,
821
+ "apps_stats": apps_stats,
822
+ }
823
+
824
+ # Publish location data via Kafka - non-blocking with robust error handling
825
+ try:
826
+ # Use callback for non-blocking operation
827
+ def location_delivery_callback(err, msg):
828
+ if err:
829
+ self._record_error(f"Failed to deliver location analytics message: {err}")
830
+ else:
831
+ logging.debug(f"Location analytics message delivered to {msg.topic()} [{msg.partition()}]")
832
+
833
+ self.kafka_producer.produce(
834
+ topic="location_raw",
835
+ key=f"{camera_group}_{location}".encode("utf-8"),
836
+ value=json.dumps(location_payload, separators=(",", ":")).encode("utf-8"),
837
+ callback=location_delivery_callback
838
+ )
839
+
840
+ # Non-blocking poll to process delivery callbacks
841
+ self.kafka_producer.poll(0)
842
+
843
+ self.stats["location_summaries_published"] += 1
844
+ logging.debug(
845
+ f"Published location summary for {camera_group}/{location} with {len(camera_stats)} cameras and {len(apps_stats)} apps"
846
+ )
847
+ except Exception as kafka_exc:
848
+ self._record_error(f"Failed to produce location analytics message to Kafka: {kafka_exc}")
849
+ # Continue processing other locations even if one fails
850
+
851
+ except Exception as exc:
852
+ self._record_error(f"Failed to publish location summary for {camera_group}/{location}: {exc}")
853
+
854
+ def _record_error(self, error_message: str) -> None:
855
+ with self._lock:
856
+ self.stats["errors"] += 1
857
+ self.stats["last_error"] = error_message
858
+ self.stats["last_error_time"] = time.time()
859
+ logging.error(f"Analytics summarizer error: {error_message}")
860
+
861
+ def get_stats(self) -> Dict[str, Any]:
862
+ with self._lock:
863
+ stats = dict(self.stats)
864
+ if stats.get("start_time"):
865
+ stats["uptime_seconds"] = time.time() - stats["start_time"]
866
+ return stats
867
+
868
+ def get_health_status(self) -> Dict[str, Any]:
869
+ health = {
870
+ "status": "healthy",
871
+ "is_running": self._is_running,
872
+ "errors": self.stats["errors"],
873
+ "summaries_published": self.stats["summaries_published"],
874
+ "messages_ingested": self.stats["messages_ingested"],
875
+ }
876
+ if (
877
+ self.stats.get("last_error_time")
878
+ and (time.time() - self.stats["last_error_time"]) < 60
879
+ ):
880
+ health["status"] = "degraded"
881
+ health["reason"] = f"Recent error: {self.stats.get('last_error')}"
882
+ if not self._is_running:
883
+ health["status"] = "unhealthy"
884
+ health["reason"] = "Summarizer is not running"
885
+ return health
886
+
887
+ def cleanup(self) -> None:
888
+ try:
889
+ self.stop()
890
+ except Exception:
891
+ pass
892
+ with self._lock:
893
+ self._ingest_queue = []
894
+ self._buffers = {}
895
+ self._location_buffers = {}
896
+ self._prev_location_total_counts = {}
897
+ self._incident_buffers = {}
898
+ try:
899
+ if hasattr(self, "kafka_producer") and self.kafka_producer is not None:
900
+ # Non-blocking cleanup with timeout
901
+ self.kafka_producer.poll(0) # Process any pending callbacks
902
+ self.kafka_producer.flush(timeout=3) # Short timeout for cleanup
903
+ except Exception as exc:
904
+ logging.warning(f"Analytics kafka producer cleanup error (non-critical): {exc}")
905
+ logging.info("Analytics summarizer cleanup completed")
906
+