matrice-analytics 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. matrice_analytics/__init__.py +28 -0
  2. matrice_analytics/boundary_drawing_internal/README.md +305 -0
  3. matrice_analytics/boundary_drawing_internal/__init__.py +45 -0
  4. matrice_analytics/boundary_drawing_internal/boundary_drawing_internal.py +1207 -0
  5. matrice_analytics/boundary_drawing_internal/boundary_drawing_tool.py +429 -0
  6. matrice_analytics/boundary_drawing_internal/boundary_tool_template.html +1036 -0
  7. matrice_analytics/boundary_drawing_internal/data/.gitignore +12 -0
  8. matrice_analytics/boundary_drawing_internal/example_usage.py +206 -0
  9. matrice_analytics/boundary_drawing_internal/usage/README.md +110 -0
  10. matrice_analytics/boundary_drawing_internal/usage/boundary_drawer_launcher.py +102 -0
  11. matrice_analytics/boundary_drawing_internal/usage/simple_boundary_launcher.py +107 -0
  12. matrice_analytics/post_processing/README.md +455 -0
  13. matrice_analytics/post_processing/__init__.py +732 -0
  14. matrice_analytics/post_processing/advanced_tracker/README.md +650 -0
  15. matrice_analytics/post_processing/advanced_tracker/__init__.py +17 -0
  16. matrice_analytics/post_processing/advanced_tracker/base.py +99 -0
  17. matrice_analytics/post_processing/advanced_tracker/config.py +77 -0
  18. matrice_analytics/post_processing/advanced_tracker/kalman_filter.py +370 -0
  19. matrice_analytics/post_processing/advanced_tracker/matching.py +195 -0
  20. matrice_analytics/post_processing/advanced_tracker/strack.py +230 -0
  21. matrice_analytics/post_processing/advanced_tracker/tracker.py +367 -0
  22. matrice_analytics/post_processing/config.py +146 -0
  23. matrice_analytics/post_processing/core/__init__.py +63 -0
  24. matrice_analytics/post_processing/core/base.py +704 -0
  25. matrice_analytics/post_processing/core/config.py +3291 -0
  26. matrice_analytics/post_processing/core/config_utils.py +925 -0
  27. matrice_analytics/post_processing/face_reg/__init__.py +43 -0
  28. matrice_analytics/post_processing/face_reg/compare_similarity.py +556 -0
  29. matrice_analytics/post_processing/face_reg/embedding_manager.py +950 -0
  30. matrice_analytics/post_processing/face_reg/face_recognition.py +2234 -0
  31. matrice_analytics/post_processing/face_reg/face_recognition_client.py +606 -0
  32. matrice_analytics/post_processing/face_reg/people_activity_logging.py +321 -0
  33. matrice_analytics/post_processing/ocr/__init__.py +0 -0
  34. matrice_analytics/post_processing/ocr/easyocr_extractor.py +250 -0
  35. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/__init__.py +9 -0
  36. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/__init__.py +4 -0
  37. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/cli.py +33 -0
  38. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/dataset_stats.py +139 -0
  39. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/export.py +398 -0
  40. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/train.py +447 -0
  41. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/utils.py +129 -0
  42. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/valid.py +93 -0
  43. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/validate_dataset.py +240 -0
  44. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/visualize_augmentation.py +176 -0
  45. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/cli/visualize_predictions.py +96 -0
  46. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/core/__init__.py +3 -0
  47. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/core/process.py +246 -0
  48. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/core/types.py +60 -0
  49. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/core/utils.py +87 -0
  50. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/inference/__init__.py +3 -0
  51. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/inference/config.py +82 -0
  52. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/inference/hub.py +141 -0
  53. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/inference/plate_recognizer.py +323 -0
  54. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/py.typed +0 -0
  55. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/__init__.py +0 -0
  56. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/data/__init__.py +0 -0
  57. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/data/augmentation.py +101 -0
  58. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/data/dataset.py +97 -0
  59. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/__init__.py +0 -0
  60. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/config.py +114 -0
  61. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/layers.py +553 -0
  62. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/loss.py +55 -0
  63. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/metric.py +86 -0
  64. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/model_builders.py +95 -0
  65. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/model/model_schema.py +395 -0
  66. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/utilities/__init__.py +0 -0
  67. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/utilities/backend_utils.py +38 -0
  68. matrice_analytics/post_processing/ocr/fast_plate_ocr_py38/train/utilities/utils.py +214 -0
  69. matrice_analytics/post_processing/ocr/postprocessing.py +270 -0
  70. matrice_analytics/post_processing/ocr/preprocessing.py +52 -0
  71. matrice_analytics/post_processing/post_processor.py +1175 -0
  72. matrice_analytics/post_processing/test_cases/__init__.py +1 -0
  73. matrice_analytics/post_processing/test_cases/run_tests.py +143 -0
  74. matrice_analytics/post_processing/test_cases/test_advanced_customer_service.py +841 -0
  75. matrice_analytics/post_processing/test_cases/test_basic_counting_tracking.py +523 -0
  76. matrice_analytics/post_processing/test_cases/test_comprehensive.py +531 -0
  77. matrice_analytics/post_processing/test_cases/test_config.py +852 -0
  78. matrice_analytics/post_processing/test_cases/test_customer_service.py +585 -0
  79. matrice_analytics/post_processing/test_cases/test_data_generators.py +583 -0
  80. matrice_analytics/post_processing/test_cases/test_people_counting.py +510 -0
  81. matrice_analytics/post_processing/test_cases/test_processor.py +524 -0
  82. matrice_analytics/post_processing/test_cases/test_usecases.py +165 -0
  83. matrice_analytics/post_processing/test_cases/test_utilities.py +356 -0
  84. matrice_analytics/post_processing/test_cases/test_utils.py +743 -0
  85. matrice_analytics/post_processing/usecases/Histopathological_Cancer_Detection_img.py +604 -0
  86. matrice_analytics/post_processing/usecases/__init__.py +267 -0
  87. matrice_analytics/post_processing/usecases/abandoned_object_detection.py +797 -0
  88. matrice_analytics/post_processing/usecases/advanced_customer_service.py +1601 -0
  89. matrice_analytics/post_processing/usecases/age_detection.py +842 -0
  90. matrice_analytics/post_processing/usecases/age_gender_detection.py +1085 -0
  91. matrice_analytics/post_processing/usecases/anti_spoofing_detection.py +656 -0
  92. matrice_analytics/post_processing/usecases/assembly_line_detection.py +841 -0
  93. matrice_analytics/post_processing/usecases/banana_defect_detection.py +624 -0
  94. matrice_analytics/post_processing/usecases/basic_counting_tracking.py +667 -0
  95. matrice_analytics/post_processing/usecases/blood_cancer_detection_img.py +881 -0
  96. matrice_analytics/post_processing/usecases/car_damage_detection.py +834 -0
  97. matrice_analytics/post_processing/usecases/car_part_segmentation.py +946 -0
  98. matrice_analytics/post_processing/usecases/car_service.py +1601 -0
  99. matrice_analytics/post_processing/usecases/cardiomegaly_classification.py +864 -0
  100. matrice_analytics/post_processing/usecases/cell_microscopy_segmentation.py +897 -0
  101. matrice_analytics/post_processing/usecases/chicken_pose_detection.py +648 -0
  102. matrice_analytics/post_processing/usecases/child_monitoring.py +814 -0
  103. matrice_analytics/post_processing/usecases/color/clip.py +660 -0
  104. matrice_analytics/post_processing/usecases/color/clip_processor/merges.txt +48895 -0
  105. matrice_analytics/post_processing/usecases/color/clip_processor/preprocessor_config.json +28 -0
  106. matrice_analytics/post_processing/usecases/color/clip_processor/special_tokens_map.json +30 -0
  107. matrice_analytics/post_processing/usecases/color/clip_processor/tokenizer.json +245079 -0
  108. matrice_analytics/post_processing/usecases/color/clip_processor/tokenizer_config.json +32 -0
  109. matrice_analytics/post_processing/usecases/color/clip_processor/vocab.json +1 -0
  110. matrice_analytics/post_processing/usecases/color/color_map_utils.py +70 -0
  111. matrice_analytics/post_processing/usecases/color/color_mapper.py +468 -0
  112. matrice_analytics/post_processing/usecases/color_detection.py +1936 -0
  113. matrice_analytics/post_processing/usecases/color_map_utils.py +70 -0
  114. matrice_analytics/post_processing/usecases/concrete_crack_detection.py +827 -0
  115. matrice_analytics/post_processing/usecases/crop_weed_detection.py +781 -0
  116. matrice_analytics/post_processing/usecases/customer_service.py +1008 -0
  117. matrice_analytics/post_processing/usecases/defect_detection_products.py +936 -0
  118. matrice_analytics/post_processing/usecases/distracted_driver_detection.py +822 -0
  119. matrice_analytics/post_processing/usecases/drone_traffic_monitoring.py +585 -0
  120. matrice_analytics/post_processing/usecases/drowsy_driver_detection.py +829 -0
  121. matrice_analytics/post_processing/usecases/dwell_detection.py +829 -0
  122. matrice_analytics/post_processing/usecases/emergency_vehicle_detection.py +827 -0
  123. matrice_analytics/post_processing/usecases/face_emotion.py +813 -0
  124. matrice_analytics/post_processing/usecases/face_recognition.py +827 -0
  125. matrice_analytics/post_processing/usecases/fashion_detection.py +835 -0
  126. matrice_analytics/post_processing/usecases/field_mapping.py +902 -0
  127. matrice_analytics/post_processing/usecases/fire_detection.py +1146 -0
  128. matrice_analytics/post_processing/usecases/flare_analysis.py +836 -0
  129. matrice_analytics/post_processing/usecases/flower_segmentation.py +1006 -0
  130. matrice_analytics/post_processing/usecases/gas_leak_detection.py +837 -0
  131. matrice_analytics/post_processing/usecases/gender_detection.py +832 -0
  132. matrice_analytics/post_processing/usecases/human_activity_recognition.py +871 -0
  133. matrice_analytics/post_processing/usecases/intrusion_detection.py +1672 -0
  134. matrice_analytics/post_processing/usecases/leaf.py +821 -0
  135. matrice_analytics/post_processing/usecases/leaf_disease.py +840 -0
  136. matrice_analytics/post_processing/usecases/leak_detection.py +837 -0
  137. matrice_analytics/post_processing/usecases/license_plate_detection.py +1188 -0
  138. matrice_analytics/post_processing/usecases/license_plate_monitoring.py +1781 -0
  139. matrice_analytics/post_processing/usecases/litter_monitoring.py +717 -0
  140. matrice_analytics/post_processing/usecases/mask_detection.py +869 -0
  141. matrice_analytics/post_processing/usecases/natural_disaster.py +907 -0
  142. matrice_analytics/post_processing/usecases/parking.py +787 -0
  143. matrice_analytics/post_processing/usecases/parking_space_detection.py +822 -0
  144. matrice_analytics/post_processing/usecases/pcb_defect_detection.py +888 -0
  145. matrice_analytics/post_processing/usecases/pedestrian_detection.py +808 -0
  146. matrice_analytics/post_processing/usecases/people_counting.py +706 -0
  147. matrice_analytics/post_processing/usecases/people_counting_bckp.py +1683 -0
  148. matrice_analytics/post_processing/usecases/people_tracking.py +1842 -0
  149. matrice_analytics/post_processing/usecases/pipeline_detection.py +605 -0
  150. matrice_analytics/post_processing/usecases/plaque_segmentation_img.py +874 -0
  151. matrice_analytics/post_processing/usecases/pothole_segmentation.py +915 -0
  152. matrice_analytics/post_processing/usecases/ppe_compliance.py +645 -0
  153. matrice_analytics/post_processing/usecases/price_tag_detection.py +822 -0
  154. matrice_analytics/post_processing/usecases/proximity_detection.py +1901 -0
  155. matrice_analytics/post_processing/usecases/road_lane_detection.py +623 -0
  156. matrice_analytics/post_processing/usecases/road_traffic_density.py +832 -0
  157. matrice_analytics/post_processing/usecases/road_view_segmentation.py +915 -0
  158. matrice_analytics/post_processing/usecases/shelf_inventory_detection.py +583 -0
  159. matrice_analytics/post_processing/usecases/shoplifting_detection.py +822 -0
  160. matrice_analytics/post_processing/usecases/shopping_cart_analysis.py +899 -0
  161. matrice_analytics/post_processing/usecases/skin_cancer_classification_img.py +864 -0
  162. matrice_analytics/post_processing/usecases/smoker_detection.py +833 -0
  163. matrice_analytics/post_processing/usecases/solar_panel.py +810 -0
  164. matrice_analytics/post_processing/usecases/suspicious_activity_detection.py +1030 -0
  165. matrice_analytics/post_processing/usecases/template_usecase.py +380 -0
  166. matrice_analytics/post_processing/usecases/theft_detection.py +648 -0
  167. matrice_analytics/post_processing/usecases/traffic_sign_monitoring.py +724 -0
  168. matrice_analytics/post_processing/usecases/underground_pipeline_defect_detection.py +775 -0
  169. matrice_analytics/post_processing/usecases/underwater_pollution_detection.py +842 -0
  170. matrice_analytics/post_processing/usecases/vehicle_monitoring.py +1029 -0
  171. matrice_analytics/post_processing/usecases/warehouse_object_segmentation.py +899 -0
  172. matrice_analytics/post_processing/usecases/waterbody_segmentation.py +923 -0
  173. matrice_analytics/post_processing/usecases/weapon_detection.py +771 -0
  174. matrice_analytics/post_processing/usecases/weld_defect_detection.py +615 -0
  175. matrice_analytics/post_processing/usecases/wildlife_monitoring.py +898 -0
  176. matrice_analytics/post_processing/usecases/windmill_maintenance.py +834 -0
  177. matrice_analytics/post_processing/usecases/wound_segmentation.py +856 -0
  178. matrice_analytics/post_processing/utils/__init__.py +150 -0
  179. matrice_analytics/post_processing/utils/advanced_counting_utils.py +400 -0
  180. matrice_analytics/post_processing/utils/advanced_helper_utils.py +317 -0
  181. matrice_analytics/post_processing/utils/advanced_tracking_utils.py +461 -0
  182. matrice_analytics/post_processing/utils/alerting_utils.py +213 -0
  183. matrice_analytics/post_processing/utils/category_mapping_utils.py +94 -0
  184. matrice_analytics/post_processing/utils/color_utils.py +592 -0
  185. matrice_analytics/post_processing/utils/counting_utils.py +182 -0
  186. matrice_analytics/post_processing/utils/filter_utils.py +261 -0
  187. matrice_analytics/post_processing/utils/format_utils.py +293 -0
  188. matrice_analytics/post_processing/utils/geometry_utils.py +300 -0
  189. matrice_analytics/post_processing/utils/smoothing_utils.py +358 -0
  190. matrice_analytics/post_processing/utils/tracking_utils.py +234 -0
  191. matrice_analytics/py.typed +0 -0
  192. matrice_analytics-0.1.60.dist-info/METADATA +481 -0
  193. matrice_analytics-0.1.60.dist-info/RECORD +196 -0
  194. matrice_analytics-0.1.60.dist-info/WHEEL +5 -0
  195. matrice_analytics-0.1.60.dist-info/licenses/LICENSE.txt +21 -0
  196. matrice_analytics-0.1.60.dist-info/top_level.txt +1 -0
@@ -0,0 +1,950 @@
1
+
2
+ from typing import Any, Dict, List, Optional, Tuple, NamedTuple
3
+ import time
4
+ import logging
5
+ import threading
6
+ import numpy as np
7
+ from datetime import datetime, timezone
8
+ from dataclasses import dataclass, field
9
+
10
+ from .face_recognition_client import FacialRecognitionClient
11
+
12
+
13
+ class SearchResult(NamedTuple):
14
+ """Search result containing staff information as separate variables."""
15
+ employee_id: str
16
+ staff_id: str
17
+ detection_type: str # "known" or "unknown"
18
+ staff_details: Dict[str, Any]
19
+ person_name: str
20
+ similarity_score: float
21
+
22
+
23
+ class StaffEmbedding(NamedTuple):
24
+ """Staff embedding data structure."""
25
+ embedding_id: str
26
+ staff_id: str
27
+ embedding: List[float]
28
+ employee_id: str
29
+ staff_details: Dict[str, Any]
30
+ is_active: bool
31
+
32
+
33
+ @dataclass
34
+ class EmbeddingConfig:
35
+ """Configuration for embedding processing and search."""
36
+
37
+ # Similarity and confidence thresholds
38
+ similarity_threshold: float = 0.35
39
+ confidence_threshold: float = 0.6
40
+
41
+ # Track ID cache optimization settings
42
+ enable_track_id_cache: bool = True
43
+ cache_max_size: int = 3000
44
+ cache_ttl: int = 3600 # Cache time-to-live in seconds (1 hour)
45
+
46
+ # Search settings
47
+ search_limit: int = 5
48
+ search_collection: str = "staff_enrollment"
49
+
50
+ # Background embedding refresh settings
51
+ enable_background_refresh: bool = True
52
+ # Refresh embeddings every 12 hours by default
53
+ background_refresh_interval: int = 43200
54
+ # TTL for cached staff embeddings (controls on-demand refresh checks)
55
+ staff_embeddings_cache_ttl: int = 43200
56
+
57
+
58
+ class EmbeddingManager:
59
+ """
60
+ Manages face embeddings, search operations, and caching.
61
+
62
+ CRITICAL INITIALIZATION FLOW:
63
+ 1. __init__() creates the manager but does NOT load embeddings or start background refresh
64
+ 2. External caller MUST call await _load_staff_embeddings() to load embeddings synchronously
65
+ 3. After successful load, caller SHOULD call start_background_refresh() for periodic updates
66
+ 4. The _embeddings_loaded flag tracks whether embeddings are ready for use
67
+ 5. All search operations check _embeddings_loaded before proceeding
68
+
69
+ This design prevents race conditions where:
70
+ - Background thread tries to load while main thread is loading
71
+ - Search operations are called before embeddings are loaded
72
+ - Multiple threads compete for the embeddings_lock during initialization
73
+
74
+ Thread Safety:
75
+ - _embeddings_lock protects embeddings_matrix and embedding_metadata
76
+ - _cache_lock protects track_id_cache
77
+ - _embeddings_loaded is set only after successful load under lock
78
+ """
79
+
80
+ def __init__(self, config: EmbeddingConfig, face_client: FacialRecognitionClient = None):
81
+ self.config = config
82
+ self.face_client = face_client
83
+ self.logger = logging.getLogger(__name__)
84
+
85
+ # Track ID cache for optimization - cache track IDs and their best results
86
+ # Format: {track_id: {"result": search_result, "similarity_score": float, "timestamp": timestamp}}
87
+ self.track_id_cache = {}
88
+
89
+ # Staff embeddings cache for local similarity search
90
+ self.staff_embeddings: List[StaffEmbedding] = []
91
+ self.staff_embeddings_last_update = 0
92
+ # Use configured TTL (default: 12 hours)
93
+ self.staff_embeddings_cache_ttl = int(self.config.staff_embeddings_cache_ttl)
94
+
95
+ # Numpy arrays for fast similarity computation
96
+ self.embeddings_matrix = None
97
+ self.embedding_metadata = [] # List of StaffEmbedding objects corresponding to matrix rows
98
+
99
+ # Unknown faces cache - storing unknown embeddings locally
100
+ self.unknown_faces_counter = 0
101
+
102
+ # Thread safety
103
+ self._cache_lock = threading.Lock()
104
+ self._embeddings_lock = threading.Lock()
105
+
106
+ # Background refresh thread
107
+ self._refresh_thread = None
108
+ self._is_running = False
109
+ self._stop_event = threading.Event()
110
+
111
+ # Initialization status flag
112
+ self._embeddings_loaded = False
113
+
114
+ # DON'T start background refresh yet - wait for initial load in initialize()
115
+ # This prevents race conditions where background thread interferes with main init
116
+ self.logger.info(f"EmbeddingManager created - background refresh will start after initial load (interval: {self.config.background_refresh_interval}s)")
117
+
118
+ def is_ready(self) -> bool:
119
+ """
120
+ Check if embeddings are loaded and ready for use.
121
+
122
+ Returns:
123
+ True if embeddings are loaded and matrix is valid, False otherwise
124
+ """
125
+ return (
126
+ self._embeddings_loaded
127
+ and self.embeddings_matrix is not None
128
+ and len(self.embedding_metadata) > 0
129
+ )
130
+
131
+ def get_status(self) -> Dict[str, Any]:
132
+ """
133
+ Get detailed status of embedding manager for debugging and health checks.
134
+
135
+ Returns:
136
+ Dictionary with status information
137
+ """
138
+ with self._embeddings_lock:
139
+ matrix_shape = self.embeddings_matrix.shape if self.embeddings_matrix is not None else None
140
+
141
+ return {
142
+ "embeddings_loaded": self._embeddings_loaded,
143
+ "embeddings_count": len(self.staff_embeddings),
144
+ "matrix_shape": matrix_shape,
145
+ "metadata_count": len(self.embedding_metadata),
146
+ "cache_size": len(self.track_id_cache),
147
+ "last_update": self.staff_embeddings_last_update,
148
+ "is_running": self._is_running,
149
+ "is_ready": self.is_ready(),
150
+ }
151
+
152
+ def set_face_client(self, face_client: FacialRecognitionClient):
153
+ """Set the face recognition client."""
154
+ self.face_client = face_client
155
+
156
+ # Start background refresh if it wasn't started yet
157
+ if self.config.enable_background_refresh and not self._is_running:
158
+ self.start_background_refresh()
159
+ self.logger.info("Background embedding refresh started after setting face client")
160
+
161
+ def start_background_refresh(self):
162
+ """Start the background embedding refresh thread"""
163
+ if not self._is_running and self.face_client:
164
+ self._is_running = True
165
+ self._stop_event.clear()
166
+ self._refresh_thread = threading.Thread(
167
+ target=self._run_refresh_loop, daemon=True, name="EmbeddingRefreshThread"
168
+ )
169
+ self._refresh_thread.start()
170
+ self.logger.info("Started background embedding refresh thread")
171
+
172
+ def stop_background_refresh(self):
173
+ """Stop the background embedding refresh thread"""
174
+ if self._is_running:
175
+ self.logger.info("Stopping background embedding refresh thread...")
176
+ self._is_running = False
177
+ self._stop_event.set()
178
+ if self._refresh_thread:
179
+ self._refresh_thread.join(timeout=10.0)
180
+ self.logger.info("Background embedding refresh thread stopped")
181
+
182
+ def _run_refresh_loop(self):
183
+ """Run the embedding refresh loop in background thread"""
184
+ import asyncio
185
+
186
+ try:
187
+ # Create new event loop for this thread
188
+ loop = asyncio.new_event_loop()
189
+ asyncio.set_event_loop(loop)
190
+
191
+ # Run initial load
192
+ self.logger.info("Loading initial staff embeddings in background thread...")
193
+ loop.run_until_complete(self._load_staff_embeddings())
194
+
195
+ # Periodic refresh loop
196
+ while self._is_running and not self._stop_event.is_set():
197
+ try:
198
+ # Wait for refresh interval with ability to stop
199
+ if self._stop_event.wait(timeout=self.config.background_refresh_interval):
200
+ # Stop event was set
201
+ break
202
+
203
+ if not self._is_running:
204
+ break
205
+
206
+ # Refresh embeddings
207
+ self.logger.info("Refreshing staff embeddings from server...")
208
+ success = loop.run_until_complete(self._load_staff_embeddings())
209
+
210
+ if success:
211
+ self.logger.info("Successfully refreshed staff embeddings in background")
212
+ else:
213
+ self.logger.warning("Failed to refresh staff embeddings in background")
214
+
215
+ except Exception as e:
216
+ self.logger.error(f"Error in background embedding refresh loop: {e}", exc_info=True)
217
+ # Continue loop even on error
218
+ time.sleep(60) # Wait 1 minute before retry on error
219
+
220
+ except Exception as e:
221
+ self.logger.error(f"Fatal error in background refresh thread: {e}", exc_info=True)
222
+ finally:
223
+ try:
224
+ loop.close()
225
+ except:
226
+ pass
227
+ self.logger.info("Background embedding refresh loop ended")
228
+
229
+ async def _load_staff_embeddings(self) -> bool:
230
+ """Load all staff embeddings from API and cache them."""
231
+ if not self.face_client:
232
+ self.logger.error("Face client not available for loading staff embeddings")
233
+ print("ERROR: Face client not available for loading staff embeddings")
234
+ return False
235
+
236
+ try:
237
+ self.logger.info("Loading staff embeddings from API...")
238
+ print("=============== LOADING STAFF EMBEDDINGS FROM API ===============")
239
+ response = await self.face_client.get_all_staff_embeddings()
240
+ print(f"API RESPONSE TYPE: {type(response)}, IS_LIST: {isinstance(response, list)}, LEN: {len(response) if isinstance(response, list) else 'N/A'}")
241
+
242
+ # Robust response handling: accept dict with data or raw list
243
+ embeddings_data: List[Dict[str, Any]] = []
244
+ if isinstance(response, dict):
245
+ # Typical: { success: True, data: [...] }
246
+ if response.get("success", False) and isinstance(response.get("data"), list):
247
+ embeddings_data = response.get("data", [])
248
+ # Alternate: { data: [...] } without success flag
249
+ elif isinstance(response.get("data"), list):
250
+ embeddings_data = response.get("data", [])
251
+ # Fallback keys sometimes used
252
+ elif isinstance(response.get("items"), list):
253
+ embeddings_data = response.get("items", [])
254
+ else:
255
+ self.logger.error(f"Unexpected embeddings response shape (dict): keys={list(response.keys())}")
256
+ return False
257
+ elif isinstance(response, list):
258
+ # Some deployments return raw list directly
259
+ embeddings_data = response
260
+ else:
261
+ self.logger.error(f"Unexpected embeddings response type: {type(response)}")
262
+ return False
263
+
264
+ self.staff_embeddings = []
265
+ embeddings_list = []
266
+ expected_dim: Optional[int] = None
267
+ dims_observed: List[int] = []
268
+ mismatch_examples: List[Tuple[str, int]] = [] # (staffId, dim)
269
+
270
+ for item in embeddings_data:
271
+ # Skip inactive if provided
272
+ if isinstance(item, dict) and item.get("isActive") is False:
273
+ continue
274
+
275
+ raw_emb = []
276
+ try:
277
+ raw_emb = item.get("embedding", []) if isinstance(item, dict) else []
278
+ except Exception:
279
+ raw_emb = []
280
+ # Record observed dimension for debugging
281
+ try:
282
+ dims_observed.append(len(raw_emb) if isinstance(raw_emb, list) else 0)
283
+ except Exception:
284
+ dims_observed.append(0)
285
+
286
+ # Validate and coerce embedding list
287
+ if not isinstance(raw_emb, list) or len(raw_emb) == 0:
288
+ continue
289
+ try:
290
+ # Ensure numeric float32 list
291
+ clean_emb = [float(v) for v in raw_emb]
292
+ except Exception:
293
+ continue
294
+
295
+ # Dimension consistency
296
+ if expected_dim is None:
297
+ expected_dim = len(clean_emb)
298
+ if len(clean_emb) != expected_dim:
299
+ # Collect a few examples to aid debugging
300
+ try:
301
+ mismatch_examples.append((str(item.get("staffId", "")), len(clean_emb)))
302
+ except Exception:
303
+ mismatch_examples.append(("", len(clean_emb)))
304
+ self.logger.warning(f"Skipping embedding with mismatched dimension: got {len(clean_emb)} expected {expected_dim}")
305
+ continue
306
+
307
+ staff_embedding = StaffEmbedding(
308
+ embedding_id=(item.get("embeddingId", "") if isinstance(item, dict) else ""),
309
+ staff_id=(item.get("staffId", "") if isinstance(item, dict) else ""),
310
+ embedding=clean_emb,
311
+ employee_id=str(item.get("employeeId", "")) if isinstance(item, dict) else "",
312
+ staff_details=(item.get("staffDetails", {}) if isinstance(item, dict) else {}),
313
+ is_active=(item.get("isActive", True) if isinstance(item, dict) else True)
314
+ )
315
+
316
+ self.staff_embeddings.append(staff_embedding)
317
+ embeddings_list.append(clean_emb)
318
+
319
+ # Create numpy matrix for fast similarity computation (thread-safe)
320
+ with self._embeddings_lock:
321
+ if embeddings_list:
322
+ self.embeddings_matrix = np.array(embeddings_list, dtype=np.float32)
323
+ # Normalize embeddings for cosine similarity
324
+ norms = np.linalg.norm(self.embeddings_matrix, axis=1, keepdims=True)
325
+ norms[norms == 0] = 1 # Avoid division by zero
326
+ self.embeddings_matrix = self.embeddings_matrix / norms
327
+
328
+ self.embedding_metadata = self.staff_embeddings.copy()
329
+ self.staff_embeddings_last_update = time.time()
330
+ self._embeddings_loaded = True # Mark as successfully loaded
331
+
332
+ self.logger.info(f"Successfully loaded and cached {len(self.staff_embeddings)} staff embeddings (dim={self.embeddings_matrix.shape[1]})")
333
+ print(f"=============== SUCCESS: LOADED {len(self.staff_embeddings)} EMBEDDINGS, MATRIX SHAPE: {self.embeddings_matrix.shape} ===============")
334
+ try:
335
+ # Quick sanity metrics
336
+ row0_sum = float(np.sum(self.embeddings_matrix[0])) if self.embeddings_matrix.shape[0] > 0 else 0.0
337
+ row0_norm = float(np.linalg.norm(self.embeddings_matrix[0])) if self.embeddings_matrix.shape[0] > 0 else 0.0
338
+ print(f"SANITY CHECK: row0_sum={row0_sum:.4f}, row0_norm={row0_norm:.4f} (should be ~1.0 after normalization)")
339
+ self.logger.debug(f"Embeddings matrix shape: {self.embeddings_matrix.shape}, dtype={self.embeddings_matrix.dtype}, row0_sum={row0_sum:.4f}")
340
+ except Exception as e:
341
+ print(f"ERROR in sanity check: {e}")
342
+ return True
343
+ else:
344
+ # Build diagnostics and raise to stop pipeline early with actionable info
345
+ dims_summary: Dict[int, int] = {}
346
+ for d in dims_observed:
347
+ dims_summary[d] = dims_summary.get(d, 0) + 1
348
+ error_msg = (
349
+ f"No valid staff embeddings loaded. Observed dimension distribution: {dims_summary}. "
350
+ f"Expected_dim={expected_dim}. Mismatch examples (staffId, dim): {mismatch_examples[:5]}"
351
+ )
352
+ self.logger.error(error_msg)
353
+ print(f"=============== ERROR: NO VALID EMBEDDINGS ===============")
354
+ print(f"ERROR: {error_msg}")
355
+ print(f"=============== STOPPING PIPELINE ===============")
356
+ raise RuntimeError(f"Failed to load staff embeddings due to dimension mismatch. Observed dims: {dims_summary}")
357
+
358
+ except Exception as e:
359
+ self.logger.error(f"Error loading staff embeddings: {e}", exc_info=True)
360
+ return False
361
+
362
+ def _should_refresh_embeddings(self) -> bool:
363
+ """Check if staff embeddings should be refreshed."""
364
+ current_time = time.time()
365
+ return (current_time - self.staff_embeddings_last_update) > self.staff_embeddings_cache_ttl
366
+
367
+ def _add_embedding_to_local_cache(self, staff_embedding: StaffEmbedding):
368
+ """Add a new embedding to the local cache and update the matrix."""
369
+ try:
370
+ if not staff_embedding.embedding:
371
+ return
372
+
373
+ # Add to staff_embeddings list
374
+ self.staff_embeddings.append(staff_embedding)
375
+ self.embedding_metadata.append(staff_embedding)
376
+
377
+ # Update the embeddings matrix
378
+ new_embedding = np.array([staff_embedding.embedding], dtype=np.float32)
379
+ # Normalize the new embedding
380
+ norm = np.linalg.norm(new_embedding)
381
+ if norm > 0:
382
+ new_embedding = new_embedding / norm
383
+
384
+ if self.embeddings_matrix is None:
385
+ self.embeddings_matrix = new_embedding
386
+ else:
387
+ self.embeddings_matrix = np.vstack([self.embeddings_matrix, new_embedding])
388
+
389
+ self.logger.debug(f"Added embedding for {staff_embedding.staff_id} to local cache")
390
+
391
+ except Exception as e:
392
+ self.logger.error(f"Error adding embedding to local cache: {e}", exc_info=True)
393
+
394
+ def _find_best_local_match(self, query_embedding: List[float]) -> Optional[Tuple[StaffEmbedding, float]]:
395
+ """Find best matching staff member using optimized matrix operations (thread-safe)."""
396
+ # Check if embeddings are loaded at all
397
+ if not self._embeddings_loaded:
398
+ print(f"ERROR: _find_best_local_match called but embeddings not loaded yet (_embeddings_loaded={self._embeddings_loaded})")
399
+ self.logger.error("Embeddings not loaded - _find_best_local_match cannot proceed")
400
+ return None
401
+
402
+ with self._embeddings_lock:
403
+ if self.embeddings_matrix is None or len(self.embedding_metadata) == 0:
404
+ print(f"ERROR: _find_best_local_match - embeddings_matrix is None={self.embeddings_matrix is None}, metadata_len={len(self.embedding_metadata)}, _embeddings_loaded={self._embeddings_loaded}")
405
+ self.logger.error(f"Embeddings matrix is None despite _embeddings_loaded={self._embeddings_loaded}")
406
+ return None
407
+
408
+ # Create local copies to avoid issues with concurrent modifications
409
+ embeddings_matrix = self.embeddings_matrix.copy() if self.embeddings_matrix is not None else None
410
+ embedding_metadata = self.embedding_metadata.copy()
411
+
412
+ if embeddings_matrix is None:
413
+ print("ERROR: _find_best_local_match - embeddings_matrix copy is None")
414
+ return None
415
+
416
+ try:
417
+ query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
418
+ # Dimension check
419
+ if embeddings_matrix.shape[1] != query_array.shape[1]:
420
+ self.logger.warning(f"Query embedding dim mismatch: query={query_array.shape[1]} staff={embeddings_matrix.shape[1]}")
421
+ print(f"ERROR: DIMENSION MISMATCH - query={query_array.shape[1]} staff={embeddings_matrix.shape[1]}")
422
+ return None
423
+
424
+ # Normalize query embedding
425
+ query_norm = np.linalg.norm(query_array)
426
+ if query_norm == 0:
427
+ return None
428
+ query_array = query_array / query_norm
429
+
430
+ # Compute cosine similarities using matrix multiplication (much faster)
431
+ similarities = np.dot(embeddings_matrix, query_array.T).flatten()
432
+
433
+ # Find the best match
434
+ best_idx = np.argmax(similarities)
435
+ best_similarity = similarities[best_idx]
436
+
437
+ # Check if similarity meets threshold
438
+ if best_similarity >= self.config.similarity_threshold:
439
+ best_staff_embedding = embedding_metadata[best_idx]
440
+ return best_staff_embedding, float(best_similarity)
441
+
442
+ return None
443
+
444
+ except Exception as e:
445
+ self.logger.error(f"Error in local similarity search: {e}", exc_info=True)
446
+ return None
447
+
448
+ def get_best_similarity(self, query_embedding: List[float]) -> float:
449
+ """Return the best cosine similarity for debugging/observability (no threshold gating)."""
450
+ with self._embeddings_lock:
451
+ if self.embeddings_matrix is None or len(self.embedding_metadata) == 0:
452
+ return 0.0
453
+ embeddings_matrix = self.embeddings_matrix.copy() if self.embeddings_matrix is not None else None
454
+ if embeddings_matrix is None:
455
+ return 0.0
456
+ try:
457
+ query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
458
+ if embeddings_matrix.shape[1] != query_array.shape[1]:
459
+ print(f"ERROR: get_best_similarity DIMENSION MISMATCH - query={query_array.shape[1]} staff={embeddings_matrix.shape[1]}")
460
+ return 0.0
461
+ qn = np.linalg.norm(query_array)
462
+ if qn == 0:
463
+ return 0.0
464
+ query_array = query_array / qn
465
+ similarities = np.dot(embeddings_matrix, query_array.T).flatten()
466
+ return float(np.max(similarities)) if similarities.size > 0 else 0.0
467
+ except Exception:
468
+ return 0.0
469
+
470
+ def extract_embedding_from_detection(self, detection: Dict) -> Tuple[Dict, Optional[List[float]]]:
471
+ """Extract and validate embedding from detection."""
472
+ embedding = detection.get("embedding", [])
473
+
474
+ # Validate embedding format and dimensions
475
+ if not embedding:
476
+ self.logger.warning(
477
+ f"Missing embedding in detection: {detection.get('track_id', 'unknown')}"
478
+ )
479
+ return detection, None
480
+
481
+ if not isinstance(embedding, list):
482
+ self.logger.warning(
483
+ f"Invalid embedding type {type(embedding)} in detection: {detection.get('track_id', 'unknown')}"
484
+ )
485
+ return detection, None
486
+
487
+ if len(embedding) == 0:
488
+ self.logger.warning(
489
+ f"Empty embedding in detection: {detection.get('track_id', 'unknown')}"
490
+ )
491
+ return detection, None
492
+
493
+ # Additional validation for embedding values
494
+ try:
495
+ # Check if all embedding values are numeric
496
+ if not all(isinstance(val, (int, float)) for val in embedding):
497
+ self.logger.warning(
498
+ f"Non-numeric values in embedding for detection: {detection.get('track_id', 'unknown')}"
499
+ )
500
+ return detection, None
501
+ except Exception as e:
502
+ self.logger.warning(
503
+ f"Error validating embedding values for detection {detection.get('track_id', 'unknown')}: {e}"
504
+ )
505
+ return detection, None
506
+
507
+ return detection, embedding
508
+
509
+ # COMMENTED OUT: Track ID caching functionality removed
510
+ # def _check_track_id_cache(self, track_id: str) -> Optional[Dict]:
511
+ # """
512
+ # Check if a track_id exists in cache.
513
+ # Returns cached result if found, None otherwise.
514
+ # """
515
+ # if not self.config.enable_track_id_cache:
516
+ # return None
517
+ #
518
+ # try:
519
+ # current_time = time.time()
520
+ #
521
+ # # Clean expired entries
522
+ # expired_keys = [
523
+ # key for key, data in self.track_id_cache.items()
524
+ # if current_time - data["timestamp"] > self.config.cache_ttl
525
+ # ]
526
+ # for key in expired_keys:
527
+ # del self.track_id_cache[key]
528
+ #
529
+ # # Check for existing track_id
530
+ # if track_id in self.track_id_cache:
531
+ # self.logger.debug(f"Found cached result for track_id: {track_id}")
532
+ # return self.track_id_cache[track_id]["result"]
533
+ #
534
+ # return None
535
+ # except Exception as e:
536
+ # self.logger.warning(f"Error checking track_id cache: {e}")
537
+ # return None
538
+
539
+ def _check_track_id_cache(self, track_id: str) -> Optional[SearchResult]:
540
+ """
541
+ Check if a track_id exists in cache and return the best result.
542
+ Returns cached SearchResult if found, None otherwise.
543
+ """
544
+ if not self.config.enable_track_id_cache or not track_id:
545
+ return None
546
+
547
+ try:
548
+ with self._cache_lock:
549
+ current_time = time.time()
550
+
551
+ # Clean expired entries
552
+ expired_keys = [
553
+ key for key, data in self.track_id_cache.items()
554
+ if current_time - data["timestamp"] > self.config.cache_ttl
555
+ ]
556
+ for key in expired_keys:
557
+ del self.track_id_cache[key]
558
+
559
+ # Check for existing track_id
560
+ if track_id in self.track_id_cache:
561
+ cached_data = self.track_id_cache[track_id]
562
+ self.logger.debug(f"Found cached result for track_id: {track_id} with similarity: {cached_data['similarity_score']:.3f}")
563
+ return cached_data["result"]
564
+
565
+ return None
566
+ except Exception as e:
567
+ self.logger.warning(f"Error checking track_id cache: {e}")
568
+ return None
569
+
570
+ # COMMENTED OUT: Track ID caching functionality removed
571
+ # def _update_track_id_cache(self, track_id: str, result: Dict):
572
+ # """Update track_id cache with new result."""
573
+ # if not self.config.enable_track_id_cache:
574
+ # return
575
+ #
576
+ # try:
577
+ # # Manage cache size
578
+ # if len(self.track_id_cache) >= self.config.cache_max_size:
579
+ # # Remove oldest entries (simple FIFO)
580
+ # oldest_key = min(
581
+ # self.track_id_cache.keys(),
582
+ # key=lambda k: self.track_id_cache[k]["timestamp"]
583
+ # )
584
+ # del self.track_id_cache[oldest_key]
585
+ #
586
+ # # Add new entry
587
+ # self.track_id_cache[track_id] = {
588
+ # "result": result.copy(),
589
+ # "timestamp": time.time()
590
+ # }
591
+ # except Exception as e:
592
+ # self.logger.warning(f"Error updating track_id cache: {e}")
593
+
594
+ def _update_track_id_cache(self, track_id: str, search_result: SearchResult):
595
+ """
596
+ Update track_id cache with new result.
597
+ Note: Similarity comparison is now handled in the search method.
598
+ """
599
+ if not self.config.enable_track_id_cache or not track_id:
600
+ return
601
+
602
+ try:
603
+ with self._cache_lock:
604
+ current_time = time.time()
605
+ similarity_score = search_result.similarity_score
606
+
607
+ # Manage cache size
608
+ if len(self.track_id_cache) >= self.config.cache_max_size:
609
+ # Remove oldest entries (simple FIFO)
610
+ oldest_key = min(
611
+ self.track_id_cache.keys(),
612
+ key=lambda k: self.track_id_cache[k]["timestamp"]
613
+ )
614
+ del self.track_id_cache[oldest_key]
615
+
616
+ # Update cache entry
617
+ self.track_id_cache[track_id] = {
618
+ "result": search_result,
619
+ "similarity_score": similarity_score,
620
+ "timestamp": current_time
621
+ }
622
+
623
+ self.logger.debug(f"Updated cache for track_id {track_id} with similarity {similarity_score:.3f}")
624
+
625
+ except Exception as e:
626
+ self.logger.warning(f"Error updating track_id cache: {e}")
627
+
628
+ # COMMENTED OUT: Unknown face creation functionality removed
629
+ # def _create_unknown_face_local(self, embedding: List[float], track_id: str = None) -> SearchResult:
630
+ # """Create unknown face entry locally without API call."""
631
+ # try:
632
+ # # Generate unique IDs
633
+ # self.unknown_faces_counter += 1
634
+ # employee_id = f"unknown_{int(time.time())}_{self.unknown_faces_counter}"
635
+ # staff_id = track_id if track_id else f"unknown_{self.unknown_faces_counter}"
636
+ #
637
+ # self.logger.info(f"Creating local unknown face with ID: {employee_id}")
638
+ #
639
+ # # Create SearchResult
640
+ # search_result = SearchResult(
641
+ # employee_id=employee_id,
642
+ # staff_id=staff_id,
643
+ # detection_type="unknown",
644
+ # staff_details={"name": f"Unknown {track_id}"},
645
+ # person_name=f"Unknown {track_id}",
646
+ # similarity_score=0.0
647
+ # )
648
+ #
649
+ # # Add the new unknown embedding to local cache
650
+ # unknown_staff_embedding = StaffEmbedding(
651
+ # embedding_id=f"embedding_{employee_id}",
652
+ # staff_id=staff_id,
653
+ # embedding=embedding,
654
+ # employee_id=employee_id,
655
+ # staff_details={"name": f"Unknown {track_id}"},
656
+ # is_active=True
657
+ # )
658
+ # self._add_embedding_to_local_cache(unknown_staff_embedding)
659
+ #
660
+ # # Cache the result for track_id if caching is enabled
661
+ # if self.config.enable_track_id_cache and track_id:
662
+ # api_result = {
663
+ # "_id": employee_id,
664
+ # "staffId": staff_id,
665
+ # "detectionType": "unknown",
666
+ # "staffDetails": {"name": f"Unknown {track_id}"}
667
+ # }
668
+ # self._update_track_id_cache(track_id, api_result)
669
+ #
670
+ # return search_result
671
+ #
672
+ # except Exception as e:
673
+ # self.logger.error(f"Error creating local unknown face: {e}", exc_info=True)
674
+ # return None
675
+
676
+ def _create_unknown_face_local(self, embedding: List[float], track_id: str = None) -> SearchResult:
677
+ """Unknown face creation disabled - returns None"""
678
+ return None
679
+
680
+ async def search_face_embedding(self, embedding: List[float], track_id: str = None,
681
+ location: str = "", timestamp: str = "") -> Optional[SearchResult]:
682
+ """
683
+ Search for similar faces using embedding with local similarity search first, then API fallback.
684
+
685
+ Args:
686
+ embedding: Face embedding vector
687
+ track_id: Track ID for caching optimization
688
+ location: Location identifier for logging
689
+ timestamp: Current timestamp in ISO format
690
+
691
+ Returns:
692
+ SearchResult containing staff information as variables or None if failed
693
+ """
694
+ if not self.face_client:
695
+ self.logger.error("Face client not available for embedding search")
696
+ return None
697
+
698
+ # Refresh staff embeddings if needed
699
+ if self._should_refresh_embeddings() or self.embeddings_matrix is None:
700
+ self.logger.debug("Staff embeddings cache expired or empty, refreshing...")
701
+ await self._load_staff_embeddings()
702
+
703
+ # Always perform similarity search first
704
+ local_match = self._find_best_local_match(embedding)
705
+ current_search_result = None
706
+
707
+ if local_match:
708
+ staff_embedding, similarity_score = local_match
709
+ self.logger.info(f"Local embedding match found - staff_id={staff_embedding.staff_id}, similarity={similarity_score:.3f}, employee_id={staff_embedding.employee_id}")
710
+ self.logger.debug(f"Match details: staff_details={staff_embedding.staff_details}")
711
+
712
+ current_search_result = SearchResult(
713
+ employee_id=staff_embedding.employee_id,
714
+ staff_id=staff_embedding.staff_id,
715
+ detection_type="known",
716
+ staff_details=staff_embedding.staff_details,
717
+ person_name=self._extract_person_name(staff_embedding.staff_details),
718
+ similarity_score=similarity_score
719
+ )
720
+ else:
721
+ # Create unknown face entry (thread-safe counter)
722
+ with self._cache_lock:
723
+ self.unknown_faces_counter += 1
724
+ counter_value = self.unknown_faces_counter
725
+ employee_id = f"unknown_{int(time.time())}_{counter_value}"
726
+ staff_id = track_id if track_id else f"unknown_{counter_value}"
727
+
728
+ self.logger.info(f"No local match found - creating unknown face entry: employee_id={employee_id}, track_id={track_id}")
729
+
730
+ current_search_result = SearchResult(
731
+ employee_id=employee_id,
732
+ staff_id=staff_id,
733
+ detection_type="unknown",
734
+ staff_details={"name": f"Unknown {track_id}"},
735
+ person_name=f"Unknown {track_id}",
736
+ similarity_score=0.0
737
+ )
738
+
739
+ # Check cache and compare similarities (if caching enabled and track_id available)
740
+ # BUT: For unknown faces, always re-check to allow for potential identification
741
+ if self.config.enable_track_id_cache and track_id:
742
+ cached_result = self._check_track_id_cache(track_id)
743
+
744
+ # If current result is unknown, always continue checking even if cached
745
+ if current_search_result.detection_type == "unknown":
746
+ self.logger.debug(f"Unknown face with track_id={track_id} - not caching, will re-check for potential identification")
747
+ # Still update cache if new result is better, but don't return cached result for unknowns
748
+ if cached_result and current_search_result.similarity_score > cached_result.similarity_score:
749
+ self._update_track_id_cache(track_id, current_search_result) # TODO: check if this is correct
750
+ self.logger.debug(f"Not updating cache for unknown face (track_id={track_id})")
751
+ elif not cached_result:
752
+ # Don't cache unknown results - let them be rechecked every time
753
+ self.logger.debug(f"Not caching unknown face result for track_id={track_id}")
754
+ return current_search_result
755
+
756
+ if cached_result:
757
+ cached_similarity = cached_result.similarity_score
758
+ current_similarity = current_search_result.similarity_score
759
+
760
+ # If cached result was unknown but current is known, always use current (upgrade)
761
+ if cached_result.detection_type == "unknown" and current_search_result.detection_type == "known":
762
+ self.logger.info(f"Upgrading unknown face to known for track_id: {track_id} - similarity: {current_similarity:.3f}")
763
+ self._update_track_id_cache(track_id, current_search_result)
764
+ return current_search_result
765
+ elif current_similarity > cached_similarity:
766
+ # New result is better - update cache and return new result
767
+ self.logger.debug(f"New similarity {current_similarity:.3f} > cached {cached_similarity:.3f} for track_id: {track_id} - updating cache")
768
+ self._update_track_id_cache(track_id, current_search_result)
769
+ return current_search_result
770
+ else:
771
+ # Cached result is better or equal - keep cache and return cached result
772
+ self.logger.debug(f"Cached similarity {cached_similarity:.3f} >= new {current_similarity:.3f} for track_id: {track_id} - using cached result")
773
+ return cached_result
774
+ else:
775
+ # No cached result - add to cache and return current result (only for known faces)
776
+ if current_search_result.detection_type == "known":
777
+ self.logger.debug(f"No cached result for track_id: {track_id} - adding known face to cache")
778
+ self._update_track_id_cache(track_id, current_search_result)
779
+ return current_search_result
780
+
781
+ # If caching is disabled, just return the current result
782
+ return current_search_result
783
+
784
+ # # API calls are commented out for now
785
+ # try:
786
+ # # TODO: Uncomment this when API is ready
787
+ # # search_results = await self.face_client.search_similar_faces(
788
+ # # face_embedding=embedding,
789
+ # # threshold=self.config.similarity_threshold,
790
+ # # limit=self.config.search_limit,
791
+ # # collection=self.config.search_collection,
792
+ # # location=location,
793
+ # # timestamp=timestamp,
794
+ # # )
795
+ #
796
+ # # # Check if API call was successful
797
+ # # if not search_results.get("success", False):
798
+ # # self.logger.error(
799
+ # # f"API call failed: {search_results.get('message', 'Unknown error')}"
800
+ # # )
801
+ # # # If API fails and no local match, create unknown face locally
802
+ # # return self._create_unknown_face_local(embedding, track_id)
803
+
804
+ # # if not search_results.get("data", []):
805
+ # # # No matches found, create unknown face locally
806
+ # # return self._create_unknown_face_local(embedding, track_id)
807
+
808
+ # # response_data = search_results.get("data", [])
809
+ # # result = response_data[0] # Get first result
810
+ #
811
+ # # For now, create unknown face locally instead of API calls
812
+ # return self._create_unknown_face_local(embedding, track_id)
813
+ #
814
+ # except Exception as e:
815
+ # self.logger.error(f"Error in face embedding search: {e}", exc_info=True)
816
+ # # If any error occurs, create unknown face locally
817
+ # return self._create_unknown_face_local(embedding, track_id)
818
+
819
+ def _extract_person_name(self, staff_details: Dict[str, Any]) -> str:
820
+ """Extract person name from staff details."""
821
+ return str(
822
+ staff_details.get(
823
+ "name",
824
+ staff_details.get("firstName", "Unknown")
825
+ + " "
826
+ + staff_details.get("lastName", "Unknown"),
827
+ )
828
+ )
829
+
830
+ def _parse_api_result_to_search_result(self, api_result: Dict) -> SearchResult:
831
+ """Parse API result to SearchResult."""
832
+ employee_id = api_result["_id"]
833
+ staff_id = api_result["staffId"]
834
+ detection_type = api_result["detectionType"]
835
+ staff_details = api_result["staffDetails"]
836
+
837
+ person_name = "Unknown"
838
+ if detection_type == "known":
839
+ person_name = self._extract_person_name(staff_details)
840
+ elif detection_type == "unknown":
841
+ person_name = "Unknown"
842
+
843
+ return SearchResult(
844
+ employee_id=employee_id,
845
+ staff_id=staff_id,
846
+ detection_type=detection_type,
847
+ staff_details=staff_details,
848
+ person_name=person_name,
849
+ similarity_score=api_result.get("score", 0.0)
850
+ )
851
+
852
+ # COMMENTED OUT: Unknown face enrollment functionality removed
853
+ # async def _enroll_unknown_face(self, embedding: List[float], location: str = "", timestamp: str = "", track_id: str = None) -> Optional[SearchResult]:
854
+ # """Enroll unknown face and return SearchResult."""
855
+ # # For now, use local creation instead of API
856
+ # return self._create_unknown_face_local(embedding, track_id)
857
+
858
+ async def _enroll_unknown_face(self, embedding: List[float], location: str = "", timestamp: str = "", track_id: str = None) -> Optional[SearchResult]:
859
+ """Enroll unknown face and return SearchResult."""
860
+ # For now, use local creation instead of API
861
+ # return self._create_unknown_face_local(embedding, track_id)
862
+ return None
863
+
864
+ # TODO: Uncomment when API is ready
865
+ # try:
866
+ # if not timestamp:
867
+ # timestamp = datetime.now(timezone.utc).isoformat()
868
+ #
869
+ # response = await self.face_client.enroll_unknown_person(
870
+ # embedding=embedding,
871
+ # timestamp=timestamp,
872
+ # location=location
873
+ # )
874
+ #
875
+ # if response.get("success", False):
876
+ # data = response.get("data", {})
877
+ # employee_id = data.get("employeeId", "")
878
+ # staff_id = data.get("staffId", "")
879
+ #
880
+ # self.logger.info(f"Successfully enrolled unknown face with ID: {employee_id}")
881
+ #
882
+ # # Create SearchResult
883
+ # search_result = SearchResult(
884
+ # employee_id=employee_id,
885
+ # staff_id=staff_id,
886
+ # detection_type="unknown",
887
+ # staff_details={},
888
+ # person_name="Unknown",
889
+ # similarity_score=0.0
890
+ # )
891
+ #
892
+ # # Add the new unknown embedding to local cache
893
+ # unknown_staff_embedding = StaffEmbedding(
894
+ # embedding_id=data.get("embeddingId", ""),
895
+ # staff_id=staff_id,
896
+ # embedding=embedding,
897
+ # employee_id=employee_id,
898
+ # staff_details={},
899
+ # is_active=True
900
+ # )
901
+ # self._add_embedding_to_local_cache(unknown_staff_embedding)
902
+ #
903
+ # # Cache the result for track_id if caching is enabled
904
+ # if self.config.enable_track_id_cache and track_id:
905
+ # api_result = {
906
+ # "_id": employee_id,
907
+ # "staffId": staff_id,
908
+ # "detectionType": "unknown",
909
+ # "staffDetails": {}
910
+ # }
911
+ # self._update_track_id_cache(track_id, api_result)
912
+ #
913
+ # return search_result
914
+ # else:
915
+ # self.logger.error(f"Failed to enroll unknown face: {response.get('error', 'Unknown error')}")
916
+ # return None
917
+ #
918
+ # except Exception as e:
919
+ # self.logger.error(f"Error enrolling unknown face: {e}", exc_info=True)
920
+ # return None
921
+
922
+ def update_detection_with_search_result(self, search_result: SearchResult, detection: Dict) -> Dict:
923
+ """Update detection object with search result data."""
924
+ detection = detection.copy() # Create a copy to avoid modifying original
925
+
926
+ detection["person_id"] = search_result.staff_id
927
+ detection["person_name"] = search_result.person_name
928
+ detection["recognition_status"] = search_result.detection_type
929
+ detection["employee_id"] = search_result.employee_id
930
+ detection["staff_details"] = search_result.staff_details
931
+ detection["similarity_score"] = search_result.similarity_score
932
+
933
+ if search_result.detection_type == "known":
934
+ detection["enrolled"] = True
935
+ detection["category"] = f"{search_result.person_name.replace(' ', '_')}_{search_result.staff_id}"
936
+ elif search_result.detection_type == "unknown":
937
+ detection["enrolled"] = False
938
+ detection["category"] = "unrecognized"
939
+ else:
940
+ self.logger.warning(f"Unknown detection type: {search_result.detection_type}")
941
+ return None
942
+
943
+ return detection
944
+
945
+ def __del__(self):
946
+ """Cleanup when object is destroyed"""
947
+ try:
948
+ self.stop_background_refresh()
949
+ except:
950
+ pass