matrice-analytics 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-analytics might be problematic. Click here for more details.

@@ -205,23 +205,58 @@ class EmbeddingManager:
205
205
 
206
206
  self.staff_embeddings = []
207
207
  embeddings_list = []
208
-
208
+ expected_dim: Optional[int] = None
209
+ dims_observed: List[int] = []
210
+ mismatch_examples: List[Tuple[str, int]] = [] # (staffId, dim)
211
+
209
212
  for item in embeddings_data:
210
- # if not item.get("isActive", True): # TODO: Check what is isActive
211
- # continue
212
-
213
+ # Skip inactive if provided
214
+ if isinstance(item, dict) and item.get("isActive") is False:
215
+ continue
216
+
217
+ raw_emb = []
218
+ try:
219
+ raw_emb = item.get("embedding", []) if isinstance(item, dict) else []
220
+ except Exception:
221
+ raw_emb = []
222
+ # Record observed dimension for debugging
223
+ try:
224
+ dims_observed.append(len(raw_emb) if isinstance(raw_emb, list) else 0)
225
+ except Exception:
226
+ dims_observed.append(0)
227
+
228
+ # Validate and coerce embedding list
229
+ if not isinstance(raw_emb, list) or len(raw_emb) == 0:
230
+ continue
231
+ try:
232
+ # Ensure numeric float32 list
233
+ clean_emb = [float(v) for v in raw_emb]
234
+ except Exception:
235
+ continue
236
+
237
+ # Dimension consistency
238
+ if expected_dim is None:
239
+ expected_dim = len(clean_emb)
240
+ if len(clean_emb) != expected_dim:
241
+ # Collect a few examples to aid debugging
242
+ try:
243
+ mismatch_examples.append((str(item.get("staffId", "")), len(clean_emb)))
244
+ except Exception:
245
+ mismatch_examples.append(("", len(clean_emb)))
246
+ self.logger.warning(f"Skipping embedding with mismatched dimension: got {len(clean_emb)} expected {expected_dim}")
247
+ continue
248
+
213
249
  staff_embedding = StaffEmbedding(
214
- embedding_id=item.get("embeddingId", ""),
215
- staff_id=item.get("staffId", ""),
216
- embedding=item.get("embedding", []),
217
- employee_id=str(item.get("employeeId", "")),
218
- staff_details=item.get("staffDetails", {}),
219
- is_active=item.get("isActive", True)
250
+ embedding_id=(item.get("embeddingId", "") if isinstance(item, dict) else ""),
251
+ staff_id=(item.get("staffId", "") if isinstance(item, dict) else ""),
252
+ embedding=clean_emb,
253
+ employee_id=str(item.get("employeeId", "")) if isinstance(item, dict) else "",
254
+ staff_details=(item.get("staffDetails", {}) if isinstance(item, dict) else {}),
255
+ is_active=(item.get("isActive", True) if isinstance(item, dict) else True)
220
256
  )
221
-
222
- if staff_embedding.embedding: # Only add if embedding exists
223
- self.staff_embeddings.append(staff_embedding)
224
- embeddings_list.append(staff_embedding.embedding)
257
+
258
+ self.staff_embeddings.append(staff_embedding)
259
+ embeddings_list.append(clean_emb)
225
260
 
226
261
  # Create numpy matrix for fast similarity computation (thread-safe)
227
262
  with self._embeddings_lock:
@@ -234,12 +269,27 @@ class EmbeddingManager:
234
269
 
235
270
  self.embedding_metadata = self.staff_embeddings.copy()
236
271
  self.staff_embeddings_last_update = time.time()
237
- self.logger.info(f"Successfully loaded and cached {len(self.staff_embeddings)} staff embeddings")
238
- self.logger.debug(f"Embeddings matrix shape: {self.embeddings_matrix.shape}")
272
+ self.logger.info(f"Successfully loaded and cached {len(self.staff_embeddings)} staff embeddings (dim={self.embeddings_matrix.shape[1]})")
273
+ try:
274
+ # Quick sanity metrics
275
+ row0_sum = float(np.sum(self.embeddings_matrix[0])) if self.embeddings_matrix.shape[0] > 0 else 0.0
276
+ self.logger.debug(f"Embeddings matrix shape: {self.embeddings_matrix.shape}, dtype={self.embeddings_matrix.dtype}, row0_sum={row0_sum:.4f}")
277
+ except Exception:
278
+ pass
239
279
  return True
240
280
  else:
241
- self.logger.warning("No active staff embeddings found in API response")
242
- return False
281
+ # Build diagnostics and raise to stop pipeline early with actionable info
282
+ dims_summary: Dict[int, int] = {}
283
+ for d in dims_observed:
284
+ dims_summary[d] = dims_summary.get(d, 0) + 1
285
+ self.logger.error(
286
+ "No valid staff embeddings loaded. Observed dimension distribution: "
287
+ f"{dims_summary}. Expected_dim={expected_dim}. Mismatch examples (staffId, dim): "
288
+ f"{mismatch_examples[:5]}"
289
+ )
290
+ raise RuntimeError(
291
+ f"Failed to load staff embeddings due to dimension mismatch. Observed dims: {dims_summary}"
292
+ )
243
293
 
244
294
  except Exception as e:
245
295
  self.logger.error(f"Error loading staff embeddings: {e}", exc_info=True)
@@ -292,6 +342,10 @@ class EmbeddingManager:
292
342
 
293
343
  try:
294
344
  query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
345
+ # Dimension check
346
+ if embeddings_matrix.shape[1] != query_array.shape[1]:
347
+ self.logger.warning(f"Query embedding dim mismatch: query={query_array.shape[1]} staff={embeddings_matrix.shape[1]}")
348
+ return None
295
349
 
296
350
  # Normalize query embedding
297
351
  query_norm = np.linalg.norm(query_array)
@@ -327,6 +381,8 @@ class EmbeddingManager:
327
381
  return 0.0
328
382
  try:
329
383
  query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
384
+ if embeddings_matrix.shape[1] != query_array.shape[1]:
385
+ return 0.0
330
386
  qn = np.linalg.norm(query_array)
331
387
  if qn == 0:
332
388
  return 0.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_analytics
3
- Version: 0.1.54
3
+ Version: 0.1.55
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -28,7 +28,7 @@ matrice_analytics/post_processing/core/config.py,sha256=uyxWndO-DE9PeGD_h5K3TeB0
28
28
  matrice_analytics/post_processing/core/config_utils.py,sha256=QuAS-_JKSoNOtfUWgr7Alf_wsqODzN2rHlQu-cHRK0s,34311
29
29
  matrice_analytics/post_processing/face_reg/__init__.py,sha256=yntaiGlW9vdjBpPZQXNuovALihJPzRlFyUE88l3MhBA,1364
30
30
  matrice_analytics/post_processing/face_reg/compare_similarity.py,sha256=NlFc8b2a74k0PqSFAbuM_fUbA1BT3pr3VUgvSqRpJzQ,23396
31
- matrice_analytics/post_processing/face_reg/embedding_manager.py,sha256=lt6BSRylvCzPVsKBpYBrDBZ7-LkJOf_aGbsl_5422Ps,37525
31
+ matrice_analytics/post_processing/face_reg/embedding_manager.py,sha256=KhSV-JXykqvhRC1iwvbAyJZrFl3Ntv1eiLE5semKOHE,40533
32
32
  matrice_analytics/post_processing/face_reg/face_recognition.py,sha256=ILkQumMY2ij_QrhJxzLifOv_q5rXh7N4mW94hr6EcgQ,103585
33
33
  matrice_analytics/post_processing/face_reg/face_recognition_client.py,sha256=eF2NYju1uWKXhILndI1rh4_VhWrKSGidui2jjbPQXgM,27596
34
34
  matrice_analytics/post_processing/face_reg/people_activity_logging.py,sha256=vZbIvkK1h3h58ROeF0_ygF3lqr19O2h5222bN8XyIis,13675
@@ -189,8 +189,8 @@ matrice_analytics/post_processing/utils/format_utils.py,sha256=UTF7A5h9j0_S12xH9
189
189
  matrice_analytics/post_processing/utils/geometry_utils.py,sha256=BWfdM6RsdJTTLR1GqkWfdwpjMEjTCJyuBxA4zVGKdfk,9623
190
190
  matrice_analytics/post_processing/utils/smoothing_utils.py,sha256=78U-yucAcjUiZ0NIAc9NOUSIT0PWP1cqyIPA_Fdrjp0,14699
191
191
  matrice_analytics/post_processing/utils/tracking_utils.py,sha256=rWxuotnJ3VLMHIBOud2KLcu4yZfDp7hVPWUtNAq_2xw,8288
192
- matrice_analytics-0.1.54.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
193
- matrice_analytics-0.1.54.dist-info/METADATA,sha256=1RfM8nWJEoj9BnNP4ehKraPM8ZSReG7D2dZ8JYjxqew,14378
194
- matrice_analytics-0.1.54.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
195
- matrice_analytics-0.1.54.dist-info/top_level.txt,sha256=STAPEU-e-rWTerXaspdi76T_eVRSrEfFpURSP7_Dt8E,18
196
- matrice_analytics-0.1.54.dist-info/RECORD,,
192
+ matrice_analytics-0.1.55.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
193
+ matrice_analytics-0.1.55.dist-info/METADATA,sha256=XlOk1g_bwwjiRWoXw5loC_kJd9weHzxZC58JgkhOzhA,14378
194
+ matrice_analytics-0.1.55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
195
+ matrice_analytics-0.1.55.dist-info/top_level.txt,sha256=STAPEU-e-rWTerXaspdi76T_eVRSrEfFpURSP7_Dt8E,18
196
+ matrice_analytics-0.1.55.dist-info/RECORD,,