matrice-analytics 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-analytics might be problematic. Click here for more details.

@@ -1,41 +1,78 @@
1
1
  import numpy as np
2
2
  from typing import List, Dict, Tuple, Optional
3
3
  from dataclasses import dataclass, field
4
+ from pathlib import Path
4
5
  import cv2
5
- from scipy.special import softmax
6
+ import os
7
+ import io
8
+ import threading
9
+ # from scipy.special import softmax
6
10
  import requests
7
11
  try:
8
12
  from transformers import CLIPProcessor
9
13
  import onnxruntime as ort
10
14
  from PIL import Image
15
+ from importlib.resources import files as ir_files, as_file as ir_as_file
16
+
11
17
  except:
18
+ ir_files = None
19
+ ir_as_file = None
12
20
  print("Unable to import onnxruntime")
13
21
 
14
- def load_model_from_checkpoint(checkpoint_path,local_path):
22
+ def load_model_from_checkpoint(checkpoint_url: str, providers: Optional[List] = None):
15
23
  """
16
- Load a model from checkpoint URL
24
+ Load an ONNX model from a URL directly into memory without writing locally.
25
+ Enforces the specified providers (e.g., CUDAExecutionProvider) for execution.
17
26
  """
18
27
  try:
19
- print(f"Loading model from checkpoint: {checkpoint_path}")
20
-
21
- # Check if checkpoint is a URL
22
- if checkpoint_path.startswith(('http://', 'https://')):
23
- # Download checkpoint from URL
24
- response = requests.get(checkpoint_path, timeout = (30,200))
25
- if response.status_code == 200:
26
- with open(local_path, 'wb') as f:
27
- f.write(response.content)
28
- checkpoint_path = local_path
29
- print(f"Downloaded checkpoint to {local_path}")
30
- else:
31
- print(f"Failed to download checkpoint from {checkpoint_path}")
32
- return None
33
-
34
- # Load the model from the checkpoint
35
- model = ort.InferenceSession(checkpoint_path, providers=["CUDAExecutionProvider","CPUExecutionProvider"])
36
- print(f"{local_path} Model loaded successfully from checkpoint")
28
+ print(f"Loading model from checkpoint: {checkpoint_url}")
29
+
30
+ # Download the checkpoint with streaming
31
+ response = requests.get(checkpoint_url, stream=True, timeout=(30, 200))
32
+ response.raise_for_status()
33
+
34
+ # Read the content into bytes
35
+ model_bytes = io.BytesIO()
36
+ for chunk in response.iter_content(chunk_size=8192):
37
+ if chunk:
38
+ model_bytes.write(chunk)
39
+ model_bytes.seek(0) # reset pointer to start
40
+
41
+ # Prepare session options for performance
42
+ try:
43
+ sess_options = ort.SessionOptions()
44
+ # Enable all graph optimizations
45
+ sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
46
+ # Conservative thread usage – GPU work dominates
47
+ sess_options.intra_op_num_threads = 1
48
+ sess_options.inter_op_num_threads = 1
49
+ except Exception:
50
+ sess_options = None
51
+
52
+ # Resolve providers
53
+ available = ort.get_available_providers()
54
+ print("Available providers:", available)
55
+ use_providers = ["CUDAExecutionProvider"] #providers or
56
+
57
+ # Validate providers and enforce CUDA when requested
58
+ if any(
59
+ (isinstance(p, tuple) and p[0] == "CUDAExecutionProvider") or p == "CUDAExecutionProvider"
60
+ for p in use_providers
61
+ ):
62
+ if "CUDAExecutionProvider" not in available:
63
+ raise RuntimeError("CUDAExecutionProvider not available in this environment")
64
+
65
+ # Load ONNX model from bytes with enforced providers
66
+ model = ort.InferenceSession(
67
+ model_bytes.read(),
68
+ sess_options=sess_options,
69
+ providers=use_providers,
70
+ )
71
+
72
+ print("Session providers:", model.get_providers())
73
+ print("Model loaded successfully from checkpoint (in-memory)")
37
74
  return model
38
-
75
+
39
76
  except Exception as e:
40
77
  print(f"Error loading model from checkpoint: {e}")
41
78
  return None
@@ -46,7 +83,7 @@ class ClipProcessor:
46
83
  def __init__(self,
47
84
  image_model_path: str = 'https://s3.us-west-2.amazonaws.com/testing.resources/datasets/clip_image.onnx',
48
85
  text_model_path: str = 'https://s3.us-west-2.amazonaws.com/testing.resources/datasets/clip_text.onnx',
49
- processor_dir: str = './clip_processor',
86
+ processor_dir: Optional[str] = None,
50
87
  providers: Optional[List[str]] = None):
51
88
 
52
89
  self.color_category: List[str] = ["black", "white", "yellow", "gray", "red", "blue", "light blue",
@@ -54,12 +91,45 @@ class ClipProcessor:
54
91
 
55
92
  self.image_url: str = image_model_path
56
93
  self.text_url: str = text_model_path
57
- self.processor_path: str = processor_dir
94
+ # Resolve processor_dir relative to this module, not CWD
95
+ self.processor_path: str = self._resolve_processor_dir(processor_dir)
96
+ print("PROCESSOR PATH->", self.processor_path)
97
+ cwd = os.getcwd()
98
+ print("Current working directory:", cwd)
99
+
100
+ # Determine and enforce providers (prefer CUDA only)
101
+ try:
102
+ available = ort.get_available_providers()
103
+ except Exception:
104
+ available = []
105
+
106
+ if providers is None:
107
+ if "CUDAExecutionProvider" in available:
108
+ self.providers = ["CUDAExecutionProvider"]
109
+ else:
110
+ # Enforce GPU-only per requirement; raise if not available
111
+ print("CUDAExecutionProvider not available; ensure CUDA-enabled onnxruntime-gpu is installed and GPU is visible")
112
+ else:
113
+ self.providers = providers
58
114
 
59
- self.image_sess = load_model_from_checkpoint(self.image_url,"clip_image.onnx")
60
- self.text_sess = load_model_from_checkpoint(self.text_url,"clip_text.onnx")
115
+ # Thread-safety to serialize processing
116
+ self._lock = threading.Lock()
61
117
 
62
- self.processor = CLIPProcessor.from_pretrained(self.processor_path)
118
+ self.image_sess = load_model_from_checkpoint(self.image_url, providers=self.providers)
119
+ self.text_sess = load_model_from_checkpoint(self.text_url, providers=self.providers)
120
+
121
+
122
+ # Load CLIPProcessor tokenizer/config from local package data if available
123
+ self.processor = None
124
+ try:
125
+ if self.processor_path and os.path.isdir(self.processor_path):
126
+ self.processor = CLIPProcessor.from_pretrained(self.processor_path, local_files_only=True)
127
+ else:
128
+ # Fallback to hub
129
+ self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
130
+ except Exception as e:
131
+ print(f"Falling back to remote CLIPProcessor due to error loading local assets: {e}")
132
+ self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
63
133
 
64
134
  tok = self.processor.tokenizer(self.color_category, padding=True, return_tensors="np")
65
135
  ort_inputs_text = {
@@ -73,7 +143,8 @@ class ClipProcessor:
73
143
  self.pixel_template = sample["pixel_values"].astype(np.float32)
74
144
  self.min_box_size = 32
75
145
  self.max_batch = 32
76
- self.frame_skip = 2
146
+ # Classify every frame for stability unless changed by caller
147
+ self.frame_skip = 1
77
148
  self.batch_pixels = np.zeros((self.max_batch, *self.pixel_template.shape[1:]), dtype=np.float32)
78
149
 
79
150
  self.records: Dict[int, Dict[str, float]] = {}
@@ -81,107 +152,159 @@ class ClipProcessor:
81
152
  self.processed_frames = 0
82
153
 
83
154
 
155
+ def _resolve_processor_dir(self, processor_dir: Optional[str]) -> str:
156
+ """
157
+ Find the absolute path to the bundled 'clip_processor' assets directory in the
158
+ installed package, independent of current working directory.
159
+
160
+ Resolution order:
161
+ 1) Explicit processor_dir if provided.
162
+ 2) Directory next to this file: <module_dir>/clip_processor
163
+ 3) importlib.resources (Python 3.9+): matrice_analytics.post_processing.usecases.color/clip_processor
164
+ """
165
+ if processor_dir:
166
+ return os.path.abspath(processor_dir)
167
+
168
+ # 2) Try path next to this file
169
+ module_dir = Path(__file__).resolve().parent
170
+ candidate = module_dir / "clip_processor"
171
+ if candidate.is_dir():
172
+ return str(candidate)
173
+
174
+ # 3) Try importlib.resources if available
175
+ try:
176
+ if ir_files is not None:
177
+ pkg = "matrice_analytics.post_processing.usecases.color"
178
+ res = ir_files(pkg).joinpath("clip_processor")
179
+ try:
180
+ # If packaged in a zip, materialize to a temp path
181
+ with ir_as_file(res) as p:
182
+ if Path(p).is_dir():
183
+ return str(p)
184
+ except Exception:
185
+ # If already a concrete path
186
+ if res and str(res):
187
+ return str(res)
188
+ except Exception:
189
+ pass
190
+
191
+ # Fallback to CWD-relative (last resort)
192
+ return os.path.abspath("clip_processor")
193
+
84
194
  def process_color_in_frame(self, detections, input_bytes, zones: Optional[Dict[str, List[List[float]]]], stream_info):
85
- boxes = []
86
- tracked_ids: List[int] = []
87
- frame_number: Optional[int] = None
88
- # print(detections)
89
- self.frame_idx+=1
90
- nparr = np.frombuffer(input_bytes, np.uint8) # convert bytes to numpy array
91
- image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # decode image
92
-
93
- # Step 2: Convert PIL → NumPy array
94
- frame = np.array(image)
95
- if stream_info:
96
- input_settings = stream_info.get("input_settings", {})
97
- start_frame = input_settings.get("start_frame")
98
- end_frame = input_settings.get("end_frame")
99
- if start_frame is not None and end_frame is not None and start_frame == end_frame:
100
- frame_number = start_frame
101
-
102
- for det in detections:
103
- bbox = det.get('bounding_box')
104
- tid = det.get('track_id')
105
- zones = zones if zones else {}
106
- for z_name, zone_polygon in zones.items():
107
- if self._is_in_zone(bbox, zone_polygon):
108
- w = bbox['xmax'] - bbox['xmin']
109
- h = bbox['ymax'] - bbox['ymin']
110
- if w >= self.min_box_size and h >= self.max_batch:
111
- boxes.append(bbox)
112
- tracked_ids.append(tid)
113
- # print(boxes)
114
- # print(tracked_ids)
115
- if not boxes:
116
- print(f"Frame {self.frame_idx}: No cars in zone")
117
- self.processed_frames += 1
118
- # print(f"Frame {frame_idx} processedms\n")
119
- return
120
-
121
- # print(boxes)
122
- # print(tracked_ids)
123
- crops_for_model = []
124
- map_trackidx_to_cropidx = []
125
- for i,(bbox, tid) in enumerate(zip(boxes, tracked_ids)):
126
- last_rec = self.records.get(tid)
127
- should_classify = False
128
- if last_rec is None:
129
- should_classify = True
130
- else:
131
- if (self.frame_idx - last_rec.get("last_classified_frame", -999)) >= self.frame_skip:
132
- should_classify = True
133
- if should_classify:
134
- x1, y1, x2, y2 = bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']
135
- # crop safely
136
- y1c, y2c = max(0, y1), min(frame.shape[0], y2)
137
- x1c, x2c = max(0, x1), min(frame.shape[1], x2)
138
- if y2c - y1c <= 0 or x2c - x1c <= 0:
195
+ # Serialize processing to avoid concurrent access and potential frame drops
196
+ with self._lock:
197
+ print("=== process_color_in_frame called ===")
198
+ print(f"Number of detections: {len(detections) if detections else 0}")
199
+ print(f"Input bytes length: {len(input_bytes) if input_bytes else 0}")
200
+
201
+ boxes = []
202
+ tracked_ids: List[int] = []
203
+ frame_number: Optional[int] = None
204
+ print(detections)
205
+ self.frame_idx += 1
206
+
207
+ if not detections:
208
+ print(f"Frame {self.frame_idx}: No detections provided")
209
+ self.processed_frames += 1
210
+ return {}
211
+
212
+ nparr = np.frombuffer(input_bytes, np.uint8) # convert bytes to numpy array
213
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # decode image
214
+
215
+ if image is None:
216
+ print(f"Frame {self.frame_idx}: Failed to decode image")
217
+ self.processed_frames += 1
218
+ return {}
219
+
220
+ # Step 2: Use decoded frame directly (BGR → RGB performed at crop time)
221
+ frame = image
222
+ if stream_info:
223
+ input_settings = stream_info.get("input_settings", {})
224
+ start_frame = input_settings.get("start_frame")
225
+ end_frame = input_settings.get("end_frame")
226
+ if start_frame is not None and end_frame is not None and start_frame == end_frame:
227
+ frame_number = start_frame
228
+
229
+ for det in detections:
230
+ bbox = det.get('bounding_box')
231
+ tid = det.get('track_id')
232
+ if not bbox or not tid:
139
233
  continue
140
- crop = cv2.cvtColor(frame[y1c:y2c, x1c:x2c], cv2.COLOR_BGR2RGB)
141
- pil_img = Image.fromarray(crop).resize((224, 224))
142
- map_trackidx_to_cropidx.append((tid, len(crops_for_model)))
143
- crops_for_model.append(pil_img)
144
- # print(crops_for_model)
145
- # print(map_trackidx_to_cropidx)
146
-
147
- if crops_for_model:
148
- record = {}
149
- img_embeds = self.run_image_onnx_on_crops(crops_for_model) # [N, D]
150
- # compute similarity with text_embeds (shape [num_labels, D])
151
- sims = img_embeds @ self.text_embeds.T # [N, num_labels]
152
- # convert to probs
153
- probs = np.exp(sims) / np.exp(sims).sum(axis=-1, keepdims=True) # softmax numerically simple
154
- # print(probs)
155
-
156
- # assign back to corresponding tracks
157
- for (tid, crop_idx) in map_trackidx_to_cropidx:
158
- prob = probs[crop_idx]
159
- # print(prob)
160
- best_idx = int(np.argmax(prob))
161
- best_label = self.color_category[best_idx]
162
- # print(best_label)
163
- best_score = float(prob[best_idx])
164
- # print(best_score)
165
-
166
- rec = self.records.get(tid)
167
- # if rec is None:
168
- record[tid] = {
169
- "frame": self.frame_idx,
170
- "color": best_label,
171
- "confidence": best_score,
172
- "track_id": tid,
173
- "last_classified_frame": self.frame_idx,
174
- }
175
- # else:
176
- # # update only if confidence improves
177
- # if best_score > rec["confidence"]:
178
- # rec["color"] = best_label
179
- # rec["confidence"] = best_score
180
- # rec["frame"] = self.frame_idx
181
- # rec["last_classified_frame"] = self.frame_idx
182
-
183
-
184
- return record
234
+ w = bbox['xmax'] - bbox['xmin']
235
+ h = bbox['ymax'] - bbox['ymin']
236
+ if w >= self.min_box_size and h >= self.min_box_size:
237
+ boxes.append(bbox)
238
+ tracked_ids.append(tid)
239
+
240
+ if not boxes:
241
+ print(f"Frame {self.frame_idx}: No cars in zone")
242
+ self.processed_frames += 1
243
+ return {}
244
+
245
+ # print(boxes)
246
+ # print(tracked_ids)
247
+ crops_for_model = []
248
+ map_trackidx_to_cropidx = []
249
+ for i, (bbox, tid) in enumerate(zip(boxes, tracked_ids)):
250
+ last_rec = self.records.get(tid)
251
+ should_classify = False
252
+ if last_rec is None:
253
+ should_classify = True
254
+ else:
255
+ if (self.frame_idx - last_rec.get("last_classified_frame", -999)) >= self.frame_skip:
256
+ should_classify = True
257
+ if should_classify:
258
+ x1, y1, x2, y2 = bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']
259
+ # crop safely - convert to integers
260
+ y1c, y2c = max(0, int(y1)), min(frame.shape[0], int(y2))
261
+ x1c, x2c = max(0, int(x1)), min(frame.shape[1], int(x2))
262
+ print(f"Cropping bbox: x1c={x1c}, y1c={y1c}, x2c={x2c}, y2c={y2c}, frame_shape={frame.shape}")
263
+ if y2c - y1c <= 0 or x2c - x1c <= 0:
264
+ print(f"Skipping invalid crop: dimensions {x2c-x1c}x{y2c-y1c}")
265
+ continue
266
+ crop = cv2.cvtColor(frame[y1c:y2c, x1c:x2c], cv2.COLOR_BGR2RGB)
267
+ map_trackidx_to_cropidx.append((tid, len(crops_for_model)))
268
+ # Pass raw numpy crop; resize handled in run_image_onnx_on_crops
269
+ crops_for_model.append(crop)
270
+ # print(f"Added crop for track_id {tid}")
271
+ # print(crops_for_model)
272
+
273
+ record = {} # Initialize record outside the if block
274
+ if crops_for_model:
275
+ img_embeds = self.run_image_onnx_on_crops(crops_for_model) # [N, D]
276
+ # compute similarity with text_embeds (shape [num_labels, D])
277
+ sims = img_embeds @ self.text_embeds.T # [N, num_labels]
278
+ # convert to probs
279
+ probs = np.exp(sims) / np.exp(sims).sum(axis=-1, keepdims=True) # softmax numerically simple
280
+ # print(probs)
281
+
282
+ # assign back to corresponding tracks
283
+ for (tid, crop_idx) in map_trackidx_to_cropidx:
284
+ prob = probs[crop_idx]
285
+ # print(prob)
286
+ best_idx = int(np.argmax(prob))
287
+ best_label = self.color_category[best_idx]
288
+ # print(best_label)
289
+ best_score = float(prob[best_idx])
290
+ # print(best_score)
291
+
292
+ rec = self.records.get(tid)
293
+ det_info = next((d for d in detections if d.get("track_id") == tid), {})
294
+ category_label = det_info.get("category", "unknown")
295
+ zone_name = det_info.get("zone_name", "Unknown_Zone")
296
+ record[tid] = {
297
+ "frame": self.frame_idx,
298
+ "color": best_label,
299
+ "confidence": best_score,
300
+ "track_id": tid,
301
+ "object_label": category_label,
302
+ "zone_name": zone_name,
303
+ "last_classified_frame": self.frame_idx,
304
+ }
305
+ print(record)
306
+
307
+ return record
185
308
 
186
309
 
187
310
  def run_image_onnx_on_crops(self, crops):
@@ -203,10 +326,13 @@ class ClipProcessor:
203
326
  print(f"Skipping crop {i}: resize failed ({e})")
204
327
 
205
328
  if not valid_crops:
206
- print("⚠️ No valid crops to process")
329
+ print("No valid crops to process")
207
330
  return np.zeros((0, self.text_embeds.shape[-1]), dtype=np.float32)
208
331
 
209
332
  # Convert all valid crops at once
333
+
334
+ #ToDO: Check if the processor and model.run is running on single thread and is uusing GPU. Latency should be <100ms.
335
+
210
336
  pixel_values = self.processor(images=valid_crops, return_tensors="np")["pixel_values"]
211
337
  n = pixel_values.shape[0]
212
338
  self.batch_pixels[:n] = pixel_values