OTVision 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,140 +0,0 @@
1
- """
2
- Utils for using iou tracker
3
- """
4
-
5
- # ---------------------------------------------------------
6
- # IOU Tracker
7
- # Copyright (c) 2017 TU Berlin, Communication Systems Group
8
- # Licensed under The MIT License, see
9
- # https://github.com/bochinski/iou-tracker/blob/master/LICENSE
10
- # for details.
11
- # Written by Erik Bochinski
12
- # ---------------------------------------------------------
13
-
14
- from typing import Union
15
-
16
- import numpy as np
17
-
18
-
19
- # TODO: Remove if not needed
20
- def nms(
21
- boxes: np.ndarray,
22
- scores: np.ndarray,
23
- overlapThresh: float,
24
- classes: Union[np.ndarray, None] = None,
25
- ) -> Union[tuple[np.ndarray, np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray]]:
26
- """
27
- perform non-maximum suppression. based on Malisiewicz et al.
28
- Args:
29
- boxes (numpy.ndarray): boxes to process
30
- scores (numpy.ndarray): corresponding scores for each box
31
- overlapThresh (float): overlap threshold for boxes to merge
32
- classes (numpy.ndarray, optional): class ids for each box.
33
-
34
- Returns:
35
- (tuple): tuple containing:
36
-
37
- boxes (list): nms boxes
38
- scores (list): nms scores
39
- classes (list, optional): nms classes if specified
40
- """
41
- # # if there are no boxes, return an empty list
42
- # if len(boxes) == 0:
43
- # return [], [], [] if classes else [], []
44
-
45
- # if the bounding boxes integers, convert them to floats --
46
- # this is important since we'll be doing a bunch of divisions
47
- if boxes.dtype.kind == "i":
48
- boxes = boxes.astype("float")
49
-
50
- if scores.dtype.kind == "i":
51
- scores = scores.astype("float")
52
-
53
- # initialize the list of picked indexes
54
- pick = []
55
-
56
- # grab the coordinates of the bounding boxes
57
- x1 = boxes[:, 0]
58
- y1 = boxes[:, 1]
59
- x2 = boxes[:, 2]
60
- y2 = boxes[:, 3]
61
- # score = boxes[:, 4]
62
- # compute the area of the bounding boxes and sort the bounding
63
- # boxes by the bottom-right y-coordinate of the bounding box
64
- area = (x2 - x1 + 1) * (y2 - y1 + 1)
65
- idxs = np.argsort(scores)
66
-
67
- # keep looping while some indexes still remain in the indexes
68
- # list
69
- while len(idxs) > 0:
70
- # grab the last index in the indexes list and add the
71
- # index value to the list of picked indexes
72
- last = len(idxs) - 1
73
- i = idxs[last]
74
- pick.append(i)
75
-
76
- # find the largest (x, y) coordinates for the start of
77
- # the bounding box and the smallest (x, y) coordinates
78
- # for the end of the bounding box
79
- xx1 = np.maximum(x1[i], x1[idxs[:last]])
80
- yy1 = np.maximum(y1[i], y1[idxs[:last]])
81
- xx2 = np.minimum(x2[i], x2[idxs[:last]])
82
- yy2 = np.minimum(y2[i], y2[idxs[:last]])
83
-
84
- # compute the width and height of the bounding box
85
- w = np.maximum(0, xx2 - xx1 + 1)
86
- h = np.maximum(0, yy2 - yy1 + 1)
87
-
88
- # compute the ratio of overlap
89
- overlap = (w * h) / area[idxs[:last]]
90
-
91
- # delete all indexes from the index list that have
92
- idxs = np.delete(
93
- idxs,
94
- np.concatenate((np.array([last]), np.where(overlap > overlapThresh)[0])),
95
- )
96
-
97
- if classes is not None:
98
- return boxes[pick], scores[pick], classes[pick]
99
- else:
100
- return boxes[pick], scores[pick]
101
-
102
-
103
- def iou(
104
- bbox1: Union[list[float], tuple[float, float, float, float]],
105
- bbox2: Union[list[float], tuple[float, float, float, float]],
106
- ) -> float:
107
- """
108
- Calculates the intersection-over-union of two bounding boxes.
109
-
110
- Args:
111
- bbox1 (list of floats): bounding box in format x1,y1,x2,y2.
112
- bbox2 (list of floats): bounding box in format x1,y1,x2,y2.
113
-
114
- Returns:
115
- int: intersection-over-onion of bbox1, bbox2
116
- """
117
-
118
- bbox1 = [float(x) for x in bbox1]
119
- bbox2 = [float(x) for x in bbox2]
120
-
121
- (x0_1, y0_1, x1_1, y1_1) = bbox1
122
- (x0_2, y0_2, x1_2, y1_2) = bbox2
123
-
124
- # get the overlap rectangle
125
- overlap_x0 = max(x0_1, x0_2)
126
- overlap_y0 = max(y0_1, y0_2)
127
- overlap_x1 = min(x1_1, x1_2)
128
- overlap_y1 = min(y1_1, y1_2)
129
-
130
- # check if there is an overlap
131
- if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
132
- return 0
133
-
134
- # if yes, calculate the ratio of the overlap to each ROI size and the unified size
135
- size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
136
- size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
137
- size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
138
- size_union = size_1 + size_2 - size_intersection
139
-
140
- return size_intersection / size_union
@@ -1,453 +0,0 @@
1
- import re
2
- from dataclasses import dataclass
3
- from datetime import datetime, timedelta
4
- from pathlib import Path
5
- from typing import Any
6
-
7
- from OTVision import dataformat, version
8
- from OTVision.dataformat import (
9
- CLASS,
10
- CONFIDENCE,
11
- DATA,
12
- DATE_FORMAT,
13
- DETECTIONS,
14
- EXPECTED_DURATION,
15
- FILENAME,
16
- FRAME,
17
- INPUT_FILE_PATH,
18
- INTERPOLATED_DETECTION,
19
- OCCURRENCE,
20
- OTTRACK_VERSION,
21
- RECORDED_START_DATE,
22
- TRACK_ID,
23
- VIDEO,
24
- H,
25
- W,
26
- X,
27
- Y,
28
- )
29
- from OTVision.helpers.date import (
30
- parse_date_string_to_utc_datime,
31
- parse_timestamp_string_to_utc_datetime,
32
- )
33
- from OTVision.helpers.files import (
34
- FULL_FILE_NAME_PATTERN,
35
- HOSTNAME,
36
- InproperFormattedFilename,
37
- read_json,
38
- read_json_bz2_metadata,
39
- )
40
-
41
- MISSING_START_DATE = datetime(1900, 1, 1)
42
- MISSING_EXPECTED_DURATION = timedelta(minutes=15)
43
-
44
-
45
- def parse_datetime(date: str | float) -> datetime:
46
- """Parse a date string or timestamp to a datetime with UTC as timezone.
47
-
48
- Args:
49
- date (str | float): the date to parse
50
-
51
- Returns:
52
- datetime: the parsed datetime object with UTC set as timezone
53
- """
54
- if isinstance(date, str) and ("-" in date):
55
- return parse_date_string_to_utc_datime(date, DATE_FORMAT)
56
- return parse_timestamp_string_to_utc_datetime(date)
57
-
58
-
59
- @dataclass(frozen=True, repr=True)
60
- class Detection:
61
- """
62
- Data class which contains information for a single detection.
63
- """
64
-
65
- label: str
66
- conf: float
67
- x: float
68
- y: float
69
- w: float
70
- h: float
71
-
72
- def to_dict(self, frame: int, occurrence: datetime, input_file_path: str) -> dict:
73
- return {
74
- CLASS: self.label,
75
- CONFIDENCE: self.conf,
76
- X: self.x,
77
- Y: self.y,
78
- W: self.w,
79
- H: self.h,
80
- FRAME: frame,
81
- OCCURRENCE: occurrence.timestamp(),
82
- INPUT_FILE_PATH: input_file_path,
83
- INTERPOLATED_DETECTION: False,
84
- }
85
-
86
- def to_otdet(self) -> dict:
87
- return {
88
- CLASS: self.label,
89
- CONFIDENCE: self.conf,
90
- X: self.x,
91
- Y: self.y,
92
- W: self.w,
93
- H: self.h,
94
- }
95
-
96
-
97
- @dataclass(frozen=True)
98
- class Frame:
99
- frame: int
100
- occurrence: datetime
101
- input_file_path: Path
102
- detections: list[Detection]
103
-
104
- def to_dict(self) -> dict:
105
- return {
106
- FRAME: self.frame,
107
- OCCURRENCE: self.occurrence.timestamp(),
108
- INPUT_FILE_PATH: self.input_file_path.as_posix(),
109
- DETECTIONS: [
110
- detection.to_dict(
111
- self.frame, self.occurrence, self.input_file_path.as_posix()
112
- )
113
- for detection in self.detections
114
- ],
115
- }
116
-
117
- def derive_frame_number(self, new_frame_number: int) -> "Frame":
118
- return Frame(
119
- new_frame_number, self.occurrence, self.input_file_path, self.detections
120
- )
121
-
122
- def get_output_file(self, with_suffix: str) -> Path:
123
- return self.input_file_path.with_suffix(with_suffix)
124
-
125
-
126
- class FrameGroup:
127
- def __init__(
128
- self,
129
- start_date: datetime,
130
- end_date: datetime,
131
- hostname: str,
132
- file: Path | None,
133
- metadata: dict | None,
134
- ) -> None:
135
- self._files_metadata: dict[str, dict] = dict()
136
- self._files: list[Path] = []
137
- if (file is not None) and (metadata is not None):
138
- self._files_metadata[file.as_posix()] = metadata
139
- self._files.append(file)
140
-
141
- self.hostname = hostname
142
- self._start_date = start_date
143
- self._end_date = end_date
144
-
145
- @property
146
- def files(self) -> list[Path]:
147
- return self._files
148
-
149
- def metadata_for(self, file: Path | str) -> dict:
150
- if isinstance(file, str):
151
- return self._files_metadata[file]
152
- else:
153
- return self._files_metadata[file.as_posix()]
154
-
155
- def start_date(self) -> datetime:
156
- return self._start_date
157
-
158
- def end_date(self) -> datetime:
159
- return self._end_date
160
-
161
- def merge(self, other: "FrameGroup") -> "FrameGroup":
162
- if self.start_date() < other.start_date():
163
- return self._merge(self, other)
164
- else:
165
- return self._merge(other, self)
166
-
167
- def _merge(self, first: "FrameGroup", second: "FrameGroup") -> "FrameGroup":
168
- if first.hostname != second.hostname:
169
- raise ValueError("Hostname of FrameGroups does not match")
170
- merged = FrameGroup(
171
- start_date=first._start_date,
172
- end_date=second._end_date,
173
- hostname=self.hostname,
174
- file=None,
175
- metadata=None,
176
- )
177
-
178
- merged._files_metadata.update(first._files_metadata)
179
- merged._files_metadata.update(second._files_metadata)
180
- merged._files += first.files
181
- merged._files += second.files
182
-
183
- return merged
184
-
185
- def __repr__(self) -> str:
186
- return self.__str__()
187
-
188
- def __str__(self) -> str:
189
- return f"{self._start_date} - {self._end_date}"
190
-
191
- def update_metadata(self, tracker_data: dict[str, dict]) -> None:
192
- for filepath in self.files:
193
- metadata = self.metadata_for(filepath)
194
- metadata[OTTRACK_VERSION] = version.ottrack_version()
195
- metadata[dataformat.TRACKING] = {
196
- dataformat.OTVISION_VERSION: version.otvision_version(),
197
- dataformat.FIRST_TRACKED_VIDEO_START: self.start_date().timestamp(),
198
- dataformat.LAST_TRACKED_VIDEO_END: self.end_date().timestamp(),
199
- dataformat.TRACKER: tracker_data,
200
- }
201
-
202
-
203
- @dataclass(frozen=True)
204
- class FrameChunk:
205
- file: Path
206
- frames: list[Frame]
207
-
208
- def start_date(self) -> datetime:
209
- return self.frames[0].occurrence
210
-
211
- def end_date(self) -> datetime:
212
- return self.frames[-1].occurrence
213
-
214
- def last_frame_id(self) -> int:
215
- return self.frames[-1].frame
216
-
217
- def get_existing_output_files(self, with_suffix: str) -> list[Path]:
218
- output_files = set(
219
- [frame.get_output_file(with_suffix=with_suffix) for frame in self.frames]
220
- )
221
- existing_files = [file for file in output_files if file.is_file()]
222
- return existing_files
223
-
224
- def to_dict(self) -> dict:
225
- return {
226
- DATA: {frame.frame: frame.to_dict() for frame in self.frames},
227
- }
228
-
229
- def __repr__(self) -> str:
230
- return self.__str__()
231
-
232
- def __str__(self) -> str:
233
- return str(self.file)
234
-
235
-
236
- class FrameIndexer:
237
- def reindex(self, frames: dict[str, dict], frame_offset: int) -> list[dict]:
238
- detections = []
239
- for track in frames.values():
240
- for detection in track.values():
241
- # Take into account that consecutive tracks over more than one
242
- # video must have their frame reset to one when splitting.
243
- # This is done by taking the frame_offset into account.
244
- detection[FRAME] = detection[FRAME] - frame_offset
245
- detections.append(detection)
246
-
247
- if len(detections) == 0:
248
- return []
249
-
250
- assert len({detection[INPUT_FILE_PATH] for detection in detections}) == 1
251
-
252
- detections.sort(
253
- key=lambda detection: (
254
- detection[INPUT_FILE_PATH],
255
- detection[FRAME],
256
- detection[TRACK_ID],
257
- )
258
- )
259
-
260
- return detections
261
-
262
-
263
- class DetectionParser:
264
- def convert(self, data_detections: list[dict[str, str]]) -> list[Detection]:
265
- detections: list[Detection] = []
266
- for detection in data_detections:
267
- detected_item = Detection(
268
- detection[CLASS],
269
- float(detection[CONFIDENCE]),
270
- float(detection[X]),
271
- float(detection[Y]),
272
- float(detection[W]),
273
- float(detection[H]),
274
- )
275
- detections.append(detected_item)
276
- return detections
277
-
278
-
279
- class FrameChunkParser:
280
- @staticmethod
281
- def parse(
282
- file_path: Path,
283
- frame_offset: int = 0,
284
- ) -> FrameChunk:
285
- input: dict[int, dict[str, Any]] = read_json(file_path)[DATA]
286
- return FrameChunkParser.convert(input, file_path, frame_offset)
287
-
288
- @staticmethod
289
- def convert(
290
- input: dict[int, dict[str, Any]],
291
- file_path: Path,
292
- frame_offset: int = 0,
293
- ) -> FrameChunk:
294
- detection_parser = DetectionParser()
295
- frames: list[Frame] = []
296
- for key, value in input.items():
297
- occurrence: datetime = parse_datetime(value[OCCURRENCE])
298
- data_detections = value[DETECTIONS]
299
- detections = detection_parser.convert(data_detections)
300
- parsed_frame = Frame(
301
- int(key) + frame_offset,
302
- occurrence=occurrence,
303
- input_file_path=file_path,
304
- detections=detections,
305
- )
306
- frames.append(parsed_frame)
307
-
308
- frames.sort(key=lambda frame: (frame.occurrence, frame.frame))
309
- return FrameChunk(file_path, frames)
310
-
311
-
312
- class Preprocess:
313
- """Preprocess otdet file metadata (recording time interval) before running track.
314
- Input files belonging to the same recording will be merged together.
315
- The time gap to separate two recordings from each other is defined by
316
- `self.time_without_frames`.
317
-
318
- Returns:
319
- Preprocess: preprocessor for tracking
320
- """
321
-
322
- def __init__(self, time_without_frames: timedelta = timedelta(minutes=1)) -> None:
323
- self.time_without_frames = time_without_frames
324
-
325
- def run(self, files: list[Path]) -> list[FrameGroup]:
326
- """Read metadata of all input files,
327
- parse the content and merge the frame groups belonging together.
328
-
329
- Args:
330
- files (list[Path]): list of input files
331
-
332
- Returns:
333
- list[FrameGroup]: merged frame groups sorted by start date
334
- """
335
-
336
- groups = self.process(self._read_input(files))
337
- return sorted(groups, key=lambda r: r.start_date())
338
-
339
- def process(self, input: dict[Path, dict]) -> list[FrameGroup]:
340
- """Process given otdet files:
341
- Create FrameGroup for each file then merge frame groups belonging together.
342
-
343
- Args:
344
- files (list[Path]): list of file paths
345
-
346
- Returns:
347
- list[FrameGroup]: parsed and merged frame groups
348
- """
349
- all_groups = [
350
- self._parse_frame_group(path, metadata) for path, metadata in input.items()
351
- ]
352
- if len(all_groups) == 0:
353
- return []
354
- return self._merge_groups(all_groups)
355
-
356
- def _read_input(self, files: list[Path]) -> dict[Path, dict]:
357
- return {path: read_json_bz2_metadata(path) for path in files}
358
-
359
- def _parse_frame_group(self, file_path: Path, metadata: dict) -> FrameGroup:
360
- """Read and parse metadata of the given file to a FrameGroup
361
- covering the recording time interval defined by:
362
- - the recorded start date and
363
- - the expected duration given in the metadata
364
-
365
- Args:
366
- file_path (Path): path of otdet file
367
- metadata (dict): metadata of otdet file
368
-
369
- Returns:
370
- list[FrameGroup]: parsed input and metadata per file
371
- """
372
-
373
- start_date: datetime = self.extract_start_date_from(metadata)
374
- duration: timedelta = self.extract_expected_duration_from(metadata)
375
- end_date: datetime = start_date + duration
376
- hostname = self.get_hostname(metadata)
377
-
378
- return FrameGroup(
379
- start_date=start_date,
380
- end_date=end_date,
381
- file=file_path,
382
- metadata=metadata,
383
- hostname=hostname,
384
- )
385
-
386
- @staticmethod
387
- def get_hostname(file_metadata: dict) -> str:
388
- """Retrieve hostname from the given file metadata.
389
-
390
- Args:
391
- file_metadata (dict): metadata content.
392
-
393
- Raises:
394
- InproperFormattedFilename: if the filename is not formatted as expected, an
395
- exception will be raised.
396
-
397
- Returns:
398
- str: the hostname
399
- """
400
- video_name = Path(file_metadata[VIDEO][FILENAME]).name
401
- match = re.search(
402
- FULL_FILE_NAME_PATTERN,
403
- video_name,
404
- )
405
- if match:
406
- return match.group(HOSTNAME)
407
-
408
- raise InproperFormattedFilename(
409
- f"Could not parse {video_name} with pattern: {FULL_FILE_NAME_PATTERN}."
410
- )
411
-
412
- def _merge_groups(self, all_groups: list[FrameGroup]) -> list[FrameGroup]:
413
- """Merge frame groups whose start and end times are close to each other. Close
414
- is defined by `self.time_without_frames`.
415
-
416
- Args:
417
- all_groups (list[FrameGroup]): list of frame groups to merge
418
-
419
- Returns:
420
- list[FrameGroup]: list of merged frame groups
421
- """
422
- assert len(all_groups) >= 1
423
-
424
- merged_groups = []
425
- sorted_groups = sorted(all_groups, key=lambda group: group.start_date())
426
- last_group = sorted_groups[0]
427
- for current_group in sorted_groups[1:]:
428
- if last_group.hostname != current_group.hostname:
429
- merged_groups.append(last_group)
430
- last_group = current_group
431
- elif (
432
- timedelta(seconds=0)
433
- <= (current_group.start_date() - last_group.end_date())
434
- <= self.time_without_frames
435
- ):
436
- last_group = last_group.merge(current_group)
437
- else:
438
- merged_groups.append(last_group)
439
- last_group = current_group
440
- merged_groups.append(last_group)
441
- return merged_groups
442
-
443
- def extract_start_date_from(self, metadata: dict) -> datetime:
444
- if RECORDED_START_DATE in metadata[VIDEO].keys():
445
- recorded_start_date = metadata[VIDEO][RECORDED_START_DATE]
446
- return parse_datetime(recorded_start_date)
447
- return MISSING_START_DATE
448
-
449
- def extract_expected_duration_from(self, metadata: dict) -> timedelta:
450
- if EXPECTED_DURATION in metadata[VIDEO].keys():
451
- expected_duration = metadata[VIDEO][EXPECTED_DURATION]
452
- return timedelta(seconds=int(expected_duration))
453
- return MISSING_EXPECTED_DURATION