endoreg-db 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. endoreg_db/management/commands/delete_all.py +18 -0
  2. endoreg_db/management/commands/fix_auth_permission.py +20 -0
  3. endoreg_db/management/commands/load_user_groups.py +8 -47
  4. endoreg_db/migrations/0001_initial.py +1 -1
  5. endoreg_db/migrations/0002_rawvideofile.py +26 -0
  6. endoreg_db/migrations/0003_rawvideofile_frames_required.py +18 -0
  7. endoreg_db/migrations/0004_rename_hash_rawvideofile_video_hash.py +18 -0
  8. endoreg_db/migrations/0005_ffmpegmeta_remove_videoimportmeta_center_and_more.py +56 -0
  9. endoreg_db/migrations/0006_rawvideofile_center_alter_videometa_processor.py +25 -0
  10. endoreg_db/migrations/0007_rawvideofile_processor.py +19 -0
  11. endoreg_db/migrations/0008_rename_frames_required_rawvideofile_state_frames_required.py +18 -0
  12. endoreg_db/migrations/0009_sensitivemeta_rawvideofile_sensitive_meta.py +31 -0
  13. endoreg_db/migrations/0010_rename_endoscope_serial_number_sensitivemeta_endoscope_sn.py +18 -0
  14. endoreg_db/migrations/0011_rawvideofile_state_sensitive_data_retrieved.py +18 -0
  15. endoreg_db/migrations/0012_rawvideofile_prediction_dir_and_more.py +109 -0
  16. endoreg_db/models/data_file/__init__.py +4 -1
  17. endoreg_db/models/data_file/base_classes/__init__.py +0 -1
  18. endoreg_db/models/data_file/base_classes/abstract_video.py +1 -0
  19. endoreg_db/models/data_file/import_classes/__init__.py +31 -0
  20. endoreg_db/models/data_file/import_classes/processing_functions.py +269 -0
  21. endoreg_db/models/data_file/import_classes/raw_video.py +341 -0
  22. endoreg_db/models/data_file/metadata/__init__.py +133 -0
  23. endoreg_db/models/data_file/metadata/sensitive_meta.py +13 -0
  24. endoreg_db/models/data_file/video/__init__.py +1 -1
  25. endoreg_db/models/data_file/video/import_meta.py +21 -21
  26. endoreg_db/models/permissions/__init__.py +44 -0
  27. endoreg_db/utils/cropping.py +29 -0
  28. endoreg_db/utils/file_operations.py +30 -0
  29. endoreg_db/utils/legacy_ocr.py +201 -0
  30. endoreg_db/utils/ocr.py +19 -23
  31. endoreg_db/utils/uuid.py +4 -0
  32. endoreg_db/utils/video_metadata.py +2 -2
  33. {endoreg_db-0.2.3.dist-info → endoreg_db-0.3.0.dist-info}/METADATA +7 -1
  34. {endoreg_db-0.2.3.dist-info → endoreg_db-0.3.0.dist-info}/RECORD +36 -13
  35. {endoreg_db-0.2.3.dist-info → endoreg_db-0.3.0.dist-info}/LICENSE +0 -0
  36. {endoreg_db-0.2.3.dist-info → endoreg_db-0.3.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,269 @@
1
+ from .raw_video import RawVideoFile
2
+
3
+ # # Starting point
4
+ # Automated tasks generate RawVideoFile objects in our db.
5
+ # Each object has state_{NAME} attributes.
6
+ # We will create functions which query the db for RawVideoFile
7
+ # objects with specific state_{NAME} attributes.
8
+ # Then, we perform the necessary operations on the RawVideoFile and
9
+ # update the state_{NAME} attributes accordingly.
10
+
11
+ # # Step 1 - Frame Extraction
12
+ # function to query for videos scheduled for frame extraction,
13
+ # these have state_frames_required and state_frames_extracted
14
+ def get_videos_scheduled_for_frame_extraction():
15
+ return RawVideoFile.objects.filter(
16
+ state_frames_required=True,
17
+ state_frames_extracted=False
18
+ )
19
+
20
+ def extract_frames_from_video(video:RawVideoFile):
21
+ # extract frames from video
22
+ video.extract_frames()
23
+
24
+ # update state_frames_extracted
25
+ video.state_frames_extracted = True
26
+ video.save()
27
+
28
+ return video
29
+
30
+ def extract_frames_from_videos():
31
+ videos = get_videos_scheduled_for_frame_extraction()
32
+ for video in videos:
33
+ extract_frames_from_video(video)
34
+
35
+ # # Step 2 - OCR
36
+ # function to query for videos scheduled for OCR,
37
+ # these have
38
+ # state_ocr_required = True and state_ocr_completed = False and state_frames_extracted = True
39
+ def get_videos_scheduled_for_ocr():
40
+ return RawVideoFile.objects.filter(
41
+ state_ocr_required=True,
42
+ state_ocr_completed=False,
43
+ state_frames_extracted=True
44
+ )
45
+
46
+ # function to set state_frames_required to True for videos
47
+ # which are scheduled for OCR but have not had frames extracted
48
+ def videos_scheduled_for_ocr_preflight():
49
+ videos = RawVideoFile.objects.filter(
50
+ state_ocr_required=True,
51
+ state_ocr_completed=False,
52
+ state_frames_extracted=False
53
+ )
54
+ for video in videos:
55
+ video.state_frames_required = True
56
+ video.save()
57
+
58
+ def perform_ocr_on_video(video:RawVideoFile):
59
+ # perform OCR on video
60
+ video.update_text_metadata()
61
+
62
+ # update state_ocr_completed
63
+ video.state_ocr_completed = True
64
+ video.save()
65
+
66
+ return video
67
+
68
+ def perform_ocr_on_videos():
69
+ videos = get_videos_scheduled_for_ocr()
70
+ for video in videos:
71
+ perform_ocr_on_video(video)
72
+
73
+
74
+ # # Step 3 - initial Prediction
75
+ # function to query for videos scheduled for initial prediction,
76
+ # these have
77
+ # state_initial_prediction_required = True and state_initial_prediction_completed = False and state_frames_extracted = True
78
+ def videos_scheduled_for_initial_prediction_preflight():
79
+ videos = RawVideoFile.objects.filter(
80
+ state_initial_prediction_required=True,
81
+ state_initial_prediction_completed=False,
82
+ state_frames_extracted=False
83
+ )
84
+ for video in videos:
85
+ video.state_frames_required = True
86
+ video.save()
87
+
88
+ def get_videos_scheduled_for_initial_prediction():
89
+ return RawVideoFile.objects.filter(
90
+ state_initial_prediction_required=True,
91
+ state_initial_prediction_completed=False,
92
+ state_frames_extracted=True
93
+ )
94
+
95
+ from pathlib import Path
96
+ def get_multilabel_model(model_path:Path):
97
+ from agl_predict_endo_frame.model_loader import MultiLabelClassificationNet
98
+ model_path_str = model_path.resolve().as_posix()
99
+ model = MultiLabelClassificationNet.load_from_checkpoint(model_path_str)
100
+ model.cuda()
101
+ model.eval()
102
+ return model
103
+
104
+ def get_multilabel_classifier(model, verbose:bool=False):
105
+ from agl_predict_endo_frame.predict import Classifier
106
+ classifier = Classifier(model, verbose = verbose)
107
+ return classifier
108
+
109
+ def get_crops(video, paths):
110
+ endo_roi_dict = video.get_endo_roi()
111
+ # dict with x, y, width height
112
+ # crops is list of touples with (y_min, y_max, x_min, x_max)
113
+ crop_tuple = (
114
+ endo_roi_dict["y"],
115
+ endo_roi_dict["y"] + endo_roi_dict["height"],
116
+ endo_roi_dict["x"],
117
+ endo_roi_dict["x"] + endo_roi_dict["width"],
118
+ )
119
+ crops = [crop_tuple for _ in paths]
120
+ return crops
121
+
122
+ # model = MultiLabelClassificationNet.load_from_checkpoint("model/colo_segmentation_RegNetX800MF_6.ckpt")
123
+ def perform_initial_prediction_on_video(
124
+ video:RawVideoFile, model_path,
125
+ window_size_s, min_seq_len_s
126
+ ):
127
+
128
+ model = get_multilabel_model(model_path)
129
+ classifier = get_multilabel_classifier(model, verbose = True)
130
+
131
+ paths = video.get_frame_paths()
132
+ string_paths = [p.resolve().as_posix() for p in paths]
133
+ crops = get_crops(video, string_paths)
134
+ fps = video.get_fps()
135
+
136
+ predictions = classifier.pipe(string_paths, crops)
137
+ readable_predictions = [classifier.readable(p) for p in predictions]
138
+ result_dict = classifier.post_process_predictions_serializable(
139
+ readable_predictions,
140
+ window_size_s = window_size_s,
141
+ min_seq_len_s = min_seq_len_s,
142
+ fps = fps
143
+ )
144
+
145
+ # pred_target_dir = video.get_pred_target_dir()
146
+
147
+ result_targets = [
148
+ "predictions",
149
+ "smooth_predictions",
150
+ "binary_predictions",
151
+ "raw_sequences",
152
+ "filtered_sequences"
153
+ ]
154
+
155
+ # Predictions
156
+ _path = video.get_predictions_path()
157
+ with open(_path, "w") as f:
158
+ json.dump(result_targets["predictions"])
159
+
160
+ # smooth_predictions
161
+ _path = video.get_smooth_predictions_path()
162
+ with open(_path, "w") as f:
163
+ json.dump(result_targets["smooth_predictions"])
164
+
165
+ # binary_predictions
166
+ _path = video.get_binary_predictions_path()
167
+ with open(_path, "w") as f:
168
+ json.dump(result_targets["binary_predictions"])
169
+
170
+ # Raw Sequences
171
+ _path = video.get_raw_sequences_path()
172
+ with open(_path, "w") as f:
173
+ json.dump(result_targets["raw_sequences"])
174
+
175
+ # filtered_sequences
176
+ _path = video.get_filtered_sequences_path()
177
+ with open(_path, "w") as f:
178
+ json.dump(result_targets["filtered_sequences"])
179
+
180
+
181
+ # update state_initial_prediction_completed
182
+ video.state_initial_prediction_required = False
183
+ video.state_initial_prediction_completed = True
184
+ video.state_initial_prediction_import_required = True
185
+ video.state_initial_prediction_import_completed = False
186
+ video.save()
187
+
188
+ return video
189
+
190
+ def perform_initial_prediction_on_videos(
191
+ model_path,
192
+ window_size_s, min_seq_len_s
193
+ ):
194
+ videos = get_videos_scheduled_for_initial_prediction()
195
+ for video in videos:
196
+ perform_initial_prediction_on_video(
197
+ video,
198
+ model_path, window_size_s, min_seq_len_s
199
+ )
200
+
201
+ def videos_scheduled_for_prediction_import_preflight():
202
+ videos = RawVideoFile.objects.filter(
203
+ state_initial_prediction_completed=True,
204
+ state_initial_prediction_import_completed=False
205
+ )
206
+ for video in videos:
207
+ video.state_initial_prediction_required = True
208
+ video.save()
209
+
210
+ def get_videos_scheduled_for_prediction_import():
211
+ return RawVideoFile.objects.filter(
212
+ state_prediction_import_required=True,
213
+ state_prediction_import_completed=False,
214
+ state_initial_prediction_completed=True
215
+ )
216
+
217
+ def import_predictions_for_video(video:RawVideoFile):
218
+ # import predictions for video
219
+ pass
220
+
221
+ # update state_prediction_import_completed
222
+ video.state_prediction_import_required = False
223
+ video.state_prediction_import_completed = True
224
+ video.save()
225
+
226
+ return video
227
+
228
+ def import_predictions_for_videos():
229
+ videos = get_videos_scheduled_for_prediction_import()
230
+ for video in videos:
231
+ import_predictions_for_video(video)
232
+
233
+
234
+ # # Step 4 - Delete Frames if not needed anymore
235
+ # function to query for videos scheduled for frame deletion,
236
+ # first we need to set state_frames_required = False for videos with:
237
+ # state_ocr_required = False and state_ocr_completed = True and
238
+ # state_initial_prediction_required = False and state_initial_prediction_completed = True
239
+ def delete_frames_preflight():
240
+ videos = RawVideoFile.objects.filter(
241
+ state_ocr_required=False,
242
+ state_ocr_completed=True,
243
+ state_initial_prediction_required=False,
244
+ state_initial_prediction_completed=True
245
+ )
246
+ for video in videos:
247
+ video.state_frames_required = False
248
+ video.save()
249
+
250
+ # function to query for videos scheduled for frame deletion,
251
+ # frames should be deleted if state_frames_required = False
252
+ def get_videos_scheduled_for_frame_deletion():
253
+ return RawVideoFile.objects.filter(
254
+ state_frames_required=False
255
+ )
256
+
257
+ def delete_frames_for_video(video:RawVideoFile):
258
+ # delete frames for video
259
+
260
+ # update state_frames_deleted
261
+ video.state_frames_extracted = False
262
+ video.save()
263
+
264
+ return video
265
+
266
+ def delete_frames():
267
+ videos = get_videos_scheduled_for_frame_deletion()
268
+ for video in videos:
269
+ delete_frames_for_video(video)
@@ -0,0 +1,341 @@
1
+ from django.db import models
2
+ from pathlib import Path
3
+ from collections import defaultdict, Counter
4
+
5
+ from endoreg_db.utils.hashs import get_video_hash
6
+ from endoreg_db.utils.file_operations import get_uuid_filename
7
+ from endoreg_db.utils.ocr import extract_text_from_rois
8
+
9
+ import shutil
10
+ import os
11
+ import subprocess
12
+
13
+ from ..metadata import VideoMeta, SensitiveMeta
14
+
15
+ class RawVideoFile(models.Model):
16
+ uuid = models.UUIDField()
17
+ file = models.FileField(upload_to="raw_data/")
18
+ sensitive_meta = models.OneToOneField(
19
+ "SensitiveMeta", on_delete=models.CASCADE, blank=True, null=True
20
+ )
21
+ center = models.ForeignKey("Center", on_delete=models.CASCADE)
22
+ processor = models.ForeignKey(
23
+ "EndoscopyProcessor", on_delete=models.CASCADE, blank=True, null=True
24
+ )
25
+ video_meta = models.OneToOneField(
26
+ "VideoMeta", on_delete=models.CASCADE, blank=True, null=True
27
+ )
28
+ original_file_name = models.CharField(max_length=255)
29
+ video_hash = models.CharField(max_length=255, unique=True)
30
+ uploaded_at = models.DateTimeField(auto_now_add=True)
31
+
32
+ # Frame Extraction States
33
+ state_frames_required = models.BooleanField(default=True)
34
+ state_frames_extracted = models.BooleanField(default=False)
35
+
36
+ # Video
37
+ ## Prediction
38
+ state_initial_prediction_required = models.BooleanField(default=True)
39
+ state_initial_prediction_completed = models.BooleanField(default=False)
40
+ state_initial_prediction_import_required = models.BooleanField(default=True)
41
+ state_initial_prediction_import_completed = models.BooleanField(default=False)
42
+ ## OCR
43
+ state_ocr_required = models.BooleanField(default=True)
44
+ state_ocr_completed = models.BooleanField(default=False)
45
+ ## Validation
46
+ state_outside_validated = models.BooleanField(default=False)
47
+ state_ocr_result_validated = models.BooleanField(default=False)
48
+
49
+ state_sensitive_data_retrieved = models.BooleanField(default=False)
50
+
51
+ # Dataset complete?
52
+ state_histology_required = models.BooleanField(blank=True, null=True)
53
+ state_histology_available = models.BooleanField(default=False)
54
+ state_follow_up_intervention_required = models.BooleanField(blank=True, null=True)
55
+ state_follow_up_intervention_available = models.BooleanField(default=False)
56
+ state_dataset_complete = models.BooleanField(default=False)
57
+
58
+ # Finalizing for Upload
59
+ state_anonym_video_required = models.BooleanField(default=True)
60
+ state_anonym_video_performed = models.BooleanField(default=False)
61
+ state_original_reports_deleted = models.BooleanField(default=False)
62
+ state_original_video_deleted = models.BooleanField(default=False)
63
+ state_finalized = models.BooleanField(default=False)
64
+
65
+ frame_dir = models.CharField(max_length=255)
66
+ prediction_dir = models.CharField(max_length=255)
67
+
68
+ @classmethod
69
+ def create_from_file(
70
+ cls,
71
+ file_path: Path,
72
+ video_dir_parent: Path,
73
+ center_name: str,
74
+ processor_name: str,
75
+ frame_dir_parent: Path,
76
+ save: bool = True,
77
+ ):
78
+ from endoreg_db.models import Center, EndoscopyProcessor
79
+
80
+ print(f"Creating RawVideoFile from {file_path}")
81
+ original_file_name = file_path.name
82
+ # Rename and and move
83
+
84
+ new_file_name, uuid = get_uuid_filename(file_path)
85
+ framedir: Path = frame_dir_parent / str(uuid)
86
+
87
+ if not framedir.exists():
88
+ framedir.mkdir(parents=True, exist_ok=True)
89
+
90
+ if not video_dir_parent.exists():
91
+ video_dir_parent.mkdir(parents=True, exist_ok=True)
92
+
93
+ video_hash = get_video_hash(file_path)
94
+
95
+ center = Center.objects.get(name=center_name)
96
+ assert center is not None, "Center must exist"
97
+
98
+ processor = EndoscopyProcessor.objects.get(name=processor_name)
99
+ assert processor is not None, "Processor must exist"
100
+
101
+ new_filepath = video_dir_parent / new_file_name
102
+
103
+ print(f"Moving {file_path} to {new_filepath}")
104
+ shutil.move(file_path.resolve().as_posix(), new_filepath.resolve().as_posix())
105
+ print(f"Moved to {new_filepath}")
106
+
107
+ # Make sure file was transferred correctly and hash is correct
108
+ if not new_filepath.exists():
109
+ print(f"File {file_path} was not transferred correctly to {new_filepath}")
110
+ return None
111
+
112
+ new_hash = get_video_hash(new_filepath)
113
+ if new_hash != video_hash:
114
+ print(f"Hash of file {file_path} is not correct")
115
+ return None
116
+
117
+ # make sure that no other file with the same hash exists
118
+ if cls.objects.filter(video_hash=video_hash).exists():
119
+ # log and print warnint
120
+ print(f"File with hash {video_hash} already exists")
121
+ return None
122
+
123
+ else:
124
+ print(center)
125
+ # Create a new instance of RawVideoFile
126
+ raw_video_file = cls(
127
+ uuid=uuid,
128
+ file=new_filepath.resolve().as_posix(),
129
+ center=center,
130
+ processor=processor,
131
+ original_file_name=original_file_name,
132
+ video_hash=video_hash,
133
+ frame_dir=framedir.as_posix(),
134
+ )
135
+
136
+ # Save the instance to the database
137
+ raw_video_file.save()
138
+
139
+ return raw_video_file
140
+
141
+ def __str__(self):
142
+ return self.file.name
143
+
144
+ def get_endo_roi(self):
145
+ endo_roi = self.video_meta.get_endo_roi()
146
+ return endo_roi
147
+
148
+ # video meta should be created when video file is created
149
+ def save(self, *args, **kwargs):
150
+ if self.video_meta is None:
151
+ center = self.center
152
+ processor = self.processor
153
+ self.video_meta = VideoMeta.objects.create(
154
+ center=center, processor=processor
155
+ )
156
+ self.video_meta.initialize_ffmpeg_meta(self.file.path)
157
+ super(RawVideoFile, self).save(*args, **kwargs)
158
+
159
+ def extract_frames(
160
+ self,
161
+ quality: int = 2,
162
+ frame_dir: Path = None,
163
+ overwrite: bool = False,
164
+ ext="jpg",
165
+ ):
166
+ """
167
+ Extract frames from the video file and save them to the frame_dir.
168
+ For this, ffmpeg must be available in in the current environment.
169
+ """
170
+ if frame_dir is None:
171
+ frame_dir = Path(self.frame_dir)
172
+ else:
173
+ frame_dir = Path(frame_dir)
174
+
175
+ if not frame_dir.exists():
176
+ frame_dir.mkdir(parents=True, exist_ok=True)
177
+
178
+ if not overwrite and len(list(frame_dir.glob("*.jpg"))) > 0:
179
+ print(f"Frames already extracted for {self.file.name}")
180
+ return
181
+
182
+ video_path = Path(self.file.path).resolve().as_posix()
183
+
184
+ frame_path_string = frame_dir.resolve().as_posix()
185
+ command = [
186
+ "ffmpeg",
187
+ "-i",
188
+ video_path, #
189
+ "-q:v",
190
+ str(quality),
191
+ os.path.join(frame_path_string, f"frame_%07d.{ext}"),
192
+ ]
193
+
194
+ # Ensure FFmpeg is available
195
+ if not shutil.which("ffmpeg"):
196
+ raise EnvironmentError(
197
+ "FFmpeg could not be found. Ensure it is installed and in your PATH."
198
+ )
199
+
200
+ # Extract frames from the video file
201
+ # Execute the command
202
+ result = subprocess.run(command, capture_output=True, text=True)
203
+ if result.returncode != 0:
204
+ raise Exception(f"Error extracting frames: {result.stderr}")
205
+
206
+ self.state_frames_extracted = True
207
+
208
+ return f"Frames extracted to {frame_dir} ({frame_path_string}) with quality {quality}"
209
+
210
+ def delete_frames(self):
211
+ """
212
+ Delete frames extracted from the video file.
213
+ """
214
+ frame_dir = Path(self.frame_dir)
215
+ if frame_dir.exists():
216
+ shutil.rmtree(frame_dir)
217
+ self.state_frames_extracted = False
218
+ self.save()
219
+ return f"Frames deleted from {frame_dir}"
220
+ else:
221
+ return f"No frames to delete for {self.file.name}"
222
+
223
+ def get_frame_path(self, n: int = 0):
224
+ """
225
+ Get the path to the n-th frame extracted from the video file.
226
+ Note that the frame numbering starts at 1 in our naming convention.
227
+ """
228
+ # Adjust index
229
+ n = n + 1
230
+
231
+ frame_dir = Path(self.frame_dir)
232
+ return frame_dir / f"frame_{n:07d}.jpg"
233
+
234
+ def get_frame_paths(self):
235
+ if not self.state_frames_extracted:
236
+ return None
237
+ frame_dir = Path(self.frame_dir)
238
+ paths = [p for p in frame_dir.glob('*')]
239
+ indices = [int(p.stem.split("_")[1]) for p in paths]
240
+ path_index_tuples = list(zip(paths, indices))
241
+ # sort ascending by index
242
+ path_index_tuples.sort(key=lambda x: x[1])
243
+ paths, indices = zip(*path_index_tuples)
244
+
245
+ return paths
246
+
247
+ def get_prediction_dir(self):
248
+ return Path(elf.prediction_dir)
249
+
250
+ def get_predictions_path(self, suffix = ".json"):
251
+ pred_dir = self.get_prediction_dir()
252
+ return pred_dir.joinpath("predictions").with_suffix(suffix)
253
+
254
+ def get_smooth_predictions_path(self, suffix = ".json"):
255
+ pred_dir = self.get_prediction_dir()
256
+ return pred_dir.joinpath("smooth_predictions").with_suffix(suffix)
257
+
258
+ def get_binary_predictions_path(self, suffix = ".json"):
259
+ pred_dir = self.get_prediction_dir()
260
+ return pred_dir.joinpath("binary_predictions").with_suffix(suffix)
261
+
262
+ def get_raw_sequences_path(self, suffix = ".json"):
263
+ pred_dir = self.get_prediction_dir()
264
+ return pred_dir.joinpath("raw_sequences").with_suffix(suffix)
265
+
266
+ def get_filtered_sequences_path(self, suffix=".json"):
267
+ pred_dir = self.get_prediction_dir()
268
+ return pred_dir.joinpath("filtered_sequences").with_suffix(suffix)
269
+
270
+ def extract_text_information(self, frame_fraction: float = 0.001):
271
+ """
272
+ Extract text information from the video file.
273
+ Makes sure that frames are extracted and then processes the frames.
274
+ gets all frames from frame_dir and selects a fraction of them to process (at least 1)
275
+ """
276
+ if not self.state_frames_extracted:
277
+ print(f"Frames not extracted for {self.file.name}")
278
+ return None
279
+
280
+ processor = self.processor
281
+
282
+ frame_dir = Path(self.frame_dir)
283
+ frames = list(frame_dir.glob("*"))
284
+ n_frames = len(frames)
285
+ n_frames_to_process = max(1, int(frame_fraction * n_frames))
286
+
287
+ # Select evenly spaced frames
288
+ frames = frames[:: n_frames // n_frames_to_process]
289
+
290
+ # extract text from each frame and store the value to
291
+ # defaultdict of lists.
292
+ # Then, extract the most frequent value from each list
293
+ # Finally, return the dictionary of most frequent values
294
+
295
+ # Create a defaultdict to store the extracted text from each ROI
296
+ rois_texts = defaultdict(list)
297
+
298
+ print(f"Processing {n_frames_to_process} frames from {self.file.name}")
299
+ # Process frames
300
+ for frame_path in frames[:n_frames_to_process]:
301
+ extracted_texts = extract_text_from_rois(frame_path, processor)
302
+ for roi, text in extracted_texts.items():
303
+ rois_texts[roi].append(text)
304
+
305
+ # Get the most frequent text values for each ROI using Counter
306
+ for key in rois_texts.keys():
307
+ counter = Counter([text for text in rois_texts[key] if text])
308
+ rois_texts[key] = counter.most_common(1)[0][0] if counter else None
309
+
310
+ return rois_texts
311
+
312
+ def update_text_metadata(self, ocr_frame_fraction=0.001):
313
+ print(f"Updating metadata for {self.file.name}")
314
+ texts = self.extract_text_information(ocr_frame_fraction)
315
+
316
+ self.sensitive_meta = SensitiveMeta.create_from_dict(texts)
317
+ self.state_sensitive_data_retrieved = True
318
+ self.save()
319
+
320
+ # Resulting dict depends on defined ROIs for this processor type!
321
+
322
+ def update_video_meta(self):
323
+ video_meta = self.video_meta
324
+ video_path = Path(self.file.path)
325
+
326
+ if video_meta is None:
327
+ video_meta = VideoMeta.create_from_video(video_path)
328
+ self.video_meta = video_meta
329
+ self.save()
330
+
331
+ else:
332
+ video_meta.update_meta(video_path)
333
+
334
+ def get_fps(self):
335
+ if self.video_meta is None:
336
+ self.update_video_meta()
337
+
338
+ if self.video_meta.ffmpeg_meta is None:
339
+ self.video_meta.initialize_ffmpeg_meta(self.file.path)
340
+
341
+ return self.video_meta.get_fps()