endoreg-db 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/management/commands/delete_all.py +18 -0
- endoreg_db/management/commands/fix_auth_permission.py +20 -0
- endoreg_db/management/commands/load_user_groups.py +8 -47
- endoreg_db/migrations/0001_initial.py +1 -1
- endoreg_db/migrations/0002_rawvideofile.py +26 -0
- endoreg_db/migrations/0003_rawvideofile_frames_required.py +18 -0
- endoreg_db/migrations/0004_rename_hash_rawvideofile_video_hash.py +18 -0
- endoreg_db/migrations/0005_ffmpegmeta_remove_videoimportmeta_center_and_more.py +56 -0
- endoreg_db/migrations/0006_rawvideofile_center_alter_videometa_processor.py +25 -0
- endoreg_db/migrations/0007_rawvideofile_processor.py +19 -0
- endoreg_db/migrations/0008_rename_frames_required_rawvideofile_state_frames_required.py +18 -0
- endoreg_db/migrations/0009_sensitivemeta_rawvideofile_sensitive_meta.py +31 -0
- endoreg_db/migrations/0010_rename_endoscope_serial_number_sensitivemeta_endoscope_sn.py +18 -0
- endoreg_db/migrations/0011_rawvideofile_state_sensitive_data_retrieved.py +18 -0
- endoreg_db/migrations/0012_rawvideofile_prediction_dir_and_more.py +109 -0
- endoreg_db/models/data_file/__init__.py +4 -1
- endoreg_db/models/data_file/base_classes/__init__.py +0 -1
- endoreg_db/models/data_file/base_classes/abstract_video.py +1 -0
- endoreg_db/models/data_file/import_classes/__init__.py +31 -0
- endoreg_db/models/data_file/import_classes/processing_functions.py +269 -0
- endoreg_db/models/data_file/import_classes/raw_video.py +341 -0
- endoreg_db/models/data_file/metadata/__init__.py +133 -0
- endoreg_db/models/data_file/metadata/sensitive_meta.py +13 -0
- endoreg_db/models/data_file/video/__init__.py +1 -1
- endoreg_db/models/data_file/video/import_meta.py +21 -21
- endoreg_db/models/permissions/__init__.py +44 -0
- endoreg_db/utils/__init__.py +1 -0
- endoreg_db/utils/cropping.py +29 -0
- endoreg_db/utils/dataloader.py +69 -0
- endoreg_db/utils/file_operations.py +30 -0
- endoreg_db/utils/hashs.py +16 -0
- endoreg_db/utils/legacy_ocr.py +201 -0
- endoreg_db/utils/ocr.py +197 -0
- endoreg_db/utils/uuid.py +4 -0
- endoreg_db/utils/video_metadata.py +87 -0
- {endoreg_db-0.2.2.dist-info → endoreg_db-0.3.0.dist-info}/METADATA +7 -1
- {endoreg_db-0.2.2.dist-info → endoreg_db-0.3.0.dist-info}/RECORD +39 -11
- {endoreg_db-0.2.2.dist-info → endoreg_db-0.3.0.dist-info}/LICENSE +0 -0
- {endoreg_db-0.2.2.dist-info → endoreg_db-0.3.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from django.db import models
|
|
2
|
+
import ffmpeg
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from .sensitive_meta import SensitiveMeta
|
|
5
|
+
|
|
6
|
+
# import endoreg_center_id from django settings
|
|
7
|
+
from django.conf import settings
|
|
8
|
+
|
|
9
|
+
# check if endoreg_center_id is set
|
|
10
|
+
if not hasattr(settings, 'ENDOREG_CENTER_ID'):
|
|
11
|
+
ENDOREG_CENTER_ID = 9999
|
|
12
|
+
else:
|
|
13
|
+
ENDOREG_CENTER_ID = settings.ENDOREG_CENTER_ID
|
|
14
|
+
|
|
15
|
+
# VideoMeta
|
|
16
|
+
class VideoMeta(models.Model):
|
|
17
|
+
processor = models.ForeignKey('EndoscopyProcessor', on_delete=models.CASCADE, blank=True, null=True)
|
|
18
|
+
endoscope = models.ForeignKey('Endoscope', on_delete=models.CASCADE, blank=True, null=True)
|
|
19
|
+
center = models.ForeignKey('Center', on_delete=models.CASCADE)
|
|
20
|
+
import_meta = models.OneToOneField('VideoImportMeta', on_delete=models.CASCADE, blank=True, null=True)
|
|
21
|
+
ffmpeg_meta = models.OneToOneField('FFMpegMeta', on_delete=models.CASCADE, blank=True, null=True)
|
|
22
|
+
|
|
23
|
+
def __str__(self):
|
|
24
|
+
|
|
25
|
+
processor_name = self.processor.name if self.processor is not None else "None"
|
|
26
|
+
endoscope_name = self.endoscope.name if self.endoscope is not None else "None"
|
|
27
|
+
center_name = self.center.name if self.center is not None else "None"
|
|
28
|
+
|
|
29
|
+
result_html = ""
|
|
30
|
+
|
|
31
|
+
result_html += f"Processor: {processor_name}<br>"
|
|
32
|
+
result_html += f"Endoscope: {endoscope_name}<br>"
|
|
33
|
+
result_html += f"Center: {center_name}<br>"
|
|
34
|
+
|
|
35
|
+
return result_html
|
|
36
|
+
|
|
37
|
+
# import meta should be created when video meta is created
|
|
38
|
+
def save(self, *args, **kwargs):
|
|
39
|
+
if self.import_meta is None:
|
|
40
|
+
self.import_meta = VideoImportMeta.objects.create()
|
|
41
|
+
super(VideoMeta, self).save(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
def initialize_ffmpeg_meta(self, file_path):
|
|
44
|
+
"""Initializes FFMpeg metadata for the video file if not already done."""
|
|
45
|
+
self.ffmpeg_meta = FFMpegMeta.create_from_file(Path(file_path))
|
|
46
|
+
self.save()
|
|
47
|
+
|
|
48
|
+
def update_meta(self, file_path):
|
|
49
|
+
"""Updates the video metadata from the file."""
|
|
50
|
+
self.initialize_ffmpeg_meta(file_path)
|
|
51
|
+
self.save()
|
|
52
|
+
|
|
53
|
+
def get_endo_roi(self):
|
|
54
|
+
endo_roi = self.processor.get_roi_endoscope_image()
|
|
55
|
+
return endo_roi
|
|
56
|
+
|
|
57
|
+
def get_fps(self):
|
|
58
|
+
if not self.ffmpeg_meta:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
return self.ffmpeg_meta.frame_rate
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class FFMpegMeta(models.Model):
|
|
65
|
+
# Existing fields
|
|
66
|
+
duration = models.FloatField(blank=True, null=True)
|
|
67
|
+
width = models.IntegerField(blank=True, null=True)
|
|
68
|
+
height = models.IntegerField(blank=True, null=True)
|
|
69
|
+
frame_rate = models.FloatField(blank=True, null=True)
|
|
70
|
+
|
|
71
|
+
# New fields for comprehensive information
|
|
72
|
+
video_codec = models.CharField(max_length=50, blank=True, null=True)
|
|
73
|
+
audio_codec = models.CharField(max_length=50, blank=True, null=True)
|
|
74
|
+
audio_channels = models.IntegerField(blank=True, null=True)
|
|
75
|
+
audio_sample_rate = models.IntegerField(blank=True, null=True)
|
|
76
|
+
|
|
77
|
+
# Existing __str__ method can be updated to include new fields
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def create_from_file(cls, file_path: Path):
|
|
81
|
+
"""Creates an FFMpegMeta instance from a video file using ffmpeg probe."""
|
|
82
|
+
try:
|
|
83
|
+
probe = ffmpeg.probe(str(file_path))
|
|
84
|
+
except ffmpeg.Error as e:
|
|
85
|
+
print(e.stderr)
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
|
|
89
|
+
audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
|
|
90
|
+
|
|
91
|
+
# Check for the existence of a video stream
|
|
92
|
+
if video_stream is None:
|
|
93
|
+
print(f"No video stream found in {file_path}")
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
# Extract and store video metadata
|
|
97
|
+
metadata = {
|
|
98
|
+
'duration': float(video_stream.get('duration', 0)),
|
|
99
|
+
'width': int(video_stream.get('width', 0)),
|
|
100
|
+
'height': int(video_stream.get('height', 0)),
|
|
101
|
+
'frame_rate': float(next(iter(video_stream.get('avg_frame_rate', '').split('/')), 0)),
|
|
102
|
+
'video_codec': video_stream.get('codec_name', ''),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# If there are audio streams, extract and store audio metadata from the first stream
|
|
106
|
+
if audio_streams:
|
|
107
|
+
first_audio_stream = audio_streams[0]
|
|
108
|
+
metadata.update({
|
|
109
|
+
'audio_codec': first_audio_stream.get('codec_name', ''),
|
|
110
|
+
'audio_channels': int(first_audio_stream.get('channels', 0)),
|
|
111
|
+
'audio_sample_rate': int(first_audio_stream.get('sample_rate', 0)),
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
# Create and return the FFMpegMeta instance
|
|
115
|
+
return cls.objects.create(**metadata)
|
|
116
|
+
|
|
117
|
+
class VideoImportMeta(models.Model):
|
|
118
|
+
|
|
119
|
+
video_anonymized = models.BooleanField(default=False)
|
|
120
|
+
video_patient_data_detected = models.BooleanField(default=False)
|
|
121
|
+
outside_detected = models.BooleanField(default=False)
|
|
122
|
+
patient_data_removed = models.BooleanField(default=False)
|
|
123
|
+
outside_removed = models.BooleanField(default=False)
|
|
124
|
+
|
|
125
|
+
def __str__(self):
|
|
126
|
+
result_html = ""
|
|
127
|
+
|
|
128
|
+
result_html += f"Video anonymized: {self.video_anonymized}<br>"
|
|
129
|
+
result_html += f"Video patient data detected: {self.video_patient_data_detected}<br>"
|
|
130
|
+
result_html += f"Outside detected: {self.outside_detected}<br>"
|
|
131
|
+
result_html += f"Patient data removed: {self.patient_data_removed}<br>"
|
|
132
|
+
result_html += f"Outside removed: {self.outside_removed}<br>"
|
|
133
|
+
return result_html
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from django.db import models
|
|
2
|
+
|
|
3
|
+
class SensitiveMeta(models.Model):
|
|
4
|
+
examination_date = models.DateField(blank=True, null=True)
|
|
5
|
+
patient_first_name = models.CharField(max_length=255, blank=True, null=True)
|
|
6
|
+
patient_last_name = models.CharField(max_length=255, blank=True, null=True)
|
|
7
|
+
patient_dob = models.DateField(blank=True, null=True)
|
|
8
|
+
endoscope_type = models.CharField(max_length=255, blank=True, null=True)
|
|
9
|
+
endoscope_sn = models.CharField(max_length=255, blank=True, null=True)
|
|
10
|
+
|
|
11
|
+
@classmethod
|
|
12
|
+
def create_from_dict(cls, data: dict):
|
|
13
|
+
return cls.objects.create(**data)
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
from django.db import models
|
|
1
|
+
# from django.db import models
|
|
2
2
|
|
|
3
|
-
class VideoImportMeta(models.Model):
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
3
|
+
# class VideoImportMeta(models.Model):
|
|
4
|
+
# processor = models.ForeignKey('EndoscopyProcessor', on_delete=models.CASCADE)
|
|
5
|
+
# endoscope = models.ForeignKey('Endoscope', on_delete=models.CASCADE, blank=True, null=True)
|
|
6
|
+
# center = models.ForeignKey('Center', on_delete=models.CASCADE)
|
|
7
|
+
# video_anonymized = models.BooleanField(default=False)
|
|
8
|
+
# video_patient_data_detected = models.BooleanField(default=False)
|
|
9
|
+
# outside_detected = models.BooleanField(default=False)
|
|
10
|
+
# patient_data_removed = models.BooleanField(default=False)
|
|
11
|
+
# outside_removed = models.BooleanField(default=False)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
# def __str__(self):
|
|
14
|
+
# result_html = ""
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
16
|
+
# result_html += f"Processor: {self.processor.name}<br>"
|
|
17
|
+
# result_html += f"Endoscope: {self.endoscope.name}<br>"
|
|
18
|
+
# result_html += f"Center: {self.center.name}<br>"
|
|
19
|
+
# result_html += f"Video anonymized: {self.video_anonymized}<br>"
|
|
20
|
+
# result_html += f"Video patient data detected: {self.video_patient_data_detected}<br>"
|
|
21
|
+
# result_html += f"Outside detected: {self.outside_detected}<br>"
|
|
22
|
+
# result_html += f"Patient data removed: {self.patient_data_removed}<br>"
|
|
23
|
+
# result_html += f"Outside removed: {self.outside_removed}<br>"
|
|
24
|
+
# return result_html
|
|
25
25
|
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from django.contrib.contenttypes.models import ContentType
|
|
2
|
+
from django.contrib.auth.models import Permission
|
|
3
|
+
from endoreg_db.models import * # Import your models here
|
|
4
|
+
from django.db import transaction
|
|
5
|
+
|
|
6
|
+
# Step 1: Define model to category mappings
|
|
7
|
+
# Assuming every model class name is unique across your entire Django project
|
|
8
|
+
model_categories = {
|
|
9
|
+
'SensitiveModel1': 'sensitive',
|
|
10
|
+
'SensitiveModel2': 'sensitive',
|
|
11
|
+
'DevelopmentModel1': 'development',
|
|
12
|
+
'DevelopmentModel2': 'development',
|
|
13
|
+
# Add all models you have, mapping them to either 'sensitive', 'development', or 'all'
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
# Step 2: Define permissions for each category
|
|
17
|
+
category_permissions = {
|
|
18
|
+
'sensitive': ['view', 'edit', 'delete'],
|
|
19
|
+
'development': ['view', 'edit'],
|
|
20
|
+
'all': ['view'],
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
@transaction.atomic
|
|
24
|
+
def create_permissions_for_all_models():
|
|
25
|
+
for model_class_name, category in model_categories.items():
|
|
26
|
+
model_class = globals().get(model_class_name)
|
|
27
|
+
if model_class is None:
|
|
28
|
+
print(f"Model {model_class_name} not found.")
|
|
29
|
+
continue
|
|
30
|
+
|
|
31
|
+
content_type = ContentType.objects.get_for_model(model_class)
|
|
32
|
+
permissions = category_permissions.get(category, [])
|
|
33
|
+
|
|
34
|
+
for permission_codename in permissions:
|
|
35
|
+
permission_name = f"Can {permission_codename} {model_class_name}"
|
|
36
|
+
permission, created = Permission.objects.get_or_create(
|
|
37
|
+
codename=f"{permission_codename}_{model_class_name.lower()}",
|
|
38
|
+
defaults={'name': permission_name, 'content_type': content_type},
|
|
39
|
+
)
|
|
40
|
+
if created:
|
|
41
|
+
print(f"Created permission: {permission_name}")
|
|
42
|
+
|
|
43
|
+
# Run the function to create and assign permissions based on categories
|
|
44
|
+
create_permissions_for_all_models()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .dataloader import load_model_data_from_yaml
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from PIL import Image
|
|
2
|
+
|
|
3
|
+
def crop_and_insert(image:Image, x, y, h, w, bg_color=(255, 255, 255)):
|
|
4
|
+
"""
|
|
5
|
+
Crops a region from an inverted grayscale image and inserts it into a white image of the same size as the original.
|
|
6
|
+
|
|
7
|
+
Parameters:
|
|
8
|
+
- fp: File path or a file object of the original image.
|
|
9
|
+
- x, y: The top-left coordinates of the rectangle to be cropped.
|
|
10
|
+
- h, w: The height and width of the rectangle to be cropped.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
A PIL Image object containing the original image with the specified region replaced.
|
|
14
|
+
"""
|
|
15
|
+
# Load the original image
|
|
16
|
+
original_image = image
|
|
17
|
+
|
|
18
|
+
# Crop the specified region from the inverted image
|
|
19
|
+
crop_rectangle = (x, y, x + w, y + h)
|
|
20
|
+
cropped_content = original_image.crop(crop_rectangle)
|
|
21
|
+
|
|
22
|
+
# Create a new white image of the same size as the original image
|
|
23
|
+
white_background = Image.new('RGB', original_image.size, bg_color)
|
|
24
|
+
|
|
25
|
+
# Paste the cropped content onto the white image at the specified location
|
|
26
|
+
white_background.paste(cropped_content, (x, y))
|
|
27
|
+
|
|
28
|
+
# The final image can be displayed or saved as needed
|
|
29
|
+
return white_background
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
|
|
4
|
+
def load_model_data_from_yaml(
|
|
5
|
+
command,
|
|
6
|
+
model_name,
|
|
7
|
+
metadata,
|
|
8
|
+
verbose,
|
|
9
|
+
):
|
|
10
|
+
|
|
11
|
+
if verbose:
|
|
12
|
+
command.stdout.write(f"Start Loading {model_name}")
|
|
13
|
+
model = metadata["model"]
|
|
14
|
+
dir = metadata["dir"]
|
|
15
|
+
foreign_keys = metadata["foreign_keys"]
|
|
16
|
+
foreign_key_models = metadata["foreign_key_models"]
|
|
17
|
+
|
|
18
|
+
for file in [f for f in os.listdir(dir) if f.endswith('.yaml')]:
|
|
19
|
+
with open(os.path.join(dir, file), 'r') as f:
|
|
20
|
+
yaml_data = yaml.safe_load(f)
|
|
21
|
+
|
|
22
|
+
load_data_with_foreign_keys(
|
|
23
|
+
command,
|
|
24
|
+
model,
|
|
25
|
+
yaml_data,
|
|
26
|
+
foreign_keys,
|
|
27
|
+
foreign_key_models,
|
|
28
|
+
verbose
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def load_data_with_foreign_keys(command, model, yaml_data, foreign_keys, foreign_key_models, verbose):
|
|
32
|
+
# Since pathology types is a ManyToMany field, we need to hack arount
|
|
33
|
+
for entry in yaml_data:
|
|
34
|
+
fields = entry.get('fields', {})
|
|
35
|
+
name = fields.pop('name', None)
|
|
36
|
+
many_to_many_tuples = []
|
|
37
|
+
foreign_key_tuples = zip(foreign_keys, foreign_key_models)
|
|
38
|
+
for foreign_key, foreign_key_model in foreign_key_tuples:
|
|
39
|
+
target_natural_key = fields.pop(foreign_key, None)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if isinstance(target_natural_key, list):
|
|
43
|
+
# the field is a Many to X field.
|
|
44
|
+
fk_objects = [foreign_key_model.objects.get_by_natural_key(_) for _ in target_natural_key]
|
|
45
|
+
fk_tuple = (foreign_key, fk_objects)
|
|
46
|
+
many_to_many_tuples.append(fk_tuple)
|
|
47
|
+
continue
|
|
48
|
+
# Use the natural key to look up the related object
|
|
49
|
+
try:
|
|
50
|
+
obj = foreign_key_model.objects.get_by_natural_key(target_natural_key)
|
|
51
|
+
except model.DoesNotExist:
|
|
52
|
+
command.stderr.write(command.style.ERROR(f'{model.__name__} with natural key {target_natural_key} does not exist. Skipping {name}.'))
|
|
53
|
+
raise Exception(f'{model.__name__} with natural key {target_natural_key} does not exist. Skipping {name}.')
|
|
54
|
+
|
|
55
|
+
# Assign the related object to the field
|
|
56
|
+
fields[foreign_key] = obj
|
|
57
|
+
|
|
58
|
+
if name:
|
|
59
|
+
obj, created = model.objects.get_or_create(name=name, defaults=fields)
|
|
60
|
+
else:
|
|
61
|
+
obj, created = model.objects.get_or_create(**fields)
|
|
62
|
+
if many_to_many_tuples:
|
|
63
|
+
for fk, fk_objects in many_to_many_tuples:
|
|
64
|
+
getattr(obj, fk).set(fk_objects)
|
|
65
|
+
|
|
66
|
+
if created and verbose:
|
|
67
|
+
command.stdout.write(command.style.SUCCESS(f'Created {model.__name__} {name}'))
|
|
68
|
+
elif verbose:
|
|
69
|
+
command.stdout.write(command.style.WARNING(f'Skipped {model.__name__} {name}, already exists'))
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from endoreg_db.utils.uuid import get_uuid
|
|
4
|
+
|
|
5
|
+
def get_uuid_filename(file:Path) -> tuple[str, str]:
|
|
6
|
+
"""
|
|
7
|
+
Returns a new filename with a uuid
|
|
8
|
+
"""
|
|
9
|
+
# Get the file extension
|
|
10
|
+
file_extension = file.suffix
|
|
11
|
+
# Generate a new file name
|
|
12
|
+
uuid = get_uuid()
|
|
13
|
+
new_file_name = f"{uuid}{file_extension}"
|
|
14
|
+
return new_file_name, uuid
|
|
15
|
+
|
|
16
|
+
def rename_file_uuid(old_file:Path):
|
|
17
|
+
"""
|
|
18
|
+
Rename a file by assigning a uuid while preserving file extension. Returns new filepath and uuid
|
|
19
|
+
"""
|
|
20
|
+
# Get the file extension
|
|
21
|
+
file_extension = old_file.suffix
|
|
22
|
+
# Generate a new file name
|
|
23
|
+
uuid = get_uuid()
|
|
24
|
+
new_file_name = f"{uuid}{file_extension}"
|
|
25
|
+
|
|
26
|
+
# Rename the file
|
|
27
|
+
new_file = old_file.with_name(new_file_name)
|
|
28
|
+
shutil.move(old_file.resolve().as_posix(), new_file.resolve().as_posix())
|
|
29
|
+
|
|
30
|
+
return new_file, uuid
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
|
|
3
|
+
def get_video_hash(video_path):
|
|
4
|
+
"""
|
|
5
|
+
Get the hash of a video file.
|
|
6
|
+
"""
|
|
7
|
+
# Open the video file in read-binary mode:
|
|
8
|
+
with open(video_path, 'rb') as f:
|
|
9
|
+
# Create the hash object, passing in the video contents for hashing:
|
|
10
|
+
hash_object = hashlib.sha256(f.read())
|
|
11
|
+
# Get the hexadecimal representation of the hash
|
|
12
|
+
video_hash = hash_object.hexdigest()
|
|
13
|
+
assert len(video_hash) <= 255, "Hash length exceeds 255 characters"
|
|
14
|
+
|
|
15
|
+
return video_hash
|
|
16
|
+
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import pytesseract
|
|
2
|
+
# import cv2
|
|
3
|
+
from endoreg_db.models import EndoscopyProcessor
|
|
4
|
+
import os
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from tempfile import TemporaryDirectory
|
|
7
|
+
import re
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Dict, List
|
|
10
|
+
from icecream import ic
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
N_FRAMES_MEAN_OCR = 2
|
|
14
|
+
|
|
15
|
+
# Helper function to process date strings
|
|
16
|
+
def process_date_text(date_text):
|
|
17
|
+
"""
|
|
18
|
+
Processes a string of text that represents a date and returns a datetime.date object.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
date_text (str): A string of text that represents a date.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
datetime.date: A datetime.date object representing the parsed date, or None if the text cannot be parsed.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
# Remove any non-digit characters
|
|
28
|
+
date_text_clean = re.sub(r'\D', '', date_text)
|
|
29
|
+
# Reformat to 'ddmmyyyy' if necessary
|
|
30
|
+
if len(date_text_clean) == 8:
|
|
31
|
+
return datetime.strptime(date_text_clean, "%d%m%Y").date()
|
|
32
|
+
elif len(date_text_clean) == 14:
|
|
33
|
+
return datetime.strptime(date_text_clean, "%d%m%Y%H%M%S").date()
|
|
34
|
+
except ValueError:
|
|
35
|
+
# Return None if the text cannot be parsed into a date
|
|
36
|
+
# set date to 1/1/1900
|
|
37
|
+
return datetime.strptime("01011900", "%d%m%Y").date()
|
|
38
|
+
|
|
39
|
+
# Helper function to process patient names
|
|
40
|
+
def process_name_text(name_text):
|
|
41
|
+
"""
|
|
42
|
+
Remove all numbers, punctuation, and whitespace from a string of text and return the result.
|
|
43
|
+
"""
|
|
44
|
+
name = re.sub(r'[0-9!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\s]+', '', name_text).strip()
|
|
45
|
+
# capitalize first letter of each word
|
|
46
|
+
name = ' '.join([word.capitalize() for word in name.split()])
|
|
47
|
+
return name
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Helper function to process endoscope type text
|
|
51
|
+
def process_general_text(endoscope_text):
|
|
52
|
+
"""
|
|
53
|
+
This function takes in a string of text from an endoscope and returns a cleaned version of the text.
|
|
54
|
+
"""
|
|
55
|
+
return ' '.join(endoscope_text.split())
|
|
56
|
+
|
|
57
|
+
def roi_values_valid(roi):
|
|
58
|
+
"""
|
|
59
|
+
Check if all values in an ROI dictionary are valid (>=0).
|
|
60
|
+
"""
|
|
61
|
+
return all([value >= 0 for value in roi.values()])
|
|
62
|
+
|
|
63
|
+
# Function to extract text from ROIs
|
|
64
|
+
def extract_text_from_rois(image_path, processor:EndoscopyProcessor):
|
|
65
|
+
"""
|
|
66
|
+
Extracts text from regions of interest (ROIs) in an image using OCR.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
image_path (str): The path to the image file.
|
|
70
|
+
processor (EndoscopyProcessor): An instance of the EndoscopyProcessor class.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
dict: A dictionary containing the extracted text for each ROI.
|
|
74
|
+
"""
|
|
75
|
+
# Read the image using OpenCV
|
|
76
|
+
image = cv2.imread(image_path)
|
|
77
|
+
|
|
78
|
+
# Initialize the dictionary to hold the extracted text
|
|
79
|
+
extracted_texts = {}
|
|
80
|
+
|
|
81
|
+
# Define your ROIs and their corresponding post-processing functions in tuples
|
|
82
|
+
rois_with_postprocessing = [
|
|
83
|
+
('examination_date', processor.get_roi_examination_date, process_date_text),
|
|
84
|
+
("patient_first_name", processor.get_roi_patient_first_name, process_name_text),
|
|
85
|
+
('patient_last_name', processor.get_roi_patient_last_name, process_name_text),
|
|
86
|
+
('patient_dob', processor.get_roi_patient_dob, process_date_text),
|
|
87
|
+
('endoscope_type', processor.get_roi_endoscope_type, process_general_text),
|
|
88
|
+
('endoscope_sn', processor.get_roi_endoscopy_sn, process_general_text),
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# Extract and post-process text for each ROI
|
|
92
|
+
for roi_name, roi_function, post_process in rois_with_postprocessing:
|
|
93
|
+
# Get the ROI dictionary
|
|
94
|
+
roi = roi_function()
|
|
95
|
+
|
|
96
|
+
# Check if the ROI has values
|
|
97
|
+
|
|
98
|
+
if roi_values_valid(roi):
|
|
99
|
+
# Crop the image to the ROI
|
|
100
|
+
x, y, w, h = roi['x'], roi['y'], roi['width'], roi['height']
|
|
101
|
+
roi_cropped = image[y:y+h, x:x+w]
|
|
102
|
+
# Convert to grayscale
|
|
103
|
+
gray = cv2.cvtColor(roi_cropped, cv2.COLOR_BGR2GRAY)
|
|
104
|
+
|
|
105
|
+
# Invert colors for white text on black background
|
|
106
|
+
gray = cv2.bitwise_not(gray)
|
|
107
|
+
|
|
108
|
+
# Binarize the image - using Otsu's method
|
|
109
|
+
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
110
|
+
|
|
111
|
+
# Dilate the image to improve the contour of the pixelated text
|
|
112
|
+
kernel = np.ones((2,2), np.uint8)
|
|
113
|
+
dilation = cv2.dilate(binary, kernel, iterations=1)
|
|
114
|
+
|
|
115
|
+
# OCR configuration: Recognize white text on black background without corrections
|
|
116
|
+
config = '--psm 10 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-üöäÜÖÄß'
|
|
117
|
+
|
|
118
|
+
# Use pytesseract to do OCR on the preprocessed ROI
|
|
119
|
+
text = pytesseract.image_to_string(dilation, config=config).strip()
|
|
120
|
+
|
|
121
|
+
# Post-process extracted text
|
|
122
|
+
processed_text = post_process(text)
|
|
123
|
+
# processed_text = text
|
|
124
|
+
|
|
125
|
+
# Store the processed text in the dictionary
|
|
126
|
+
extracted_texts[roi_name] = processed_text
|
|
127
|
+
|
|
128
|
+
else:
|
|
129
|
+
ic(roi_name)
|
|
130
|
+
ic(roi)
|
|
131
|
+
ic("No values for this ROI")
|
|
132
|
+
|
|
133
|
+
return extracted_texts
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def get_most_frequent_values(rois_texts: Dict[str, List[str]]) -> Dict[str, str]:
|
|
137
|
+
"""
|
|
138
|
+
Given a dictionary of ROIs and their corresponding texts, returns a dictionary of the most frequent text for each ROI.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
rois_texts: A dictionary where the keys are the names of the ROIs and the values are lists of texts.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
A dictionary where the keys are the names of the ROIs and the values are the most frequent text for each ROI.
|
|
145
|
+
"""
|
|
146
|
+
most_frequent = {}
|
|
147
|
+
for key in rois_texts.keys():
|
|
148
|
+
counter = Counter([text for text in rois_texts[key] if text])
|
|
149
|
+
ic(key)
|
|
150
|
+
ic(counter)
|
|
151
|
+
most_frequent[key], _ = counter.most_common(1)[0] if counter else (None, None)
|
|
152
|
+
return most_frequent
|
|
153
|
+
|
|
154
|
+
def process_video(video_path, processor):
|
|
155
|
+
"""
|
|
156
|
+
Processes a video file by extracting text from regions of interest (ROIs) in each frame.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
video_path (str): The path to the video file to process.
|
|
160
|
+
processor (OCRProcessor): An instance of the OCRProcessor class that defines the ROIs to extract text from.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
dict: A dictionary containing the most frequent text values extracted from each ROI.
|
|
164
|
+
"""
|
|
165
|
+
# Create a temporary directory to store frames
|
|
166
|
+
with TemporaryDirectory() as temp_dir:
|
|
167
|
+
ic(temp_dir)
|
|
168
|
+
# Capture the video
|
|
169
|
+
video = cv2.VideoCapture(video_path)
|
|
170
|
+
success, frame_number = True, 0
|
|
171
|
+
rois_texts = {roi_name: [] for roi_name in processor.get_rois().keys()}
|
|
172
|
+
frames_for_mean_extraction = 0
|
|
173
|
+
|
|
174
|
+
while success:
|
|
175
|
+
success, frame = video.read()
|
|
176
|
+
|
|
177
|
+
# Check if this is the 200th frame
|
|
178
|
+
if frame_number % 1000 == 0 and success:
|
|
179
|
+
frame_path = os.path.join(temp_dir, f"frame_{frame_number}.jpg")
|
|
180
|
+
cv2.imwrite(frame_path, frame) # Save the frame as a JPEG file
|
|
181
|
+
# cv2.imwrite(f"_tmp/frame_{frame_number}.jpg", frame)
|
|
182
|
+
|
|
183
|
+
# Extract text from ROIs
|
|
184
|
+
extracted_texts = extract_text_from_rois(frame_path, processor)
|
|
185
|
+
ic(extracted_texts)
|
|
186
|
+
|
|
187
|
+
# Store the extracted text from each ROI
|
|
188
|
+
for key, text in extracted_texts.items():
|
|
189
|
+
rois_texts[key].append(text)
|
|
190
|
+
frames_for_mean_extraction += 1
|
|
191
|
+
|
|
192
|
+
frame_number += 1
|
|
193
|
+
|
|
194
|
+
if frames_for_mean_extraction >= N_FRAMES_MEAN_OCR: break
|
|
195
|
+
|
|
196
|
+
# Release the video capture object
|
|
197
|
+
video.release()
|
|
198
|
+
|
|
199
|
+
# Get the most frequent values for each ROI
|
|
200
|
+
return get_most_frequent_values(rois_texts)
|
|
201
|
+
|