endoreg-db 0.8.2.7__py3-none-any.whl → 0.8.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/models/metadata/model_meta.py +14 -0
- endoreg_db/models/metadata/model_meta_logic.py +88 -1
- endoreg_db/services/video_import.py +3 -43
- {endoreg_db-0.8.2.7.dist-info → endoreg_db-0.8.2.9.dist-info}/METADATA +3 -2
- {endoreg_db-0.8.2.7.dist-info → endoreg_db-0.8.2.9.dist-info}/RECORD +7 -7
- {endoreg_db-0.8.2.7.dist-info → endoreg_db-0.8.2.9.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.2.7.dist-info → endoreg_db-0.8.2.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -128,6 +128,20 @@ class ModelMeta(models.Model):
|
|
|
128
128
|
cls, meta_name, model_name, labelset_name, weights_file,
|
|
129
129
|
requested_version, bump_if_exists, **kwargs
|
|
130
130
|
)
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def setup_default_from_huggingface(
|
|
134
|
+
cls: Type["ModelMeta"],
|
|
135
|
+
model_id: str,
|
|
136
|
+
task: str = "image-classification",
|
|
137
|
+
labelset_name: Optional[str] = None,
|
|
138
|
+
) -> "ModelMeta":
|
|
139
|
+
"""
|
|
140
|
+
Downloads a pretrained model from Hugging Face and initializes ModelMeta automatically.
|
|
141
|
+
"""
|
|
142
|
+
from . import model_meta_logic as logic
|
|
143
|
+
return logic.setup_default_from_huggingface_logic(cls, model_id, task, labelset_name)
|
|
144
|
+
|
|
131
145
|
|
|
132
146
|
@classmethod
|
|
133
147
|
def get_latest_version_number(cls: Type["ModelMeta"], meta_name: str, model_name: str) -> int:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Optional, TYPE_CHECKING, Any, Type
|
|
4
|
-
|
|
4
|
+
from huggingface_hub import hf_hub_download
|
|
5
5
|
from django.db import transaction
|
|
6
6
|
|
|
7
7
|
# Assuming ModelMeta, AiModel, LabelSet are importable from the correct locations
|
|
@@ -234,3 +234,90 @@ def get_model_meta_by_name_version_logic(
|
|
|
234
234
|
raise cls.DoesNotExist(
|
|
235
235
|
f"No ModelMeta found for '{meta_name}' and model '{model_name}'."
|
|
236
236
|
)
|
|
237
|
+
|
|
238
|
+
from huggingface_hub import model_info
|
|
239
|
+
import re
|
|
240
|
+
|
|
241
|
+
def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
|
|
242
|
+
"""
|
|
243
|
+
Infers default model metadata (activation, normalization, input size)
|
|
244
|
+
from a Hugging Face model_id using its tags and architecture.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
A dict with fields: name, activation, mean, std, size_x, size_y
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
if not (info := model_info(model_id)):
|
|
251
|
+
logger.info(f"Could not retrieve model info for {model_id}, using ColoReg segmentation defaults.")
|
|
252
|
+
return {
|
|
253
|
+
"name": "wg-lux/colo_segmentation_RegNetX800MF_base",
|
|
254
|
+
"activation": "sigmoid",
|
|
255
|
+
"mean": (0.45211223, 0.27139644, 0.19264949),
|
|
256
|
+
"std": (0.31418097, 0.21088019, 0.16059452),
|
|
257
|
+
"size_x": 716,
|
|
258
|
+
"size_y": 716,
|
|
259
|
+
"description": f"Defaults for unknown model {model_id}",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
# Extract architecture from tags or model_id ---
|
|
263
|
+
tags = info.tags or []
|
|
264
|
+
model_name = model_id.split("/")[-1].lower()
|
|
265
|
+
|
|
266
|
+
# Heuristics for architecture and task
|
|
267
|
+
architecture = next((t for t in tags if t.startswith("architecture:")), None)
|
|
268
|
+
task = next((t for t in tags if t.startswith("task:")), None)
|
|
269
|
+
|
|
270
|
+
# Default values
|
|
271
|
+
activation = "sigmoid"
|
|
272
|
+
size_x = size_y = 716
|
|
273
|
+
mean = (0.45211223, 0.27139644, 0.19264949)
|
|
274
|
+
std = (0.31418097, 0.21088019, 0.16059452)
|
|
275
|
+
|
|
276
|
+
# --- 2. Task-based inference ---
|
|
277
|
+
if task:
|
|
278
|
+
if "segmentation" in task or "detection" in task:
|
|
279
|
+
activation = "sigmoid"
|
|
280
|
+
elif any(k in task for k in ["classification"]):
|
|
281
|
+
activation = "softmax"
|
|
282
|
+
|
|
283
|
+
# --- 3. Architecture-based inference ---
|
|
284
|
+
if architecture:
|
|
285
|
+
arch = architecture.replace("architecture:", "")
|
|
286
|
+
else:
|
|
287
|
+
arch = re.sub(r"[^a-z0-9]+", "_", model_name)
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"name": arch,
|
|
291
|
+
"activation": activation,
|
|
292
|
+
"mean": mean,
|
|
293
|
+
"std": std,
|
|
294
|
+
"size_x": size_x,
|
|
295
|
+
"size_y": size_y,
|
|
296
|
+
"description": f"Inferred defaults for {model_id}",
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str | None = None):
|
|
300
|
+
"""
|
|
301
|
+
Downloads model weights from Hugging Face and auto-fills ModelMeta fields.
|
|
302
|
+
"""
|
|
303
|
+
meta = infer_default_model_meta_from_hf(model_id)
|
|
304
|
+
|
|
305
|
+
# Download weights
|
|
306
|
+
weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=WEIGHTS_DIR)
|
|
307
|
+
|
|
308
|
+
ai_model, _ = AiModel.objects.get_or_create(name=meta["name"])
|
|
309
|
+
labelset = LabelSet.objects.first() if not labelset_name else LabelSet.objects.get(name=labelset_name)
|
|
310
|
+
|
|
311
|
+
return create_from_file_logic(
|
|
312
|
+
cls,
|
|
313
|
+
meta_name=meta["name"],
|
|
314
|
+
model_name=ai_model.name,
|
|
315
|
+
labelset_name=labelset.name,
|
|
316
|
+
weights_file=weights_path,
|
|
317
|
+
activation=meta["activation"],
|
|
318
|
+
mean=meta["mean"],
|
|
319
|
+
std=meta["std"],
|
|
320
|
+
size_x=meta["size_x"],
|
|
321
|
+
size_y=meta["size_y"],
|
|
322
|
+
description=meta["description"],
|
|
323
|
+
)
|
|
@@ -523,20 +523,7 @@ class VideoImportService():
|
|
|
523
523
|
video = self.current_video
|
|
524
524
|
if video is None:
|
|
525
525
|
self.logger.warning("No VideoFile instance available for fallback anonymization")
|
|
526
|
-
|
|
527
|
-
# Try VideoFile.pipe_2() method if available
|
|
528
|
-
if hasattr(video, 'pipe_2'):
|
|
529
|
-
self.logger.info("Trying VideoFile.pipe_2() method...")
|
|
530
|
-
if video.pipe_2():
|
|
531
|
-
self.logger.info("VideoFile.pipe_2() succeeded")
|
|
532
|
-
self.processing_context['anonymization_completed'] = True
|
|
533
|
-
return
|
|
534
|
-
self.logger.warning("VideoFile.pipe_2() returned False")
|
|
535
|
-
# Try direct anonymization via _anonymize
|
|
536
|
-
if _anonymize(video, delete_original_raw=self.delete_source):
|
|
537
|
-
self.logger.info("VideoFile._anonymize() succeeded")
|
|
538
|
-
self.processing_context['anonymization_completed'] = True
|
|
539
|
-
return
|
|
526
|
+
|
|
540
527
|
|
|
541
528
|
# Strategy 2: Simple copy (no processing, just copy raw to processed)
|
|
542
529
|
self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
|
|
@@ -888,10 +875,8 @@ class VideoImportService():
|
|
|
888
875
|
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
|
|
889
876
|
cleaned_filename = f"cleaned_{video_filename}"
|
|
890
877
|
cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
# Processor roi can be used later to OCR preknown regions.
|
|
878
|
+
|
|
879
|
+
# Processor roi is used later to OCR preknown regions.
|
|
895
880
|
|
|
896
881
|
# Clean video with ROI masking (heavy I/O operation)
|
|
897
882
|
actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
|
|
@@ -903,29 +888,6 @@ class VideoImportService():
|
|
|
903
888
|
technique="mask_overlay"
|
|
904
889
|
)
|
|
905
890
|
|
|
906
|
-
# Optional: enrich metadata using TrOCR+LLM on one random extracted frame
|
|
907
|
-
try:
|
|
908
|
-
# Prefer frames belonging to this video (UUID in path), else pick any frame
|
|
909
|
-
frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
|
|
910
|
-
video_uuid = str(video.uuid)
|
|
911
|
-
filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
|
|
912
|
-
if filtered:
|
|
913
|
-
sample_frame = random.choice(filtered)
|
|
914
|
-
ocr_text = trocr_full_image_ocr(sample_frame)
|
|
915
|
-
if ocr_text:
|
|
916
|
-
llm_metadata = frame_cleaner.extract_metadata(ocr_text)
|
|
917
|
-
if llm_metadata:
|
|
918
|
-
# Merge with already extracted frame-level metadata
|
|
919
|
-
extracted_metadata = frame_cleaner.frame_metadata_extractor.merge_metadata(
|
|
920
|
-
extracted_metadata or {}, llm_metadata
|
|
921
|
-
)
|
|
922
|
-
self.logger.info("LLM metadata extraction (random frame) successful")
|
|
923
|
-
else:
|
|
924
|
-
self.logger.info("LLM metadata extraction (random frame) found no data")
|
|
925
|
-
else:
|
|
926
|
-
self.logger.info("No text extracted by TrOCR on random frame")
|
|
927
|
-
except Exception as e:
|
|
928
|
-
self.logger.error(f"LLM metadata enrichment step failed: {e}")
|
|
929
891
|
|
|
930
892
|
# Store cleaned video path for later use in _cleanup_and_archive
|
|
931
893
|
self.processing_context['cleaned_video_path'] = actual_cleaned_path
|
|
@@ -1048,8 +1010,6 @@ class VideoImportService():
|
|
|
1048
1010
|
self.processed_files.remove(file_path_str)
|
|
1049
1011
|
self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
|
|
1050
1012
|
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
1013
|
|
|
1054
1014
|
except Exception as e:
|
|
1055
1015
|
self.logger.warning(f"Error during context cleanup: {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: endoreg-db
|
|
3
|
-
Version: 0.8.2.
|
|
3
|
+
Version: 0.8.2.9
|
|
4
4
|
Summary: EndoReg Db Django App
|
|
5
5
|
Project-URL: Homepage, https://info.coloreg.de
|
|
6
6
|
Project-URL: Repository, https://github.com/wg-lux/endoreg-db
|
|
@@ -29,10 +29,11 @@ Requires-Dist: dotenv>=0.9.9
|
|
|
29
29
|
Requires-Dist: faker>=37.6.0
|
|
30
30
|
Requires-Dist: flake8>=7.3.0
|
|
31
31
|
Requires-Dist: gunicorn>=23.0.0
|
|
32
|
+
Requires-Dist: huggingface-hub>=0.35.3
|
|
32
33
|
Requires-Dist: icecream>=2.1.4
|
|
33
34
|
Requires-Dist: librosa==0.11.0
|
|
34
35
|
Requires-Dist: llvmlite>=0.44.0
|
|
35
|
-
Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.
|
|
36
|
+
Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.7
|
|
36
37
|
Requires-Dist: moviepy==2.2.1
|
|
37
38
|
Requires-Dist: mypy>=1.16.0
|
|
38
39
|
Requires-Dist: numpy>=2.2.3
|
|
@@ -461,8 +461,8 @@ endoreg_db/models/medical/risk/risk.py,sha256=g5pgAfCfsvH88nbmX3xsASF3OZgNA-G6NJ
|
|
|
461
461
|
endoreg_db/models/medical/risk/risk_type.py,sha256=kEugcaWSTEWH_Vxq4dcF80Iv1L4_Kk1JKJGQMgz_s0o,1350
|
|
462
462
|
endoreg_db/models/metadata/__init__.py,sha256=8I6oLj3YTmeaPGJpL0AWG5gLwp38QzrEggxSkTisv7c,474
|
|
463
463
|
endoreg_db/models/metadata/frame_ocr_result.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
464
|
-
endoreg_db/models/metadata/model_meta.py,sha256=
|
|
465
|
-
endoreg_db/models/metadata/model_meta_logic.py,sha256=
|
|
464
|
+
endoreg_db/models/metadata/model_meta.py,sha256=AveneX6f4AOQD4R-IQB8UqcqTzKpek2yuEWk7ybtF38,7606
|
|
465
|
+
endoreg_db/models/metadata/model_meta_logic.py,sha256=27mqScxUTJXNUVc6CqAs5dXjspEsh0TWPmlxdJVulGc,12015
|
|
466
466
|
endoreg_db/models/metadata/pdf_meta.py,sha256=BTmpSgqxmPKi0apcNjyrZAS4AFKCPXVdBd6VBeyyv6E,3174
|
|
467
467
|
endoreg_db/models/metadata/sensitive_meta.py,sha256=ekLHrW-b5uYcjfkRd0EW5ncx5ef8Bu-K6msDkpWCAbk,13034
|
|
468
468
|
endoreg_db/models/metadata/sensitive_meta_logic.py,sha256=Oh7ssZQEPfKGfRMF5nXKJpOIxXx-Xibd3rpOu-bQilk,29988
|
|
@@ -600,7 +600,7 @@ endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CU
|
|
|
600
600
|
endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
|
|
601
601
|
endoreg_db/services/segment_sync.py,sha256=YgHvIHkbW4mqCu0ACf3zjRSZnNfxWwt4gh5syUVXuE0,6400
|
|
602
602
|
endoreg_db/services/storage_aware_video_processor.py,sha256=kKFK64vXLeBSVkp1YJonU3gFDTeXZ8C4qb9QZZB99SE,13420
|
|
603
|
-
endoreg_db/services/video_import.py,sha256=
|
|
603
|
+
endoreg_db/services/video_import.py,sha256=PhcOgxU5M4uSEklBXEWHpIaNX-yIYv1rJy-T-fCU8cs,47830
|
|
604
604
|
endoreg_db/tasks/upload_tasks.py,sha256=OJq7DhNwcbWdXzHY8jz5c51BCVkPN5gSWOz-6Fx6W5M,7799
|
|
605
605
|
endoreg_db/tasks/video_ingest.py,sha256=kxFuYkHijINV0VabQKCFVpJRv6eCAw07tviONurDgg8,5265
|
|
606
606
|
endoreg_db/tasks/video_processing_tasks.py,sha256=KjcERRJ1TZzmavBpvr6OsvSTUViU0PR1ECWnEdzu2Js,14140
|
|
@@ -784,7 +784,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
|
|
|
784
784
|
endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
|
|
785
785
|
endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
|
|
786
786
|
endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
|
|
787
|
-
endoreg_db-0.8.2.
|
|
788
|
-
endoreg_db-0.8.2.
|
|
789
|
-
endoreg_db-0.8.2.
|
|
790
|
-
endoreg_db-0.8.2.
|
|
787
|
+
endoreg_db-0.8.2.9.dist-info/METADATA,sha256=2PdVUGEhQwGP1CvM1194ox8-B7Ir5wG70n0iwnZdJQ0,14758
|
|
788
|
+
endoreg_db-0.8.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
789
|
+
endoreg_db-0.8.2.9.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
790
|
+
endoreg_db-0.8.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|