endoreg-db 0.8.2.7__py3-none-any.whl → 0.8.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -128,6 +128,20 @@ class ModelMeta(models.Model):
128
128
  cls, meta_name, model_name, labelset_name, weights_file,
129
129
  requested_version, bump_if_exists, **kwargs
130
130
  )
131
+
132
+ @classmethod
133
+ def setup_default_from_huggingface(
134
+ cls: Type["ModelMeta"],
135
+ model_id: str,
136
+ task: str = "image-classification",
137
+ labelset_name: Optional[str] = None,
138
+ ) -> "ModelMeta":
139
+ """
140
+ Downloads a pretrained model from Hugging Face and initializes ModelMeta automatically.
141
+ """
142
+ from . import model_meta_logic as logic
143
+ return logic.setup_default_from_huggingface_logic(cls, model_id, task, labelset_name)
144
+
131
145
 
132
146
  @classmethod
133
147
  def get_latest_version_number(cls: Type["ModelMeta"], meta_name: str, model_name: str) -> int:
@@ -1,7 +1,7 @@
1
1
  import shutil
2
2
  from pathlib import Path
3
3
  from typing import Optional, TYPE_CHECKING, Any, Type
4
-
4
+ from huggingface_hub import hf_hub_download
5
5
  from django.db import transaction
6
6
 
7
7
  # Assuming ModelMeta, AiModel, LabelSet are importable from the correct locations
@@ -234,3 +234,90 @@ def get_model_meta_by_name_version_logic(
234
234
  raise cls.DoesNotExist(
235
235
  f"No ModelMeta found for '{meta_name}' and model '{model_name}'."
236
236
  )
237
+
238
+ from huggingface_hub import model_info
239
+ import re
240
+
241
+ def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
242
+ """
243
+ Infers default model metadata (activation, normalization, input size)
244
+ from a Hugging Face model_id using its tags and architecture.
245
+
246
+ Returns:
247
+ A dict with fields: name, activation, mean, std, size_x, size_y
248
+ """
249
+
250
+ if not (info := model_info(model_id)):
251
+ logger.info(f"Could not retrieve model info for {model_id}, using ColoReg segmentation defaults.")
252
+ return {
253
+ "name": "wg-lux/colo_segmentation_RegNetX800MF_base",
254
+ "activation": "sigmoid",
255
+ "mean": (0.45211223, 0.27139644, 0.19264949),
256
+ "std": (0.31418097, 0.21088019, 0.16059452),
257
+ "size_x": 716,
258
+ "size_y": 716,
259
+ "description": f"Defaults for unknown model {model_id}",
260
+ }
261
+
262
+ # Extract architecture from tags or model_id ---
263
+ tags = info.tags or []
264
+ model_name = model_id.split("/")[-1].lower()
265
+
266
+ # Heuristics for architecture and task
267
+ architecture = next((t for t in tags if t.startswith("architecture:")), None)
268
+ task = next((t for t in tags if t.startswith("task:")), None)
269
+
270
+ # Default values
271
+ activation = "sigmoid"
272
+ size_x = size_y = 716
273
+ mean = (0.45211223, 0.27139644, 0.19264949)
274
+ std = (0.31418097, 0.21088019, 0.16059452)
275
+
276
+ # --- 2. Task-based inference ---
277
+ if task:
278
+ if "segmentation" in task or "detection" in task:
279
+ activation = "sigmoid"
280
+ elif any(k in task for k in ["classification"]):
281
+ activation = "softmax"
282
+
283
+ # --- 3. Architecture-based inference ---
284
+ if architecture:
285
+ arch = architecture.replace("architecture:", "")
286
+ else:
287
+ arch = re.sub(r"[^a-z0-9]+", "_", model_name)
288
+
289
+ return {
290
+ "name": arch,
291
+ "activation": activation,
292
+ "mean": mean,
293
+ "std": std,
294
+ "size_x": size_x,
295
+ "size_y": size_y,
296
+ "description": f"Inferred defaults for {model_id}",
297
+ }
298
+
299
+ def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str | None = None):
300
+ """
301
+ Downloads model weights from Hugging Face and auto-fills ModelMeta fields.
302
+ """
303
+ meta = infer_default_model_meta_from_hf(model_id)
304
+
305
+ # Download weights
306
+ weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=WEIGHTS_DIR)
307
+
308
+ ai_model, _ = AiModel.objects.get_or_create(name=meta["name"])
309
+ labelset = LabelSet.objects.first() if not labelset_name else LabelSet.objects.get(name=labelset_name)
310
+
311
+ return create_from_file_logic(
312
+ cls,
313
+ meta_name=meta["name"],
314
+ model_name=ai_model.name,
315
+ labelset_name=labelset.name,
316
+ weights_file=weights_path,
317
+ activation=meta["activation"],
318
+ mean=meta["mean"],
319
+ std=meta["std"],
320
+ size_x=meta["size_x"],
321
+ size_y=meta["size_y"],
322
+ description=meta["description"],
323
+ )
@@ -523,20 +523,7 @@ class VideoImportService():
523
523
  video = self.current_video
524
524
  if video is None:
525
525
  self.logger.warning("No VideoFile instance available for fallback anonymization")
526
- else:
527
- # Try VideoFile.pipe_2() method if available
528
- if hasattr(video, 'pipe_2'):
529
- self.logger.info("Trying VideoFile.pipe_2() method...")
530
- if video.pipe_2():
531
- self.logger.info("VideoFile.pipe_2() succeeded")
532
- self.processing_context['anonymization_completed'] = True
533
- return
534
- self.logger.warning("VideoFile.pipe_2() returned False")
535
- # Try direct anonymization via _anonymize
536
- if _anonymize(video, delete_original_raw=self.delete_source):
537
- self.logger.info("VideoFile._anonymize() succeeded")
538
- self.processing_context['anonymization_completed'] = True
539
- return
526
+
540
527
 
541
528
  # Strategy 2: Simple copy (no processing, just copy raw to processed)
542
529
  self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
@@ -888,10 +875,8 @@ class VideoImportService():
888
875
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
889
876
  cleaned_filename = f"cleaned_{video_filename}"
890
877
  cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
891
-
892
- processor_roi, endoscope_roi = self._get_processor_roi_info(video)
893
-
894
- # Processor roi can be used later to OCR preknown regions.
878
+
879
+ # Processor roi is used later to OCR preknown regions.
895
880
 
896
881
  # Clean video with ROI masking (heavy I/O operation)
897
882
  actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
@@ -903,29 +888,6 @@ class VideoImportService():
903
888
  technique="mask_overlay"
904
889
  )
905
890
 
906
- # Optional: enrich metadata using TrOCR+LLM on one random extracted frame
907
- try:
908
- # Prefer frames belonging to this video (UUID in path), else pick any frame
909
- frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
910
- video_uuid = str(video.uuid)
911
- filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
912
- if filtered:
913
- sample_frame = random.choice(filtered)
914
- ocr_text = trocr_full_image_ocr(sample_frame)
915
- if ocr_text:
916
- llm_metadata = frame_cleaner.extract_metadata(ocr_text)
917
- if llm_metadata:
918
- # Merge with already extracted frame-level metadata
919
- extracted_metadata = frame_cleaner.frame_metadata_extractor.merge_metadata(
920
- extracted_metadata or {}, llm_metadata
921
- )
922
- self.logger.info("LLM metadata extraction (random frame) successful")
923
- else:
924
- self.logger.info("LLM metadata extraction (random frame) found no data")
925
- else:
926
- self.logger.info("No text extracted by TrOCR on random frame")
927
- except Exception as e:
928
- self.logger.error(f"LLM metadata enrichment step failed: {e}")
929
891
 
930
892
  # Store cleaned video path for later use in _cleanup_and_archive
931
893
  self.processing_context['cleaned_video_path'] = actual_cleaned_path
@@ -1048,8 +1010,6 @@ class VideoImportService():
1048
1010
  self.processed_files.remove(file_path_str)
1049
1011
  self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
1050
1012
 
1051
-
1052
-
1053
1013
 
1054
1014
  except Exception as e:
1055
1015
  self.logger.warning(f"Error during context cleanup: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endoreg-db
3
- Version: 0.8.2.7
3
+ Version: 0.8.2.9
4
4
  Summary: EndoReg Db Django App
5
5
  Project-URL: Homepage, https://info.coloreg.de
6
6
  Project-URL: Repository, https://github.com/wg-lux/endoreg-db
@@ -29,10 +29,11 @@ Requires-Dist: dotenv>=0.9.9
29
29
  Requires-Dist: faker>=37.6.0
30
30
  Requires-Dist: flake8>=7.3.0
31
31
  Requires-Dist: gunicorn>=23.0.0
32
+ Requires-Dist: huggingface-hub>=0.35.3
32
33
  Requires-Dist: icecream>=2.1.4
33
34
  Requires-Dist: librosa==0.11.0
34
35
  Requires-Dist: llvmlite>=0.44.0
35
- Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.5
36
+ Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.7
36
37
  Requires-Dist: moviepy==2.2.1
37
38
  Requires-Dist: mypy>=1.16.0
38
39
  Requires-Dist: numpy>=2.2.3
@@ -461,8 +461,8 @@ endoreg_db/models/medical/risk/risk.py,sha256=g5pgAfCfsvH88nbmX3xsASF3OZgNA-G6NJ
461
461
  endoreg_db/models/medical/risk/risk_type.py,sha256=kEugcaWSTEWH_Vxq4dcF80Iv1L4_Kk1JKJGQMgz_s0o,1350
462
462
  endoreg_db/models/metadata/__init__.py,sha256=8I6oLj3YTmeaPGJpL0AWG5gLwp38QzrEggxSkTisv7c,474
463
463
  endoreg_db/models/metadata/frame_ocr_result.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
464
- endoreg_db/models/metadata/model_meta.py,sha256=aZH6Bz5Ss874Knvg1b3Kgq6gU8kVzPHXneunZJNF4yw,7111
465
- endoreg_db/models/metadata/model_meta_logic.py,sha256=yiIWbxxykUp6VB_7imRqSXcO0RS5GuoYP83O48TyKws,8987
464
+ endoreg_db/models/metadata/model_meta.py,sha256=AveneX6f4AOQD4R-IQB8UqcqTzKpek2yuEWk7ybtF38,7606
465
+ endoreg_db/models/metadata/model_meta_logic.py,sha256=27mqScxUTJXNUVc6CqAs5dXjspEsh0TWPmlxdJVulGc,12015
466
466
  endoreg_db/models/metadata/pdf_meta.py,sha256=BTmpSgqxmPKi0apcNjyrZAS4AFKCPXVdBd6VBeyyv6E,3174
467
467
  endoreg_db/models/metadata/sensitive_meta.py,sha256=ekLHrW-b5uYcjfkRd0EW5ncx5ef8Bu-K6msDkpWCAbk,13034
468
468
  endoreg_db/models/metadata/sensitive_meta_logic.py,sha256=Oh7ssZQEPfKGfRMF5nXKJpOIxXx-Xibd3rpOu-bQilk,29988
@@ -600,7 +600,7 @@ endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CU
600
600
  endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
601
601
  endoreg_db/services/segment_sync.py,sha256=YgHvIHkbW4mqCu0ACf3zjRSZnNfxWwt4gh5syUVXuE0,6400
602
602
  endoreg_db/services/storage_aware_video_processor.py,sha256=kKFK64vXLeBSVkp1YJonU3gFDTeXZ8C4qb9QZZB99SE,13420
603
- endoreg_db/services/video_import.py,sha256=X20FQkEO5QGcfuacAz8jX1_LW1GhwbF33JGPpmypEyk,50161
603
+ endoreg_db/services/video_import.py,sha256=PhcOgxU5M4uSEklBXEWHpIaNX-yIYv1rJy-T-fCU8cs,47830
604
604
  endoreg_db/tasks/upload_tasks.py,sha256=OJq7DhNwcbWdXzHY8jz5c51BCVkPN5gSWOz-6Fx6W5M,7799
605
605
  endoreg_db/tasks/video_ingest.py,sha256=kxFuYkHijINV0VabQKCFVpJRv6eCAw07tviONurDgg8,5265
606
606
  endoreg_db/tasks/video_processing_tasks.py,sha256=KjcERRJ1TZzmavBpvr6OsvSTUViU0PR1ECWnEdzu2Js,14140
@@ -784,7 +784,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
784
784
  endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
785
785
  endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
786
786
  endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
787
- endoreg_db-0.8.2.7.dist-info/METADATA,sha256=BCQcbq7ZExYZF5gBfmkDflNtysGjAt36WUMKmqfA-48,14719
788
- endoreg_db-0.8.2.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
789
- endoreg_db-0.8.2.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
790
- endoreg_db-0.8.2.7.dist-info/RECORD,,
787
+ endoreg_db-0.8.2.9.dist-info/METADATA,sha256=2PdVUGEhQwGP1CvM1194ox8-B7Ir5wG70n0iwnZdJQ0,14758
788
+ endoreg_db-0.8.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
789
+ endoreg_db-0.8.2.9.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
790
+ endoreg_db-0.8.2.9.dist-info/RECORD,,