endoreg-db 0.8.4.4__py3-none-any.whl → 0.8.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/authz/auth.py +74 -0
- endoreg_db/authz/backends.py +168 -0
- endoreg_db/authz/management/commands/list_routes.py +18 -0
- endoreg_db/authz/middleware.py +83 -0
- endoreg_db/authz/permissions.py +127 -0
- endoreg_db/authz/policy.py +218 -0
- endoreg_db/authz/views_auth.py +66 -0
- endoreg_db/config/env.py +13 -8
- endoreg_db/data/__init__.py +8 -31
- endoreg_db/data/_examples/disease.yaml +55 -0
- endoreg_db/data/_examples/disease_classification.yaml +13 -0
- endoreg_db/data/_examples/disease_classification_choice.yaml +62 -0
- endoreg_db/data/_examples/event.yaml +64 -0
- endoreg_db/data/_examples/examination.yaml +72 -0
- endoreg_db/data/_examples/finding/anatomy_colon.yaml +128 -0
- endoreg_db/data/_examples/finding/colonoscopy.yaml +40 -0
- endoreg_db/data/_examples/finding/colonoscopy_bowel_prep.yaml +56 -0
- endoreg_db/data/_examples/finding/complication.yaml +16 -0
- endoreg_db/data/_examples/finding/data.yaml +105 -0
- endoreg_db/data/_examples/finding/examination_setting.yaml +16 -0
- endoreg_db/data/_examples/finding/medication_related.yaml +18 -0
- endoreg_db/data/_examples/finding/outcome.yaml +12 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_bowel_preparation.yaml +68 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_jnet.yaml +22 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_kudo.yaml +25 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_circularity.yaml +20 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_planarity.yaml +24 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_size.yaml +68 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_surface.yaml +20 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_location.yaml +80 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lst.yaml +21 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_nice.yaml +20 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_paris.yaml +26 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_sano.yaml +22 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_summary.yaml +53 -0
- endoreg_db/data/_examples/finding_classification/complication_generic.yaml +25 -0
- endoreg_db/data/_examples/finding_classification/examination_setting_generic.yaml +40 -0
- endoreg_db/data/_examples/finding_classification/histology_colo.yaml +51 -0
- endoreg_db/data/_examples/finding_classification/intervention_required.yaml +26 -0
- endoreg_db/data/_examples/finding_classification/medication_related.yaml +23 -0
- endoreg_db/data/_examples/finding_classification/visualized.yaml +33 -0
- endoreg_db/data/_examples/finding_classification_choice/bowel_preparation.yaml +78 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_circularity_default.yaml +32 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_jnet.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_kudo.yaml +23 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_lst.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_nice.yaml +17 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_paris.yaml +57 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_planarity_default.yaml +49 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_sano.yaml +14 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_surface_intact_default.yaml +36 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_location.yaml +229 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_not_complete_reason.yaml +19 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_size.yaml +82 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/complication_generic_types.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/examination_setting_generic_types.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/histology.yaml +24 -0
- endoreg_db/data/_examples/finding_classification_choice/histology_polyp.yaml +20 -0
- endoreg_db/data/_examples/finding_classification_choice/outcome.yaml +19 -0
- endoreg_db/data/_examples/finding_classification_choice/yes_no_na.yaml +11 -0
- endoreg_db/data/_examples/finding_classification_type/colonoscopy_basic.yaml +48 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy.yaml +43 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_colonoscopy.yaml +168 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_egd.yaml +128 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_ercp.yaml +32 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_eus_lower.yaml +9 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_eus_upper.yaml +36 -0
- endoreg_db/data/_examples/finding_intervention_type/endoscopy.yaml +15 -0
- endoreg_db/data/_examples/finding_type/data.yaml +43 -0
- endoreg_db/data/_examples/requirement/age.yaml +26 -0
- endoreg_db/data/_examples/requirement/colonoscopy_baseline_austria.yaml +45 -0
- endoreg_db/data/_examples/requirement/disease_cardiovascular.yaml +79 -0
- endoreg_db/data/_examples/requirement/disease_classification_choice_cardiovascular.yaml +41 -0
- endoreg_db/data/_examples/requirement/disease_hepatology.yaml +12 -0
- endoreg_db/data/_examples/requirement/disease_misc.yaml +12 -0
- endoreg_db/data/_examples/requirement/disease_renal.yaml +96 -0
- endoreg_db/data/_examples/requirement/endoscopy_bleeding_risk.yaml +59 -0
- endoreg_db/data/_examples/requirement/event_cardiology.yaml +251 -0
- endoreg_db/data/_examples/requirement/event_requirements.yaml +145 -0
- endoreg_db/data/_examples/requirement/finding_colon_polyp.yaml +50 -0
- endoreg_db/data/_examples/requirement/gender.yaml +25 -0
- endoreg_db/data/_examples/requirement/lab_value.yaml +441 -0
- endoreg_db/data/_examples/requirement/medication.yaml +93 -0
- endoreg_db/data/_examples/requirement_operator/age.yaml +13 -0
- endoreg_db/data/_examples/requirement_operator/lab_operators.yaml +129 -0
- endoreg_db/data/_examples/requirement_operator/model_operators.yaml +96 -0
- endoreg_db/data/_examples/requirement_set/01_endoscopy_generic.yaml +48 -0
- endoreg_db/data/_examples/requirement_set/colonoscopy_austria_screening.yaml +57 -0
- endoreg_db/data/_examples/yaml_examples.xlsx +0 -0
- endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +4 -3
- endoreg_db/data/event_classification/data.yaml +4 -0
- endoreg_db/data/event_classification_choice/data.yaml +9 -0
- endoreg_db/data/finding_classification/colonoscopy_bowel_preparation.yaml +43 -70
- endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +22 -52
- endoreg_db/data/finding_classification/colonoscopy_location.yaml +31 -62
- endoreg_db/data/finding_classification/histology_colo.yaml +28 -36
- endoreg_db/data/requirement/colon_polyp_intervention.yaml +49 -0
- endoreg_db/data/requirement/coloreg_colon_polyp.yaml +49 -0
- endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +31 -12
- endoreg_db/data/requirement_set/01_laboratory.yaml +13 -0
- endoreg_db/data/requirement_set/02_endoscopy_bleeding_risk.yaml +46 -0
- endoreg_db/data/requirement_set/90_coloreg.yaml +178 -0
- endoreg_db/data/requirement_set/_old_ +109 -0
- endoreg_db/data/requirement_set_type/data.yaml +21 -0
- endoreg_db/data/setup_config.yaml +4 -4
- endoreg_db/data/tag/requirement_set_tags.yaml +21 -0
- endoreg_db/exceptions.py +5 -2
- endoreg_db/helpers/data_loader.py +1 -1
- endoreg_db/management/commands/create_model_meta_from_huggingface.py +21 -10
- endoreg_db/management/commands/create_multilabel_model_meta.py +299 -129
- endoreg_db/management/commands/import_video.py +9 -10
- endoreg_db/management/commands/import_video_with_classification.py +1 -1
- endoreg_db/management/commands/init_default_ai_model.py +1 -1
- endoreg_db/management/commands/list_routes.py +18 -0
- endoreg_db/management/commands/load_ai_model_data.py +2 -1
- endoreg_db/management/commands/load_center_data.py +12 -12
- endoreg_db/management/commands/load_requirement_data.py +60 -31
- endoreg_db/management/commands/load_requirement_set_tags.py +95 -0
- endoreg_db/management/commands/setup_endoreg_db.py +14 -10
- endoreg_db/management/commands/storage_management.py +271 -203
- endoreg_db/migrations/0001_initial.py +1799 -1300
- endoreg_db/migrations/0002_requirementset_depends_on.py +18 -0
- endoreg_db/migrations/_old/0001_initial.py +1857 -0
- endoreg_db/migrations/_old/0004_employee_city_employee_post_code_employee_street_and_more.py +68 -0
- endoreg_db/migrations/_old/0004_remove_casetemplate_rules_and_more.py +77 -0
- endoreg_db/migrations/_old/0005_merge_20251111_1003.py +14 -0
- endoreg_db/migrations/_old/0006_sensitivemeta_anonymized_text_and_more.py +68 -0
- endoreg_db/migrations/_old/0007_remove_rule_attribute_dtype_remove_rule_rule_type_and_more.py +89 -0
- endoreg_db/migrations/_old/0008_remove_event_event_classification_and_more.py +27 -0
- endoreg_db/migrations/_old/0009_alter_modelmeta_options_and_more.py +21 -0
- endoreg_db/models/__init__.py +78 -123
- endoreg_db/models/administration/__init__.py +21 -42
- endoreg_db/models/administration/ai/active_model.py +2 -2
- endoreg_db/models/administration/ai/ai_model.py +7 -6
- endoreg_db/models/administration/case/__init__.py +1 -15
- endoreg_db/models/administration/case/case.py +3 -3
- endoreg_db/models/administration/case/case_template/__init__.py +2 -14
- endoreg_db/models/administration/case/case_template/case_template.py +2 -124
- endoreg_db/models/administration/case/case_template/case_template_rule.py +2 -268
- endoreg_db/models/administration/case/case_template/case_template_rule_value.py +2 -85
- endoreg_db/models/administration/case/case_template/case_template_type.py +2 -25
- endoreg_db/models/administration/center/center.py +33 -19
- endoreg_db/models/administration/center/center_product.py +12 -9
- endoreg_db/models/administration/center/center_resource.py +25 -19
- endoreg_db/models/administration/center/center_shift.py +21 -17
- endoreg_db/models/administration/center/center_waste.py +16 -8
- endoreg_db/models/administration/person/__init__.py +2 -0
- endoreg_db/models/administration/person/employee/employee.py +10 -5
- endoreg_db/models/administration/person/employee/employee_qualification.py +9 -4
- endoreg_db/models/administration/person/employee/employee_type.py +12 -6
- endoreg_db/models/administration/person/examiner/examiner.py +13 -11
- endoreg_db/models/administration/person/patient/__init__.py +2 -0
- endoreg_db/models/administration/person/patient/patient.py +103 -100
- endoreg_db/models/administration/person/patient/patient_external_id.py +37 -0
- endoreg_db/models/administration/person/person.py +4 -0
- endoreg_db/models/administration/person/profession/__init__.py +8 -4
- endoreg_db/models/administration/person/user/portal_user_information.py +11 -7
- endoreg_db/models/administration/product/product.py +20 -15
- endoreg_db/models/administration/product/product_material.py +17 -18
- endoreg_db/models/administration/product/product_weight.py +12 -8
- endoreg_db/models/administration/product/reference_product.py +23 -55
- endoreg_db/models/administration/qualification/qualification.py +7 -3
- endoreg_db/models/administration/qualification/qualification_type.py +7 -3
- endoreg_db/models/administration/shift/scheduled_days.py +8 -5
- endoreg_db/models/administration/shift/shift.py +16 -12
- endoreg_db/models/administration/shift/shift_type.py +23 -31
- endoreg_db/models/label/__init__.py +7 -8
- endoreg_db/models/label/annotation/image_classification.py +10 -9
- endoreg_db/models/label/annotation/video_segmentation_annotation.py +8 -5
- endoreg_db/models/label/label.py +15 -15
- endoreg_db/models/label/label_set.py +19 -6
- endoreg_db/models/label/label_type.py +1 -1
- endoreg_db/models/label/label_video_segment/_create_from_video.py +5 -8
- endoreg_db/models/label/label_video_segment/label_video_segment.py +76 -102
- endoreg_db/models/label/video_segmentation_label.py +4 -0
- endoreg_db/models/label/video_segmentation_labelset.py +4 -3
- endoreg_db/models/media/frame/frame.py +22 -22
- endoreg_db/models/media/pdf/raw_pdf.py +249 -177
- endoreg_db/models/media/pdf/report_file.py +25 -29
- endoreg_db/models/media/pdf/report_reader/report_reader_config.py +30 -46
- endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +23 -7
- endoreg_db/models/media/video/__init__.py +1 -0
- endoreg_db/models/media/video/create_from_file.py +48 -56
- endoreg_db/models/media/video/pipe_1.py +30 -33
- endoreg_db/models/media/video/pipe_2.py +8 -9
- endoreg_db/models/media/video/video_file.py +359 -204
- endoreg_db/models/media/video/video_file_ai.py +288 -74
- endoreg_db/models/media/video/video_file_anonymize.py +38 -38
- endoreg_db/models/media/video/video_file_frames/__init__.py +3 -1
- endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +7 -9
- endoreg_db/models/media/video/video_file_frames/_delete_frames.py +9 -8
- endoreg_db/models/media/video/video_file_frames/_extract_frames.py +38 -45
- endoreg_db/models/media/video/video_file_frames/_get_frame.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_get_frame_number.py +4 -18
- endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -3
- endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +7 -6
- endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_get_frames.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +15 -25
- endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +26 -23
- endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +23 -14
- endoreg_db/models/media/video/video_file_io.py +109 -62
- endoreg_db/models/media/video/video_file_meta/get_crop_template.py +3 -3
- endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +5 -3
- endoreg_db/models/media/video/video_file_meta/get_fps.py +37 -34
- endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +19 -25
- endoreg_db/models/media/video/video_file_meta/text_meta.py +41 -38
- endoreg_db/models/media/video/video_file_meta/video_meta.py +14 -7
- endoreg_db/models/media/video/video_file_segments.py +24 -17
- endoreg_db/models/media/video/video_metadata.py +19 -35
- endoreg_db/models/media/video/video_processing.py +96 -95
- endoreg_db/models/medical/contraindication/__init__.py +13 -3
- endoreg_db/models/medical/disease.py +22 -16
- endoreg_db/models/medical/event.py +31 -18
- endoreg_db/models/medical/examination/__init__.py +13 -6
- endoreg_db/models/medical/examination/examination.py +17 -18
- endoreg_db/models/medical/examination/examination_indication.py +26 -25
- endoreg_db/models/medical/examination/examination_time.py +16 -6
- endoreg_db/models/medical/examination/examination_time_type.py +9 -6
- endoreg_db/models/medical/examination/examination_type.py +3 -4
- endoreg_db/models/medical/finding/finding.py +38 -39
- endoreg_db/models/medical/finding/finding_classification.py +37 -48
- endoreg_db/models/medical/finding/finding_intervention.py +27 -22
- endoreg_db/models/medical/finding/finding_type.py +13 -12
- endoreg_db/models/medical/hardware/endoscope.py +20 -26
- endoreg_db/models/medical/hardware/endoscopy_processor.py +2 -2
- endoreg_db/models/medical/laboratory/lab_value.py +62 -91
- endoreg_db/models/medical/medication/medication.py +22 -10
- endoreg_db/models/medical/medication/medication_indication.py +29 -3
- endoreg_db/models/medical/medication/medication_indication_type.py +25 -14
- endoreg_db/models/medical/medication/medication_intake_time.py +31 -19
- endoreg_db/models/medical/medication/medication_schedule.py +27 -16
- endoreg_db/models/medical/organ/__init__.py +15 -12
- endoreg_db/models/medical/patient/medication_examples.py +1 -5
- endoreg_db/models/medical/patient/patient_disease.py +20 -23
- endoreg_db/models/medical/patient/patient_event.py +19 -22
- endoreg_db/models/medical/patient/patient_examination.py +48 -54
- endoreg_db/models/medical/patient/patient_examination_indication.py +16 -14
- endoreg_db/models/medical/patient/patient_finding.py +122 -139
- endoreg_db/models/medical/patient/patient_finding_classification.py +44 -49
- endoreg_db/models/medical/patient/patient_finding_intervention.py +8 -19
- endoreg_db/models/medical/patient/patient_lab_sample.py +28 -23
- endoreg_db/models/medical/patient/patient_lab_value.py +82 -89
- endoreg_db/models/medical/patient/patient_medication.py +27 -38
- endoreg_db/models/medical/patient/patient_medication_schedule.py +28 -36
- endoreg_db/models/medical/risk/risk.py +7 -6
- endoreg_db/models/medical/risk/risk_type.py +8 -5
- endoreg_db/models/metadata/model_meta.py +60 -29
- endoreg_db/models/metadata/model_meta_logic.py +139 -18
- endoreg_db/models/metadata/pdf_meta.py +19 -24
- endoreg_db/models/metadata/sensitive_meta.py +102 -85
- endoreg_db/models/metadata/sensitive_meta_logic.py +383 -43
- endoreg_db/models/metadata/video_meta.py +51 -31
- endoreg_db/models/metadata/video_prediction_logic.py +16 -23
- endoreg_db/models/metadata/video_prediction_meta.py +29 -33
- endoreg_db/models/other/distribution/date_value_distribution.py +89 -29
- endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +21 -5
- endoreg_db/models/other/distribution/numeric_value_distribution.py +114 -53
- endoreg_db/models/other/distribution/single_categorical_value_distribution.py +4 -3
- endoreg_db/models/other/emission/emission_factor.py +18 -8
- endoreg_db/models/other/gender.py +10 -5
- endoreg_db/models/other/information_source.py +25 -25
- endoreg_db/models/other/material.py +9 -5
- endoreg_db/models/other/resource.py +6 -4
- endoreg_db/models/other/tag.py +10 -5
- endoreg_db/models/other/transport_route.py +13 -8
- endoreg_db/models/other/unit.py +10 -6
- endoreg_db/models/other/waste.py +6 -5
- endoreg_db/models/requirement/requirement.py +580 -272
- endoreg_db/models/requirement/requirement_error.py +85 -0
- endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +268 -0
- endoreg_db/models/requirement/requirement_evaluation/operator_evaluation_models.py +3 -6
- endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +90 -64
- endoreg_db/models/requirement/requirement_operator.py +36 -33
- endoreg_db/models/requirement/requirement_set.py +74 -57
- endoreg_db/models/state/__init__.py +4 -4
- endoreg_db/models/state/abstract.py +2 -2
- endoreg_db/models/state/anonymization.py +12 -0
- endoreg_db/models/state/audit_ledger.py +46 -47
- endoreg_db/models/state/label_video_segment.py +9 -0
- endoreg_db/models/state/raw_pdf.py +40 -46
- endoreg_db/models/state/sensitive_meta.py +6 -2
- endoreg_db/models/state/video.py +58 -53
- endoreg_db/models/upload_job.py +32 -55
- endoreg_db/models/utils.py +1 -2
- endoreg_db/root_urls.py +21 -2
- endoreg_db/serializers/__init__.py +26 -57
- endoreg_db/serializers/anonymization.py +18 -10
- endoreg_db/serializers/meta/report_meta.py +1 -1
- endoreg_db/serializers/meta/sensitive_meta_detail.py +63 -118
- endoreg_db/serializers/misc/__init__.py +1 -1
- endoreg_db/serializers/misc/file_overview.py +33 -91
- endoreg_db/serializers/misc/{vop_patient_data.py → sensitive_patient_data.py} +1 -1
- endoreg_db/serializers/requirements/requirement_sets.py +92 -22
- endoreg_db/serializers/video/segmentation.py +2 -1
- endoreg_db/serializers/video/video_processing_history.py +20 -5
- endoreg_db/serializers/video_examination.py +198 -0
- endoreg_db/services/anonymization.py +75 -73
- endoreg_db/services/lookup_service.py +256 -73
- endoreg_db/services/lookup_store.py +174 -30
- endoreg_db/services/pdf_import.py +711 -310
- endoreg_db/services/storage_aware_video_processor.py +140 -114
- endoreg_db/services/video_import.py +266 -117
- endoreg_db/urls/__init__.py +27 -27
- endoreg_db/urls/label_video_segments.py +2 -0
- endoreg_db/urls/media.py +108 -66
- endoreg_db/urls/root_urls.py +29 -0
- endoreg_db/utils/__init__.py +15 -5
- endoreg_db/utils/ai/multilabel_classification_net.py +116 -20
- endoreg_db/utils/case_generator/__init__.py +3 -0
- endoreg_db/utils/dataloader.py +88 -16
- endoreg_db/utils/defaults/set_default_center.py +32 -0
- endoreg_db/utils/names.py +22 -16
- endoreg_db/utils/permissions.py +2 -1
- endoreg_db/utils/pipelines/process_video_dir.py +1 -1
- endoreg_db/utils/requirement_operator_logic/model_evaluators.py +414 -127
- endoreg_db/utils/setup_config.py +8 -5
- endoreg_db/utils/storage.py +115 -0
- endoreg_db/utils/validate_endo_roi.py +8 -2
- endoreg_db/utils/video/ffmpeg_wrapper.py +184 -188
- endoreg_db/views/__init__.py +5 -12
- endoreg_db/views/anonymization/media_management.py +198 -163
- endoreg_db/views/anonymization/overview.py +4 -1
- endoreg_db/views/anonymization/validate.py +174 -40
- endoreg_db/views/media/__init__.py +2 -0
- endoreg_db/views/media/pdf_media.py +131 -150
- endoreg_db/views/media/sensitive_metadata.py +46 -6
- endoreg_db/views/media/video_media.py +89 -82
- endoreg_db/views/media/video_segments.py +187 -260
- endoreg_db/views/meta/sensitive_meta_detail.py +0 -63
- endoreg_db/views/patient/patient.py +5 -4
- endoreg_db/views/pdf/__init__.py +5 -8
- endoreg_db/views/pdf/pdf_stream.py +186 -0
- endoreg_db/views/pdf/pdf_stream_views.py +0 -127
- endoreg_db/views/pdf/reimport.py +86 -91
- endoreg_db/views/requirement/evaluate.py +188 -187
- endoreg_db/views/requirement/lookup.py +186 -288
- endoreg_db/views/requirement/requirement_utils.py +89 -0
- endoreg_db/views/video/__init__.py +0 -4
- endoreg_db/views/video/correction.py +2 -2
- endoreg_db/views/video/video_examination_viewset.py +202 -289
- {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/METADATA +7 -3
- {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/RECORD +350 -255
- endoreg_db/models/administration/permissions/__init__.py +0 -44
- endoreg_db/models/media/video/refactor_plan.md +0 -0
- endoreg_db/models/media/video/video_file_frames.py +0 -0
- endoreg_db/models/metadata/frame_ocr_result.py +0 -0
- endoreg_db/models/rule/__init__.py +0 -13
- endoreg_db/models/rule/rule.py +0 -27
- endoreg_db/models/rule/rule_applicator.py +0 -224
- endoreg_db/models/rule/rule_attribute_dtype.py +0 -17
- endoreg_db/models/rule/rule_type.py +0 -20
- endoreg_db/models/rule/ruleset.py +0 -17
- endoreg_db/serializers/video/video_metadata.py +0 -105
- endoreg_db/urls/report.py +0 -48
- endoreg_db/urls/video.py +0 -61
- endoreg_db/utils/case_generator/case_generator.py +0 -159
- endoreg_db/utils/case_generator/utils.py +0 -30
- endoreg_db/views/pdf/pdf_media.py +0 -239
- endoreg_db/views/report/__init__.py +0 -9
- endoreg_db/views/report/report_list.py +0 -112
- endoreg_db/views/report/report_with_secure_url.py +0 -28
- endoreg_db/views/report/start_examination.py +0 -7
- endoreg_db/views/video/video_media.py +0 -158
- endoreg_db/views.py +0 -0
- /endoreg_db/data/{requirement_set → _examples/requirement_set}/endoscopy_bleeding_risk.yaml +0 -0
- /endoreg_db/migrations/{0002_add_video_correction_models.py → _old/0002_add_video_correction_models.py} +0 -0
- /endoreg_db/migrations/{0003_add_center_display_name.py → _old/0003_add_center_display_name.py} +0 -0
- {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,38 +4,39 @@
|
|
|
4
4
|
# Class contains classmethod to create object from pdf file
|
|
5
5
|
# objects contains methods to extract text, extract metadata from text and anonymize text from pdf file uzing agl_report_reader.ReportReader class
|
|
6
6
|
# ------------------------------------------------------------------------------
|
|
7
|
-
import
|
|
8
|
-
|
|
7
|
+
from typing import TYPE_CHECKING, Optional, cast
|
|
8
|
+
|
|
9
9
|
from django.core.exceptions import ValidationError
|
|
10
|
-
from django.core.validators import FileExtensionValidator
|
|
11
10
|
from django.core.files import File
|
|
12
|
-
from
|
|
11
|
+
from django.core.validators import FileExtensionValidator
|
|
12
|
+
from django.db import models
|
|
13
|
+
|
|
13
14
|
from endoreg_db.utils.file_operations import get_uuid_filename
|
|
14
|
-
from typing import TYPE_CHECKING, Optional, Union
|
|
15
|
-
# Use the specific paths from the centralized paths module
|
|
16
|
-
from ...utils import PDF_DIR
|
|
17
15
|
from endoreg_db.utils.hashs import get_pdf_hash
|
|
16
|
+
from endoreg_db.utils.paths import PDF_DIR
|
|
17
|
+
from endoreg_db.utils.storage import (
|
|
18
|
+
delete_field_file,
|
|
19
|
+
ensure_local_file,
|
|
20
|
+
file_exists,
|
|
21
|
+
save_local_file,
|
|
22
|
+
)
|
|
18
23
|
|
|
19
24
|
if TYPE_CHECKING:
|
|
20
|
-
from
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
)
|
|
24
|
-
from .report_file import AnonymExaminationReport
|
|
25
|
-
from ...medical.patient import PatientExamination
|
|
26
|
-
from ...administration import Center
|
|
27
|
-
from ...metadata.pdf_meta import PdfType
|
|
28
|
-
from ...state import RawPdfState
|
|
29
|
-
from ...metadata import SensitiveMeta
|
|
25
|
+
from django.db.models.fields.files import FieldFile
|
|
26
|
+
|
|
27
|
+
from endoreg_db.models.state import RawPdfState
|
|
30
28
|
|
|
31
29
|
# setup logging to pdf_import.log
|
|
32
30
|
import logging
|
|
33
|
-
|
|
34
31
|
from pathlib import Path
|
|
35
32
|
|
|
33
|
+
from ...metadata import SensitiveMeta
|
|
34
|
+
|
|
36
35
|
logger = logging.getLogger("raw_pdf")
|
|
37
36
|
|
|
37
|
+
|
|
38
38
|
class RawPdfFile(models.Model):
|
|
39
|
+
objects = models.Manager()
|
|
39
40
|
# Fields from AbstractPdfFile
|
|
40
41
|
pdf_hash = models.CharField(max_length=255, unique=True)
|
|
41
42
|
pdf_type = models.ForeignKey(
|
|
@@ -49,72 +50,128 @@ class RawPdfFile(models.Model):
|
|
|
49
50
|
on_delete=models.SET_NULL,
|
|
50
51
|
blank=True,
|
|
51
52
|
null=True,
|
|
52
|
-
)
|
|
53
|
+
)
|
|
53
54
|
examination = models.ForeignKey(
|
|
54
55
|
"PatientExamination",
|
|
55
56
|
on_delete=models.SET_NULL,
|
|
56
57
|
blank=True,
|
|
57
58
|
null=True,
|
|
58
59
|
related_name="raw_pdf_files",
|
|
59
|
-
)
|
|
60
|
+
)
|
|
60
61
|
examiner = models.ForeignKey(
|
|
61
62
|
"Examiner",
|
|
62
63
|
on_delete=models.SET_NULL,
|
|
63
64
|
blank=True,
|
|
64
65
|
null=True,
|
|
65
|
-
)
|
|
66
|
+
)
|
|
66
67
|
text = models.TextField(blank=True, null=True)
|
|
67
68
|
date_created = models.DateTimeField(auto_now_add=True)
|
|
68
69
|
date_modified = models.DateTimeField(auto_now=True)
|
|
69
70
|
anonymized = models.BooleanField(default=False, help_text="True if the PDF has been anonymized.")
|
|
70
|
-
|
|
71
|
-
# Fields specific to RawPdfFile (keeping existing related_names)
|
|
72
71
|
file = models.FileField(
|
|
73
72
|
# Use the relative path from the specific PDF_DIR
|
|
74
73
|
upload_to=PDF_DIR.name,
|
|
75
74
|
validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
|
|
76
|
-
)
|
|
77
|
-
|
|
75
|
+
)
|
|
78
76
|
anonymized_file = models.FileField(
|
|
79
77
|
upload_to=PDF_DIR.name,
|
|
80
78
|
validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
|
|
81
79
|
null=True,
|
|
82
80
|
blank=True,
|
|
83
|
-
)
|
|
84
|
-
|
|
81
|
+
)
|
|
85
82
|
state = models.OneToOneField(
|
|
86
83
|
"RawPdfState",
|
|
87
84
|
on_delete=models.SET_NULL,
|
|
88
85
|
blank=True,
|
|
89
86
|
null=True,
|
|
90
87
|
related_name="raw_pdf_file",
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
88
|
+
)
|
|
89
|
+
patient = models.ForeignKey(
|
|
90
|
+
"Patient",
|
|
91
|
+
on_delete=models.SET_NULL,
|
|
92
|
+
blank=True,
|
|
93
|
+
null=True,
|
|
94
|
+
related_name="raw_pdf_files",
|
|
95
|
+
)
|
|
96
|
+
sensitive_meta = models.ForeignKey(
|
|
97
|
+
"SensitiveMeta",
|
|
98
|
+
on_delete=models.SET_NULL,
|
|
99
|
+
related_name="raw_pdf_files",
|
|
100
|
+
null=True,
|
|
101
|
+
blank=True,
|
|
102
|
+
)
|
|
103
|
+
state_report_processing_required = models.BooleanField(default=True)
|
|
104
|
+
state_report_processed = models.BooleanField(default=False)
|
|
105
|
+
raw_meta = models.JSONField(blank=True, null=True)
|
|
106
|
+
anonym_examination_report = models.OneToOneField(
|
|
107
|
+
"AnonymExaminationReport",
|
|
108
|
+
on_delete=models.SET_NULL,
|
|
109
|
+
blank=True,
|
|
110
|
+
null=True,
|
|
111
|
+
related_name="raw_pdf_file",
|
|
112
|
+
)
|
|
113
|
+
anonymized_text = models.TextField(blank=True, null=True)
|
|
114
|
+
|
|
115
|
+
# Type hinting is needed, improve and use correct django types
|
|
116
|
+
if TYPE_CHECKING:
|
|
117
|
+
from endoreg_db.models import (
|
|
118
|
+
AnonymExaminationReport,
|
|
119
|
+
Center,
|
|
120
|
+
Examiner,
|
|
121
|
+
Patient,
|
|
122
|
+
PatientExamination,
|
|
123
|
+
RawPdfState,
|
|
124
|
+
SensitiveMeta,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
center: models.ForeignKey["Center | None"]
|
|
128
|
+
examination: models.ForeignKey["PatientExamination | None"]
|
|
129
|
+
examiner: models.ForeignKey["Examiner | None"]
|
|
130
|
+
state: models.ForeignKey["RawPdfState | None"]
|
|
131
|
+
patient: models.ForeignKey["Patient | None"]
|
|
132
|
+
sensitive_meta: models.ForeignKey["SensitiveMeta | None"]
|
|
133
|
+
anonym_examination_report: models.OneToOneField["AnonymExaminationReport | None"]
|
|
134
|
+
file = cast(FieldFile, file)
|
|
135
|
+
anonymized_file = cast(FieldFile, anonymized_file)
|
|
136
|
+
|
|
95
137
|
@property
|
|
96
|
-
def
|
|
138
|
+
def uuid(self):
|
|
139
|
+
"""
|
|
140
|
+
Compatibility property - returns pdf_hash as UUID-like identifier.
|
|
141
|
+
|
|
142
|
+
Note: RawPdfFile uses pdf_hash instead of UUID for identification.
|
|
143
|
+
This property exists for API backward compatibility.
|
|
144
|
+
"""
|
|
145
|
+
return self.pdf_hash
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def file_path(self) -> Path | None:
|
|
97
149
|
"""
|
|
98
150
|
Returns the file path of the stored PDF file if available; otherwise, returns None.
|
|
99
151
|
"""
|
|
152
|
+
from django.db.models.fields.files import FieldFile
|
|
153
|
+
|
|
100
154
|
# assert self.file has path attribute
|
|
101
|
-
assert isinstance(self.file,
|
|
155
|
+
assert isinstance(self.file, FieldFile)
|
|
102
156
|
if self.file and self.file.name:
|
|
103
157
|
try:
|
|
104
158
|
return Path(self.file.path)
|
|
105
159
|
except (ValueError, AttributeError, NotImplementedError):
|
|
106
160
|
return None
|
|
107
161
|
return None
|
|
108
|
-
|
|
162
|
+
|
|
109
163
|
def set_file_path(self, file_path: Path):
|
|
110
164
|
"""
|
|
111
165
|
Sets the file path of the stored PDF file.
|
|
112
166
|
"""
|
|
113
|
-
|
|
114
|
-
|
|
167
|
+
if not file_path.exists():
|
|
168
|
+
raise FileNotFoundError(f"File path does not exist: {file_path}")
|
|
169
|
+
|
|
170
|
+
save_local_file(self.file, file_path, name=file_path.name, save=False)
|
|
171
|
+
self.save(update_fields=["file"])
|
|
115
172
|
|
|
116
173
|
@property
|
|
117
|
-
def anonymized_file_path(self) -> Path|None:
|
|
174
|
+
def anonymized_file_path(self) -> Path | None:
|
|
118
175
|
"""
|
|
119
176
|
Returns the file path of the anonymized PDF file if available; otherwise, returns None.
|
|
120
177
|
"""
|
|
@@ -124,13 +181,76 @@ class RawPdfFile(models.Model):
|
|
|
124
181
|
except (ValueError, AttributeError, NotImplementedError):
|
|
125
182
|
return None
|
|
126
183
|
return None
|
|
127
|
-
|
|
184
|
+
|
|
128
185
|
def set_anonymized_file_path(self, file_path: Path):
|
|
129
186
|
"""
|
|
130
187
|
Sets the file path of the anonymized PDF file.
|
|
131
188
|
"""
|
|
132
|
-
|
|
133
|
-
|
|
189
|
+
if not file_path.exists():
|
|
190
|
+
raise FileNotFoundError(f"File path does not exist: {file_path}")
|
|
191
|
+
|
|
192
|
+
save_local_file(self.anonymized_file, file_path, name=file_path.name, save=False)
|
|
193
|
+
self.save(update_fields=["anonymized_file"])
|
|
194
|
+
|
|
195
|
+
def get_raw_file_path(self) -> Optional[Path]:
|
|
196
|
+
"""
|
|
197
|
+
Get the path to the raw PDF file, searching common locations.
|
|
198
|
+
|
|
199
|
+
This method attempts to find the original raw PDF file by checking:
|
|
200
|
+
1. Direct hash-based path in raw_pdfs/
|
|
201
|
+
2. Scanning raw_pdfs/ directory for files matching the hash
|
|
202
|
+
3. Checking the file field if it exists
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Path to raw file if it exists, None otherwise
|
|
206
|
+
"""
|
|
207
|
+
from django.conf import settings
|
|
208
|
+
|
|
209
|
+
# Check if file field already points to a valid file
|
|
210
|
+
if self.file and self.file.name:
|
|
211
|
+
try:
|
|
212
|
+
file_path = Path(self.file.path)
|
|
213
|
+
if file_path.exists():
|
|
214
|
+
logger.debug(f"Found raw PDF via file field: {file_path}")
|
|
215
|
+
return file_path
|
|
216
|
+
except (ValueError, AttributeError, NotImplementedError):
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
# Define potential raw directories
|
|
220
|
+
raw_dirs = [
|
|
221
|
+
PDF_DIR / "sensitive", # Files might be in sensitive dir
|
|
222
|
+
Path(settings.BASE_DIR) / "data" / "raw_pdfs",
|
|
223
|
+
Path(settings.BASE_DIR) / "data" / "pdfs" / "raw",
|
|
224
|
+
PDF_DIR, # General PDF directory
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
# Check direct hash-based name in each directory
|
|
228
|
+
for raw_dir in raw_dirs:
|
|
229
|
+
if not raw_dir.exists():
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
hash_path = raw_dir / f"{self.pdf_hash}.pdf"
|
|
233
|
+
if hash_path.exists():
|
|
234
|
+
logger.debug(f"Found raw PDF at: {hash_path}")
|
|
235
|
+
return hash_path
|
|
236
|
+
|
|
237
|
+
# Scan directories for matching hash
|
|
238
|
+
for raw_dir in raw_dirs:
|
|
239
|
+
if not raw_dir.exists():
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
for file_path in raw_dir.glob("*.pdf"):
|
|
243
|
+
try:
|
|
244
|
+
file_hash = get_pdf_hash(file_path)
|
|
245
|
+
if file_hash == self.pdf_hash:
|
|
246
|
+
logger.debug(f"Found matching PDF by hash: {file_path}")
|
|
247
|
+
return file_path
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.debug(f"Error checking {file_path}: {e}")
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
logger.warning(f"No raw file found for PDF hash: {self.pdf_hash}")
|
|
253
|
+
return None
|
|
134
254
|
|
|
135
255
|
@property
|
|
136
256
|
def file_url(self):
|
|
@@ -141,7 +261,7 @@ class RawPdfFile(models.Model):
|
|
|
141
261
|
return self.file.url if self.file and self.file.name else None
|
|
142
262
|
except (ValueError, AttributeError):
|
|
143
263
|
return None
|
|
144
|
-
|
|
264
|
+
|
|
145
265
|
@property
|
|
146
266
|
def anonymized_file_url(self):
|
|
147
267
|
"""
|
|
@@ -152,45 +272,6 @@ class RawPdfFile(models.Model):
|
|
|
152
272
|
except (ValueError, AttributeError):
|
|
153
273
|
return None
|
|
154
274
|
|
|
155
|
-
patient = models.ForeignKey(
|
|
156
|
-
"Patient",
|
|
157
|
-
on_delete=models.SET_NULL,
|
|
158
|
-
blank=True,
|
|
159
|
-
null=True,
|
|
160
|
-
related_name="raw_pdf_files",
|
|
161
|
-
) # type: ignore
|
|
162
|
-
sensitive_meta = models.ForeignKey(
|
|
163
|
-
"SensitiveMeta",
|
|
164
|
-
on_delete=models.SET_NULL,
|
|
165
|
-
related_name="raw_pdf_files",
|
|
166
|
-
null=True,
|
|
167
|
-
blank=True,
|
|
168
|
-
) # type: ignore
|
|
169
|
-
state_report_processing_required = models.BooleanField(default=True)
|
|
170
|
-
state_report_processed = models.BooleanField(default=False)
|
|
171
|
-
raw_meta = models.JSONField(blank=True, null=True)
|
|
172
|
-
anonym_examination_report = models.OneToOneField(
|
|
173
|
-
"AnonymExaminationReport",
|
|
174
|
-
on_delete=models.SET_NULL,
|
|
175
|
-
blank=True,
|
|
176
|
-
null=True,
|
|
177
|
-
related_name="raw_pdf_file",
|
|
178
|
-
) # type: ignore
|
|
179
|
-
anonymized_text = models.TextField(blank=True, null=True)
|
|
180
|
-
|
|
181
|
-
# Type hinting is needed, improve and use correct django types
|
|
182
|
-
if TYPE_CHECKING:
|
|
183
|
-
file : Optional[Union[models.FieldFile,models.FileField]]
|
|
184
|
-
anonymized_file : Optional[Union[models.FieldFile,models.FileField]]
|
|
185
|
-
pdf_type: Optional[models.ForeignKey]
|
|
186
|
-
examination: Optional[models.ForeignKey["PatientExamination"]]
|
|
187
|
-
examiner: Optional[models.ForeignKey["Examiner"]]
|
|
188
|
-
patient: Optional[models.ForeignKey["Patient"]]
|
|
189
|
-
center: Optional[models.ForeignKey["Center"]]
|
|
190
|
-
anonym_examination_report: Optional[models.OneToOneField["AnonymExaminationReport"]]
|
|
191
|
-
sensitive_meta: Optional[models.ForeignKey["SensitiveMeta"]]
|
|
192
|
-
state: Optional[models.ForeignKey["RawPdfState"]]
|
|
193
|
-
|
|
194
275
|
def __str__(self):
|
|
195
276
|
"""
|
|
196
277
|
Return a string representation of the RawPdfFile, including its PDF hash, type, and center.
|
|
@@ -201,72 +282,57 @@ class RawPdfFile(models.Model):
|
|
|
201
282
|
def delete(self, *args, **kwargs):
|
|
202
283
|
"""
|
|
203
284
|
Deletes the RawPdfFile instance from the database and removes the associated file from storage if it exists.
|
|
204
|
-
|
|
285
|
+
|
|
205
286
|
This method ensures that the physical PDF file is deleted from the file system after the database record is removed. Logs warnings or errors if the file cannot be found or deleted.
|
|
206
287
|
"""
|
|
207
|
-
|
|
208
|
-
if self.
|
|
209
|
-
try:
|
|
210
|
-
if self.file_path:
|
|
211
|
-
os.remove(Path(self.file_path))
|
|
212
|
-
logger.info("Original file removed: %s", self.file)
|
|
213
|
-
except Exception as e:
|
|
214
|
-
logger.warning(f"Could not get file path for {self.file.name} before deletion: {e}")
|
|
215
|
-
if self.anonymized_file:
|
|
216
|
-
try:
|
|
217
|
-
if self.anonymized_file_path:
|
|
218
|
-
os.remove(Path(self.anonymized_file_path))
|
|
219
|
-
logger.info("Anonymized file removed: %s", self.anonymized_file.name)
|
|
220
|
-
except OSError as e:
|
|
221
|
-
logger.error("Error removing anonymized file %s: %s", self.anonymized_file.name, e)
|
|
288
|
+
primary_name = self.file.name if self.file and self.file.name else None
|
|
289
|
+
anonymized_name = self.anonymized_file.name if self.anonymized_file and self.anonymized_file.name else None
|
|
222
290
|
|
|
223
|
-
|
|
291
|
+
if delete_field_file(self.file, missing_ok=True, save=False):
|
|
292
|
+
logger.info("Original file removed from storage: %s", primary_name)
|
|
293
|
+
if delete_field_file(self.anonymized_file, missing_ok=True, save=False):
|
|
294
|
+
logger.info("Anonymized file removed from storage: %s", anonymized_name)
|
|
224
295
|
|
|
296
|
+
super().delete(*args, **kwargs)
|
|
225
297
|
|
|
226
298
|
def validate_metadata_annotation(self, extracted_data_dict: Optional[dict] = None) -> bool:
|
|
227
299
|
"""
|
|
228
300
|
Validate the metadata of the RawPdf instance.
|
|
229
|
-
|
|
301
|
+
|
|
230
302
|
Called after annotation in the frontend, this method deletes the associated active file, updates the sensitive meta data with the user annotated data.
|
|
231
303
|
It also ensures the video file is properly saved after the metadata update.
|
|
232
304
|
"""
|
|
233
|
-
|
|
305
|
+
|
|
234
306
|
if not self.sensitive_meta:
|
|
235
307
|
logger.error("No sensitive meta data associated with this PDF file.")
|
|
236
308
|
return False
|
|
237
|
-
|
|
309
|
+
|
|
238
310
|
if not extracted_data_dict:
|
|
239
311
|
logger.error("No extracted data provided for validation.")
|
|
240
312
|
return False
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
313
|
+
|
|
314
|
+
if extracted_data_dict:
|
|
315
|
+
self.sensitive_meta.update_from_dict(extracted_data_dict)
|
|
316
|
+
else:
|
|
317
|
+
return False
|
|
318
|
+
|
|
245
319
|
# Save the sensitive meta to ensure changes are persisted
|
|
246
320
|
self.sensitive_meta.save()
|
|
247
|
-
|
|
321
|
+
|
|
248
322
|
# Save the RawPdfFile instance to ensure all changes are saved
|
|
249
323
|
self.save()
|
|
250
|
-
|
|
324
|
+
|
|
251
325
|
logger.info(f"Metadata for PDF {self.pk} validated and updated successfully.")
|
|
252
|
-
|
|
253
|
-
if self.file_path:
|
|
254
|
-
try:
|
|
255
|
-
os.unlink(self.file_path) # Delete the original file if it exists
|
|
256
|
-
except OSError as e:
|
|
257
|
-
logger.error(f"Error removing original file {self.file_path}: {e}")
|
|
258
326
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
327
|
+
deleted_original = delete_field_file(self.file, missing_ok=True, save=False)
|
|
328
|
+
deleted_anonymized = delete_field_file(self.anonymized_file, missing_ok=True, save=False)
|
|
329
|
+
self.get_or_create_state().mark_anonymization_validated()
|
|
330
|
+
|
|
331
|
+
if deleted_original or deleted_anonymized:
|
|
332
|
+
self.save(update_fields=["file", "anonymized_file"]) # Persist cleared fields
|
|
264
333
|
|
|
265
|
-
self.save() # Save the model to persist the cleared file fields
|
|
266
|
-
|
|
267
334
|
logger.info(f"Files for PDF {self.pk} deleted successfully.")
|
|
268
335
|
return True
|
|
269
|
-
|
|
270
336
|
|
|
271
337
|
@classmethod
|
|
272
338
|
def create_from_file_initialized(
|
|
@@ -277,18 +343,18 @@ class RawPdfFile(models.Model):
|
|
|
277
343
|
):
|
|
278
344
|
"""
|
|
279
345
|
Creates a RawPdfFile instance from a file and center name, ensuring an associated RawPdfState exists.
|
|
280
|
-
|
|
346
|
+
|
|
281
347
|
Parameters:
|
|
282
348
|
file_path (Path): Path to the source PDF file.
|
|
283
349
|
center_name (str): Name of the center to associate with the PDF.
|
|
284
350
|
delete_source (bool): Whether to delete the source file after processing. Defaults to True.
|
|
285
|
-
|
|
351
|
+
|
|
286
352
|
Returns:
|
|
287
353
|
RawPdfFile: The created or retrieved RawPdfFile instance with an associated RawPdfState.
|
|
288
354
|
"""
|
|
289
355
|
raw_pdf = cls.create_from_file(
|
|
290
356
|
file_path=file_path,
|
|
291
|
-
center_name=center_name,
|
|
357
|
+
center_name=center_name,
|
|
292
358
|
delete_source=delete_source,
|
|
293
359
|
)
|
|
294
360
|
_state = raw_pdf.get_or_create_state()
|
|
@@ -305,18 +371,18 @@ class RawPdfFile(models.Model):
|
|
|
305
371
|
):
|
|
306
372
|
"""
|
|
307
373
|
Creates or retrieves a RawPdfFile instance from a given PDF file path and center name.
|
|
308
|
-
|
|
374
|
+
|
|
309
375
|
If a RawPdfFile with the same PDF hash already exists, verifies the file exists in storage and restores it if missing. Otherwise, creates a new RawPdfFile, assigns the file, and saves it to storage. Optionally deletes the source file after processing.
|
|
310
|
-
|
|
376
|
+
|
|
311
377
|
Parameters:
|
|
312
378
|
file_path (Path): Path to the source PDF file.
|
|
313
379
|
center_name (str): Name of the center to associate with the file.
|
|
314
380
|
save (bool, optional): Deprecated; saving occurs internally.
|
|
315
381
|
delete_source (bool, optional): Whether to delete the source file after processing (default True).
|
|
316
|
-
|
|
382
|
+
|
|
317
383
|
Returns:
|
|
318
384
|
RawPdfFile: The created or retrieved RawPdfFile instance.
|
|
319
|
-
|
|
385
|
+
|
|
320
386
|
Raises:
|
|
321
387
|
FileNotFoundError: If the source file does not exist.
|
|
322
388
|
Center.DoesNotExist: If the specified center is not found.
|
|
@@ -338,28 +404,41 @@ class RawPdfFile(models.Model):
|
|
|
338
404
|
raise ValueError(f"Could not calculate hash for {file_path}") from e
|
|
339
405
|
|
|
340
406
|
# 2. Check if record with this hash already exists
|
|
341
|
-
existing_pdf_file = cls.objects.filter(pdf_hash=pdf_hash).first()
|
|
407
|
+
existing_pdf_file = cls.objects.filter(pdf_hash=pdf_hash).first()
|
|
342
408
|
if existing_pdf_file:
|
|
343
|
-
logger.warning(
|
|
409
|
+
logger.warning(
|
|
410
|
+
"RawPdfFile with hash %s already exists (ID: %s)",
|
|
411
|
+
pdf_hash,
|
|
412
|
+
existing_pdf_file.pk,
|
|
413
|
+
)
|
|
344
414
|
|
|
345
415
|
# Verify physical file exists for the existing record
|
|
346
416
|
try:
|
|
347
417
|
if existing_pdf_file is not None and isinstance(existing_pdf_file, cls):
|
|
348
|
-
|
|
418
|
+
# Use storage API to check existence
|
|
349
419
|
_file = existing_pdf_file.file
|
|
350
420
|
assert _file is not None
|
|
351
421
|
if not _file.storage.exists(_file.name):
|
|
352
|
-
logger.warning(
|
|
422
|
+
logger.warning(
|
|
423
|
+
"File for existing RawPdfFile %s not found in storage at %s. Attempting to restore from source %s",
|
|
424
|
+
pdf_hash,
|
|
425
|
+
_file.name,
|
|
426
|
+
file_path,
|
|
427
|
+
)
|
|
353
428
|
# Re-save the file from the source to potentially fix it
|
|
354
429
|
with file_path.open("rb") as f:
|
|
355
430
|
django_file = File(f, name=Path(_file.name).name) # Use existing name if possible
|
|
356
|
-
existing_pdf_file.file = django_file
|
|
357
|
-
existing_pdf_file.save(update_fields=[
|
|
431
|
+
existing_pdf_file.file = django_file
|
|
432
|
+
existing_pdf_file.save(update_fields=["file"]) # Only update file field
|
|
358
433
|
else:
|
|
359
434
|
pass
|
|
360
435
|
# logger.debug("File for existing RawPdfFile %s already exists in storage.", pdf_hash)
|
|
361
436
|
except Exception as e:
|
|
362
|
-
logger.error(
|
|
437
|
+
logger.error(
|
|
438
|
+
"Error verifying/restoring file for existing record %s: %s",
|
|
439
|
+
pdf_hash,
|
|
440
|
+
e,
|
|
441
|
+
)
|
|
363
442
|
|
|
364
443
|
# Delete the source temp file if requested
|
|
365
444
|
if delete_source:
|
|
@@ -396,7 +475,9 @@ class RawPdfFile(models.Model):
|
|
|
396
475
|
_file = raw_pdf.file
|
|
397
476
|
assert _file is not None
|
|
398
477
|
logger.info(
|
|
399
|
-
"Created and saved new RawPdfFile %s with file %s",
|
|
478
|
+
"Created and saved new RawPdfFile %s with file %s",
|
|
479
|
+
raw_pdf.pk,
|
|
480
|
+
_file.name,
|
|
400
481
|
)
|
|
401
482
|
|
|
402
483
|
if not _file.storage.exists(_file.name):
|
|
@@ -404,9 +485,7 @@ class RawPdfFile(models.Model):
|
|
|
404
485
|
"File was not saved correctly to storage path %s after model save.",
|
|
405
486
|
_file.name,
|
|
406
487
|
)
|
|
407
|
-
raise IOError(
|
|
408
|
-
f"File not found at expected storage path after save: {_file.name}"
|
|
409
|
-
)
|
|
488
|
+
raise IOError(f"File not found at expected storage path after save: {_file.name}")
|
|
410
489
|
|
|
411
490
|
try:
|
|
412
491
|
logger.info("File saved to absolute path: %s", _file.path)
|
|
@@ -428,7 +507,7 @@ class RawPdfFile(models.Model):
|
|
|
428
507
|
except OSError as e:
|
|
429
508
|
logger.error("Error deleting source file %s: %s", file_path, e)
|
|
430
509
|
|
|
431
|
-
# raw_pdf.save() # unnecessary?
|
|
510
|
+
# raw_pdf.save() # unnecessary?
|
|
432
511
|
return raw_pdf
|
|
433
512
|
|
|
434
513
|
def save(self, *args, **kwargs):
|
|
@@ -436,26 +515,20 @@ class RawPdfFile(models.Model):
|
|
|
436
515
|
# This is primarily a fallback if instance created manually without using create_from_file
|
|
437
516
|
"""
|
|
438
517
|
Saves the RawPdfFile instance, ensuring the PDF hash is set and related fields are derived from metadata.
|
|
439
|
-
|
|
518
|
+
|
|
440
519
|
If the PDF hash is missing, attempts to calculate it from the file before saving. Validates that the file has a `.pdf` extension. If related fields such as patient, examination, center, or examiner are unset but available in the associated sensitive metadata, they are populated accordingly before saving.
|
|
441
520
|
"""
|
|
442
521
|
if not self.pk and not self.pdf_hash and self.file:
|
|
443
522
|
try:
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
logger.info(f"Calculated hash during pre-save for {self.file.name}")
|
|
454
|
-
except Exception as e:
|
|
455
|
-
logger.warning("Could not calculate hash before initial save for %s: %s", self.file.name, e)
|
|
456
|
-
# Ensure file is closed if opened
|
|
457
|
-
if hasattr(self.file, 'closed') and not self.file.closed:
|
|
458
|
-
self.file.close()
|
|
523
|
+
with ensure_local_file(self.file) as local_path:
|
|
524
|
+
self.pdf_hash = get_pdf_hash(local_path)
|
|
525
|
+
logger.info("Calculated hash during pre-save for %s", self.file.name)
|
|
526
|
+
except Exception as exc:
|
|
527
|
+
logger.warning(
|
|
528
|
+
"Could not calculate hash before initial save for %s: %s",
|
|
529
|
+
self.file.name,
|
|
530
|
+
exc,
|
|
531
|
+
)
|
|
459
532
|
|
|
460
533
|
if self.file and not self.file.name.endswith(".pdf"):
|
|
461
534
|
raise ValidationError("Only PDF files are allowed")
|
|
@@ -463,18 +536,17 @@ class RawPdfFile(models.Model):
|
|
|
463
536
|
# If hash is still missing after potential creation logic (e.g., direct instantiation)
|
|
464
537
|
# and the file exists in storage, try calculating it from storage path.
|
|
465
538
|
# This is less ideal as it requires the file to be saved first.
|
|
466
|
-
if not self.pdf_hash and self.pk and self.file and
|
|
539
|
+
if not self.pdf_hash and self.pk and self.file and file_exists(self.file):
|
|
467
540
|
try:
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
logger.error("Could not calculate hash during save for existing file %s: %s", self.file.name, e)
|
|
541
|
+
with ensure_local_file(self.file) as local_path:
|
|
542
|
+
logger.warning("Hash missing for saved file %s. Recalculating.", self.file.name)
|
|
543
|
+
self.pdf_hash = get_pdf_hash(local_path)
|
|
544
|
+
except Exception as exc:
|
|
545
|
+
logger.error(
|
|
546
|
+
"Could not calculate hash during save for existing file %s: %s",
|
|
547
|
+
self.file.name,
|
|
548
|
+
exc,
|
|
549
|
+
)
|
|
478
550
|
|
|
479
551
|
# Derive related fields from sensitive_meta if available
|
|
480
552
|
if not self.patient and self.sensitive_meta:
|
|
@@ -483,7 +555,7 @@ class RawPdfFile(models.Model):
|
|
|
483
555
|
self.examination = self.sensitive_meta.pseudo_examination
|
|
484
556
|
if not self.center and self.sensitive_meta:
|
|
485
557
|
self.center = self.sensitive_meta.center
|
|
486
|
-
#TODO Outdated?
|
|
558
|
+
# TODO Outdated?
|
|
487
559
|
# if not self.examiner and self.sensitive_meta and hasattr(self.sensitive_meta, 'pseudo_examiner'):
|
|
488
560
|
# self.examiner = self.sensitive_meta.pseudo_examiner
|
|
489
561
|
|
|
@@ -492,7 +564,7 @@ class RawPdfFile(models.Model):
|
|
|
492
564
|
def get_or_create_state(self) -> "RawPdfState":
|
|
493
565
|
"""
|
|
494
566
|
Retrieve the associated RawPdfState for this RawPdfFile, creating and linking a new one if none exists.
|
|
495
|
-
|
|
567
|
+
|
|
496
568
|
Returns:
|
|
497
569
|
RawPdfState: The existing or newly created RawPdfState instance linked to this RawPdfFile.
|
|
498
570
|
"""
|
|
@@ -514,7 +586,7 @@ class RawPdfFile(models.Model):
|
|
|
514
586
|
# Ensure fallback_file is a Path object.
|
|
515
587
|
"""
|
|
516
588
|
Checks if the stored PDF file exists in storage and attempts to restore it from a fallback file path if missing.
|
|
517
|
-
|
|
589
|
+
|
|
518
590
|
Parameters:
|
|
519
591
|
fallback_file: Path or string representing the fallback file location to restore from if the stored file is missing.
|
|
520
592
|
"""
|
|
@@ -534,7 +606,6 @@ class RawPdfFile(models.Model):
|
|
|
534
606
|
else:
|
|
535
607
|
logger.error(f"Fallback file {fallback_file} does not exist.")
|
|
536
608
|
except Exception as e:
|
|
537
|
-
|
|
538
609
|
logger.error(f"Error during verify_existing_file for {_file.name}: {e}")
|
|
539
610
|
|
|
540
611
|
def process_file(self, text, anonymized_text, report_meta, verbose):
|
|
@@ -562,18 +633,19 @@ class RawPdfFile(models.Model):
|
|
|
562
633
|
for key, value in serializable_report_meta.items():
|
|
563
634
|
if isinstance(value, (datetime, date)):
|
|
564
635
|
serializable_report_meta[key] = value.isoformat()
|
|
565
|
-
|
|
566
|
-
self.raw_meta = serializable_report_meta # Assign the version with string dates
|
|
567
636
|
|
|
568
|
-
|
|
569
|
-
|
|
637
|
+
self.raw_meta = serializable_report_meta # Assign the version with string dates
|
|
638
|
+
|
|
639
|
+
sensitive_meta.save() # Save SensitiveMeta first
|
|
640
|
+
self.save() # Then save RawPdfFile
|
|
570
641
|
|
|
571
642
|
return text, anonymized_text, report_meta
|
|
572
643
|
|
|
573
644
|
def get_report_reader_config(self):
|
|
645
|
+
from warnings import warn
|
|
646
|
+
|
|
574
647
|
from ...administration import Center
|
|
575
648
|
from ...metadata.pdf_meta import PdfType
|
|
576
|
-
from warnings import warn
|
|
577
649
|
|
|
578
650
|
_center = self.center
|
|
579
651
|
assert _center is not None, "Center must be set to get report reader config"
|
|
@@ -604,10 +676,10 @@ class RawPdfFile(models.Model):
|
|
|
604
676
|
}
|
|
605
677
|
|
|
606
678
|
return settings_dict
|
|
607
|
-
|
|
679
|
+
|
|
608
680
|
@staticmethod
|
|
609
681
|
def get_pdf_by_id(pdf_id: int) -> "RawPdfFile":
|
|
610
682
|
try:
|
|
611
683
|
return RawPdfFile.objects.get(pk=pdf_id)
|
|
612
684
|
except RawPdfFile.DoesNotExist:
|
|
613
|
-
raise ValueError(f"PDF with ID {pdf_id} does not exist.")
|
|
685
|
+
raise ValueError(f"PDF with ID {pdf_id} does not exist.")
|