endoreg-db 0.8.6.1__py3-none-any.whl → 0.8.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/authz/auth.py +74 -0
- endoreg_db/authz/backends.py +168 -0
- endoreg_db/authz/management/commands/list_routes.py +18 -0
- endoreg_db/authz/middleware.py +83 -0
- endoreg_db/authz/permissions.py +127 -0
- endoreg_db/authz/policy.py +218 -0
- endoreg_db/authz/views_auth.py +66 -0
- endoreg_db/config/env.py +13 -8
- endoreg_db/data/__init__.py +8 -31
- endoreg_db/data/_examples/disease.yaml +55 -0
- endoreg_db/data/_examples/disease_classification.yaml +13 -0
- endoreg_db/data/_examples/disease_classification_choice.yaml +62 -0
- endoreg_db/data/_examples/event.yaml +64 -0
- endoreg_db/data/_examples/examination.yaml +72 -0
- endoreg_db/data/_examples/finding/anatomy_colon.yaml +128 -0
- endoreg_db/data/_examples/finding/colonoscopy.yaml +40 -0
- endoreg_db/data/_examples/finding/colonoscopy_bowel_prep.yaml +56 -0
- endoreg_db/data/_examples/finding/complication.yaml +16 -0
- endoreg_db/data/_examples/finding/data.yaml +105 -0
- endoreg_db/data/_examples/finding/examination_setting.yaml +16 -0
- endoreg_db/data/_examples/finding/medication_related.yaml +18 -0
- endoreg_db/data/_examples/finding/outcome.yaml +12 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_bowel_preparation.yaml +68 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_jnet.yaml +22 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_kudo.yaml +25 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_circularity.yaml +20 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_planarity.yaml +24 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_size.yaml +68 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_surface.yaml +20 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_location.yaml +80 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_lst.yaml +21 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_nice.yaml +20 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_paris.yaml +26 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_sano.yaml +22 -0
- endoreg_db/data/_examples/finding_classification/colonoscopy_summary.yaml +53 -0
- endoreg_db/data/_examples/finding_classification/complication_generic.yaml +25 -0
- endoreg_db/data/_examples/finding_classification/examination_setting_generic.yaml +40 -0
- endoreg_db/data/_examples/finding_classification/histology_colo.yaml +51 -0
- endoreg_db/data/_examples/finding_classification/intervention_required.yaml +26 -0
- endoreg_db/data/_examples/finding_classification/medication_related.yaml +23 -0
- endoreg_db/data/_examples/finding_classification/visualized.yaml +33 -0
- endoreg_db/data/_examples/finding_classification_choice/bowel_preparation.yaml +78 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_circularity_default.yaml +32 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_jnet.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_kudo.yaml +23 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_lst.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_nice.yaml +17 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_paris.yaml +57 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_planarity_default.yaml +49 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_sano.yaml +14 -0
- endoreg_db/data/_examples/finding_classification_choice/colon_lesion_surface_intact_default.yaml +36 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_location.yaml +229 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_not_complete_reason.yaml +19 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_size.yaml +82 -0
- endoreg_db/data/_examples/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/complication_generic_types.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/examination_setting_generic_types.yaml +15 -0
- endoreg_db/data/_examples/finding_classification_choice/histology.yaml +24 -0
- endoreg_db/data/_examples/finding_classification_choice/histology_polyp.yaml +20 -0
- endoreg_db/data/_examples/finding_classification_choice/outcome.yaml +19 -0
- endoreg_db/data/_examples/finding_classification_choice/yes_no_na.yaml +11 -0
- endoreg_db/data/_examples/finding_classification_type/colonoscopy_basic.yaml +48 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy.yaml +43 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_colonoscopy.yaml +168 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_egd.yaml +128 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_ercp.yaml +32 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_eus_lower.yaml +9 -0
- endoreg_db/data/_examples/finding_intervention/endoscopy_eus_upper.yaml +36 -0
- endoreg_db/data/_examples/finding_intervention_type/endoscopy.yaml +15 -0
- endoreg_db/data/_examples/finding_type/data.yaml +43 -0
- endoreg_db/data/_examples/requirement/age.yaml +26 -0
- endoreg_db/data/_examples/requirement/colonoscopy_baseline_austria.yaml +45 -0
- endoreg_db/data/_examples/requirement/disease_cardiovascular.yaml +79 -0
- endoreg_db/data/_examples/requirement/disease_classification_choice_cardiovascular.yaml +41 -0
- endoreg_db/data/_examples/requirement/disease_hepatology.yaml +12 -0
- endoreg_db/data/_examples/requirement/disease_misc.yaml +12 -0
- endoreg_db/data/_examples/requirement/disease_renal.yaml +96 -0
- endoreg_db/data/_examples/requirement/endoscopy_bleeding_risk.yaml +59 -0
- endoreg_db/data/_examples/requirement/event_cardiology.yaml +251 -0
- endoreg_db/data/_examples/requirement/event_requirements.yaml +145 -0
- endoreg_db/data/_examples/requirement/finding_colon_polyp.yaml +50 -0
- endoreg_db/data/_examples/requirement/gender.yaml +25 -0
- endoreg_db/data/_examples/requirement/lab_value.yaml +441 -0
- endoreg_db/data/_examples/requirement/medication.yaml +93 -0
- endoreg_db/data/_examples/requirement_operator/age.yaml +13 -0
- endoreg_db/data/_examples/requirement_operator/lab_operators.yaml +129 -0
- endoreg_db/data/_examples/requirement_operator/model_operators.yaml +96 -0
- endoreg_db/data/_examples/requirement_set/01_endoscopy_generic.yaml +48 -0
- endoreg_db/data/_examples/requirement_set/colonoscopy_austria_screening.yaml +57 -0
- endoreg_db/data/_examples/yaml_examples.xlsx +0 -0
- endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +4 -3
- endoreg_db/data/event_classification/data.yaml +4 -0
- endoreg_db/data/event_classification_choice/data.yaml +9 -0
- endoreg_db/data/finding_classification/colonoscopy_bowel_preparation.yaml +43 -70
- endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +22 -52
- endoreg_db/data/finding_classification/colonoscopy_location.yaml +31 -62
- endoreg_db/data/finding_classification/histology_colo.yaml +28 -36
- endoreg_db/data/requirement/colon_polyp_intervention.yaml +49 -0
- endoreg_db/data/requirement/coloreg_colon_polyp.yaml +49 -0
- endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +31 -12
- endoreg_db/data/requirement_set/01_laboratory.yaml +13 -0
- endoreg_db/data/requirement_set/02_endoscopy_bleeding_risk.yaml +46 -0
- endoreg_db/data/requirement_set/90_coloreg.yaml +178 -0
- endoreg_db/data/requirement_set/_old_ +109 -0
- endoreg_db/data/requirement_set_type/data.yaml +21 -0
- endoreg_db/data/setup_config.yaml +4 -4
- endoreg_db/data/tag/requirement_set_tags.yaml +21 -0
- endoreg_db/exceptions.py +5 -2
- endoreg_db/helpers/data_loader.py +1 -1
- endoreg_db/management/commands/create_model_meta_from_huggingface.py +21 -10
- endoreg_db/management/commands/create_multilabel_model_meta.py +299 -129
- endoreg_db/management/commands/import_video.py +9 -10
- endoreg_db/management/commands/import_video_with_classification.py +1 -1
- endoreg_db/management/commands/init_default_ai_model.py +1 -1
- endoreg_db/management/commands/list_routes.py +18 -0
- endoreg_db/management/commands/load_center_data.py +12 -12
- endoreg_db/management/commands/load_requirement_data.py +60 -31
- endoreg_db/management/commands/load_requirement_set_tags.py +95 -0
- endoreg_db/management/commands/setup_endoreg_db.py +3 -3
- endoreg_db/management/commands/storage_management.py +271 -203
- endoreg_db/migrations/0001_initial.py +1799 -1300
- endoreg_db/migrations/0002_requirementset_depends_on.py +18 -0
- endoreg_db/migrations/_old/0001_initial.py +1857 -0
- endoreg_db/migrations/_old/0004_employee_city_employee_post_code_employee_street_and_more.py +68 -0
- endoreg_db/migrations/_old/0004_remove_casetemplate_rules_and_more.py +77 -0
- endoreg_db/migrations/_old/0005_merge_20251111_1003.py +14 -0
- endoreg_db/migrations/_old/0006_sensitivemeta_anonymized_text_and_more.py +68 -0
- endoreg_db/migrations/_old/0007_remove_rule_attribute_dtype_remove_rule_rule_type_and_more.py +89 -0
- endoreg_db/migrations/_old/0008_remove_event_event_classification_and_more.py +27 -0
- endoreg_db/migrations/_old/0009_alter_modelmeta_options_and_more.py +21 -0
- endoreg_db/models/__init__.py +78 -123
- endoreg_db/models/administration/__init__.py +21 -42
- endoreg_db/models/administration/ai/active_model.py +2 -2
- endoreg_db/models/administration/ai/ai_model.py +7 -6
- endoreg_db/models/administration/case/__init__.py +1 -15
- endoreg_db/models/administration/case/case.py +3 -3
- endoreg_db/models/administration/case/case_template/__init__.py +2 -14
- endoreg_db/models/administration/case/case_template/case_template.py +2 -124
- endoreg_db/models/administration/case/case_template/case_template_rule.py +2 -268
- endoreg_db/models/administration/case/case_template/case_template_rule_value.py +2 -85
- endoreg_db/models/administration/case/case_template/case_template_type.py +2 -25
- endoreg_db/models/administration/center/center.py +33 -19
- endoreg_db/models/administration/center/center_product.py +12 -9
- endoreg_db/models/administration/center/center_resource.py +25 -19
- endoreg_db/models/administration/center/center_shift.py +21 -17
- endoreg_db/models/administration/center/center_waste.py +16 -8
- endoreg_db/models/administration/person/__init__.py +2 -0
- endoreg_db/models/administration/person/employee/employee.py +10 -5
- endoreg_db/models/administration/person/employee/employee_qualification.py +9 -4
- endoreg_db/models/administration/person/employee/employee_type.py +12 -6
- endoreg_db/models/administration/person/examiner/examiner.py +13 -11
- endoreg_db/models/administration/person/patient/__init__.py +2 -0
- endoreg_db/models/administration/person/patient/patient.py +103 -100
- endoreg_db/models/administration/person/patient/patient_external_id.py +37 -0
- endoreg_db/models/administration/person/person.py +4 -0
- endoreg_db/models/administration/person/profession/__init__.py +8 -4
- endoreg_db/models/administration/person/user/portal_user_information.py +11 -7
- endoreg_db/models/administration/product/product.py +20 -15
- endoreg_db/models/administration/product/product_material.py +17 -18
- endoreg_db/models/administration/product/product_weight.py +12 -8
- endoreg_db/models/administration/product/reference_product.py +23 -55
- endoreg_db/models/administration/qualification/qualification.py +7 -3
- endoreg_db/models/administration/qualification/qualification_type.py +7 -3
- endoreg_db/models/administration/shift/scheduled_days.py +8 -5
- endoreg_db/models/administration/shift/shift.py +16 -12
- endoreg_db/models/administration/shift/shift_type.py +23 -31
- endoreg_db/models/label/__init__.py +7 -8
- endoreg_db/models/label/annotation/image_classification.py +10 -9
- endoreg_db/models/label/annotation/video_segmentation_annotation.py +8 -5
- endoreg_db/models/label/label.py +15 -15
- endoreg_db/models/label/label_set.py +19 -6
- endoreg_db/models/label/label_type.py +1 -1
- endoreg_db/models/label/label_video_segment/_create_from_video.py +5 -8
- endoreg_db/models/label/label_video_segment/label_video_segment.py +76 -102
- endoreg_db/models/label/video_segmentation_label.py +4 -0
- endoreg_db/models/label/video_segmentation_labelset.py +4 -3
- endoreg_db/models/media/frame/frame.py +22 -22
- endoreg_db/models/media/pdf/raw_pdf.py +110 -182
- endoreg_db/models/media/pdf/report_file.py +25 -29
- endoreg_db/models/media/pdf/report_reader/report_reader_config.py +30 -46
- endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +23 -7
- endoreg_db/models/media/video/__init__.py +1 -0
- endoreg_db/models/media/video/create_from_file.py +48 -56
- endoreg_db/models/media/video/pipe_2.py +8 -9
- endoreg_db/models/media/video/video_file.py +150 -108
- endoreg_db/models/media/video/video_file_ai.py +288 -74
- endoreg_db/models/media/video/video_file_anonymize.py +38 -38
- endoreg_db/models/media/video/video_file_frames/__init__.py +3 -1
- endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +7 -9
- endoreg_db/models/media/video/video_file_frames/_delete_frames.py +9 -8
- endoreg_db/models/media/video/video_file_frames/_extract_frames.py +38 -45
- endoreg_db/models/media/video/video_file_frames/_get_frame.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_get_frame_number.py +4 -18
- endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -3
- endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +7 -6
- endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_get_frames.py +6 -8
- endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +15 -25
- endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +26 -23
- endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +23 -14
- endoreg_db/models/media/video/video_file_io.py +109 -62
- endoreg_db/models/media/video/video_file_meta/get_crop_template.py +3 -3
- endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +5 -3
- endoreg_db/models/media/video/video_file_meta/get_fps.py +37 -34
- endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +19 -25
- endoreg_db/models/media/video/video_file_meta/text_meta.py +41 -38
- endoreg_db/models/media/video/video_file_meta/video_meta.py +14 -7
- endoreg_db/models/media/video/video_file_segments.py +24 -17
- endoreg_db/models/media/video/video_metadata.py +19 -35
- endoreg_db/models/media/video/video_processing.py +96 -95
- endoreg_db/models/medical/contraindication/__init__.py +13 -3
- endoreg_db/models/medical/disease.py +22 -16
- endoreg_db/models/medical/event.py +31 -18
- endoreg_db/models/medical/examination/__init__.py +13 -6
- endoreg_db/models/medical/examination/examination.py +17 -18
- endoreg_db/models/medical/examination/examination_indication.py +26 -25
- endoreg_db/models/medical/examination/examination_time.py +16 -6
- endoreg_db/models/medical/examination/examination_time_type.py +9 -6
- endoreg_db/models/medical/examination/examination_type.py +3 -4
- endoreg_db/models/medical/finding/finding.py +38 -39
- endoreg_db/models/medical/finding/finding_classification.py +37 -48
- endoreg_db/models/medical/finding/finding_intervention.py +27 -22
- endoreg_db/models/medical/finding/finding_type.py +13 -12
- endoreg_db/models/medical/hardware/endoscope.py +20 -26
- endoreg_db/models/medical/hardware/endoscopy_processor.py +2 -2
- endoreg_db/models/medical/laboratory/lab_value.py +62 -91
- endoreg_db/models/medical/medication/medication.py +22 -10
- endoreg_db/models/medical/medication/medication_indication.py +29 -3
- endoreg_db/models/medical/medication/medication_indication_type.py +25 -14
- endoreg_db/models/medical/medication/medication_intake_time.py +31 -19
- endoreg_db/models/medical/medication/medication_schedule.py +27 -16
- endoreg_db/models/medical/organ/__init__.py +15 -12
- endoreg_db/models/medical/patient/medication_examples.py +1 -5
- endoreg_db/models/medical/patient/patient_disease.py +20 -23
- endoreg_db/models/medical/patient/patient_event.py +19 -22
- endoreg_db/models/medical/patient/patient_examination.py +48 -54
- endoreg_db/models/medical/patient/patient_examination_indication.py +16 -14
- endoreg_db/models/medical/patient/patient_finding.py +122 -139
- endoreg_db/models/medical/patient/patient_finding_classification.py +44 -49
- endoreg_db/models/medical/patient/patient_finding_intervention.py +8 -19
- endoreg_db/models/medical/patient/patient_lab_sample.py +28 -23
- endoreg_db/models/medical/patient/patient_lab_value.py +82 -89
- endoreg_db/models/medical/patient/patient_medication.py +27 -38
- endoreg_db/models/medical/patient/patient_medication_schedule.py +28 -36
- endoreg_db/models/medical/risk/risk.py +7 -6
- endoreg_db/models/medical/risk/risk_type.py +8 -5
- endoreg_db/models/metadata/model_meta.py +60 -29
- endoreg_db/models/metadata/model_meta_logic.py +125 -18
- endoreg_db/models/metadata/pdf_meta.py +19 -24
- endoreg_db/models/metadata/sensitive_meta.py +102 -85
- endoreg_db/models/metadata/sensitive_meta_logic.py +192 -173
- endoreg_db/models/metadata/video_meta.py +51 -31
- endoreg_db/models/metadata/video_prediction_logic.py +16 -23
- endoreg_db/models/metadata/video_prediction_meta.py +29 -33
- endoreg_db/models/other/distribution/date_value_distribution.py +89 -29
- endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +21 -5
- endoreg_db/models/other/distribution/numeric_value_distribution.py +114 -53
- endoreg_db/models/other/distribution/single_categorical_value_distribution.py +4 -3
- endoreg_db/models/other/emission/emission_factor.py +18 -8
- endoreg_db/models/other/gender.py +10 -5
- endoreg_db/models/other/information_source.py +25 -25
- endoreg_db/models/other/material.py +9 -5
- endoreg_db/models/other/resource.py +6 -4
- endoreg_db/models/other/tag.py +10 -5
- endoreg_db/models/other/transport_route.py +13 -8
- endoreg_db/models/other/unit.py +10 -6
- endoreg_db/models/other/waste.py +6 -5
- endoreg_db/models/requirement/requirement.py +580 -272
- endoreg_db/models/requirement/requirement_error.py +85 -0
- endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +268 -0
- endoreg_db/models/requirement/requirement_evaluation/operator_evaluation_models.py +3 -6
- endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +90 -64
- endoreg_db/models/requirement/requirement_operator.py +36 -33
- endoreg_db/models/requirement/requirement_set.py +74 -57
- endoreg_db/models/state/__init__.py +4 -4
- endoreg_db/models/state/abstract.py +2 -2
- endoreg_db/models/state/anonymization.py +12 -0
- endoreg_db/models/state/audit_ledger.py +46 -47
- endoreg_db/models/state/label_video_segment.py +9 -0
- endoreg_db/models/state/raw_pdf.py +40 -46
- endoreg_db/models/state/sensitive_meta.py +6 -2
- endoreg_db/models/state/video.py +58 -53
- endoreg_db/models/upload_job.py +32 -55
- endoreg_db/models/utils.py +1 -2
- endoreg_db/root_urls.py +21 -2
- endoreg_db/serializers/__init__.py +0 -2
- endoreg_db/serializers/anonymization.py +18 -10
- endoreg_db/serializers/meta/report_meta.py +1 -1
- endoreg_db/serializers/meta/sensitive_meta_detail.py +63 -118
- endoreg_db/serializers/misc/file_overview.py +11 -99
- endoreg_db/serializers/requirements/requirement_sets.py +92 -22
- endoreg_db/serializers/video/segmentation.py +2 -1
- endoreg_db/serializers/video/video_processing_history.py +20 -5
- endoreg_db/services/anonymization.py +75 -73
- endoreg_db/services/lookup_service.py +37 -24
- endoreg_db/services/pdf_import.py +166 -68
- endoreg_db/services/storage_aware_video_processor.py +140 -114
- endoreg_db/services/video_import.py +193 -283
- endoreg_db/urls/__init__.py +7 -20
- endoreg_db/urls/media.py +108 -67
- endoreg_db/urls/root_urls.py +29 -0
- endoreg_db/utils/__init__.py +15 -5
- endoreg_db/utils/ai/multilabel_classification_net.py +116 -20
- endoreg_db/utils/case_generator/__init__.py +3 -0
- endoreg_db/utils/dataloader.py +88 -16
- endoreg_db/utils/defaults/set_default_center.py +32 -0
- endoreg_db/utils/names.py +22 -16
- endoreg_db/utils/permissions.py +2 -1
- endoreg_db/utils/pipelines/process_video_dir.py +1 -1
- endoreg_db/utils/requirement_operator_logic/model_evaluators.py +414 -127
- endoreg_db/utils/setup_config.py +8 -5
- endoreg_db/utils/storage.py +115 -0
- endoreg_db/utils/validate_endo_roi.py +8 -2
- endoreg_db/utils/video/ffmpeg_wrapper.py +184 -188
- endoreg_db/views/__init__.py +0 -10
- endoreg_db/views/anonymization/media_management.py +198 -163
- endoreg_db/views/anonymization/overview.py +4 -1
- endoreg_db/views/anonymization/validate.py +174 -40
- endoreg_db/views/media/__init__.py +2 -0
- endoreg_db/views/media/pdf_media.py +131 -152
- endoreg_db/views/media/sensitive_metadata.py +46 -6
- endoreg_db/views/media/video_media.py +89 -82
- endoreg_db/views/media/video_segments.py +2 -3
- endoreg_db/views/meta/sensitive_meta_detail.py +0 -63
- endoreg_db/views/patient/patient.py +5 -4
- endoreg_db/views/pdf/pdf_stream.py +20 -21
- endoreg_db/views/pdf/reimport.py +11 -32
- endoreg_db/views/requirement/evaluate.py +188 -187
- endoreg_db/views/requirement/lookup.py +17 -3
- endoreg_db/views/requirement/requirement_utils.py +89 -0
- endoreg_db/views/video/__init__.py +0 -2
- endoreg_db/views/video/correction.py +2 -2
- {endoreg_db-0.8.6.1.dist-info → endoreg_db-0.8.8.0.dist-info}/METADATA +7 -3
- {endoreg_db-0.8.6.1.dist-info → endoreg_db-0.8.8.0.dist-info}/RECORD +341 -245
- endoreg_db/models/administration/permissions/__init__.py +0 -44
- endoreg_db/models/media/video/video_file_frames.py +0 -0
- endoreg_db/models/metadata/frame_ocr_result.py +0 -0
- endoreg_db/models/rule/__init__.py +0 -13
- endoreg_db/models/rule/rule.py +0 -27
- endoreg_db/models/rule/rule_applicator.py +0 -224
- endoreg_db/models/rule/rule_attribute_dtype.py +0 -17
- endoreg_db/models/rule/rule_type.py +0 -20
- endoreg_db/models/rule/ruleset.py +0 -17
- endoreg_db/serializers/video/video_metadata.py +0 -105
- endoreg_db/urls/report.py +0 -48
- endoreg_db/urls/video.py +0 -61
- endoreg_db/utils/case_generator/case_generator.py +0 -159
- endoreg_db/utils/case_generator/utils.py +0 -30
- endoreg_db/views/report/__init__.py +0 -9
- endoreg_db/views/report/report_list.py +0 -112
- endoreg_db/views/report/report_with_secure_url.py +0 -28
- endoreg_db/views/report/start_examination.py +0 -7
- endoreg_db/views.py +0 -0
- /endoreg_db/data/{requirement_set → _examples/requirement_set}/endoscopy_bleeding_risk.yaml +0 -0
- /endoreg_db/migrations/{0002_add_video_correction_models.py → _old/0002_add_video_correction_models.py} +0 -0
- /endoreg_db/migrations/{0003_add_center_display_name.py → _old/0003_add_center_display_name.py} +0 -0
- /endoreg_db/{models/media/video/refactor_plan.md → views/pdf/pdf_stream_views.py} +0 -0
- {endoreg_db-0.8.6.1.dist-info → endoreg_db-0.8.8.0.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.6.1.dist-info → endoreg_db-0.8.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
PDF import service module.
|
|
3
3
|
|
|
4
4
|
Provides high-level functions for importing and anonymizing PDF files,
|
|
5
|
-
combining RawPdfFile creation with text extraction and anonymization.
|
|
5
|
+
combining RawPdfFile creation with text extraction and anonymization using lx anonymizer.
|
|
6
|
+
|
|
7
|
+
All Fields should be overwritten from anonymizer defaults except for the center which is given.
|
|
6
8
|
"""
|
|
7
9
|
|
|
8
10
|
import errno
|
|
@@ -16,8 +18,10 @@ from contextlib import contextmanager
|
|
|
16
18
|
from datetime import date, datetime
|
|
17
19
|
from pathlib import Path
|
|
18
20
|
from typing import TYPE_CHECKING, Union
|
|
19
|
-
|
|
21
|
+
import subprocess
|
|
20
22
|
from django.db import transaction
|
|
23
|
+
from django.core.exceptions import ObjectDoesNotExist
|
|
24
|
+
import lx_anonymizer
|
|
21
25
|
|
|
22
26
|
from endoreg_db.models import SensitiveMeta
|
|
23
27
|
from endoreg_db.models.media.pdf.raw_pdf import RawPdfFile
|
|
@@ -44,7 +48,7 @@ class PdfImportService:
|
|
|
44
48
|
"""
|
|
45
49
|
|
|
46
50
|
def __init__(
|
|
47
|
-
self, allow_meta_overwrite: bool =
|
|
51
|
+
self, allow_meta_overwrite: bool = True, processing_mode: str = "blackening"
|
|
48
52
|
):
|
|
49
53
|
"""
|
|
50
54
|
Initialize the PDF import service.
|
|
@@ -68,7 +72,14 @@ class PdfImportService:
|
|
|
68
72
|
|
|
69
73
|
# Central PDF instance management
|
|
70
74
|
self.current_pdf = None
|
|
75
|
+
self.current_pdf_state = None
|
|
71
76
|
self.processing_context = {}
|
|
77
|
+
self.original_path = None
|
|
78
|
+
|
|
79
|
+
self.DEFAULT_PATIENT_FIRST_NAME = "Patient"
|
|
80
|
+
self.DEFAULT_PATIENT_LAST_NAME = "Unknown"
|
|
81
|
+
self.DEFAULT_PATIENT_DOB = date(1990, 1, 1)
|
|
82
|
+
self.DEFAULT_CENTER_NAME = "university_hospital_wuerzburg"
|
|
72
83
|
|
|
73
84
|
@classmethod
|
|
74
85
|
def with_blackening(cls, allow_meta_overwrite: bool = False) -> "PdfImportService":
|
|
@@ -137,6 +148,7 @@ class PdfImportService:
|
|
|
137
148
|
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
|
138
149
|
else:
|
|
139
150
|
# Another worker is processing this file
|
|
151
|
+
|
|
140
152
|
raise ValueError(f"File already being processed: {path}")
|
|
141
153
|
|
|
142
154
|
os.write(fd, b"lock")
|
|
@@ -200,6 +212,10 @@ class PdfImportService:
|
|
|
200
212
|
shutil.move(str(source), str(target))
|
|
201
213
|
else:
|
|
202
214
|
raise
|
|
215
|
+
lock_path = Path(str(source) + ".lock")
|
|
216
|
+
if lock_path.exists():
|
|
217
|
+
lock_path.unlink()
|
|
218
|
+
|
|
203
219
|
return target
|
|
204
220
|
|
|
205
221
|
def _ensure_state(self, pdf_file: "RawPdfFile"):
|
|
@@ -209,14 +225,10 @@ class PdfImportService:
|
|
|
209
225
|
if hasattr(pdf_file, "get_or_create_state"):
|
|
210
226
|
state = pdf_file.get_or_create_state()
|
|
211
227
|
pdf_file.state = state
|
|
228
|
+
self.current_pdf_state = state
|
|
229
|
+
assert isinstance(self.current_pdf_state, RawPdfState)
|
|
212
230
|
return state
|
|
213
|
-
|
|
214
|
-
try:
|
|
215
|
-
state, _ = pdf_file.get_or_create_state(raw_pdf_file=pdf_file)
|
|
216
|
-
pdf_file.state = state
|
|
217
|
-
return state
|
|
218
|
-
except Exception:
|
|
219
|
-
return None
|
|
231
|
+
|
|
220
232
|
|
|
221
233
|
def _ensure_report_reading_available(self):
|
|
222
234
|
"""
|
|
@@ -266,7 +278,7 @@ class PdfImportService:
|
|
|
266
278
|
self._report_reader_class = None
|
|
267
279
|
return False, None
|
|
268
280
|
|
|
269
|
-
def _ensure_default_patient_data(self, pdf_instance: "RawPdfFile"
|
|
281
|
+
def _ensure_default_patient_data(self, pdf_instance: "RawPdfFile") -> None:
|
|
270
282
|
"""
|
|
271
283
|
Ensure PDF has minimum required patient data in SensitiveMeta.
|
|
272
284
|
Creates default values if data is missing after text processing.
|
|
@@ -289,15 +301,18 @@ class PdfImportService:
|
|
|
289
301
|
|
|
290
302
|
# Create default SensitiveMeta with placeholder data
|
|
291
303
|
default_data = {
|
|
292
|
-
"patient_first_name":
|
|
293
|
-
"patient_last_name":
|
|
294
|
-
"patient_dob":
|
|
295
|
-
"examination_date": date.today(),
|
|
296
|
-
"center_name":
|
|
297
|
-
|
|
298
|
-
|
|
304
|
+
"patient_first_name": self.DEFAULT_PATIENT_FIRST_NAME,
|
|
305
|
+
"patient_last_name": self.DEFAULT_PATIENT_LAST_NAME,
|
|
306
|
+
"patient_dob": self.DEFAULT_PATIENT_DOB,
|
|
307
|
+
"examination_date": date.today(), # today is intentionally *not* a constant
|
|
308
|
+
"center_name": (
|
|
309
|
+
pdf_file.center.name
|
|
310
|
+
if pdf_file.center
|
|
311
|
+
else self.DEFAULT_CENTER_NAME
|
|
312
|
+
),
|
|
299
313
|
}
|
|
300
314
|
|
|
315
|
+
|
|
301
316
|
try:
|
|
302
317
|
sensitive_meta = SensitiveMeta.create_from_dict(default_data)
|
|
303
318
|
pdf_file.sensitive_meta = sensitive_meta
|
|
@@ -316,7 +331,7 @@ class PdfImportService:
|
|
|
316
331
|
center_name: str,
|
|
317
332
|
delete_source: bool = False,
|
|
318
333
|
retry: bool = False,
|
|
319
|
-
) -> "RawPdfFile":
|
|
334
|
+
) -> "RawPdfFile | None":
|
|
320
335
|
"""
|
|
321
336
|
Import a PDF file and anonymize it using ReportReader.
|
|
322
337
|
Uses centralized PDF instance management pattern.
|
|
@@ -354,8 +369,7 @@ class PdfImportService:
|
|
|
354
369
|
logger.warning(
|
|
355
370
|
f"No PDF instance created for {file_path}, returning None"
|
|
356
371
|
)
|
|
357
|
-
|
|
358
|
-
|
|
372
|
+
raise ObjectDoesNotExist
|
|
359
373
|
# Step 3: Setup processing environment
|
|
360
374
|
self._setup_processing_environment()
|
|
361
375
|
|
|
@@ -371,7 +385,7 @@ class PdfImportService:
|
|
|
371
385
|
# Handle "File already being processed" case specifically
|
|
372
386
|
if "already being processed" in str(e):
|
|
373
387
|
logger.info(f"Skipping file {file_path}: {e}")
|
|
374
|
-
return
|
|
388
|
+
return
|
|
375
389
|
else:
|
|
376
390
|
logger.error(f"PDF import failed for {file_path}: {e}")
|
|
377
391
|
self._cleanup_on_error()
|
|
@@ -405,6 +419,7 @@ class PdfImportService:
|
|
|
405
419
|
"metadata_processed": False,
|
|
406
420
|
"anonymization_completed": False,
|
|
407
421
|
}
|
|
422
|
+
self.original_path = Path(file_path)
|
|
408
423
|
|
|
409
424
|
# Check if already processed (only during current session to prevent race conditions)
|
|
410
425
|
if str(file_path) in self.processed_files:
|
|
@@ -497,8 +512,17 @@ class PdfImportService:
|
|
|
497
512
|
def _setup_processing_environment(self):
|
|
498
513
|
"""Setup processing environment and state."""
|
|
499
514
|
original_path = self.processing_context.get("file_path")
|
|
500
|
-
|
|
515
|
+
if not original_path or not self.current_pdf:
|
|
516
|
+
try:
|
|
517
|
+
self.current_pdf = RawPdfFile.objects.get(pdf_hash=self.processing_context["file_hash"])
|
|
518
|
+
self.original_path = Path(str(self.current_pdf.file.path))
|
|
519
|
+
|
|
520
|
+
except RawPdfFile.DoesNotExist:
|
|
521
|
+
raise RuntimeError("Processing environment setup failed")
|
|
501
522
|
# Create sensitive file copy
|
|
523
|
+
if original_path is None or not isinstance(original_path, (str, Path)):
|
|
524
|
+
logger.error(f"No original path: {original_path!r}")
|
|
525
|
+
return
|
|
502
526
|
self.create_sensitive_file(self.current_pdf, original_path)
|
|
503
527
|
|
|
504
528
|
# Update file path to point to sensitive copy
|
|
@@ -525,13 +549,17 @@ class PdfImportService:
|
|
|
525
549
|
|
|
526
550
|
def _process_text_and_metadata(self):
|
|
527
551
|
"""Process text extraction and metadata using ReportReader."""
|
|
528
|
-
report_reading_available,
|
|
529
|
-
|
|
552
|
+
report_reading_available, ReportReaderCls = self._ensure_report_reading_available()
|
|
553
|
+
try:
|
|
554
|
+
assert ReportReaderCls is not None and report_reading_available
|
|
555
|
+
assert self.current_pdf is not None
|
|
556
|
+
except AssertionError as e:
|
|
557
|
+
logger.error(f"PDF Import failed on Error:{e} Ensure the pdf was passed correctly and report reading is available in function _process_text_and_metadata() ")
|
|
530
558
|
if not report_reading_available:
|
|
531
559
|
logger.warning("Report reading not available (lx_anonymizer not found)")
|
|
532
560
|
self._mark_processing_incomplete("no_report_reader")
|
|
533
|
-
return
|
|
534
|
-
|
|
561
|
+
return
|
|
562
|
+
assert self.current_pdf is not None
|
|
535
563
|
if not self.current_pdf.file:
|
|
536
564
|
logger.warning("No file available for text processing")
|
|
537
565
|
self._mark_processing_incomplete("no_file")
|
|
@@ -541,9 +569,10 @@ class PdfImportService:
|
|
|
541
569
|
logger.info(
|
|
542
570
|
f"Starting text extraction and metadata processing with ReportReader (mode: {self.processing_mode})..."
|
|
543
571
|
)
|
|
572
|
+
ReportReaderCls = lx_anonymizer.ReportReader
|
|
544
573
|
|
|
545
574
|
# Initialize ReportReader
|
|
546
|
-
report_reader =
|
|
575
|
+
report_reader = ReportReaderCls(
|
|
547
576
|
report_root_path=str(path_utils.STORAGE_DIR),
|
|
548
577
|
locale="de_DE",
|
|
549
578
|
text_date_format="%d.%m.%Y",
|
|
@@ -567,7 +596,7 @@ class PdfImportService:
|
|
|
567
596
|
# Setup anonymized directory
|
|
568
597
|
anonymized_dir = path_utils.PDF_DIR / "anonymized"
|
|
569
598
|
anonymized_dir.mkdir(parents=True, exist_ok=True)
|
|
570
|
-
|
|
599
|
+
assert self.current_pdf is not None
|
|
571
600
|
# Generate output path for anonymized PDF
|
|
572
601
|
pdf_hash = self.current_pdf.pdf_hash
|
|
573
602
|
anonymized_output_path = anonymized_dir / f"{pdf_hash}_anonymized.pdf"
|
|
@@ -702,7 +731,8 @@ class PdfImportService:
|
|
|
702
731
|
"examiner_first_name": "examiner_first_name",
|
|
703
732
|
"examiner_last_name": "examiner_last_name",
|
|
704
733
|
"endoscope_type": "endoscope_type",
|
|
705
|
-
"casenumber": "
|
|
734
|
+
"casenumber": "casenumber",
|
|
735
|
+
"center_name": "center_name",
|
|
706
736
|
}
|
|
707
737
|
|
|
708
738
|
# Update fields with extracted information
|
|
@@ -727,9 +757,9 @@ class PdfImportService:
|
|
|
727
757
|
# Configurable overwrite policy
|
|
728
758
|
should_overwrite = (
|
|
729
759
|
self.allow_meta_overwrite
|
|
730
|
-
or
|
|
731
|
-
or old_value in ["Patient", "Unknown"]
|
|
760
|
+
or self._is_placeholder_value(sm_field, old_value)
|
|
732
761
|
)
|
|
762
|
+
|
|
733
763
|
if new_value and should_overwrite:
|
|
734
764
|
setattr(sm, sm_field, new_value)
|
|
735
765
|
updated_fields.append(sm_field)
|
|
@@ -835,8 +865,9 @@ class PdfImportService:
|
|
|
835
865
|
|
|
836
866
|
# Mark state as anonymized immediately; this keeps downstream flows working
|
|
837
867
|
state = self._ensure_state(self.current_pdf)
|
|
838
|
-
|
|
839
|
-
|
|
868
|
+
|
|
869
|
+
if state and not state.processing_started:
|
|
870
|
+
state.mark_processing_started()
|
|
840
871
|
|
|
841
872
|
logger.info(
|
|
842
873
|
"Updated anonymized_file reference to: %s",
|
|
@@ -944,15 +975,66 @@ class PdfImportService:
|
|
|
944
975
|
|
|
945
976
|
def _cleanup_on_error(self):
|
|
946
977
|
"""Cleanup processing context on error."""
|
|
978
|
+
original_path = self.original_path
|
|
947
979
|
try:
|
|
948
980
|
if self.current_pdf and hasattr(self.current_pdf, "state"):
|
|
949
981
|
state = self._ensure_state(self.current_pdf)
|
|
982
|
+
raw_file_path = self.current_pdf.get_raw_file_path()
|
|
983
|
+
if raw_file_path is not None and original_path is not None:
|
|
984
|
+
# Ensure reprocessing for next attempt by restoring original file
|
|
985
|
+
shutil.copy2(str(raw_file_path), str(original_path))
|
|
986
|
+
|
|
987
|
+
# Ensure no two files can remain
|
|
988
|
+
if raw_file_path == original_path and raw_file_path is not None and original_path is not None:
|
|
989
|
+
os.remove(str(raw_file_path))
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
# Remove Lock file also
|
|
993
|
+
lock_path = Path(str(path_utils.PDF_DIR) + ".lock")
|
|
994
|
+
try:
|
|
995
|
+
if lock_path.exists():
|
|
996
|
+
lock_path.unlink()
|
|
997
|
+
logger.info("Removed lock file during quarantine: %s", lock_path)
|
|
998
|
+
except Exception as e:
|
|
999
|
+
logger.warning("Could not remove lock file during quarantine: %s", e)
|
|
1000
|
+
|
|
1001
|
+
|
|
950
1002
|
if state and self.processing_context.get("processing_started"):
|
|
951
1003
|
state.text_meta_extracted = False
|
|
952
1004
|
state.pdf_meta_extracted = False
|
|
953
1005
|
state.sensitive_meta_processed = False
|
|
1006
|
+
state.anonymized = False
|
|
954
1007
|
state.save()
|
|
955
1008
|
logger.debug("Updated PDF state to indicate processing failure")
|
|
1009
|
+
else:
|
|
1010
|
+
# 🔧 Early failure: no current_pdf (or no state).
|
|
1011
|
+
# In this case we want to make sure we don't leave stray files
|
|
1012
|
+
# under PDF_DIR or PDF_DIR/sensitive.
|
|
1013
|
+
|
|
1014
|
+
pdf_dir = self._get_pdf_dir()
|
|
1015
|
+
if pdf_dir and pdf_dir.exists():
|
|
1016
|
+
for candidate_dir in (pdf_dir, pdf_dir / "sensitive"):
|
|
1017
|
+
if candidate_dir.exists():
|
|
1018
|
+
for candidate in candidate_dir.glob("*.pdf"):
|
|
1019
|
+
# Don't delete the original ingress file
|
|
1020
|
+
if (
|
|
1021
|
+
original_path is not None
|
|
1022
|
+
and candidate.resolve() == Path(original_path).resolve()
|
|
1023
|
+
):
|
|
1024
|
+
continue
|
|
1025
|
+
try:
|
|
1026
|
+
candidate.unlink()
|
|
1027
|
+
logger.debug(
|
|
1028
|
+
"Removed stray PDF during early error cleanup: %s",
|
|
1029
|
+
candidate,
|
|
1030
|
+
)
|
|
1031
|
+
except Exception as e:
|
|
1032
|
+
logger.warning(
|
|
1033
|
+
"Failed to remove stray PDF %s: %s",
|
|
1034
|
+
candidate,
|
|
1035
|
+
e,
|
|
1036
|
+
)
|
|
1037
|
+
|
|
956
1038
|
except Exception as e:
|
|
957
1039
|
logger.warning(f"Error during cleanup: {e}")
|
|
958
1040
|
finally:
|
|
@@ -975,6 +1057,25 @@ class PdfImportService:
|
|
|
975
1057
|
"Failed to remove sensitive copy during error cleanup: %s",
|
|
976
1058
|
cleanup_exc,
|
|
977
1059
|
)
|
|
1060
|
+
pdf_dir = self._get_pdf_dir()
|
|
1061
|
+
if original_path and pdf_dir:
|
|
1062
|
+
# Try to remove any extra file that was created during import
|
|
1063
|
+
# Simplest heuristic: same basename as original, but in pdf dir or pdf/sensitive dir
|
|
1064
|
+
for candidate_dir in (pdf_dir, pdf_dir / "sensitive"):
|
|
1065
|
+
candidate = candidate_dir / original_path.name
|
|
1066
|
+
if candidate.exists() and candidate != original_path:
|
|
1067
|
+
try:
|
|
1068
|
+
candidate.unlink()
|
|
1069
|
+
logger.debug(
|
|
1070
|
+
"Removed stray PDF copy during early error cleanup: %s",
|
|
1071
|
+
candidate,
|
|
1072
|
+
)
|
|
1073
|
+
except Exception as e:
|
|
1074
|
+
logger.warning(
|
|
1075
|
+
"Failed to remove stray PDF copy %s: %s",
|
|
1076
|
+
candidate,
|
|
1077
|
+
e,
|
|
1078
|
+
)
|
|
978
1079
|
|
|
979
1080
|
# Always clean up processed files set to prevent blocks
|
|
980
1081
|
file_path = self.processing_context.get("file_path")
|
|
@@ -985,32 +1086,10 @@ class PdfImportService:
|
|
|
985
1086
|
)
|
|
986
1087
|
|
|
987
1088
|
try:
|
|
988
|
-
original_path = self.processing_context.get("original_file_path")
|
|
989
|
-
logger.debug(
|
|
990
|
-
"PDF cleanup original path: %s (%s)",
|
|
991
|
-
original_path,
|
|
992
|
-
type(original_path),
|
|
993
|
-
)
|
|
994
1089
|
raw_dir = (
|
|
995
1090
|
original_path.parent if isinstance(original_path, Path) else None
|
|
996
1091
|
)
|
|
997
|
-
|
|
998
|
-
isinstance(original_path, Path)
|
|
999
|
-
and original_path.exists()
|
|
1000
|
-
and not self.processing_context.get("sensitive_copy_created")
|
|
1001
|
-
):
|
|
1002
|
-
try:
|
|
1003
|
-
original_path.unlink()
|
|
1004
|
-
logger.info(
|
|
1005
|
-
"Removed original file %s during error cleanup",
|
|
1006
|
-
original_path,
|
|
1007
|
-
)
|
|
1008
|
-
except Exception as remove_exc:
|
|
1009
|
-
logger.warning(
|
|
1010
|
-
"Could not remove original file %s during error cleanup: %s",
|
|
1011
|
-
original_path,
|
|
1012
|
-
remove_exc,
|
|
1013
|
-
)
|
|
1092
|
+
|
|
1014
1093
|
pdf_dir = self._get_pdf_dir()
|
|
1015
1094
|
if not pdf_dir and raw_dir:
|
|
1016
1095
|
base_dir = raw_dir.parent
|
|
@@ -1181,7 +1260,7 @@ class PdfImportService:
|
|
|
1181
1260
|
|
|
1182
1261
|
def check_storage_capacity(
|
|
1183
1262
|
self, file_path: Union[Path, str], storage_root, min_required_space
|
|
1184
|
-
) ->
|
|
1263
|
+
) -> bool:
|
|
1185
1264
|
"""
|
|
1186
1265
|
Check if there is sufficient storage capacity for the PDF file.
|
|
1187
1266
|
|
|
@@ -1221,7 +1300,7 @@ class PdfImportService:
|
|
|
1221
1300
|
return True
|
|
1222
1301
|
|
|
1223
1302
|
def create_sensitive_file(
|
|
1224
|
-
self, pdf_instance: "RawPdfFile"
|
|
1303
|
+
self, pdf_instance: "RawPdfFile", file_path: Union[Path, str]
|
|
1225
1304
|
) -> None:
|
|
1226
1305
|
"""
|
|
1227
1306
|
Create a copy of the PDF file in the sensitive directory and update the file reference.
|
|
@@ -1304,10 +1383,10 @@ class PdfImportService:
|
|
|
1304
1383
|
|
|
1305
1384
|
def archive_or_quarantine_file(
|
|
1306
1385
|
self,
|
|
1307
|
-
pdf_instance: "RawPdfFile"
|
|
1308
|
-
source_file_path: Union[Path, str]
|
|
1309
|
-
quarantine_reason: str
|
|
1310
|
-
is_pdf_problematic: bool
|
|
1386
|
+
pdf_instance: "RawPdfFile",
|
|
1387
|
+
source_file_path: Union[Path, str],
|
|
1388
|
+
quarantine_reason: str,
|
|
1389
|
+
is_pdf_problematic: bool,
|
|
1311
1390
|
) -> bool:
|
|
1312
1391
|
"""
|
|
1313
1392
|
Archive or quarantine file based on the state of the PDF processing.
|
|
@@ -1328,9 +1407,9 @@ class PdfImportService:
|
|
|
1328
1407
|
if source_file_path
|
|
1329
1408
|
else self.processing_context.get("file_path")
|
|
1330
1409
|
)
|
|
1331
|
-
quarantine_reason = quarantine_reason or self.processing_context.get(
|
|
1410
|
+
quarantine_reason = str(quarantine_reason or self.processing_context.get(
|
|
1332
1411
|
"error_reason"
|
|
1333
|
-
)
|
|
1412
|
+
))
|
|
1334
1413
|
|
|
1335
1414
|
if not pdf_file:
|
|
1336
1415
|
raise ValueError("No PDF instance available for archiving/quarantine")
|
|
@@ -1355,9 +1434,6 @@ class PdfImportService:
|
|
|
1355
1434
|
quarantine_path = quarantine_dir / f"{pdf_file.pdf_hash}.pdf"
|
|
1356
1435
|
try:
|
|
1357
1436
|
shutil.move(file_path, quarantine_path)
|
|
1358
|
-
pdf_file.quarantine_reason = (
|
|
1359
|
-
quarantine_reason or "File processing failed"
|
|
1360
|
-
)
|
|
1361
1437
|
pdf_file.save(update_fields=["quarantine_reason"])
|
|
1362
1438
|
logger.info(f"Moved problematic PDF to quarantine: {quarantine_path}")
|
|
1363
1439
|
return True
|
|
@@ -1380,3 +1456,25 @@ class PdfImportService:
|
|
|
1380
1456
|
except Exception as e:
|
|
1381
1457
|
logger.error(f"Failed to archive PDF {pdf_file.pdf_hash}: {e}")
|
|
1382
1458
|
return False
|
|
1459
|
+
|
|
1460
|
+
def _is_placeholder_value(self, field_name: str, value) -> bool:
|
|
1461
|
+
"""Return True if a SensitiveMeta field still has a dummy/default value."""
|
|
1462
|
+
if value is None:
|
|
1463
|
+
return True
|
|
1464
|
+
|
|
1465
|
+
# String placeholders
|
|
1466
|
+
if isinstance(value, str):
|
|
1467
|
+
if value in {self.DEFAULT_PATIENT_FIRST_NAME, self.DEFAULT_PATIENT_LAST_NAME}:
|
|
1468
|
+
return True
|
|
1469
|
+
|
|
1470
|
+
# Date placeholders
|
|
1471
|
+
if isinstance(value, date):
|
|
1472
|
+
# Default DOB
|
|
1473
|
+
if field_name == "patient_dob" and value == self.DEFAULT_PATIENT_DOB:
|
|
1474
|
+
return True
|
|
1475
|
+
# "Today" exam date created as fallback – allow anonymizer to override
|
|
1476
|
+
if field_name == "examination_date" and value == date.today():
|
|
1477
|
+
return True
|
|
1478
|
+
|
|
1479
|
+
return False
|
|
1480
|
+
|