mindee 3.19.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -1
- data/CHANGELOG.md +28 -0
- data/README.md +42 -52
- data/Steepfile +31 -0
- data/bin/mindee.rb +30 -62
- data/docs/advanced_file_operations.md +111 -0
- data/docs/code_samples/bank_account_details_v1.txt +5 -0
- data/docs/code_samples/bank_account_details_v2.txt +5 -0
- data/docs/code_samples/bank_check_v1.txt +5 -0
- data/docs/code_samples/{us_driver_license_v1.txt → bank_statement_fr_v2_async.txt} +6 -1
- data/docs/code_samples/barcode_reader_v1.txt +5 -0
- data/docs/code_samples/bill_of_lading_v1_async.txt +6 -1
- data/docs/code_samples/business_card_v1_async.txt +6 -1
- data/docs/code_samples/carte_grise_v1.txt +5 -0
- data/docs/code_samples/cropper_v1.txt +5 -0
- data/docs/code_samples/default.txt +8 -2
- data/docs/code_samples/default_async.txt +7 -2
- data/docs/code_samples/delivery_notes_v1_async.txt +6 -1
- data/docs/code_samples/driver_license_v1_async.txt +6 -1
- data/docs/code_samples/energy_bill_fra_v1_async.txt +6 -1
- data/docs/code_samples/expense_receipts_v5.txt +7 -1
- data/docs/code_samples/expense_receipts_v5_async.txt +6 -1
- data/docs/code_samples/financial_document_v1.txt +7 -1
- data/docs/code_samples/financial_document_v1_async.txt +6 -1
- data/docs/code_samples/french_healthcard_v1_async.txt +6 -1
- data/docs/code_samples/idcard_fr_v1.txt +5 -0
- data/docs/code_samples/idcard_fr_v2.txt +5 -0
- data/docs/code_samples/ind_passport_v1_async.txt +6 -1
- data/docs/code_samples/international_id_v2_async.txt +6 -1
- data/docs/code_samples/invoice_splitter_v1_async.txt +6 -1
- data/docs/code_samples/invoices_v4.txt +7 -1
- data/docs/code_samples/invoices_v4_async.txt +6 -1
- data/docs/code_samples/license_plates_v1.txt +5 -0
- data/docs/code_samples/multi_receipts_detector_v1.txt +5 -0
- data/docs/code_samples/nutrition_facts_v1_async.txt +6 -1
- data/docs/code_samples/passport_v1.txt +5 -0
- data/docs/code_samples/payslip_fra_v2_async.txt +6 -1
- data/docs/code_samples/payslip_fra_v3_async.txt +6 -1
- data/docs/code_samples/resume_v1_async.txt +6 -1
- data/docs/code_samples/us_healthcare_cards_v1_async.txt +6 -1
- data/docs/code_samples/us_mail_v3_async.txt +6 -1
- data/docs/code_samples/us_w9_v1.txt +5 -0
- data/docs/getting_started.md +50 -118
- data/docs/{barcode_reader_v1.md → global_products/barcode_reader_v1.md} +26 -9
- data/docs/{bill_of_lading_v1.md → global_products/bill_of_lading_v1.md} +62 -37
- data/docs/{business_card_v1.md → global_products/business_card_v1.md} +35 -10
- data/docs/{cropper_v1.md → global_products/cropper_v1.md} +32 -13
- data/docs/{delivery_notes_v1.md → global_products/delivery_notes_v1.md} +35 -10
- data/docs/{driver_license_v1.md → global_products/driver_license_v1.md} +65 -9
- data/docs/{expense_receipts_v5.md → global_products/expense_receipts_v5.md} +57 -21
- data/docs/{financial_document_v1.md → global_products/financial_document_v1.md} +70 -33
- data/docs/{international_id_v2.md → global_products/international_id_v2.md} +39 -13
- data/docs/{invoice_splitter_v1.md → global_products/invoice_splitter_v1.md} +25 -6
- data/docs/{invoices_v4.md → global_products/invoices_v4.md} +70 -33
- data/docs/{multi_receipts_detector_v1.md → global_products/multi_receipts_detector_v1.md} +32 -13
- data/docs/{nutrition_facts_v1.md → global_products/nutrition_facts_v1.md} +103 -78
- data/docs/{passport_v1.md → global_products/passport_v1.md} +26 -9
- data/docs/{resume_v1.md → global_products/resume_v1.md} +72 -46
- data/docs/{generated_v1.md → global_products/universal.md} +39 -19
- data/docs/global_products.md +6 -0
- data/docs/loading_a_document.md +316 -0
- data/docs/{bank_account_details_v2.md → localized_products/bank_account_details_v2.md} +24 -7
- data/docs/{bank_check_v1.md → localized_products/bank_check_v1.md} +37 -18
- data/docs/localized_products/bank_statement_fr_v2.md +269 -0
- data/docs/{carte_grise_v1.md → localized_products/carte_grise_v1.md} +25 -8
- data/docs/{energy_bill_fra_v1.md → localized_products/energy_bill_fra_v1.md} +70 -45
- data/docs/{french_healthcard_v1.md → localized_products/french_healthcard_v1.md} +36 -11
- data/docs/{idcard_fr_v2.md → localized_products/idcard_fr_v2.md} +34 -16
- data/docs/{ind_passport_v1.md → localized_products/ind_passport_v1.md} +37 -11
- data/docs/{license_plates_v1.md → localized_products/license_plates_v1.md} +25 -8
- data/docs/{payslip_fra_v3.md → localized_products/payslip_fra_v3.md} +80 -55
- data/docs/{us_healthcare_cards_v1.md → localized_products/us_healthcare_cards_v1.md} +37 -12
- data/docs/{us_mail_v3.md → localized_products/us_mail_v3.md} +49 -24
- data/docs/{us_w9_v1.md → localized_products/us_w9_v1.md} +64 -45
- data/docs/localized_products.md +6 -0
- data/examples/auto_invoice_splitter_extraction.rb +5 -5
- data/examples/auto_multi_receipts_detector_extraction.rb +4 -5
- data/lib/mindee/client.rb +354 -209
- data/lib/mindee/errors/mindee_error.rb +17 -0
- data/lib/mindee/errors/mindee_http_error.rb +36 -0
- data/lib/mindee/errors/mindee_input_error.rb +30 -0
- data/lib/mindee/errors.rb +5 -0
- data/lib/mindee/extraction/multi_receipts_extractor.rb +28 -1
- data/lib/mindee/extraction.rb +0 -3
- data/lib/mindee/geometry/polygon.rb +1 -1
- data/lib/mindee/geometry/utils.rb +1 -1
- data/lib/mindee/http/endpoint.rb +48 -40
- data/lib/mindee/http/{error.rb → http_error_handler.rb} +5 -35
- data/lib/mindee/http/response_validation.rb +2 -2
- data/lib/mindee/http/workflow_endpoint.rb +12 -12
- data/lib/mindee/http.rb +1 -1
- data/lib/mindee/{extraction/common → image}/extracted_image.rb +22 -18
- data/lib/mindee/{extraction/common → image}/image_extractor.rb +20 -78
- data/lib/mindee/image/image_utils.rb +68 -7
- data/lib/mindee/image.rb +2 -0
- data/lib/mindee/input/local_response.rb +8 -8
- data/lib/mindee/input/sources/base64_input_source.rb +6 -6
- data/lib/mindee/input/sources/bytes_input_source.rb +3 -3
- data/lib/mindee/input/sources/file_input_source.rb +3 -3
- data/lib/mindee/input/sources/local_input_source.rb +43 -41
- data/lib/mindee/input/sources/path_input_source.rb +3 -3
- data/lib/mindee/input/sources/url_input_source.rb +11 -8
- data/lib/mindee/logging/logger.rb +16 -0
- data/lib/mindee/logging.rb +3 -0
- data/lib/mindee/parsing/common/api_request.rb +36 -0
- data/lib/mindee/parsing/common/api_response.rb +6 -96
- data/lib/mindee/parsing/common/document.rb +12 -8
- data/lib/mindee/parsing/common/execution.rb +4 -2
- data/lib/mindee/parsing/common/execution_priority.rb +6 -0
- data/lib/mindee/parsing/common/extras/cropper_extra.rb +2 -1
- data/lib/mindee/parsing/common/extras/extras.rb +9 -4
- data/lib/mindee/parsing/common/extras/full_text_ocr_extra.rb +1 -0
- data/lib/mindee/parsing/common/inference.rb +23 -4
- data/lib/mindee/parsing/common/job.rb +46 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +6 -6
- data/lib/mindee/parsing/common/ocr/ocr.rb +29 -29
- data/lib/mindee/parsing/common/page.rb +5 -3
- data/lib/mindee/parsing/common/prediction.rb +2 -0
- data/lib/mindee/parsing/common/workflow_response.rb +28 -0
- data/lib/mindee/parsing/common.rb +3 -0
- data/lib/mindee/parsing/standard/abstract_field.rb +72 -0
- data/lib/mindee/parsing/standard/amount_field.rb +2 -2
- data/lib/mindee/parsing/standard/base_field.rb +5 -84
- data/lib/mindee/parsing/standard/boolean_field.rb +4 -3
- data/lib/mindee/parsing/standard/classification_field.rb +1 -1
- data/lib/mindee/parsing/standard/company_registration_field.rb +7 -2
- data/lib/mindee/parsing/standard/date_field.rb +2 -2
- data/lib/mindee/parsing/standard/feature_field.rb +24 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +2 -2
- data/lib/mindee/parsing/standard/position_field.rb +3 -1
- data/lib/mindee/parsing/standard/string_field.rb +1 -1
- data/lib/mindee/parsing/standard/tax_field.rb +1 -1
- data/lib/mindee/parsing/standard.rb +1 -0
- data/lib/mindee/parsing/{generated/generated_list_field.rb → universal/universal_list_field.rb} +12 -12
- data/lib/mindee/parsing/{generated/generated_object_field.rb → universal/universal_object_field.rb} +23 -14
- data/lib/mindee/parsing/universal.rb +4 -0
- data/lib/mindee/parsing.rb +1 -2
- data/lib/mindee/{extraction/pdf_extractor → pdf}/extracted_pdf.rb +15 -12
- data/lib/mindee/pdf/pdf_compressor.rb +12 -10
- data/lib/mindee/{extraction/pdf_extractor → pdf}/pdf_extractor.rb +20 -17
- data/lib/mindee/pdf/pdf_processor.rb +12 -22
- data/lib/mindee/pdf/pdf_tools.rb +52 -6
- data/lib/mindee/pdf.rb +2 -0
- data/lib/mindee/product/.rubocop.yml +3 -0
- data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +9 -3
- data/lib/mindee/product/barcode_reader/barcode_reader_v1_document.rb +5 -5
- data/lib/mindee/product/barcode_reader/barcode_reader_v1_page.rb +9 -5
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1.rb +9 -3
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rb +1 -1
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rb +13 -7
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb +62 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rb +1 -1
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_document.rb +44 -17
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rb +1 -1
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_page.rb +9 -5
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rb +1 -1
- data/lib/mindee/product/business_card/business_card_v1.rb +9 -3
- data/lib/mindee/product/business_card/business_card_v1_document.rb +40 -13
- data/lib/mindee/product/business_card/business_card_v1_page.rb +9 -5
- data/lib/mindee/product/cropper/cropper_v1.rb +10 -4
- data/lib/mindee/product/cropper/cropper_v1_page.rb +11 -7
- data/lib/mindee/product/delivery_note/delivery_note_v1.rb +9 -3
- data/lib/mindee/product/delivery_note/delivery_note_v1_document.rb +29 -8
- data/lib/mindee/product/delivery_note/delivery_note_v1_page.rb +9 -5
- data/lib/mindee/product/driver_license/driver_license_v1.rb +9 -3
- data/lib/mindee/product/driver_license/driver_license_v1_document.rb +44 -14
- data/lib/mindee/product/driver_license/driver_license_v1_page.rb +9 -5
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +9 -3
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +3 -3
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +9 -5
- data/lib/mindee/product/financial_document/financial_document_v1.rb +9 -3
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +110 -42
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +21 -11
- data/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +62 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +9 -5
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +9 -3
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +7 -4
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +9 -5
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +9 -3
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +1 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +11 -5
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +9 -5
- data/lib/mindee/product/fr/bank_statement/bank_statement_v2.rb +47 -0
- data/lib/mindee/product/fr/bank_statement/{bank_statement_v1_document.rb → bank_statement_v2_document.rb} +62 -26
- data/lib/mindee/product/fr/bank_statement/bank_statement_v2_page.rb +38 -0
- data/lib/mindee/product/fr/bank_statement/{bank_statement_v1_transaction.rb → bank_statement_v2_transaction.rb} +17 -6
- data/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb +54 -0
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1.rb +9 -3
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1_document.rb +51 -42
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1_page.rb +9 -5
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1.rb +9 -3
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb +56 -30
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rb +1 -1
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rb +1 -1
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb +13 -7
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb +65 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb +1 -1
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_page.rb +9 -5
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rb +13 -7
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb +65 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rb +13 -7
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb +65 -0
- data/lib/mindee/product/fr/health_card/health_card_v1.rb +9 -3
- data/lib/mindee/product/fr/health_card/health_card_v1_document.rb +15 -6
- data/lib/mindee/product/fr/health_card/health_card_v1_page.rb +9 -5
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +9 -3
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +33 -12
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +14 -7
- data/lib/mindee/product/fr/id_card/id_card_v2.rb +9 -3
- data/lib/mindee/product/fr/id_card/id_card_v2_document.rb +50 -17
- data/lib/mindee/product/fr/id_card/id_card_v2_page.rb +18 -8
- data/lib/mindee/product/fr/payslip/payslip_v2.rb +9 -3
- data/lib/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v2_document.rb +29 -14
- data/lib/mindee/product/fr/payslip/payslip_v2_employee.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v2_employer.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v2_employment.rb +3 -2
- data/lib/mindee/product/fr/payslip/payslip_v2_page.rb +9 -5
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_detail.rb +33 -11
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_period.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v2_pto.rb +13 -4
- data/lib/mindee/product/fr/payslip/payslip_v2_salary_detail.rb +13 -7
- data/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb +63 -0
- data/lib/mindee/product/fr/payslip/payslip_v3.rb +9 -3
- data/lib/mindee/product/fr/payslip/payslip_v3_bank_account_detail.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v3_document.rb +31 -19
- data/lib/mindee/product/fr/payslip/payslip_v3_employee.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v3_employer.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v3_employment.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v3_page.rb +9 -5
- data/lib/mindee/product/fr/payslip/payslip_v3_paid_time_off.rb +13 -7
- data/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb +65 -0
- data/lib/mindee/product/fr/payslip/payslip_v3_pay_detail.rb +33 -11
- data/lib/mindee/product/fr/payslip/payslip_v3_pay_period.rb +1 -1
- data/lib/mindee/product/fr/payslip/payslip_v3_salary_detail.rb +17 -9
- data/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb +65 -0
- data/lib/mindee/product/ind/indian_passport/indian_passport_v1.rb +9 -3
- data/lib/mindee/product/ind/indian_passport/indian_passport_v1_document.rb +87 -24
- data/lib/mindee/product/ind/indian_passport/indian_passport_v1_page.rb +9 -5
- data/lib/mindee/product/international_id/international_id_v2.rb +9 -3
- data/lib/mindee/product/international_id/international_id_v2_document.rb +62 -20
- data/lib/mindee/product/international_id/international_id_v2_page.rb +9 -5
- data/lib/mindee/product/invoice/invoice_v4.rb +9 -3
- data/lib/mindee/product/invoice/invoice_v4_document.rb +90 -36
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +21 -11
- data/lib/mindee/product/invoice/invoice_v4_line_items.rb +62 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +9 -5
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +8 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +2 -2
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +1 -1
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +9 -3
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +3 -3
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +9 -5
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rb +9 -3
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rb +58 -20
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rb +13 -7
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb +68 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rb +9 -5
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rb +3 -2
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rb +7 -4
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rb +7 -4
- data/lib/mindee/product/passport/passport_v1.rb +9 -3
- data/lib/mindee/product/passport/passport_v1_document.rb +37 -13
- data/lib/mindee/product/passport/passport_v1_page.rb +9 -5
- data/lib/mindee/product/receipt/receipt_v5.rb +9 -3
- data/lib/mindee/product/receipt/receipt_v5_document.rb +54 -24
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +13 -7
- data/lib/mindee/product/receipt/receipt_v5_line_items.rb +54 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +9 -5
- data/lib/mindee/product/resume/resume_v1.rb +9 -3
- data/lib/mindee/product/resume/resume_v1_certificate.rb +1 -1
- data/lib/mindee/product/resume/resume_v1_certificates.rb +58 -0
- data/lib/mindee/product/resume/resume_v1_document.rb +60 -47
- data/lib/mindee/product/resume/resume_v1_education.rb +1 -1
- data/lib/mindee/product/resume/resume_v1_educations.rb +64 -0
- data/lib/mindee/product/resume/resume_v1_language.rb +1 -1
- data/lib/mindee/product/resume/resume_v1_languages.rb +54 -0
- data/lib/mindee/product/resume/resume_v1_page.rb +9 -5
- data/lib/mindee/product/resume/resume_v1_professional_experience.rb +1 -1
- data/lib/mindee/product/resume/resume_v1_professional_experiences.rb +68 -0
- data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +1 -1
- data/lib/mindee/product/resume/resume_v1_social_networks_urls.rb +54 -0
- data/lib/mindee/product/universal/universal.rb +46 -0
- data/lib/mindee/product/universal/universal_document.rb +32 -0
- data/lib/mindee/product/{generated/generated_v1_page.rb → universal/universal_page.rb} +14 -14
- data/lib/mindee/product/{generated/generated_v1_prediction.rb → universal/universal_prediction.rb} +32 -24
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +9 -3
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +20 -8
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +15 -8
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1.rb +9 -3
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +5 -3
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb +52 -0
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +46 -19
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +9 -5
- data/lib/mindee/product/us/us_mail/us_mail_v3.rb +9 -3
- data/lib/mindee/product/us/us_mail/us_mail_v3_document.rb +20 -12
- data/lib/mindee/product/us/us_mail/us_mail_v3_page.rb +9 -5
- data/lib/mindee/product/us/us_mail/us_mail_v3_recipient_address.rb +2 -2
- data/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb +65 -0
- data/lib/mindee/product/us/us_mail/us_mail_v3_sender_address.rb +1 -1
- data/lib/mindee/product/us/w9/w9_v1.rb +10 -4
- data/lib/mindee/product/us/w9/w9_v1_page.rb +49 -18
- data/lib/mindee/product.rb +2 -8
- data/lib/mindee/version.rb +2 -2
- data/lib/mindee.rb +22 -9
- data/mindee.gemspec +19 -17
- data/sig/custom/marcel.rbs +3 -0
- data/sig/custom/mini_magick.rbs +24 -0
- data/sig/custom/net_http.rbs +34 -0
- data/sig/custom/origami.rbs +54 -0
- data/sig/mindee/client.rbs +63 -0
- data/sig/mindee/errors/mindee_error.rbs +13 -0
- data/sig/mindee/errors/mindee_http_error.rbs +16 -0
- data/sig/mindee/errors/mindee_input_error.rbs +17 -0
- data/sig/mindee/extraction/multi_receipts_extractor.rbs +6 -0
- data/sig/mindee/geometry/min_max.rbs +10 -0
- data/sig/mindee/geometry/point.rbs +13 -0
- data/sig/mindee/geometry/polygon.rbs +9 -0
- data/sig/mindee/geometry/quadrilateral.rbs +18 -0
- data/sig/mindee/geometry/utils.rbs +13 -0
- data/sig/mindee/http/endpoint.rbs +26 -0
- data/sig/mindee/http/http_error_handler.rbs +10 -0
- data/sig/mindee/http/response_validation.rbs +10 -0
- data/sig/mindee/http/workflow_endpoint.rbs +14 -0
- data/sig/mindee/image/extracted_image.rbs +15 -0
- data/sig/mindee/image/image_compressor.rbs +8 -0
- data/sig/mindee/image/image_extractor.rbs +13 -0
- data/sig/mindee/image/image_utils.rbs +19 -0
- data/sig/mindee/input/local_response.rbs +13 -0
- data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
- data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
- data/sig/mindee/input/sources/file_input_source.rbs +10 -0
- data/sig/mindee/input/sources/local_input_source.rbs +24 -0
- data/sig/mindee/input/sources/path_input_source.rbs +10 -0
- data/sig/mindee/input/sources/url_input_source.rbs +20 -0
- data/sig/mindee/logging/logger.rbs +9 -0
- data/sig/mindee/parsing/common/api_request.rbs +15 -0
- data/sig/mindee/parsing/common/api_response.rbs +27 -0
- data/sig/mindee/parsing/common/document.rbs +20 -0
- data/sig/mindee/parsing/common/execution.rbs +23 -0
- data/sig/mindee/parsing/common/execution_file.rbs +12 -0
- data/sig/mindee/parsing/common/execution_priority.rbs +13 -0
- data/sig/mindee/parsing/common/extras/cropper_extra.rbs +14 -0
- data/sig/mindee/parsing/common/extras/extras.rbs +18 -0
- data/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs +15 -0
- data/sig/mindee/parsing/common/inference.rbs +19 -0
- data/sig/mindee/parsing/common/job.rbs +16 -0
- data/sig/mindee/parsing/common/ocr/mvision_v1.rbs +15 -0
- data/sig/mindee/parsing/common/ocr/ocr.rbs +39 -0
- data/sig/mindee/parsing/common/orientation.rbs +12 -0
- data/sig/mindee/parsing/common/page.rbs +15 -0
- data/sig/mindee/parsing/common/prediction.rbs +11 -0
- data/sig/mindee/parsing/common/product.rbs +13 -0
- data/sig/mindee/parsing/common/workflow_response.rbs +13 -0
- data/sig/mindee/parsing/standard/abstract_field.rbs +19 -0
- data/sig/mindee/parsing/standard/amount_field.rbs +12 -0
- data/sig/mindee/parsing/standard/base_field.rbs +12 -0
- data/sig/mindee/parsing/standard/boolean_field.rbs +12 -0
- data/sig/mindee/parsing/standard/classification_field.rbs +10 -0
- data/sig/mindee/parsing/standard/company_registration_field.rbs +14 -0
- data/sig/mindee/parsing/standard/date_field.rbs +14 -0
- data/sig/mindee/parsing/standard/feature_field.rbs +10 -0
- data/sig/mindee/parsing/standard/locale_field.rbs +16 -0
- data/sig/mindee/parsing/standard/payment_details_field.rbs +15 -0
- data/sig/mindee/parsing/standard/position_field.rbs +17 -0
- data/sig/mindee/parsing/standard/string_field.rbs +12 -0
- data/sig/mindee/parsing/standard/tax_field.rbs +23 -0
- data/sig/mindee/parsing/universal/universal_list_field.rbs +17 -0
- data/sig/mindee/parsing/universal/universal_object_field.rbs +23 -0
- data/sig/mindee/pdf/extracted_pdf.rbs +15 -0
- data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
- data/sig/mindee/pdf/pdf_extractor.rbs +16 -0
- data/sig/mindee/pdf/pdf_processor.rbs +12 -0
- data/sig/mindee/pdf/pdf_tools.rbs +23 -0
- data/sig/mindee/product/barcode_reader/barcode_reader_v1.rbs +11 -0
- data/sig/mindee/product/barcode_reader/barcode_reader_v1_document.rbs +14 -0
- data/sig/mindee/product/barcode_reader/barcode_reader_v1_page.rbs +15 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1.rbs +11 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rbs +14 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rbs +19 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rbs +13 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rbs +15 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_document.rbs +25 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rbs +15 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_page.rbs +15 -0
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rbs +15 -0
- data/sig/mindee/product/business_card/business_card_v1.rbs +11 -0
- data/sig/mindee/product/business_card/business_card_v1_document.rbs +23 -0
- data/sig/mindee/product/business_card/business_card_v1_page.rbs +15 -0
- data/sig/mindee/product/cropper/cropper_v1.rbs +11 -0
- data/sig/mindee/product/cropper/cropper_v1_document.rbs +12 -0
- data/sig/mindee/product/cropper/cropper_v1_page.rbs +16 -0
- data/sig/mindee/product/delivery_note/delivery_note_v1.rbs +11 -0
- data/sig/mindee/product/delivery_note/delivery_note_v1_document.rbs +19 -0
- data/sig/mindee/product/delivery_note/delivery_note_v1_page.rbs +15 -0
- data/sig/mindee/product/driver_license/driver_license_v1.rbs +11 -0
- data/sig/mindee/product/driver_license/driver_license_v1_document.rbs +25 -0
- data/sig/mindee/product/driver_license/driver_license_v1_page.rbs +15 -0
- data/sig/mindee/product/eu/license_plate/license_plate_v1.rbs +13 -0
- data/sig/mindee/product/eu/license_plate/license_plate_v1_document.rbs +15 -0
- data/sig/mindee/product/eu/license_plate/license_plate_v1_page.rbs +17 -0
- data/sig/mindee/product/financial_document/financial_document_v1.rbs +11 -0
- data/sig/mindee/product/financial_document/financial_document_v1_document.rbs +46 -0
- data/sig/mindee/product/financial_document/financial_document_v1_line_item.rbs +21 -0
- data/sig/mindee/product/financial_document/financial_document_v1_line_items.rbs +13 -0
- data/sig/mindee/product/financial_document/financial_document_v1_page.rbs +15 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v1.rbs +13 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rbs +17 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rbs +17 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v2.rbs +13 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +17 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rbs +18 -0
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rbs +17 -0
- data/sig/mindee/product/fr/bank_statement/bank_statement_v2.rbs +13 -0
- data/sig/mindee/product/fr/bank_statement/bank_statement_v2_document.rbs +29 -0
- data/sig/mindee/product/fr/bank_statement/bank_statement_v2_page.rbs +17 -0
- data/sig/mindee/product/fr/bank_statement/bank_statement_v2_transaction.rbs +18 -0
- data/sig/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rbs +15 -0
- data/sig/mindee/product/fr/carte_grise/carte_grise_v1.rbs +13 -0
- data/sig/mindee/product/fr/carte_grise/carte_grise_v1_document.rbs +55 -0
- data/sig/mindee/product/fr/carte_grise/carte_grise_v1_page.rbs +17 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1.rbs +13 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_document.rbs +34 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rbs +15 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rbs +15 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rbs +21 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rbs +15 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rbs +16 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_page.rbs +17 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rbs +21 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rbs +15 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rbs +21 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rbs +15 -0
- data/sig/mindee/product/fr/health_card/health_card_v1.rbs +13 -0
- data/sig/mindee/product/fr/health_card/health_card_v1_document.rbs +18 -0
- data/sig/mindee/product/fr/health_card/health_card_v1_page.rbs +17 -0
- data/sig/mindee/product/fr/id_card/id_card_v1.rbs +13 -0
- data/sig/mindee/product/fr/id_card/id_card_v1_document.rbs +24 -0
- data/sig/mindee/product/fr/id_card/id_card_v1_page.rbs +18 -0
- data/sig/mindee/product/fr/id_card/id_card_v2.rbs +13 -0
- data/sig/mindee/product/fr/id_card/id_card_v2_document.rbs +29 -0
- data/sig/mindee/product/fr/id_card/id_card_v2_page.rbs +19 -0
- data/sig/mindee/product/fr/payslip/payslip_v2.rbs +13 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rbs +16 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_document.rbs +24 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_employee.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_employer.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_employment.rbs +19 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_page.rbs +17 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_pay_detail.rbs +23 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_pay_period.rbs +18 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_pto.rbs +16 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_salary_detail.rbs +19 -0
- data/sig/mindee/product/fr/payslip/payslip_v2_salary_details.rbs +15 -0
- data/sig/mindee/product/fr/payslip/payslip_v3.rbs +13 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_bank_account_detail.rbs +16 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_document.rbs +26 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_employee.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_employer.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_employment.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_page.rbs +17 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_paid_time_off.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rbs +15 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_pay_detail.rbs +23 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_pay_period.rbs +18 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_salary_detail.rbs +20 -0
- data/sig/mindee/product/fr/payslip/payslip_v3_salary_details.rbs +15 -0
- data/sig/mindee/product/ind/indian_passport/indian_passport_v1.rbs +13 -0
- data/sig/mindee/product/ind/indian_passport/indian_passport_v1_document.rbs +37 -0
- data/sig/mindee/product/ind/indian_passport/indian_passport_v1_page.rbs +17 -0
- data/sig/mindee/product/international_id/international_id_v2.rbs +11 -0
- data/sig/mindee/product/international_id/international_id_v2_document.rbs +29 -0
- data/sig/mindee/product/international_id/international_id_v2_page.rbs +15 -0
- data/sig/mindee/product/invoice/invoice_v4.rbs +11 -0
- data/sig/mindee/product/invoice/invoice_v4_document.rbs +40 -0
- data/sig/mindee/product/invoice/invoice_v4_line_item.rbs +21 -0
- data/sig/mindee/product/invoice/invoice_v4_line_items.rbs +13 -0
- data/sig/mindee/product/invoice/invoice_v4_page.rbs +15 -0
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1.rbs +14 -0
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_document.rbs +19 -0
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_page.rbs +13 -0
- data/sig/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +11 -0
- data/sig/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +13 -0
- data/sig/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +15 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rbs +11 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rbs +28 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rbs +18 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rbs +13 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rbs +15 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rbs +13 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rbs +15 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rbs +14 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rbs +14 -0
- data/sig/mindee/product/passport/passport_v1.rbs +11 -0
- data/sig/mindee/product/passport/passport_v1_document.rbs +23 -0
- data/sig/mindee/product/passport/passport_v1_page.rbs +15 -0
- data/sig/mindee/product/receipt/receipt_v5.rbs +11 -0
- data/sig/mindee/product/receipt/receipt_v5_document.rbs +31 -0
- data/sig/mindee/product/receipt/receipt_v5_line_item.rbs +17 -0
- data/sig/mindee/product/receipt/receipt_v5_line_items.rbs +13 -0
- data/sig/mindee/product/receipt/receipt_v5_page.rbs +15 -0
- data/sig/mindee/product/resume/resume_v1.rbs +11 -0
- data/sig/mindee/product/resume/resume_v1_certificate.rbs +17 -0
- data/sig/mindee/product/resume/resume_v1_certificates.rbs +13 -0
- data/sig/mindee/product/resume/resume_v1_document.rbs +39 -0
- data/sig/mindee/product/resume/resume_v1_education.rbs +20 -0
- data/sig/mindee/product/resume/resume_v1_educations.rbs +13 -0
- data/sig/mindee/product/resume/resume_v1_language.rbs +15 -0
- data/sig/mindee/product/resume/resume_v1_languages.rbs +13 -0
- data/sig/mindee/product/resume/resume_v1_page.rbs +15 -0
- data/sig/mindee/product/resume/resume_v1_professional_experience.rbs +22 -0
- data/sig/mindee/product/resume/resume_v1_professional_experiences.rbs +13 -0
- data/sig/mindee/product/resume/resume_v1_social_networks_url.rbs +15 -0
- data/sig/mindee/product/resume/resume_v1_social_networks_urls.rbs +13 -0
- data/sig/mindee/product/universal/universal.rbs +14 -0
- data/sig/mindee/product/universal/universal_document.rbs +10 -0
- data/sig/mindee/product/universal/universal_page.rbs +14 -0
- data/sig/mindee/product/universal/universal_prediction.rbs +19 -0
- data/sig/mindee/product/us/bank_check/bank_check_v1.rbs +13 -0
- data/sig/mindee/product/us/bank_check/bank_check_v1_document.rbs +20 -0
- data/sig/mindee/product/us/bank_check/bank_check_v1_page.rbs +19 -0
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1.rbs +13 -0
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rbs +17 -0
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rbs +15 -0
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_document.rbs +28 -0
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_page.rbs +17 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3.rbs +13 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3_document.rbs +21 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3_page.rbs +17 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3_recipient_address.rbs +23 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rbs +15 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3_sender_address.rbs +18 -0
- data/sig/mindee/product/us/w9/w9_v1.rbs +13 -0
- data/sig/mindee/product/us/w9/w9_v1_document.rbs +14 -0
- data/sig/mindee/product/us/w9/w9_v1_page.rbs +29 -0
- data/sig/mindee/version.rbs +6 -0
- data/sig/mindee.rbs +59 -0
- metadata +395 -103
- data/docs/bank_statement_fr_v1.md +0 -178
- data/docs/code_samples/bank_statement_fr_v1_async.txt +0 -19
- data/docs/code_samples/custom_v1.txt +0 -33
- data/docs/code_samples/eu_driver_license_v1.txt +0 -19
- data/docs/code_samples/expense_receipts_v4.txt +0 -19
- data/docs/code_samples/proof_of_address_v1.txt +0 -19
- data/docs/code_samples/us_mail_v2_async.txt +0 -19
- data/docs/code_samples/workflow_execution.txt +0 -29
- data/docs/custom_v1.md +0 -111
- data/docs/payslip_fra_v2.md +0 -294
- data/lib/mindee/extraction/common.rb +0 -4
- data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +0 -32
- data/lib/mindee/extraction/pdf_extractor.rb +0 -4
- data/lib/mindee/extraction/tax_extractor/ocr_extractor.rb +0 -110
- data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +0 -338
- data/lib/mindee/extraction/tax_extractor.rb +0 -3
- data/lib/mindee/parsing/custom/classification_field.rb +0 -28
- data/lib/mindee/parsing/custom/list_field.rb +0 -78
- data/lib/mindee/parsing/custom.rb +0 -4
- data/lib/mindee/parsing/generated.rb +0 -4
- data/lib/mindee/product/custom/custom_v1.rb +0 -36
- data/lib/mindee/product/custom/custom_v1_document.rb +0 -60
- data/lib/mindee/product/custom/custom_v1_page.rb +0 -32
- data/lib/mindee/product/eu/driver_license/driver_license_v1.rb +0 -41
- data/lib/mindee/product/eu/driver_license/driver_license_v1_document.rb +0 -88
- data/lib/mindee/product/eu/driver_license/driver_license_v1_page.rb +0 -53
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1.rb +0 -41
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_page.rb +0 -34
- data/lib/mindee/product/generated/generated_v1.rb +0 -38
- data/lib/mindee/product/generated/generated_v1_document.rb +0 -35
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +0 -39
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +0 -83
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +0 -32
- data/lib/mindee/product/receipt/receipt_v4.rb +0 -36
- data/lib/mindee/product/receipt/receipt_v4_document.rb +0 -86
- data/lib/mindee/product/receipt/receipt_v4_page.rb +0 -32
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +0 -41
- data/lib/mindee/product/us/driver_license/driver_license_v1_document.rb +0 -113
- data/lib/mindee/product/us/driver_license/driver_license_v1_page.rb +0 -53
- data/lib/mindee/product/us/us_mail/us_mail_v2.rb +0 -41
- data/lib/mindee/product/us/us_mail/us_mail_v2_document.rb +0 -100
- data/lib/mindee/product/us/us_mail/us_mail_v2_page.rb +0 -34
- data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_address.rb +0 -105
- data/lib/mindee/product/us/us_mail/us_mail_v2_sender_address.rb +0 -66
@@ -4,20 +4,19 @@ require 'mini_magick'
|
|
4
4
|
require 'origami'
|
5
5
|
require 'stringio'
|
6
6
|
require 'tempfile'
|
7
|
-
require_relative '
|
7
|
+
require_relative '../input/sources'
|
8
8
|
require_relative 'extracted_image'
|
9
9
|
|
10
10
|
module Mindee
|
11
11
|
# Image Extraction Module.
|
12
|
-
module
|
12
|
+
module Image
|
13
13
|
# Image Extraction wrapper class.
|
14
14
|
module ImageExtractor
|
15
|
+
# Attaches an image as a new page in a PdfDocument object.
|
16
|
+
#
|
17
|
+
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
18
|
+
# @return [Origami::PDF] A PdfDocument handle.
|
15
19
|
def self.attach_image_as_new_file(input_buffer, format: 'jpg')
|
16
|
-
# Attaches an image as a new page in a PdfDocument object.
|
17
|
-
#
|
18
|
-
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
19
|
-
# @return [Origami::PDF] A PdfDocument handle.
|
20
|
-
|
21
20
|
magick_image = MiniMagick::Image.read(input_buffer)
|
22
21
|
# NOTE: some jpeg images get rendered as three different versions of themselves per output if the format isn't
|
23
22
|
# converted.
|
@@ -35,9 +34,9 @@ module Mindee
|
|
35
34
|
# @param [Integer] page_id ID of the Page to extract from.
|
36
35
|
# @param [Array<Array<Mindee::Geometry::Point>>, Array<Mindee::Geometry::Quadrangle>] polygons List of coordinates
|
37
36
|
# to extract.
|
38
|
-
# @return [Array<Mindee::
|
37
|
+
# @return [Array<Mindee::Image::ExtractedImage>] Extracted Images.
|
39
38
|
def self.extract_multiple_images_from_source(input_source, page_id, polygons)
|
40
|
-
new_stream =
|
39
|
+
new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
|
41
40
|
new_stream.seek(0)
|
42
41
|
|
43
42
|
extract_images_from_polygons(input_source, new_stream, page_id, polygons)
|
@@ -49,13 +48,13 @@ module Mindee
|
|
49
48
|
# @param [StringIO] pdf_stream Buffer of the PDF.
|
50
49
|
# @param [Integer] page_id Page ID.
|
51
50
|
# @param [Array<Mindee::Geometry::Point, Mindee::Geometry::Polygon, Mindee::Geometry::Quadrangle>] polygons
|
52
|
-
# @return [Array<Mindee::
|
51
|
+
# @return [Array<Mindee::Image::ExtractedImage>] Extracted Images.
|
53
52
|
def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
|
54
53
|
extracted_elements = []
|
55
54
|
|
56
55
|
polygons.each_with_index do |polygon, element_id|
|
57
|
-
polygon = normalize_polygon(polygon)
|
58
|
-
page_content = read_page_content(pdf_stream)
|
56
|
+
polygon = ImageUtils.normalize_polygon(polygon)
|
57
|
+
page_content = ImageUtils.read_page_content(pdf_stream)
|
59
58
|
|
60
59
|
min_max_x = Geometry.get_min_max_x([
|
61
60
|
polygon.top_left,
|
@@ -69,16 +68,16 @@ module Mindee
|
|
69
68
|
polygon.top_right,
|
70
69
|
polygon.bottom_left,
|
71
70
|
])
|
72
|
-
file_extension = determine_file_extension(input_source)
|
73
|
-
cropped_image = crop_image(page_content, min_max_x, min_max_y)
|
71
|
+
file_extension = ImageUtils.determine_file_extension(input_source)
|
72
|
+
cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
|
74
73
|
if file_extension == 'pdf'
|
75
74
|
cropped_image.format('jpg')
|
76
75
|
else
|
77
|
-
cropped_image.format(file_extension)
|
76
|
+
cropped_image.format(file_extension.to_s)
|
78
77
|
end
|
79
78
|
|
80
79
|
buffer = StringIO.new
|
81
|
-
write_image_to_buffer(cropped_image, buffer)
|
80
|
+
ImageUtils.write_image_to_buffer(cropped_image, buffer)
|
82
81
|
file_name = "#{input_source.filename}_page#{page_id}-#{element_id}.#{file_extension}"
|
83
82
|
|
84
83
|
extracted_elements << create_extracted_image(buffer, file_name, page_id, element_id)
|
@@ -87,73 +86,16 @@ module Mindee
|
|
87
86
|
extracted_elements
|
88
87
|
end
|
89
88
|
|
90
|
-
# Retrieves the bounding box of a polygon.
|
91
|
-
#
|
92
|
-
# @param [Array<Point>, Mindee::Geometry::Polygon] polygon
|
93
|
-
def self.normalize_polygon(polygon)
|
94
|
-
if polygon.is_a?(Mindee::Geometry::Polygon)
|
95
|
-
Mindee::Geometry.get_bounding_box(polygon)
|
96
|
-
else
|
97
|
-
polygon
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# Loads a buffer into a MiniMagick Image.
|
102
|
-
#
|
103
|
-
# @param [StringIO] pdf_stream Buffer containg the PDF
|
104
|
-
# @return [MiniMagick::Image] a valid MiniMagick image handle.
|
105
|
-
def self.read_page_content(pdf_stream)
|
106
|
-
pdf_stream.rewind
|
107
|
-
MiniMagick::Image.read(pdf_stream)
|
108
|
-
end
|
109
|
-
|
110
|
-
# Crops a MiniMagick Image from a the given bounding box.
|
111
|
-
#
|
112
|
-
# @param [MiniMagick::Image] image Input Image.
|
113
|
-
# @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
|
114
|
-
# @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
|
115
|
-
def self.crop_image(image, min_max_x, min_max_y)
|
116
|
-
width = image[:width].to_i
|
117
|
-
height = image[:height].to_i
|
118
|
-
|
119
|
-
image.format('jpg')
|
120
|
-
new_width = (min_max_x.max - min_max_x.min) * width
|
121
|
-
new_height = (min_max_y.max - min_max_y.min) * height
|
122
|
-
image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
|
123
|
-
|
124
|
-
image
|
125
|
-
end
|
126
|
-
|
127
|
-
# Writes a MiniMagick::Image to a buffer.
|
128
|
-
#
|
129
|
-
# @param [MiniMagick::Image] image a valid MiniMagick image.
|
130
|
-
# @param [StringIO] buffer
|
131
|
-
def self.write_image_to_buffer(image, buffer)
|
132
|
-
image.write(buffer)
|
133
|
-
end
|
134
|
-
|
135
|
-
# Retrieves the file extension from the main file to apply it to the extracted images. Note: coerces pdf as jpg.
|
136
|
-
#
|
137
|
-
# @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
|
138
|
-
# @return [String] A valid file extension.
|
139
|
-
def self.determine_file_extension(input_source)
|
140
|
-
if input_source.pdf? || input_source.filename.downcase.end_with?('pdf')
|
141
|
-
'jpg'
|
142
|
-
else
|
143
|
-
File.extname(input_source.filename).strip.downcase[1..]
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
89
|
# Generates an ExtractedImage.
|
148
90
|
#
|
149
91
|
# @param [StringIO] buffer Buffer containing the image.
|
150
92
|
# @param [String] file_name Name for the file.
|
151
|
-
# @param [Object] page_id ID of the page the file was
|
93
|
+
# @param [Object] page_id ID of the page the file was universal from.
|
152
94
|
# @param [Object] element_id ID of the element of a given page.
|
153
95
|
def self.create_extracted_image(buffer, file_name, page_id, element_id)
|
154
96
|
buffer.rewind
|
155
97
|
ExtractedImage.new(
|
156
|
-
Mindee::Input::Source::BytesInputSource.new(buffer.read, file_name),
|
98
|
+
Mindee::Input::Source::BytesInputSource.new(buffer.read.to_s, file_name),
|
157
99
|
page_id,
|
158
100
|
element_id
|
159
101
|
)
|
@@ -163,11 +105,11 @@ module Mindee
|
|
163
105
|
#
|
164
106
|
# @param input_file [LocalInputSource] Local input.
|
165
107
|
# @param [Integer] page_id Page ID.
|
166
|
-
# @return [
|
167
|
-
def self.
|
108
|
+
# @return [StringIO] A valid PdfDocument handle.
|
109
|
+
def self.load_input_source_pdf_page_as_stringio(input_file, page_id)
|
168
110
|
input_file.io_stream.rewind
|
169
111
|
if input_file.pdf?
|
170
|
-
Mindee::PDF::
|
112
|
+
Mindee::PDF::PDFProcessor.get_page(Origami::PDF.read(input_file.io_stream), page_id)
|
171
113
|
else
|
172
114
|
input_file.io_stream
|
173
115
|
end
|
@@ -30,13 +30,14 @@ module Mindee
|
|
30
30
|
# @param [MiniMagick::Image, StringIO, File, Tempfile] image The input image
|
31
31
|
# @return [MiniMagick::Image]
|
32
32
|
def self.to_image(image)
|
33
|
-
if image.
|
33
|
+
if image.is_a?(MiniMagick::Image)
|
34
|
+
image
|
35
|
+
elsif image.is_a?(StringIO) || image.is_a?(IO) || image.is_a?(File) || image.is_a?(Tempfile)
|
34
36
|
image.rewind
|
35
37
|
MiniMagick::Image.read(image)
|
36
|
-
elsif image.is_a?(MiniMagick::Image)
|
37
|
-
image
|
38
38
|
else
|
39
|
-
|
39
|
+
img_class = image.class ? image.class.to_s : 'unknown format'
|
40
|
+
raise Errors::MindeeImageError, "Expected an I/O object or a MiniMagick::Image. '#{img_class}' given instead."
|
40
41
|
end
|
41
42
|
end
|
42
43
|
|
@@ -59,7 +60,7 @@ module Mindee
|
|
59
60
|
# @param max_width [Integer] Maximum width. If not specified, the horizontal ratio will remain the same.
|
60
61
|
# @param max_height [Integer] Maximum height. If not specified, the vertical ratio will remain the same.
|
61
62
|
def self.calculate_new_dimensions(original, max_width: nil, max_height: nil)
|
62
|
-
raise 'Provided image could not be processed for resizing.' if original.nil?
|
63
|
+
raise Errors::MindeeImageError, 'Provided image could not be processed for resizing.' if original.nil?
|
63
64
|
|
64
65
|
return [original.width, original.height] if max_width.nil? && max_height.nil?
|
65
66
|
|
@@ -68,8 +69,8 @@ module Mindee
|
|
68
69
|
|
69
70
|
scale_factor = [width_ratio, height_ratio].min
|
70
71
|
|
71
|
-
new_width = (original.width * scale_factor).to_i
|
72
|
-
new_height = (original.height * scale_factor).to_i
|
72
|
+
new_width = (original.width.to_f * scale_factor).to_i
|
73
|
+
new_height = (original.height.to_f * scale_factor).to_i
|
73
74
|
|
74
75
|
[new_width, new_height]
|
75
76
|
end
|
@@ -99,6 +100,66 @@ module Mindee
|
|
99
100
|
compressed_image.quality image_quality.to_s
|
100
101
|
compressed_image
|
101
102
|
end
|
103
|
+
|
104
|
+
# Retrieves the bounding box of a polygon.
|
105
|
+
#
|
106
|
+
# @param [Array<Point>, Mindee::Geometry::Polygon] polygon
|
107
|
+
def self.normalize_polygon(polygon)
|
108
|
+
if polygon.is_a?(Mindee::Geometry::Polygon) ||
|
109
|
+
(polygon.is_a?(Array) && polygon[0].is_a?(Mindee::Geometry::Point))
|
110
|
+
Mindee::Geometry.get_bounding_box(polygon)
|
111
|
+
elsif polygon.is_a?(Mindee::Geometry::Quadrilateral)
|
112
|
+
polygon
|
113
|
+
else
|
114
|
+
raise Errors::MindeeGeometryError, 'Provided polygon has an invalid type.'
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Loads a buffer into a MiniMagick Image.
|
119
|
+
#
|
120
|
+
# @param [StringIO] pdf_stream Buffer containg the PDF
|
121
|
+
# @return [MiniMagick::Image] a valid MiniMagick image handle.
|
122
|
+
def self.read_page_content(pdf_stream)
|
123
|
+
pdf_stream.rewind
|
124
|
+
MiniMagick::Image.read(pdf_stream)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Crops a MiniMagick Image from a the given bounding box.
|
128
|
+
#
|
129
|
+
# @param [MiniMagick::Image] image Input Image.
|
130
|
+
# @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
|
131
|
+
# @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
|
132
|
+
def self.crop_image(image, min_max_x, min_max_y)
|
133
|
+
width = image[:width].to_i
|
134
|
+
height = image[:height].to_i
|
135
|
+
|
136
|
+
image.format('jpg')
|
137
|
+
new_width = (min_max_x.max - min_max_x.min) * width
|
138
|
+
new_height = (min_max_y.max - min_max_y.min) * height
|
139
|
+
image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
|
140
|
+
|
141
|
+
image
|
142
|
+
end
|
143
|
+
|
144
|
+
# Writes a MiniMagick::Image to a buffer.
|
145
|
+
#
|
146
|
+
# @param [MiniMagick::Image] image a valid MiniMagick image.
|
147
|
+
# @param [StringIO] buffer
|
148
|
+
def self.write_image_to_buffer(image, buffer)
|
149
|
+
image.write(buffer)
|
150
|
+
end
|
151
|
+
|
152
|
+
# Retrieves the file extension from the main file to apply it to the extracted images. Note: coerces pdf as jpg.
|
153
|
+
#
|
154
|
+
# @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
|
155
|
+
# @return [String, nil] A valid file extension.
|
156
|
+
def self.determine_file_extension(input_source)
|
157
|
+
if input_source.pdf? || input_source.filename.downcase.end_with?('pdf')
|
158
|
+
'jpg'
|
159
|
+
else
|
160
|
+
File.extname(input_source.filename.to_s).strip.downcase[1..].to_s
|
161
|
+
end
|
162
|
+
end
|
102
163
|
end
|
103
164
|
end
|
104
165
|
end
|
data/lib/mindee/image.rb
CHANGED
@@ -16,18 +16,18 @@ module Mindee
|
|
16
16
|
def initialize(input_file)
|
17
17
|
case input_file
|
18
18
|
when IO, StringIO, File, Tempfile
|
19
|
-
str_stripped = input_file.read.gsub(%r{[\r\n]}, '')
|
19
|
+
str_stripped = input_file.read.to_s.gsub(%r{[\r\n]}, '')
|
20
20
|
@file = StringIO.new(str_stripped)
|
21
21
|
@file.rewind
|
22
22
|
when Pathname, String
|
23
|
-
@file = if Pathname(input_file).exist?
|
24
|
-
StringIO.new(File.read(input_file, encoding: 'utf-8').gsub(%r{[\r\n]}, ''))
|
23
|
+
@file = if Pathname(input_file.to_s).exist?
|
24
|
+
StringIO.new(File.read(input_file.to_s, encoding: 'utf-8').gsub(%r{[\r\n]}, ''))
|
25
25
|
else
|
26
|
-
StringIO.new(input_file.gsub(%r{[\r\n]}, ''))
|
26
|
+
StringIO.new(input_file.to_s.gsub(%r{[\r\n]}, ''))
|
27
27
|
end
|
28
28
|
@file.rewind
|
29
29
|
else
|
30
|
-
raise "Incompatible type for input '#{input_file.class}'."
|
30
|
+
raise Errors::MindeeInputError, "Incompatible type for input '#{input_file.class}'."
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -38,7 +38,7 @@ module Mindee
|
|
38
38
|
file_str = @file.read
|
39
39
|
JSON.parse(file_str, object_class: Hash)
|
40
40
|
rescue JSON::ParserError
|
41
|
-
raise "File is not a valid dict. #{file_str}"
|
41
|
+
raise Errors::MindeeInputError, "File is not a valid dict. #{file_str}"
|
42
42
|
end
|
43
43
|
|
44
44
|
# Processes the secret key
|
@@ -56,14 +56,14 @@ module Mindee
|
|
56
56
|
@file.rewind
|
57
57
|
mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key), @file.read)
|
58
58
|
rescue StandardError
|
59
|
-
raise 'Could not get HMAC signature from payload.'
|
59
|
+
raise Errors::MindeeInputError, 'Could not get HMAC signature from payload.'
|
60
60
|
end
|
61
61
|
mac
|
62
62
|
end
|
63
63
|
|
64
64
|
# @param secret_key [String] Secret key, either a string or a byte/byte array.
|
65
65
|
# @param signature [String]
|
66
|
-
# @return [
|
66
|
+
# @return [bool]
|
67
67
|
def valid_hmac_signature?(secret_key, signature)
|
68
68
|
signature == get_hmac_signature(secret_key)
|
69
69
|
end
|
@@ -9,17 +9,17 @@ module Mindee
|
|
9
9
|
class Base64InputSource < LocalInputSource
|
10
10
|
# @param base64_string [String]
|
11
11
|
# @param filename [String]
|
12
|
-
# @param
|
13
|
-
def initialize(base64_string, filename,
|
14
|
-
io_stream = StringIO.new(base64_string.unpack1('m*'))
|
12
|
+
# @param repair_pdf [bool]
|
13
|
+
def initialize(base64_string, filename, repair_pdf: false)
|
14
|
+
io_stream = StringIO.new(base64_string.unpack1('m*').to_s)
|
15
15
|
io_stream.set_encoding Encoding::BINARY
|
16
|
-
super(io_stream, filename,
|
16
|
+
super(io_stream, filename, repair_pdf: repair_pdf)
|
17
17
|
end
|
18
18
|
|
19
19
|
# Overload of the same function to prevent a base64 from being re-encoded.
|
20
|
-
# @param close [
|
20
|
+
# @param close [bool]
|
21
21
|
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
22
|
-
def
|
22
|
+
def read_contents(close: true)
|
23
23
|
@io_stream.seek(0)
|
24
24
|
data = @io_stream.read
|
25
25
|
@io_stream.close if close
|
@@ -9,11 +9,11 @@ module Mindee
|
|
9
9
|
class BytesInputSource < LocalInputSource
|
10
10
|
# @param raw_bytes [String]
|
11
11
|
# @param filename [String]
|
12
|
-
# @param
|
13
|
-
def initialize(raw_bytes, filename,
|
12
|
+
# @param repair_pdf [bool]
|
13
|
+
def initialize(raw_bytes, filename, repair_pdf: false)
|
14
14
|
io_stream = StringIO.new(raw_bytes)
|
15
15
|
io_stream.set_encoding Encoding::BINARY
|
16
|
-
super(io_stream, filename,
|
16
|
+
super(io_stream, filename, repair_pdf: repair_pdf)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
@@ -9,10 +9,10 @@ module Mindee
|
|
9
9
|
class FileInputSource < LocalInputSource
|
10
10
|
# @param input_file [File]
|
11
11
|
# @param filename [String]
|
12
|
-
# @param
|
13
|
-
def initialize(input_file, filename,
|
12
|
+
# @param repair_pdf [bool]
|
13
|
+
def initialize(input_file, filename, repair_pdf: false)
|
14
14
|
io_stream = input_file
|
15
|
-
super(io_stream, filename,
|
15
|
+
super(io_stream, filename, repair_pdf: repair_pdf)
|
16
16
|
end
|
17
17
|
end
|
18
18
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'stringio'
|
4
4
|
require 'marcel'
|
5
|
+
require 'fileutils'
|
5
6
|
|
6
7
|
require_relative '../../pdf'
|
7
8
|
require_relative '../../image'
|
@@ -20,29 +21,6 @@ module Mindee
|
|
20
21
|
'image/webp',
|
21
22
|
].freeze
|
22
23
|
|
23
|
-
# Standard error for invalid mime types
|
24
|
-
class MimeTypeError < StandardError
|
25
|
-
end
|
26
|
-
|
27
|
-
# Error sent if the file's mimetype isn't allowed
|
28
|
-
class InvalidMimeTypeError < MimeTypeError
|
29
|
-
# @return [String]
|
30
|
-
attr_reader :invalid_mimetype
|
31
|
-
|
32
|
-
# @param mime_type [String]
|
33
|
-
def initialize(mime_type)
|
34
|
-
@invalid_mimetype = mime_type
|
35
|
-
super("'#{@invalid_mimetype}' mime type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}")
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Error sent if a pdf file couldn't be fixed
|
40
|
-
class UnfixablePDFError < MimeTypeError
|
41
|
-
def initialize
|
42
|
-
super("Corrupted PDF couldn't be repaired.")
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
24
|
# Base class for loading documents.
|
47
25
|
class LocalInputSource
|
48
26
|
# @return [String]
|
@@ -52,27 +30,31 @@ module Mindee
|
|
52
30
|
# @return [StringIO]
|
53
31
|
attr_reader :io_stream
|
54
32
|
|
55
|
-
# @param io_stream [StringIO]
|
33
|
+
# @param io_stream [StringIO, File]
|
56
34
|
# @param filename [String]
|
57
|
-
# @param
|
58
|
-
def initialize(io_stream, filename,
|
35
|
+
# @param repair_pdf [bool]
|
36
|
+
def initialize(io_stream, filename, repair_pdf: false)
|
59
37
|
@io_stream = io_stream
|
60
38
|
@filename = filename
|
61
|
-
@file_mimetype = if
|
39
|
+
@file_mimetype = if repair_pdf
|
62
40
|
Marcel::MimeType.for @io_stream
|
63
41
|
else
|
64
42
|
Marcel::MimeType.for @io_stream, name: @filename
|
65
43
|
end
|
66
|
-
|
44
|
+
if ALLOWED_MIME_TYPES.include? @file_mimetype
|
45
|
+
logger.debug("Loaded new input #{@filename} from #{self.class}")
|
46
|
+
return
|
47
|
+
end
|
67
48
|
|
68
|
-
if filename.end_with?('.pdf') &&
|
49
|
+
if filename.end_with?('.pdf') && repair_pdf
|
69
50
|
rescue_broken_pdf(@io_stream)
|
70
51
|
@file_mimetype = Marcel::MimeType.for @io_stream
|
71
52
|
|
53
|
+
logger.debug("Loaded new input #{@filename} from #{self.class}")
|
72
54
|
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
73
55
|
end
|
74
56
|
|
75
|
-
raise
|
57
|
+
raise Errors::MindeeMimeTypeError, @file_mimetype.to_s
|
76
58
|
end
|
77
59
|
|
78
60
|
# Attempts to fix pdf files if mimetype is rejected.
|
@@ -81,7 +63,7 @@ module Mindee
|
|
81
63
|
# @param stream [StringIO]
|
82
64
|
def rescue_broken_pdf(stream)
|
83
65
|
stream.gets('%PDF-')
|
84
|
-
raise
|
66
|
+
raise Errors::MindeePDFError if stream.eof? || stream.pos > 500
|
85
67
|
|
86
68
|
stream.pos = stream.pos - 5
|
87
69
|
data = stream.read
|
@@ -97,7 +79,7 @@ module Mindee
|
|
97
79
|
end
|
98
80
|
|
99
81
|
# Parses a PDF file according to provided options.
|
100
|
-
# @param options [
|
82
|
+
# @param options [PageOptions, nil] Page cutting/merge options:
|
101
83
|
#
|
102
84
|
# * `:page_indexes` Zero-based list of page indexes.
|
103
85
|
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
@@ -106,25 +88,45 @@ module Mindee
|
|
106
88
|
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
107
89
|
def process_pdf(options)
|
108
90
|
@io_stream.seek(0)
|
109
|
-
@io_stream =
|
91
|
+
@io_stream = PDF::PDFProcessor.parse(@io_stream, options)
|
110
92
|
end
|
111
93
|
|
112
94
|
# Reads a document.
|
113
|
-
# @param close [
|
95
|
+
# @param close [bool]
|
114
96
|
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
115
|
-
def
|
97
|
+
def read_contents(close: true)
|
98
|
+
logger.debug("Reading data from: #{@filename}")
|
116
99
|
@io_stream.seek(0)
|
117
100
|
# Avoids needlessly re-packing some files
|
118
101
|
data = @io_stream.read
|
102
|
+
@io_stream.rewind
|
119
103
|
@io_stream.close if close
|
120
104
|
['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
121
105
|
end
|
122
106
|
|
123
|
-
|
107
|
+
# Write the file to a given path. Uses the initial file name by default.
|
108
|
+
# @param path [String] Path to write the file to.
|
109
|
+
def write_to_file(path)
|
110
|
+
full_path = if File.directory?(path) || path.end_with?('/')
|
111
|
+
File.join(path, @filename)
|
112
|
+
else
|
113
|
+
path
|
114
|
+
end
|
115
|
+
FileUtils.mkdir_p(File.dirname(full_path))
|
116
|
+
@io_stream.rewind
|
117
|
+
File.binwrite(full_path, @io_stream.read)
|
118
|
+
logger.debug("Wrote file successfully to #{full_path}")
|
119
|
+
@io_stream.rewind
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns the page count for a document.
|
123
|
+
# Defaults to one for images.
|
124
|
+
# @return [Integer]
|
125
|
+
def count_pages
|
124
126
|
return 1 unless pdf?
|
125
127
|
|
126
128
|
@io_stream.seek(0)
|
127
|
-
pdf_processor = Mindee::PDF::
|
129
|
+
pdf_processor = Mindee::PDF::PDFProcessor.open_pdf(@io_stream)
|
128
130
|
pdf_processor.pages.size
|
129
131
|
end
|
130
132
|
|
@@ -132,10 +134,10 @@ module Mindee
|
|
132
134
|
# @param [Integer] quality Quality of the output file.
|
133
135
|
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
|
134
136
|
# @param [Integer, nil] max_height Maximum height (Ignored for PDFs).
|
135
|
-
# @param [
|
137
|
+
# @param [bool] force_source_text Whether to force the operation on PDFs with source text.
|
136
138
|
# This will attempt to re-render PDF text over the rasterized original. If disabled, ignored the operation.
|
137
139
|
# WARNING: this operation is strongly discouraged.
|
138
|
-
# @param [
|
140
|
+
# @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
|
139
141
|
# not. Needs force_source_text to work.
|
140
142
|
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
|
141
143
|
buffer = if pdf?
|
@@ -158,7 +160,7 @@ module Mindee
|
|
158
160
|
end
|
159
161
|
|
160
162
|
# Checks whether the file has source text if it is a pdf. False otherwise
|
161
|
-
# @return [
|
163
|
+
# @return [bool] True if the file is a PDF and has source text.
|
162
164
|
def source_text?
|
163
165
|
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
164
166
|
end
|
@@ -171,7 +173,7 @@ module Mindee
|
|
171
173
|
unicode_escape_string = ''.dup
|
172
174
|
string.each_char do |char|
|
173
175
|
unicode_escape_string << if char.bytesize > 1
|
174
|
-
"\\u#{char.unpack1('U')
|
176
|
+
"\\u#{format('%04x', char.unpack1('U'))}"
|
175
177
|
else
|
176
178
|
char
|
177
179
|
end
|
@@ -9,10 +9,10 @@ module Mindee
|
|
9
9
|
# Load a document from a path.
|
10
10
|
class PathInputSource < LocalInputSource
|
11
11
|
# @param filepath [String]
|
12
|
-
# @param
|
13
|
-
def initialize(filepath,
|
12
|
+
# @param repair_pdf [bool]
|
13
|
+
def initialize(filepath, repair_pdf: false)
|
14
14
|
io_stream = File.open(filepath, 'rb')
|
15
|
-
super(io_stream, File.basename(filepath),
|
15
|
+
super(io_stream, File.basename(filepath), repair_pdf: repair_pdf)
|
16
16
|
end
|
17
17
|
end
|
18
18
|
end
|
@@ -3,17 +3,20 @@
|
|
3
3
|
require 'net/http'
|
4
4
|
require 'uri'
|
5
5
|
require 'fileutils'
|
6
|
+
require_relative '../../logging'
|
6
7
|
|
7
8
|
module Mindee
|
8
9
|
module Input
|
9
10
|
module Source
|
10
11
|
# Load a remote document from a file url.
|
11
|
-
class
|
12
|
+
class URLInputSource
|
12
13
|
# @return [String]
|
13
14
|
attr_reader :url
|
14
15
|
|
15
16
|
def initialize(url)
|
16
|
-
raise 'URL must be HTTPS' unless url.start_with? 'https://'
|
17
|
+
raise Errors::MindeeInputError, 'URL must be HTTPS' unless url.start_with? 'https://'
|
18
|
+
|
19
|
+
logger.debug("URL input: #{url}")
|
17
20
|
|
18
21
|
@url = url
|
19
22
|
end
|
@@ -27,7 +30,7 @@ module Mindee
|
|
27
30
|
# @param token [String, nil] Optional token for JWT-based authentication.
|
28
31
|
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
29
32
|
# @return [String] The full path of the saved file.
|
30
|
-
def
|
33
|
+
def write_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
|
31
34
|
response_body = fetch_file_content(username: username, password: password, token: token,
|
32
35
|
max_redirects: max_redirects)
|
33
36
|
|
@@ -53,7 +56,7 @@ module Mindee
|
|
53
56
|
max_redirects: max_redirects)
|
54
57
|
bytes = StringIO.new(response_body)
|
55
58
|
|
56
|
-
BytesInputSource.new(bytes.read, filename)
|
59
|
+
BytesInputSource.new(bytes.read || '', filename || '')
|
57
60
|
end
|
58
61
|
|
59
62
|
# Fetches the file content from the URL.
|
@@ -72,9 +75,9 @@ module Mindee
|
|
72
75
|
|
73
76
|
response = make_request(uri, request, max_redirects)
|
74
77
|
if response.code.to_i > 299
|
75
|
-
raise "Failed to download file: HTTP status code #{response.code}"
|
78
|
+
raise Errors::MindeeAPIError, "Failed to download file: HTTP status code #{response.code}"
|
76
79
|
elsif response.code.to_i < 200
|
77
|
-
raise "Failed to download file: Invalid response code #{response.code}."
|
80
|
+
raise Errors::MindeeAPIError, "Failed to download file: Invalid response code #{response.code}."
|
78
81
|
end
|
79
82
|
|
80
83
|
response.body
|
@@ -83,7 +86,7 @@ module Mindee
|
|
83
86
|
private
|
84
87
|
|
85
88
|
def extract_filename_from_url(uri)
|
86
|
-
filename = File.basename(uri.path)
|
89
|
+
filename = File.basename(uri.path.to_s)
|
87
90
|
filename.empty? ? '' : filename
|
88
91
|
end
|
89
92
|
|
@@ -100,7 +103,7 @@ module Mindee
|
|
100
103
|
response = http.request(request)
|
101
104
|
if response.is_a?(Net::HTTPRedirection) && max_redirects.positive?
|
102
105
|
location = response['location']
|
103
|
-
raise 'No location in redirection header.' if location.nil?
|
106
|
+
raise Errors::MindeeInputError, 'No location in redirection header.' if location.nil?
|
104
107
|
|
105
108
|
new_uri = URI.parse(location)
|
106
109
|
request = Net::HTTP::Get.new(new_uri)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
# Mindee logging module.
|
7
|
+
module Logging
|
8
|
+
@logger = Logger.new($stdout)
|
9
|
+
log_level = ENV.fetch('MINDEE_LOG_LEVEL', 'WARN')
|
10
|
+
@logger.level = Logger.const_get(log_level)
|
11
|
+
|
12
|
+
class << self
|
13
|
+
attr_accessor :logger
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|