mindee 4.13.0 → 5.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +24 -0
- data/.pre-commit-config.yaml +36 -0
- data/.rubocop.yml +3 -2
- data/CHANGELOG.md +50 -3
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +9 -1
- data/README.md +7 -7
- data/Rakefile +21 -15
- data/Steepfile +0 -1
- data/bin/mindee.rb +19 -118
- data/bin/v1/parser.rb +153 -0
- data/bin/v1/products.rb +88 -0
- data/bin/v2/parser.rb +235 -0
- data/bin/v2/products.rb +34 -0
- data/docs/code_samples/bank_account_details_v1.txt +2 -2
- data/docs/code_samples/bank_account_details_v2.txt +2 -2
- data/docs/code_samples/bank_statement_fr_v2_async.txt +2 -2
- data/docs/code_samples/barcode_reader_v1.txt +2 -2
- data/docs/code_samples/cropper_v1.txt +2 -2
- data/docs/code_samples/default.txt +2 -2
- data/docs/code_samples/default_async.txt +2 -3
- data/docs/code_samples/expense_receipts_v5.txt +2 -2
- data/docs/code_samples/expense_receipts_v5_async.txt +2 -2
- data/docs/code_samples/financial_document_v1.txt +2 -2
- data/docs/code_samples/financial_document_v1_async.txt +2 -2
- data/docs/code_samples/idcard_fr_v1.txt +2 -2
- data/docs/code_samples/idcard_fr_v2.txt +2 -2
- data/docs/code_samples/international_id_v2_async.txt +2 -2
- data/docs/code_samples/invoice_splitter_v1_async.txt +2 -2
- data/docs/code_samples/invoices_v4.txt +2 -2
- data/docs/code_samples/invoices_v4_async.txt +2 -2
- data/docs/code_samples/multi_receipts_detector_v1.txt +2 -2
- data/docs/code_samples/passport_v1.txt +2 -2
- data/docs/code_samples/resume_v1_async.txt +2 -2
- data/docs/code_samples/v2_classification.txt +1 -1
- data/docs/code_samples/v2_crop.txt +2 -2
- data/docs/code_samples/v2_extraction.txt +3 -3
- data/docs/code_samples/v2_extraction_webhook.txt +3 -3
- data/docs/code_samples/v2_ocr.txt +2 -2
- data/docs/code_samples/v2_split.txt +1 -1
- data/docs/code_samples/workflow_execution.txt +2 -2
- data/docs/code_samples/workflow_polling.txt +2 -3
- data/examples/auto_invoice_splitter_extraction.rb +6 -6
- data/examples/auto_multi_receipts_detector_extraction.rb +4 -4
- data/lib/mindee/dependencies.rb +29 -0
- data/lib/mindee/{errors → error}/mindee_error.rb +1 -1
- data/lib/mindee/{errors → error}/mindee_http_error.rb +1 -1
- data/lib/mindee/{errors → error}/mindee_http_error_v2.rb +5 -5
- data/lib/mindee/{errors → error}/mindee_http_unknown_error_v2.rb +1 -1
- data/lib/mindee/{errors → error}/mindee_input_error.rb +2 -2
- data/lib/mindee/error.rb +6 -0
- data/lib/mindee/geometry/polygon.rb +2 -2
- data/lib/mindee/http/.rubocop.yml +0 -1
- data/lib/mindee/http/http_error_handler.rb +7 -7
- data/lib/mindee/http/response_validation.rb +3 -2
- data/lib/mindee/http.rb +0 -4
- data/lib/mindee/image/extracted_image.rb +25 -12
- data/lib/mindee/image/image_extractor.rb +17 -17
- data/lib/mindee/image/image_utils.rb +3 -3
- data/lib/mindee/input/base_parameters.rb +5 -2
- data/lib/mindee/input/local_response.rb +6 -8
- data/lib/mindee/input/sources/local_input_source.rb +18 -14
- data/lib/mindee/input/sources/path_input_source.rb +1 -1
- data/lib/mindee/input/sources/url_input_source.rb +4 -4
- data/lib/mindee/input.rb +0 -2
- data/lib/mindee/logging/logger.rb +9 -1
- data/lib/mindee/page_options.rb +1 -1
- data/lib/mindee/pdf/extracted_pdf.rb +55 -45
- data/lib/mindee/pdf/pdf_compressor.rb +2 -0
- data/lib/mindee/pdf/pdf_extractor.rb +95 -94
- data/lib/mindee/pdf/pdf_processor.rb +6 -1
- data/lib/mindee/pdf/pdf_tools.rb +19 -0
- data/lib/mindee/v1/client.rb +490 -0
- data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
- data/lib/mindee/v1/http/.rubocop.yml +7 -0
- data/lib/mindee/v1/http/endpoint.rb +221 -0
- data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
- data/lib/mindee/v1/http.rb +4 -0
- data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
- data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
- data/lib/mindee/v1/parsing/common/document.rb +86 -0
- data/lib/mindee/v1/parsing/common/execution.rb +78 -0
- data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
- data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
- data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
- data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
- data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
- data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
- data/lib/mindee/v1/parsing/common/inference.rb +69 -0
- data/lib/mindee/v1/parsing/common/job.rb +48 -0
- data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
- data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
- data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
- data/lib/mindee/v1/parsing/common/page.rb +49 -0
- data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
- data/lib/mindee/v1/parsing/common/product.rb +26 -0
- data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
- data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
- data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
- data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
- data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
- data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
- data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
- data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
- data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
- data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
- data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
- data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
- data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
- data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
- data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
- data/lib/mindee/{product → v1/product}/.rubocop.yml +1 -2
- data/lib/mindee/{product/fr/id_card/id_card_v1.rb → v1/product/barcode_reader/barcode_reader_v1.rb} +12 -12
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
- data/lib/mindee/{product/fr/bank_statement/bank_statement_v2_page.rb → v1/product/barcode_reader/barcode_reader_v1_page.rb} +10 -10
- data/lib/mindee/{product/us/bank_check/bank_check_v1.rb → v1/product/cropper/cropper_v1.rb} +12 -12
- data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
- data/lib/mindee/{product/ind/indian_passport/indian_passport_v1.rb → v1/product/financial_document/financial_document_v1.rb} +13 -13
- data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
- data/lib/mindee/{product/fr/energy_bill/energy_bill_v1_subscriptions.rb → v1/product/financial_document/financial_document_v1_line_items.rb} +19 -20
- data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
- data/lib/mindee/{product/fr/payslip/payslip_v2.rb → v1/product/international_id/international_id_v2.rb} +12 -12
- data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
- data/lib/mindee/{product/fr/energy_bill/energy_bill_v1_page.rb → v1/product/international_id/international_id_v2_page.rb} +10 -10
- data/lib/mindee/{product/fr/payslip/payslip_v3.rb → v1/product/invoice/invoice_v4.rb} +14 -14
- data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
- data/lib/mindee/{product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb → v1/product/invoice/invoice_v4_line_items.rb} +19 -20
- data/lib/mindee/{product/fr/payslip/payslip_v2_page.rb → v1/product/invoice/invoice_v4_page.rb} +10 -10
- data/lib/mindee/{product/fr/energy_bill/energy_bill_v1.rb → v1/product/invoice_splitter/invoice_splitter_v1.rb} +12 -12
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
- data/lib/mindee/{product/us/healthcare_card/healthcare_card_v1_copays.rb → v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb} +10 -12
- data/lib/mindee/{product/fr/health_card/health_card_v1_page.rb → v1/product/invoice_splitter/invoice_splitter_v1_page.rb} +10 -10
- data/lib/mindee/{product/fr/bank_account_details/bank_account_details_v1.rb → v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb} +12 -12
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
- data/lib/mindee/{product/fr/bank_account_details/bank_account_details_v1_page.rb → v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb} +10 -10
- data/lib/mindee/{product/fr/carte_grise/carte_grise_v1.rb → v1/product/passport/passport_v1.rb} +12 -12
- data/lib/mindee/{product/fr/id_card/id_card_v1_document.rb → v1/product/passport/passport_v1_document.rb} +42 -33
- data/lib/mindee/{product/fr/carte_grise/carte_grise_v1_page.rb → v1/product/passport/passport_v1_page.rb} +10 -10
- data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
- data/lib/mindee/{product/fr/bank_statement/bank_statement_v2_transactions.rb → v1/product/receipt/receipt_v5_line_items.rb} +14 -12
- data/lib/mindee/{product/fr/payslip/payslip_v3_page.rb → v1/product/receipt/receipt_v5_page.rb} +10 -10
- data/lib/mindee/{product/us/us_mail/us_mail_v3.rb → v1/product/resume/resume_v1.rb} +13 -13
- data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
- data/lib/mindee/{product/fr/payslip/payslip_v2_salary_details.rb → v1/product/resume/resume_v1_certificates.rb} +20 -23
- data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
- data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
- data/lib/mindee/{product/us/us_mail/us_mail_v3_recipient_addresses.rb → v1/product/resume/resume_v1_educations.rb} +24 -23
- data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
- data/lib/mindee/{product/fr/payslip/payslip_v3_paid_time_offs.rb → v1/product/resume/resume_v1_languages.rb} +16 -25
- data/lib/mindee/{product/us/us_mail/us_mail_v3_page.rb → v1/product/resume/resume_v1_page.rb} +10 -10
- data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
- data/lib/mindee/{product/fr/energy_bill/energy_bill_v1_energy_usages.rb → v1/product/resume/resume_v1_professional_experiences.rb} +26 -25
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
- data/lib/mindee/{product/fr/bank_statement/bank_statement_v2.rb → v1/product/universal/universal.rb} +16 -15
- data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
- data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
- data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
- data/lib/mindee/{product.rb → v1/product.rb} +0 -14
- data/lib/mindee/v1.rb +7 -0
- data/lib/mindee/v2/client.rb +132 -0
- data/lib/mindee/v2/file_operations/crop.rb +51 -0
- data/lib/mindee/v2/file_operations/crop_files.rb +25 -0
- data/lib/mindee/v2/file_operations/split.rb +37 -0
- data/lib/mindee/v2/file_operations/split_files.rb +25 -0
- data/lib/mindee/v2/file_operations.rb +6 -0
- data/lib/mindee/v2/http/.rubocop.yml +7 -0
- data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
- data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
- data/lib/mindee/v2/http.rb +4 -0
- data/lib/mindee/v2/parsing/base_inference.rb +6 -6
- data/lib/mindee/v2/parsing/base_response.rb +3 -1
- data/lib/mindee/{parsing/v2 → v2/parsing}/common_response.rb +6 -4
- data/lib/mindee/{parsing/v2 → v2/parsing}/error_item.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/error_response.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/base_field.rb +3 -3
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/field_confidence.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/field_location.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/inference_fields.rb +3 -3
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/list_field.rb +5 -5
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/object_field.rb +11 -8
- data/lib/mindee/{parsing/v2 → v2/parsing}/field/simple_field.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/inference_active_options.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/inference_file.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/inference_job.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/inference_model.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/job.rb +3 -3
- data/lib/mindee/{parsing/v2 → v2/parsing}/job_response.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/job_webhook.rb +2 -3
- data/lib/mindee/{parsing/v2 → v2/parsing}/rag_metadata.rb +2 -2
- data/lib/mindee/{parsing/v2 → v2/parsing}/raw_text.rb +6 -4
- data/lib/mindee/{parsing/v2 → v2/parsing}/raw_text_page.rb +4 -2
- data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
- data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
- data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
- data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
- data/lib/mindee/v2/parsing/search.rb +6 -0
- data/lib/mindee/v2/parsing.rb +16 -0
- data/lib/mindee/v2/product/base_product.rb +1 -0
- data/lib/mindee/v2/product/classification/classification.rb +1 -0
- data/lib/mindee/v2/product/classification/classification_inference.rb +1 -0
- data/lib/mindee/v2/product/classification/params/classification_parameters.rb +1 -0
- data/lib/mindee/v2/product/crop/crop.rb +1 -0
- data/lib/mindee/v2/product/crop/crop_item.rb +12 -2
- data/lib/mindee/v2/product/crop/crop_response.rb +8 -0
- data/lib/mindee/v2/product/crop/params/crop_parameters.rb +1 -0
- data/lib/mindee/v2/product/extraction/extraction.rb +3 -1
- data/lib/mindee/v2/product/extraction/extraction_inference.rb +19 -4
- data/lib/mindee/v2/product/extraction/extraction_response.rb +11 -2
- data/lib/mindee/v2/product/extraction/extraction_result.rb +31 -2
- data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
- data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +110 -3
- data/lib/mindee/v2/product/ocr/ocr.rb +6 -5
- data/lib/mindee/v2/product/ocr/ocr_inference.rb +4 -4
- data/lib/mindee/v2/product/ocr/ocr_page.rb +5 -5
- data/lib/mindee/v2/product/ocr/ocr_response.rb +4 -4
- data/lib/mindee/v2/product/ocr/ocr_result.rb +4 -4
- data/lib/mindee/v2/product/ocr/ocr_word.rb +2 -2
- data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +5 -4
- data/lib/mindee/v2/product/split/params/split_parameters.rb +2 -0
- data/lib/mindee/v2/product/split/split_range.rb +8 -0
- data/lib/mindee/v2/product/split/split_response.rb +8 -0
- data/lib/mindee/v2.rb +5 -2
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +59 -23
- data/mindee-lite.gemspec +36 -0
- data/mindee.gemspec +13 -13
- data/sig/custom/marcel.rbs +1 -1
- data/sig/custom/net_http.rbs +1 -1
- data/sig/custom/origami.rbs +1 -1
- data/sig/mindee/dependencies.rbs +13 -0
- data/sig/mindee/{errors → error}/mindee_error.rbs +2 -2
- data/sig/mindee/{errors → error}/mindee_http_error.rbs +3 -3
- data/sig/mindee/{errors → error}/mindee_http_error_v2.rbs +4 -4
- data/sig/mindee/{errors → error}/mindee_http_unknown_error_v2.rbs +2 -2
- data/sig/mindee/{errors → error}/mindee_input_error.rbs +2 -2
- data/sig/mindee/http/http_error_handler.rbs +4 -4
- data/sig/mindee/image/extracted_image.rbs +8 -2
- data/sig/mindee/image/image_extractor.rbs +2 -2
- data/sig/mindee/input/base_parameters.rbs +1 -1
- data/sig/mindee/input/local_response.rbs +1 -1
- data/sig/mindee/input/sources/local_input_source.rbs +0 -1
- data/sig/mindee/pdf/extracted_pdf.rbs +11 -9
- data/sig/mindee/pdf/pdf_extractor.rbs +12 -10
- data/sig/mindee/pdf/pdf_tools.rbs +2 -0
- data/sig/mindee/v1/client.rbs +84 -0
- data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
- data/sig/mindee/v1/http/endpoint.rbs +41 -0
- data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
- data/sig/mindee/v1/parsing/common/document.rbs +32 -0
- data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
- data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
- data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
- data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
- data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
- data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
- data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
- data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
- data/sig/mindee/v1/parsing/common/job.rbs +24 -0
- data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
- data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
- data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
- data/sig/mindee/v1/parsing/common/page.rbs +19 -0
- data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
- data/sig/mindee/v1/parsing/common/product.rbs +16 -0
- data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
- data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
- data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
- data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
- data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
- data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
- data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
- data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
- data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
- data/sig/mindee/{product → v1/product}/barcode_reader/barcode_reader_v1.rbs +4 -2
- data/sig/mindee/{product → v1/product}/barcode_reader/barcode_reader_v1_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/barcode_reader/barcode_reader_v1_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/cropper/cropper_v1.rbs +4 -2
- data/sig/mindee/{product → v1/product}/cropper/cropper_v1_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/cropper/cropper_v1_page.rbs +6 -3
- data/sig/mindee/{product → v1/product}/financial_document/financial_document_v1.rbs +4 -2
- data/sig/mindee/{product → v1/product}/financial_document/financial_document_v1_document.rbs +4 -2
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
- data/sig/mindee/{product → v1/product}/financial_document/financial_document_v1_line_items.rbs +3 -1
- data/sig/mindee/{product → v1/product}/financial_document/financial_document_v1_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_account_details/bank_account_details_v1.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_account_details/bank_account_details_v1_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_account_details/bank_account_details_v1_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_account_details/bank_account_details_v2.rbs +4 -2
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
- data/sig/mindee/{product → v1/product}/fr/bank_account_details/bank_account_details_v2_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_account_details/bank_account_details_v2_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_statement/bank_statement_v2.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_statement/bank_statement_v2_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/bank_statement/bank_statement_v2_page.rbs +4 -2
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
- data/sig/mindee/{product → v1/product}/fr/bank_statement/bank_statement_v2_transactions.rbs +3 -1
- data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
- data/sig/mindee/{product → v1/product}/fr/id_card/id_card_v1_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/id_card/id_card_v1_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/id_card/id_card_v2.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/id_card/id_card_v2_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/fr/id_card/id_card_v2_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/international_id/international_id_v2.rbs +4 -2
- data/sig/mindee/{product → v1/product}/international_id/international_id_v2_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/international_id/international_id_v2_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/invoice/invoice_v4.rbs +4 -2
- data/sig/mindee/{product → v1/product}/invoice/invoice_v4_document.rbs +4 -2
- data/sig/mindee/{product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rbs → v1/product/invoice/invoice_v4_line_item.rbs} +22 -8
- data/sig/mindee/{product → v1/product}/invoice/invoice_v4_line_items.rbs +3 -1
- data/sig/mindee/{product → v1/product}/invoice/invoice_v4_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/invoice_splitter/invoice_splitter_v1.rbs +4 -2
- data/sig/mindee/{product → v1/product}/invoice_splitter/invoice_splitter_v1_document.rbs +5 -3
- data/sig/mindee/{product/us/healthcare_card/healthcare_card_v1_copay.rbs → v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs} +11 -7
- data/sig/mindee/{product → v1/product}/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +3 -1
- data/sig/mindee/{product → v1/product}/invoice_splitter/invoice_splitter_v1_page.rbs +4 -2
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
- data/sig/mindee/{product → v1/product}/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +5 -3
- data/sig/mindee/{product → v1/product}/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/passport/passport_v1.rbs +4 -2
- data/sig/mindee/{product → v1/product}/passport/passport_v1_document.rbs +4 -2
- data/sig/mindee/{product → v1/product}/passport/passport_v1_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/receipt/receipt_v5.rbs +4 -2
- data/sig/mindee/{product → v1/product}/receipt/receipt_v5_document.rbs +4 -2
- data/sig/mindee/{product/fr/bank_statement/bank_statement_v2_transaction.rbs → v1/product/receipt/receipt_v5_line_item.rbs} +16 -7
- data/sig/mindee/{product → v1/product}/receipt/receipt_v5_line_items.rbs +3 -1
- data/sig/mindee/{product → v1/product}/receipt/receipt_v5_page.rbs +4 -2
- data/sig/mindee/{product → v1/product}/resume/resume_v1.rbs +4 -2
- data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
- data/sig/mindee/{product/fr/payslip/payslip_v3_paid_time_offs.rbs → v1/product/resume/resume_v1_certificates.rbs} +6 -4
- data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
- data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
- data/sig/mindee/{product/fr/payslip/payslip_v2_salary_details.rbs → v1/product/resume/resume_v1_educations.rbs} +6 -4
- data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
- data/sig/mindee/{product/fr/payslip/payslip_v3_salary_details.rbs → v1/product/resume/resume_v1_languages.rbs} +6 -4
- data/sig/mindee/{product/us/us_mail/us_mail_v3_page.rbs → v1/product/resume/resume_v1_page.rbs} +8 -6
- data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
- data/sig/mindee/{product/fr/energy_bill/energy_bill_v1_subscriptions.rbs → v1/product/resume/resume_v1_professional_experiences.rbs} +6 -4
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
- data/sig/mindee/{product/fr/energy_bill/energy_bill_v1_energy_usages.rbs → v1/product/resume/resume_v1_social_networks_urls.rbs} +6 -4
- data/sig/mindee/{product → v1/product}/universal/universal.rbs +4 -2
- data/sig/mindee/{product → v1/product}/universal/universal_document.rbs +4 -2
- data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
- data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
- data/sig/mindee/v2/client.rbs +29 -0
- data/sig/mindee/v2/file_operation/crop.rbs +10 -0
- data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
- data/sig/mindee/v2/file_operation/split.rbs +11 -0
- data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
- data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
- data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
- data/sig/mindee/v2/parsing/base_inference.rbs +3 -3
- data/sig/mindee/v2/parsing/base_response.rbs +1 -1
- data/sig/mindee/v2/parsing/common_response.rbs +12 -0
- data/sig/mindee/v2/parsing/error_item.rbs +13 -0
- data/sig/mindee/{parsing/v2 → v2/parsing}/error_response.rbs +4 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/base_field.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/field_confidence.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/field_location.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/inference_fields.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/list_field.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/object_field.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/field/simple_field.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/inference_active_options.rbs +6 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/inference_file.rbs +5 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/inference_job.rbs +3 -2
- data/sig/mindee/{parsing/v2 → v2/parsing}/inference_model.rbs +4 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/job.rbs +4 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/job_response.rbs +5 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/job_webhook.rbs +5 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/rag_metadata.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/raw_text.rbs +3 -3
- data/sig/mindee/{parsing/v2 → v2/parsing}/raw_text_page.rbs +2 -2
- data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
- data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
- data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
- data/sig/mindee/v2/parsing/search_models.rbs +14 -0
- data/sig/mindee/v2/product/classification/classification_response.rbs +1 -1
- data/sig/mindee/v2/product/crop/crop_item.rbs +4 -1
- data/sig/mindee/v2/product/crop/crop_response.rbs +3 -1
- data/sig/mindee/v2/product/extraction/extraction.rbs +3 -0
- data/sig/mindee/v2/product/extraction/extraction_inference.rbs +5 -0
- data/sig/mindee/v2/product/extraction/extraction_response.rbs +8 -2
- data/sig/mindee/v2/product/extraction/extraction_result.rbs +9 -1
- data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +27 -1
- data/sig/mindee/v2/product/ocr/ocr.rbs +2 -2
- data/sig/mindee/v2/product/ocr/ocr_inference.rbs +3 -3
- data/sig/mindee/v2/product/ocr/ocr_page.rbs +3 -3
- data/sig/mindee/v2/product/ocr/ocr_response.rbs +6 -6
- data/sig/mindee/v2/product/ocr/ocr_result.rbs +3 -3
- data/sig/mindee/v2/product/ocr/ocr_word.rbs +2 -2
- data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +3 -3
- data/sig/mindee/v2/product/split/split_range.rbs +4 -1
- data/sig/mindee/v2/product/split/split_response.rbs +3 -1
- data/sig/mindee.rbs +12 -9
- metadata +360 -568
- data/bin/cli_products.rb +0 -166
- data/docs/code_samples/bank_check_v1.txt +0 -24
- data/docs/code_samples/bill_of_lading_v1_async.txt +0 -24
- data/docs/code_samples/business_card_v1_async.txt +0 -24
- data/docs/code_samples/carte_grise_v1.txt +0 -24
- data/docs/code_samples/delivery_notes_v1_async.txt +0 -24
- data/docs/code_samples/driver_license_v1_async.txt +0 -24
- data/docs/code_samples/energy_bill_fra_v1_async.txt +0 -24
- data/docs/code_samples/french_healthcard_v1_async.txt +0 -24
- data/docs/code_samples/ind_passport_v1_async.txt +0 -24
- data/docs/code_samples/nutrition_facts_v1_async.txt +0 -24
- data/docs/code_samples/payslip_fra_v2_async.txt +0 -24
- data/docs/code_samples/payslip_fra_v3_async.txt +0 -24
- data/docs/code_samples/us_healthcare_cards_v1_async.txt +0 -24
- data/docs/code_samples/us_mail_v3_async.txt +0 -24
- data/lib/mindee/client.rb +0 -488
- data/lib/mindee/client_v2.rb +0 -161
- data/lib/mindee/errors.rb +0 -6
- data/lib/mindee/extraction/multi_receipts_extractor.rb +0 -30
- data/lib/mindee/http/api_settings_v2.rb +0 -63
- data/lib/mindee/http/endpoint.rb +0 -219
- data/lib/mindee/http/mindee_api_v2.rb +0 -215
- data/lib/mindee/http/workflow_endpoint.rb +0 -91
- data/lib/mindee/input/data_schema.rb +0 -126
- data/lib/mindee/input/inference_parameters.rb +0 -119
- data/lib/mindee/parsing/common/api_request.rb +0 -36
- data/lib/mindee/parsing/common/api_response.rb +0 -61
- data/lib/mindee/parsing/common/document.rb +0 -84
- data/lib/mindee/parsing/common/execution.rb +0 -75
- data/lib/mindee/parsing/common/execution_file.rb +0 -24
- data/lib/mindee/parsing/common/execution_priority.rb +0 -36
- data/lib/mindee/parsing/common/extras/cropper_extra.rb +0 -30
- data/lib/mindee/parsing/common/extras/extras.rb +0 -58
- data/lib/mindee/parsing/common/extras/full_text_ocr_extra.rb +0 -33
- data/lib/mindee/parsing/common/extras/rag_extra.rb +0 -26
- data/lib/mindee/parsing/common/inference.rb +0 -67
- data/lib/mindee/parsing/common/job.rb +0 -46
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +0 -50
- data/lib/mindee/parsing/common/ocr/ocr.rb +0 -178
- data/lib/mindee/parsing/common/orientation.rb +0 -26
- data/lib/mindee/parsing/common/page.rb +0 -47
- data/lib/mindee/parsing/common/prediction.rb +0 -17
- data/lib/mindee/parsing/common/product.rb +0 -24
- data/lib/mindee/parsing/common/workflow_response.rb +0 -28
- data/lib/mindee/parsing/standard/abstract_field.rb +0 -72
- data/lib/mindee/parsing/standard/address_field.rb +0 -49
- data/lib/mindee/parsing/standard/amount_field.rb +0 -26
- data/lib/mindee/parsing/standard/base_field.rb +0 -28
- data/lib/mindee/parsing/standard/boolean_field.rb +0 -27
- data/lib/mindee/parsing/standard/classification_field.rb +0 -16
- data/lib/mindee/parsing/standard/company_registration_field.rb +0 -43
- data/lib/mindee/parsing/standard/date_field.rb +0 -38
- data/lib/mindee/parsing/standard/feature_field.rb +0 -24
- data/lib/mindee/parsing/standard/locale_field.rb +0 -50
- data/lib/mindee/parsing/standard/payment_details_field.rb +0 -42
- data/lib/mindee/parsing/standard/position_field.rb +0 -59
- data/lib/mindee/parsing/standard/string_field.rb +0 -24
- data/lib/mindee/parsing/standard/tax_field.rb +0 -108
- data/lib/mindee/parsing/universal/universal_list_field.rb +0 -58
- data/lib/mindee/parsing/universal/universal_object_field.rb +0 -121
- data/lib/mindee/parsing/v2/inference.rb +0 -44
- data/lib/mindee/parsing/v2/inference_response.rb +0 -33
- data/lib/mindee/parsing/v2/inference_result.rb +0 -42
- data/lib/mindee/parsing/v2.rb +0 -16
- data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +0 -45
- data/lib/mindee/product/barcode_reader/barcode_reader_v1_document.rb +0 -44
- data/lib/mindee/product/barcode_reader/barcode_reader_v1_page.rb +0 -36
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1.rb +0 -45
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rb +0 -52
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rb +0 -101
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb +0 -62
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rb +0 -58
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_document.rb +0 -163
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rb +0 -58
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_page.rb +0 -36
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rb +0 -58
- data/lib/mindee/product/business_card/business_card_v1.rb +0 -45
- data/lib/mindee/product/business_card/business_card_v1_document.rb +0 -112
- data/lib/mindee/product/business_card/business_card_v1_page.rb +0 -36
- data/lib/mindee/product/cropper/cropper_v1.rb +0 -45
- data/lib/mindee/product/cropper/cropper_v1_document.rb +0 -13
- data/lib/mindee/product/cropper/cropper_v1_page.rb +0 -53
- data/lib/mindee/product/delivery_note/delivery_note_v1.rb +0 -45
- data/lib/mindee/product/delivery_note/delivery_note_v1_document.rb +0 -82
- data/lib/mindee/product/delivery_note/delivery_note_v1_page.rb +0 -36
- data/lib/mindee/product/driver_license/driver_license_v1.rb +0 -45
- data/lib/mindee/product/driver_license/driver_license_v1_document.rb +0 -121
- data/lib/mindee/product/driver_license/driver_license_v1_page.rb +0 -36
- data/lib/mindee/product/financial_document/financial_document_v1.rb +0 -45
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +0 -326
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +0 -121
- data/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +0 -62
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +0 -36
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +0 -46
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +0 -47
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +0 -60
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +0 -57
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +0 -38
- data/lib/mindee/product/fr/bank_statement/bank_statement_v2_document.rb +0 -166
- data/lib/mindee/product/fr/bank_statement/bank_statement_v2_transaction.rb +0 -75
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1_document.rb +0 -242
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb +0 -265
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rb +0 -48
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rb +0 -48
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb +0 -121
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb +0 -54
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rb +0 -103
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rb +0 -103
- data/lib/mindee/product/fr/health_card/health_card_v1.rb +0 -47
- data/lib/mindee/product/fr/health_card/health_card_v1_document.rb +0 -61
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +0 -55
- data/lib/mindee/product/fr/id_card/id_card_v2.rb +0 -47
- data/lib/mindee/product/fr/id_card/id_card_v2_document.rb +0 -140
- data/lib/mindee/product/fr/id_card/id_card_v2_page.rb +0 -63
- data/lib/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rb +0 -54
- data/lib/mindee/product/fr/payslip/payslip_v2_document.rb +0 -143
- data/lib/mindee/product/fr/payslip/payslip_v2_employee.rb +0 -78
- data/lib/mindee/product/fr/payslip/payslip_v2_employer.rb +0 -78
- data/lib/mindee/product/fr/payslip/payslip_v2_employment.rb +0 -73
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_detail.rb +0 -122
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_period.rb +0 -66
- data/lib/mindee/product/fr/payslip/payslip_v2_pto.rb +0 -65
- data/lib/mindee/product/fr/payslip/payslip_v2_salary_detail.rb +0 -87
- data/lib/mindee/product/fr/payslip/payslip_v3_bank_account_detail.rb +0 -54
- data/lib/mindee/product/fr/payslip/payslip_v3_document.rb +0 -178
- data/lib/mindee/product/fr/payslip/payslip_v3_employee.rb +0 -78
- data/lib/mindee/product/fr/payslip/payslip_v3_employer.rb +0 -78
- data/lib/mindee/product/fr/payslip/payslip_v3_employment.rb +0 -78
- data/lib/mindee/product/fr/payslip/payslip_v3_paid_time_off.rb +0 -95
- data/lib/mindee/product/fr/payslip/payslip_v3_pay_detail.rb +0 -122
- data/lib/mindee/product/fr/payslip/payslip_v3_pay_period.rb +0 -66
- data/lib/mindee/product/fr/payslip/payslip_v3_salary_detail.rb +0 -97
- data/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb +0 -65
- data/lib/mindee/product/ind/indian_passport/indian_passport_v1_document.rb +0 -206
- data/lib/mindee/product/ind/indian_passport/indian_passport_v1_page.rb +0 -38
- data/lib/mindee/product/international_id/international_id_v2.rb +0 -45
- data/lib/mindee/product/international_id/international_id_v2_document.rb +0 -161
- data/lib/mindee/product/international_id/international_id_v2_page.rb +0 -36
- data/lib/mindee/product/invoice/invoice_v4.rb +0 -45
- data/lib/mindee/product/invoice/invoice_v4_document.rb +0 -297
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +0 -121
- data/lib/mindee/product/invoice/invoice_v4_line_items.rb +0 -62
- data/lib/mindee/product/invoice/invoice_v4_page.rb +0 -36
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +0 -45
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +0 -63
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +0 -55
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +0 -48
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +0 -36
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +0 -45
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +0 -35
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +0 -36
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rb +0 -45
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rb +0 -211
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rb +0 -93
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb +0 -68
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rb +0 -36
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rb +0 -47
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rb +0 -61
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rb +0 -55
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rb +0 -55
- data/lib/mindee/product/passport/passport_v1.rb +0 -45
- data/lib/mindee/product/passport/passport_v1_document.rb +0 -109
- data/lib/mindee/product/passport/passport_v1_page.rb +0 -36
- data/lib/mindee/product/receipt/receipt_v5.rb +0 -45
- data/lib/mindee/product/receipt/receipt_v5_document.rb +0 -184
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +0 -85
- data/lib/mindee/product/receipt/receipt_v5_line_items.rb +0 -54
- data/lib/mindee/product/receipt/receipt_v5_page.rb +0 -36
- data/lib/mindee/product/resume/resume_v1.rb +0 -45
- data/lib/mindee/product/resume/resume_v1_certificate.rb +0 -79
- data/lib/mindee/product/resume/resume_v1_certificates.rb +0 -58
- data/lib/mindee/product/resume/resume_v1_document.rb +0 -337
- data/lib/mindee/product/resume/resume_v1_education.rb +0 -103
- data/lib/mindee/product/resume/resume_v1_educations.rb +0 -64
- data/lib/mindee/product/resume/resume_v1_language.rb +0 -63
- data/lib/mindee/product/resume/resume_v1_languages.rb +0 -54
- data/lib/mindee/product/resume/resume_v1_page.rb +0 -36
- data/lib/mindee/product/resume/resume_v1_professional_experience.rb +0 -119
- data/lib/mindee/product/resume/resume_v1_professional_experiences.rb +0 -68
- data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +0 -63
- data/lib/mindee/product/resume/resume_v1_social_networks_urls.rb +0 -54
- data/lib/mindee/product/universal/universal.rb +0 -46
- data/lib/mindee/product/universal/universal_document.rb +0 -32
- data/lib/mindee/product/universal/universal_page.rb +0 -51
- data/lib/mindee/product/universal/universal_prediction.rb +0 -122
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +0 -74
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +0 -64
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1.rb +0 -47
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +0 -67
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +0 -167
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +0 -38
- data/lib/mindee/product/us/us_mail/us_mail_v3_document.rb +0 -115
- data/lib/mindee/product/us/us_mail/us_mail_v3_recipient_address.rb +0 -113
- data/lib/mindee/product/us/us_mail/us_mail_v3_sender_address.rb +0 -66
- data/sig/mindee/client.rbs +0 -61
- data/sig/mindee/client_v2.rbs +0 -21
- data/sig/mindee/extraction/multi_receipts_extractor.rbs +0 -6
- data/sig/mindee/http/api_settings_v2.rbs +0 -23
- data/sig/mindee/http/endpoint.rbs +0 -29
- data/sig/mindee/http/mindee_api_v2.rbs +0 -38
- data/sig/mindee/http/workflow_endpoint.rbs +0 -20
- data/sig/mindee/input/data_schema.rbs +0 -34
- data/sig/mindee/input/inference_parameters.rbs +0 -32
- data/sig/mindee/parsing/common/api_request.rbs +0 -15
- data/sig/mindee/parsing/common/api_response.rbs +0 -27
- data/sig/mindee/parsing/common/document.rbs +0 -20
- data/sig/mindee/parsing/common/execution.rbs +0 -24
- data/sig/mindee/parsing/common/execution_file.rbs +0 -12
- data/sig/mindee/parsing/common/execution_priority.rbs +0 -13
- data/sig/mindee/parsing/common/extras/cropper_extra.rbs +0 -15
- data/sig/mindee/parsing/common/extras/extras.rbs +0 -19
- data/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs +0 -16
- data/sig/mindee/parsing/common/extras/rag_extra.rbs +0 -15
- data/sig/mindee/parsing/common/inference.rbs +0 -26
- data/sig/mindee/parsing/common/job.rbs +0 -16
- data/sig/mindee/parsing/common/ocr/mvision_v1.rbs +0 -15
- data/sig/mindee/parsing/common/ocr/ocr.rbs +0 -41
- data/sig/mindee/parsing/common/orientation.rbs +0 -13
- data/sig/mindee/parsing/common/page.rbs +0 -16
- data/sig/mindee/parsing/common/prediction.rbs +0 -11
- data/sig/mindee/parsing/common/product.rbs +0 -13
- data/sig/mindee/parsing/common/workflow_response.rbs +0 -17
- data/sig/mindee/parsing/standard/abstract_field.rbs +0 -19
- data/sig/mindee/parsing/standard/address_field.rbs +0 -18
- data/sig/mindee/parsing/standard/amount_field.rbs +0 -14
- data/sig/mindee/parsing/standard/base_field.rbs +0 -12
- data/sig/mindee/parsing/standard/boolean_field.rbs +0 -12
- data/sig/mindee/parsing/standard/classification_field.rbs +0 -10
- data/sig/mindee/parsing/standard/company_registration_field.rbs +0 -15
- data/sig/mindee/parsing/standard/date_field.rbs +0 -14
- data/sig/mindee/parsing/standard/feature_field.rbs +0 -10
- data/sig/mindee/parsing/standard/locale_field.rbs +0 -16
- data/sig/mindee/parsing/standard/payment_details_field.rbs +0 -16
- data/sig/mindee/parsing/standard/position_field.rbs +0 -17
- data/sig/mindee/parsing/standard/string_field.rbs +0 -12
- data/sig/mindee/parsing/standard/tax_field.rbs +0 -24
- data/sig/mindee/parsing/universal/universal_list_field.rbs +0 -16
- data/sig/mindee/parsing/universal/universal_object_field.rbs +0 -29
- data/sig/mindee/parsing/v2/common_response.rbs +0 -11
- data/sig/mindee/parsing/v2/error_item.rbs +0 -13
- data/sig/mindee/parsing/v2/inference.rbs +0 -18
- data/sig/mindee/parsing/v2/inference_response.rbs +0 -21
- data/sig/mindee/parsing/v2/inference_result.rbs +0 -15
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1.rbs +0 -11
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rbs +0 -14
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rbs +0 -19
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rbs +0 -13
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rbs +0 -15
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_document.rbs +0 -25
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rbs +0 -15
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_page.rbs +0 -15
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rbs +0 -15
- data/sig/mindee/product/business_card/business_card_v1.rbs +0 -11
- data/sig/mindee/product/business_card/business_card_v1_document.rbs +0 -23
- data/sig/mindee/product/business_card/business_card_v1_page.rbs +0 -15
- data/sig/mindee/product/delivery_note/delivery_note_v1.rbs +0 -11
- data/sig/mindee/product/delivery_note/delivery_note_v1_document.rbs +0 -19
- data/sig/mindee/product/delivery_note/delivery_note_v1_page.rbs +0 -15
- data/sig/mindee/product/driver_license/driver_license_v1.rbs +0 -11
- data/sig/mindee/product/driver_license/driver_license_v1_document.rbs +0 -25
- data/sig/mindee/product/driver_license/driver_license_v1_page.rbs +0 -15
- data/sig/mindee/product/financial_document/financial_document_v1_line_item.rbs +0 -21
- data/sig/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +0 -17
- data/sig/mindee/product/fr/carte_grise/carte_grise_v1.rbs +0 -13
- data/sig/mindee/product/fr/carte_grise/carte_grise_v1_document.rbs +0 -55
- data/sig/mindee/product/fr/carte_grise/carte_grise_v1_page.rbs +0 -17
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1.rbs +0 -13
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_document.rbs +0 -34
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rbs +0 -15
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rbs +0 -15
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rbs +0 -23
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rbs +0 -16
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_page.rbs +0 -17
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rbs +0 -21
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rbs +0 -15
- data/sig/mindee/product/fr/health_card/health_card_v1.rbs +0 -13
- data/sig/mindee/product/fr/health_card/health_card_v1_document.rbs +0 -18
- data/sig/mindee/product/fr/health_card/health_card_v1_page.rbs +0 -17
- data/sig/mindee/product/fr/id_card/id_card_v1.rbs +0 -13
- data/sig/mindee/product/fr/payslip/payslip_v2.rbs +0 -13
- data/sig/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rbs +0 -16
- data/sig/mindee/product/fr/payslip/payslip_v2_document.rbs +0 -24
- data/sig/mindee/product/fr/payslip/payslip_v2_employee.rbs +0 -20
- data/sig/mindee/product/fr/payslip/payslip_v2_employer.rbs +0 -20
- data/sig/mindee/product/fr/payslip/payslip_v2_employment.rbs +0 -19
- data/sig/mindee/product/fr/payslip/payslip_v2_page.rbs +0 -17
- data/sig/mindee/product/fr/payslip/payslip_v2_pay_detail.rbs +0 -23
- data/sig/mindee/product/fr/payslip/payslip_v2_pay_period.rbs +0 -18
- data/sig/mindee/product/fr/payslip/payslip_v2_pto.rbs +0 -16
- data/sig/mindee/product/fr/payslip/payslip_v2_salary_detail.rbs +0 -19
- data/sig/mindee/product/fr/payslip/payslip_v3.rbs +0 -13
- data/sig/mindee/product/fr/payslip/payslip_v3_bank_account_detail.rbs +0 -16
- data/sig/mindee/product/fr/payslip/payslip_v3_document.rbs +0 -26
- data/sig/mindee/product/fr/payslip/payslip_v3_employee.rbs +0 -20
- data/sig/mindee/product/fr/payslip/payslip_v3_employer.rbs +0 -20
- data/sig/mindee/product/fr/payslip/payslip_v3_employment.rbs +0 -20
- data/sig/mindee/product/fr/payslip/payslip_v3_page.rbs +0 -17
- data/sig/mindee/product/fr/payslip/payslip_v3_paid_time_off.rbs +0 -20
- data/sig/mindee/product/fr/payslip/payslip_v3_pay_detail.rbs +0 -23
- data/sig/mindee/product/fr/payslip/payslip_v3_pay_period.rbs +0 -18
- data/sig/mindee/product/fr/payslip/payslip_v3_salary_detail.rbs +0 -20
- data/sig/mindee/product/ind/indian_passport/indian_passport_v1.rbs +0 -13
- data/sig/mindee/product/ind/indian_passport/indian_passport_v1_document.rbs +0 -37
- data/sig/mindee/product/ind/indian_passport/indian_passport_v1_page.rbs +0 -17
- data/sig/mindee/product/invoice/invoice_v4_line_item.rbs +0 -21
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +0 -14
- data/sig/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +0 -11
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rbs +0 -11
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rbs +0 -28
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rbs +0 -18
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rbs +0 -13
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rbs +0 -15
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rbs +0 -13
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rbs +0 -15
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rbs +0 -14
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rbs +0 -14
- data/sig/mindee/product/receipt/receipt_v5_line_item.rbs +0 -17
- data/sig/mindee/product/resume/resume_v1_certificate.rbs +0 -17
- data/sig/mindee/product/resume/resume_v1_certificates.rbs +0 -13
- data/sig/mindee/product/resume/resume_v1_document.rbs +0 -39
- data/sig/mindee/product/resume/resume_v1_education.rbs +0 -20
- data/sig/mindee/product/resume/resume_v1_educations.rbs +0 -13
- data/sig/mindee/product/resume/resume_v1_language.rbs +0 -15
- data/sig/mindee/product/resume/resume_v1_languages.rbs +0 -13
- data/sig/mindee/product/resume/resume_v1_page.rbs +0 -15
- data/sig/mindee/product/resume/resume_v1_professional_experience.rbs +0 -22
- data/sig/mindee/product/resume/resume_v1_professional_experiences.rbs +0 -13
- data/sig/mindee/product/resume/resume_v1_social_networks_url.rbs +0 -15
- data/sig/mindee/product/resume/resume_v1_social_networks_urls.rbs +0 -13
- data/sig/mindee/product/universal/universal_page.rbs +0 -14
- data/sig/mindee/product/universal/universal_prediction.rbs +0 -19
- data/sig/mindee/product/us/bank_check/bank_check_v1.rbs +0 -13
- data/sig/mindee/product/us/bank_check/bank_check_v1_document.rbs +0 -20
- data/sig/mindee/product/us/bank_check/bank_check_v1_page.rbs +0 -19
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1.rbs +0 -13
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rbs +0 -15
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_document.rbs +0 -30
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_page.rbs +0 -17
- data/sig/mindee/product/us/us_mail/us_mail_v3.rbs +0 -13
- data/sig/mindee/product/us/us_mail/us_mail_v3_document.rbs +0 -21
- data/sig/mindee/product/us/us_mail/us_mail_v3_recipient_address.rbs +0 -23
- data/sig/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rbs +0 -15
- data/sig/mindee/product/us/us_mail/us_mail_v3_sender_address.rbs +0 -18
- data/sig/mindee/product/us/w9/w9_v1.rbs +0 -13
- data/sig/mindee/product/us/w9/w9_v1_document.rbs +0 -14
- data/sig/mindee/product/us/w9/w9_v1_page.rbs +0 -29
- /data/lib/mindee/{extraction.rb → v1/extraction.rb} +0 -0
- /data/lib/mindee/{parsing → v1/parsing}/common/extras.rb +0 -0
- /data/lib/mindee/{parsing → v1/parsing}/common/ocr.rb +0 -0
- /data/lib/mindee/{parsing → v1/parsing}/common.rb +0 -0
- /data/lib/mindee/{parsing → v1/parsing}/standard.rb +0 -0
- /data/lib/mindee/{parsing → v1/parsing}/universal.rb +0 -0
- /data/lib/mindee/{parsing.rb → v1/parsing.rb} +0 -0
- /data/lib/mindee/{parsing/v2 → v2/parsing}/field.rb +0 -0
|
@@ -31,7 +31,7 @@ module Mindee
|
|
|
31
31
|
polling_options: nil,
|
|
32
32
|
close_file: true
|
|
33
33
|
)
|
|
34
|
-
raise
|
|
34
|
+
raise Error::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?
|
|
35
35
|
|
|
36
36
|
@model_id = model_id
|
|
37
37
|
@file_alias = file_alias
|
|
@@ -54,6 +54,9 @@ module Mindee
|
|
|
54
54
|
self.class.slug
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
+
# Load from a hash
|
|
58
|
+
# @param [Hash] params Parameters to provide as a hash.
|
|
59
|
+
# @return [BaseParameters]
|
|
57
60
|
def self.from_hash(params: {})
|
|
58
61
|
load_from_hash(params: params)
|
|
59
62
|
new(
|
|
@@ -72,7 +75,7 @@ module Mindee
|
|
|
72
75
|
params.transform_keys!(&:to_sym)
|
|
73
76
|
|
|
74
77
|
if params.empty? || params[:model_id].nil? || params[:model_id].empty?
|
|
75
|
-
raise
|
|
78
|
+
raise Error::MindeeInputError, 'Model ID is required.'
|
|
76
79
|
end
|
|
77
80
|
|
|
78
81
|
polling_options_input = params.fetch(:page_options, PollingOptions.new)
|
|
@@ -27,7 +27,7 @@ module Mindee
|
|
|
27
27
|
end
|
|
28
28
|
@file.rewind
|
|
29
29
|
else
|
|
30
|
-
raise
|
|
30
|
+
raise Error::MindeeInputError, "Incompatible type for input '#{input_file.class}'."
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
33
|
|
|
@@ -38,7 +38,7 @@ module Mindee
|
|
|
38
38
|
file_str = @file.read or raise 'File could not be read'
|
|
39
39
|
JSON.parse(file_str, object_class: Hash)
|
|
40
40
|
rescue JSON::ParserError
|
|
41
|
-
raise
|
|
41
|
+
raise Error::MindeeInputError, "File is not a valid dict. #{file_str}"
|
|
42
42
|
end
|
|
43
43
|
|
|
44
44
|
# Processes the secret key
|
|
@@ -57,7 +57,7 @@ module Mindee
|
|
|
57
57
|
mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key),
|
|
58
58
|
@file.read || raise('File could not be read'))
|
|
59
59
|
rescue StandardError
|
|
60
|
-
raise
|
|
60
|
+
raise Error::MindeeInputError, 'Could not get HMAC signature from payload.'
|
|
61
61
|
end
|
|
62
62
|
mac
|
|
63
63
|
end
|
|
@@ -70,12 +70,10 @@ module Mindee
|
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
# Deserializes a loaded response
|
|
73
|
-
# @param response_class [Parsing::
|
|
74
|
-
# @return [Parsing::
|
|
73
|
+
# @param response_class [Class<V2::Parsing::BaseResponse>] class to return.
|
|
74
|
+
# @return [V2::Parsing::JobResponse, Mindee::V2::Parsing::BaseResponse]
|
|
75
75
|
def deserialize_response(response_class)
|
|
76
|
-
response_class.new(as_hash) # : Mindee::Parsing::
|
|
77
|
-
rescue StandardError
|
|
78
|
-
raise Errors::MindeeInputError, 'Invalid response provided.'
|
|
76
|
+
response_class.new(as_hash) # : Mindee::V2::Parsing::JobResponse | Mindee::V2::Parsing::BaseResponse
|
|
79
77
|
end
|
|
80
78
|
end
|
|
81
79
|
end
|
|
@@ -4,8 +4,9 @@ require 'stringio'
|
|
|
4
4
|
require 'marcel'
|
|
5
5
|
require 'fileutils'
|
|
6
6
|
|
|
7
|
-
require_relative '../../
|
|
8
|
-
require_relative '../../
|
|
7
|
+
require_relative '../../dependencies'
|
|
8
|
+
require_relative '../../pdf' if Mindee::Dependencies.all_deps_available?
|
|
9
|
+
require_relative '../../image' if Mindee::Dependencies.all_deps_available?
|
|
9
10
|
|
|
10
11
|
module Mindee
|
|
11
12
|
module Input
|
|
@@ -53,7 +54,7 @@ module Mindee
|
|
|
53
54
|
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
|
54
55
|
end
|
|
55
56
|
|
|
56
|
-
raise
|
|
57
|
+
raise Error::MindeeMimeTypeError, @file_mimetype.to_s
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
# @deprecated See {#fix_pdf!} or {#self.fix_pdf} instead.
|
|
@@ -69,7 +70,7 @@ module Mindee
|
|
|
69
70
|
# Attempts to fix the PDF data in the file.
|
|
70
71
|
# @param maximum_offset [Integer] Maximum offset to look for the PDF header.
|
|
71
72
|
# @return [void]
|
|
72
|
-
# @raise [Mindee::
|
|
73
|
+
# @raise [Mindee::Error::MindeePDFError]
|
|
73
74
|
def fix_pdf!(maximum_offset: 500)
|
|
74
75
|
@io_stream = LocalInputSource.fix_pdf(@io_stream, maximum_offset: maximum_offset)
|
|
75
76
|
@io_stream.rewind
|
|
@@ -80,11 +81,11 @@ module Mindee
|
|
|
80
81
|
# @param stream [StringIO] The stream to fix.
|
|
81
82
|
# @param maximum_offset [Integer] Maximum offset to look for the PDF header.
|
|
82
83
|
# @return [StringIO] The fixed stream.
|
|
83
|
-
# @raise [Mindee::
|
|
84
|
+
# @raise [Mindee::Error::MindeePDFError]
|
|
84
85
|
def self.fix_pdf(stream, maximum_offset: 500)
|
|
85
86
|
out_stream = StringIO.new
|
|
86
87
|
stream.gets('%PDF-')
|
|
87
|
-
raise
|
|
88
|
+
raise Error::MindeePDFError if stream.eof? || stream.pos > maximum_offset
|
|
88
89
|
|
|
89
90
|
stream.pos = stream.pos - 5
|
|
90
91
|
out_stream << stream.read
|
|
@@ -142,6 +143,9 @@ module Mindee
|
|
|
142
143
|
# Defaults to one for images.
|
|
143
144
|
# @return [Integer]
|
|
144
145
|
def page_count
|
|
146
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
147
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
148
|
+
end
|
|
145
149
|
return 1 unless pdf?
|
|
146
150
|
|
|
147
151
|
@io_stream.seek(0)
|
|
@@ -149,14 +153,6 @@ module Mindee
|
|
|
149
153
|
pdf_processor.pages.size
|
|
150
154
|
end
|
|
151
155
|
|
|
152
|
-
# Returns the page count for a document.
|
|
153
|
-
# Defaults to one for images.
|
|
154
|
-
# @return [Integer]
|
|
155
|
-
# @deprecated Use {#page_count} instead.
|
|
156
|
-
def count_pages
|
|
157
|
-
page_count
|
|
158
|
-
end
|
|
159
|
-
|
|
160
156
|
# Compresses the file, according to the provided info.
|
|
161
157
|
# @param [Integer] quality Quality of the output file.
|
|
162
158
|
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
|
|
@@ -167,6 +163,10 @@ module Mindee
|
|
|
167
163
|
# @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
|
|
168
164
|
# not. Needs force_source_text to work.
|
|
169
165
|
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
|
|
166
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
167
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
168
|
+
end
|
|
169
|
+
|
|
170
170
|
buffer = if pdf?
|
|
171
171
|
Mindee::PDF::PDFCompressor.compress_pdf(
|
|
172
172
|
@io_stream,
|
|
@@ -189,6 +189,10 @@ module Mindee
|
|
|
189
189
|
# Checks whether the file has source text if it is a pdf. `false` otherwise
|
|
190
190
|
# @return [bool] `true` if the file is a PDF and has source text.
|
|
191
191
|
def source_text?
|
|
192
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
193
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
194
|
+
end
|
|
195
|
+
|
|
192
196
|
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
|
193
197
|
end
|
|
194
198
|
end
|
|
@@ -11,7 +11,7 @@ module Mindee
|
|
|
11
11
|
# @param filepath [String]
|
|
12
12
|
# @param repair_pdf [bool]
|
|
13
13
|
def initialize(filepath, repair_pdf: false)
|
|
14
|
-
io_stream = File.
|
|
14
|
+
io_stream = File.new(filepath, 'rb')
|
|
15
15
|
super(io_stream, File.basename(filepath), repair_pdf: repair_pdf)
|
|
16
16
|
end
|
|
17
17
|
end
|
|
@@ -14,7 +14,7 @@ module Mindee
|
|
|
14
14
|
attr_reader :url
|
|
15
15
|
|
|
16
16
|
def initialize(url)
|
|
17
|
-
raise
|
|
17
|
+
raise Error::MindeeInputError, 'URL must be HTTPS' unless url.start_with? 'https://'
|
|
18
18
|
|
|
19
19
|
logger.debug("URL input: #{url}")
|
|
20
20
|
|
|
@@ -75,9 +75,9 @@ module Mindee
|
|
|
75
75
|
|
|
76
76
|
response = make_request(uri, request, max_redirects)
|
|
77
77
|
if response.code.to_i > 299
|
|
78
|
-
raise
|
|
78
|
+
raise Error::MindeeAPIError, "Failed to download file: HTTP status code #{response.code}"
|
|
79
79
|
elsif response.code.to_i < 200
|
|
80
|
-
raise
|
|
80
|
+
raise Error::MindeeAPIError, "Failed to download file: Invalid response code #{response.code}."
|
|
81
81
|
end
|
|
82
82
|
|
|
83
83
|
response.body
|
|
@@ -103,7 +103,7 @@ module Mindee
|
|
|
103
103
|
response = http.request(request)
|
|
104
104
|
if response.is_a?(Net::HTTPRedirection) && max_redirects.positive?
|
|
105
105
|
location = response['location']
|
|
106
|
-
raise
|
|
106
|
+
raise Error::MindeeInputError, 'No location in redirection header.' if location.nil?
|
|
107
107
|
|
|
108
108
|
new_uri = URI.parse(location)
|
|
109
109
|
request = Net::HTTP::Get.new(new_uri)
|
data/lib/mindee/input.rb
CHANGED
|
@@ -5,8 +5,16 @@ require 'logger'
|
|
|
5
5
|
module Mindee
|
|
6
6
|
# Mindee logging module.
|
|
7
7
|
module Logging
|
|
8
|
-
@logger = Logger.new($stdout)
|
|
9
8
|
log_level = ENV.fetch('MINDEE_LOG_LEVEL', 'WARN')
|
|
9
|
+
log_output = ENV.fetch('MINDEE_LOG_OUTPUT', 'stderr')
|
|
10
|
+
@logger = if log_output == 'stderr'
|
|
11
|
+
Logger.new($stderr)
|
|
12
|
+
elsif log_output == 'stdout'
|
|
13
|
+
Logger.new($stdout)
|
|
14
|
+
else
|
|
15
|
+
warn "Invalid MINDEE_LOG_OUTPUT='#{log_output}', defaulting to 'stderr'"
|
|
16
|
+
Logger.new($stderr)
|
|
17
|
+
end
|
|
10
18
|
@logger.level = Logger.const_get(log_level)
|
|
11
19
|
|
|
12
20
|
class << self
|
data/lib/mindee/page_options.rb
CHANGED
|
@@ -3,57 +3,67 @@
|
|
|
3
3
|
module Mindee
|
|
4
4
|
# PDF Extraction Module.
|
|
5
5
|
module PDF
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
attr_reader :pdf_bytes
|
|
12
|
-
|
|
13
|
-
# Name of the file.
|
|
14
|
-
# @return [String]
|
|
15
|
-
attr_reader :filename
|
|
16
|
-
|
|
17
|
-
# @param pdf_bytes [StringIO]
|
|
18
|
-
# @param filename [String]
|
|
19
|
-
def initialize(pdf_bytes, filename)
|
|
20
|
-
@pdf_bytes = pdf_bytes
|
|
21
|
-
@filename = filename
|
|
22
|
-
end
|
|
6
|
+
# An extracted sub-Pdf.
|
|
7
|
+
class ExtractedPDF
|
|
8
|
+
# Byte contents of the pdf
|
|
9
|
+
# @return [StringIO]
|
|
10
|
+
attr_reader :pdf_bytes
|
|
23
11
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
12
|
+
# Name of the file.
|
|
13
|
+
# @return [String]
|
|
14
|
+
attr_reader :filename
|
|
15
|
+
|
|
16
|
+
# @param pdf_stream [StringIO, File]
|
|
17
|
+
# @param filename [String]
|
|
18
|
+
def initialize(pdf_stream, filename)
|
|
19
|
+
@filename = filename
|
|
32
20
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
raise Errors::MindeePDFError, 'Invalid save path provided' unless File.exist?(
|
|
39
|
-
File.expand_path('..', output_path)
|
|
40
|
-
) && !override
|
|
41
|
-
|
|
42
|
-
if File.extname(output_path).downcase == 'pdf'
|
|
43
|
-
base_path = File.expand_path('..', output_path)
|
|
44
|
-
output_path = File.expand_path("#{File.basename(output_path)}.pdf", base_path)
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
File.write(output_path, @pdf_bytes)
|
|
21
|
+
if pdf_stream.is_a?(File)
|
|
22
|
+
pdf_stream.rewind
|
|
23
|
+
@pdf_bytes = StringIO.new(pdf_stream.read)
|
|
24
|
+
else
|
|
25
|
+
@pdf_bytes = pdf_stream
|
|
48
26
|
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Retrieves the page count for a given pdf.
|
|
30
|
+
# @return [Integer]
|
|
31
|
+
def page_count
|
|
32
|
+
current_pdf = Mindee::PDF::PDFProcessor.open_pdf(pdf_bytes)
|
|
33
|
+
current_pdf.pages.size
|
|
34
|
+
rescue TypeError, Origami::InvalidPDFError
|
|
35
|
+
raise Error::MindeePDFError, 'Could not retrieve page count from Extracted PDF object.'
|
|
36
|
+
end
|
|
49
37
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
38
|
+
# Writes the contents of the current PDF object to a file.
|
|
39
|
+
# @param output_path [String] Path to write to.
|
|
40
|
+
# @param override [bool] Whether to override the destination file.
|
|
41
|
+
def write_to_file(output_path, override: false)
|
|
42
|
+
raise Error::MindeePDFError, 'Provided path is not a file' if File.directory?(output_path)
|
|
43
|
+
raise Error::MindeePDFError, 'Invalid save path provided' unless File.exist?(
|
|
44
|
+
File.expand_path('..', output_path)
|
|
45
|
+
) && !override
|
|
54
46
|
|
|
55
|
-
|
|
47
|
+
if File.extname(output_path).downcase == 'pdf'
|
|
48
|
+
base_path = File.expand_path('..', output_path)
|
|
49
|
+
output_path = File.expand_path("#{File.basename(output_path)}.pdf", base_path)
|
|
56
50
|
end
|
|
51
|
+
|
|
52
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
53
|
+
File.binwrite(output_path, @pdf_bytes.read.to_s)
|
|
54
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns the current PDF object as a usable BytesInputSource.
|
|
58
|
+
# @return [Mindee::Input::Source::BytesInputSource]
|
|
59
|
+
def as_input_source
|
|
60
|
+
raise Error::MindeePDFError, 'Bytes object is nil.' if @pdf_bytes.nil?
|
|
61
|
+
|
|
62
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
63
|
+
data = @pdf_bytes.read || ''
|
|
64
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
65
|
+
|
|
66
|
+
Mindee::Input::Source::BytesInputSource.new(data, @filename)
|
|
57
67
|
end
|
|
58
68
|
end
|
|
59
69
|
end
|
|
@@ -3,118 +3,119 @@
|
|
|
3
3
|
module Mindee
|
|
4
4
|
# Pdf Extraction Module.
|
|
5
5
|
module PDF
|
|
6
|
-
# Pdf
|
|
7
|
-
|
|
8
|
-
#
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@filename = local_input.filename
|
|
13
|
-
if local_input.pdf?
|
|
14
|
-
@source_pdf = local_input.io_stream
|
|
15
|
-
else
|
|
16
|
-
pdf_image = Image::ImageExtractor.attach_image_as_new_file(local_input.io_stream)
|
|
17
|
-
io_buffer = StringIO.new
|
|
18
|
-
pdf_image.save(io_buffer)
|
|
19
|
-
|
|
20
|
-
@source_pdf = io_buffer
|
|
21
|
-
end
|
|
6
|
+
# Pdf extraction class.
|
|
7
|
+
class PDFExtractor
|
|
8
|
+
# @param local_input [Mindee::Input::Source::LocalInputSource]
|
|
9
|
+
def initialize(local_input)
|
|
10
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
11
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
22
12
|
end
|
|
23
13
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
14
|
+
@filename = local_input.filename
|
|
15
|
+
if local_input.pdf?
|
|
16
|
+
@source_pdf = local_input.io_stream
|
|
17
|
+
else
|
|
18
|
+
pdf_image = Image::ImageExtractor.attach_image_as_new_file(local_input.io_stream)
|
|
19
|
+
io_buffer = StringIO.new
|
|
20
|
+
pdf_image.save(io_buffer)
|
|
21
|
+
|
|
22
|
+
@source_pdf = io_buffer
|
|
28
23
|
end
|
|
24
|
+
end
|
|
29
25
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
page_indexes: page_indexes,
|
|
36
|
-
})
|
|
26
|
+
# Retrieves the page count for the Pdf object.
|
|
27
|
+
# @return [Integer]
|
|
28
|
+
def page_count
|
|
29
|
+
Mindee::PDF::PDFProcessor.open_pdf(@source_pdf).pages.size
|
|
30
|
+
end
|
|
37
31
|
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
# Creates a new Pdf from pages and save it into a buffer.
|
|
33
|
+
# @param page_indexes [Array<Integer>] List of page number to use for merging in the original Pdf.
|
|
34
|
+
# @return [StringIO] The buffer containing the new Pdf.
|
|
35
|
+
def cut_pages(page_indexes)
|
|
36
|
+
options = PageOptions.new(params: {
|
|
37
|
+
page_indexes: page_indexes,
|
|
38
|
+
})
|
|
40
39
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
40
|
+
Mindee::PDF::PDFProcessor.parse(@source_pdf, options)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Extract the sub-documents from the main pdf, based on the given list of page indexes.
|
|
44
|
+
# @param page_indexes [Array<Array<Integer>>] List of page number to use for merging in the original Pdf.
|
|
45
|
+
# @return [Array<Mindee::PDF::ExtractedPDF>] The buffer containing the new Pdf.
|
|
46
|
+
def extract_sub_documents(page_indexes)
|
|
47
|
+
extracted_pdfs = [] # @type var extracted_pdfs: Array[Mindee::PDF::ExtractedPDF]
|
|
48
|
+
extension = File.extname(@filename)
|
|
49
|
+
basename = File.basename(@filename, extension)
|
|
50
|
+
page_indexes.each do |page_index_list|
|
|
51
|
+
if page_index_list.nil? || page_index_list.empty?
|
|
52
|
+
raise Error::MindeePDFError, "Empty indexes aren't allowed for extraction #{page_index_list}"
|
|
53
|
+
end
|
|
52
54
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
end
|
|
55
|
+
page_index_list.each do |page_index|
|
|
56
|
+
if (page_index > page_count) || page_index.negative?
|
|
57
|
+
raise Error::MindeePDFError,
|
|
58
|
+
"Index #{page_index} is out of range."
|
|
58
59
|
end
|
|
59
|
-
formatted_max_index = format('%03d', page_index_list[page_index_list.length - 1] + 1).to_s
|
|
60
|
-
field_filename = "#{basename}_#{format('%03d',
|
|
61
|
-
page_index_list[0] + 1)}-#{formatted_max_index}#{extension}"
|
|
62
|
-
extracted_pdf = Mindee::PDF::PDFExtractor::ExtractedPDF.new(cut_pages(page_index_list),
|
|
63
|
-
field_filename)
|
|
64
|
-
extracted_pdfs << extracted_pdf
|
|
65
60
|
end
|
|
66
|
-
|
|
61
|
+
formatted_max_index = format('%03d', page_index_list[-1] + 1).to_s
|
|
62
|
+
field_filename = "#{basename}_#{format('%03d',
|
|
63
|
+
page_index_list[0] + 1)}-#{formatted_max_index}#{extension}"
|
|
64
|
+
extracted_pdf = Mindee::PDF::ExtractedPDF.new(cut_pages(page_index_list),
|
|
65
|
+
field_filename)
|
|
66
|
+
extracted_pdfs << extracted_pdf
|
|
67
67
|
end
|
|
68
|
+
extracted_pdfs
|
|
69
|
+
end
|
|
68
70
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
|
72
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
74
|
+
# Extracts invoices as complete PDFs from the document.
|
|
75
|
+
# @param page_indexes [Array<Array<Integer>, InvoiceSplitterV1InvoicePageGroup>]
|
|
76
|
+
# @param strict [bool]
|
|
77
|
+
# @return [Array<Mindee::PDF::ExtractedPDF>]
|
|
78
|
+
def extract_invoices(page_indexes, strict: false)
|
|
79
|
+
raise Error::MindeePDFError, 'No indexes provided.' if page_indexes.empty?
|
|
78
80
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
end
|
|
106
|
-
previous_confidence = confidence
|
|
81
|
+
if page_indexes[0].is_a?(Array) && page_indexes[0].all?(Integer)
|
|
82
|
+
page_indexes_as_array = page_indexes # @type var page_indexes : Array[Array[Integer]]
|
|
83
|
+
return extract_sub_documents(page_indexes_as_array)
|
|
84
|
+
end
|
|
85
|
+
p_ids = page_indexes # @type var page_indexes: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
|
|
86
|
+
return extract_sub_documents(p_ids.map(&:page_indexes)) unless strict
|
|
87
|
+
|
|
88
|
+
correct_page_indexes = [] # @type var correct_page_indexes: Array[Array[Integer]]
|
|
89
|
+
current_list = [] # @type var current_list: Array[Integer]
|
|
90
|
+
previous_confidence = nil
|
|
91
|
+
p_ids.each_with_index do |p_i, i|
|
|
92
|
+
page_index = p_i # @type var page_index: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
|
|
93
|
+
confidence = page_index.confidence.to_f
|
|
94
|
+
page_list = page_index.page_indexes
|
|
95
|
+
|
|
96
|
+
if confidence >= 0.5 && previous_confidence.nil?
|
|
97
|
+
current_list = page_list
|
|
98
|
+
elsif confidence >= 0.5 && i < p_ids.length - 1
|
|
99
|
+
correct_page_indexes << current_list
|
|
100
|
+
current_list = page_list
|
|
101
|
+
elsif confidence < 0.5 && i == p_ids.length - 1
|
|
102
|
+
current_list.concat page_list
|
|
103
|
+
correct_page_indexes << current_list
|
|
104
|
+
else
|
|
105
|
+
correct_page_indexes << current_list
|
|
106
|
+
correct_page_indexes << page_list
|
|
107
107
|
end
|
|
108
|
-
|
|
108
|
+
previous_confidence = confidence
|
|
109
109
|
end
|
|
110
|
+
extract_sub_documents(correct_page_indexes)
|
|
111
|
+
end
|
|
110
112
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
|
114
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
|
113
115
|
|
|
114
|
-
|
|
116
|
+
private
|
|
115
117
|
|
|
116
|
-
|
|
117
|
-
end
|
|
118
|
+
attr_reader :source_pdf, :filename
|
|
118
119
|
end
|
|
119
120
|
end
|
|
120
121
|
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Mindee::Dependencies.require_all_deps!
|
|
4
4
|
require 'origami'
|
|
5
5
|
require_relative 'pdf_tools'
|
|
6
6
|
|
|
@@ -61,6 +61,11 @@ module Mindee
|
|
|
61
61
|
# @param io_stream [StringIO]
|
|
62
62
|
# @return [Origami::PDF]
|
|
63
63
|
def self.open_pdf(io_stream)
|
|
64
|
+
unless PDFTools.pdf_header?(io_stream)
|
|
65
|
+
raise Origami::InvalidPDFError,
|
|
66
|
+
'Input stream does not contain a PDF header.'
|
|
67
|
+
end
|
|
68
|
+
|
|
64
69
|
pdf_parser = Origami::PDF::LinearParser.new({ verbosity: Origami::Parser::VERBOSE_QUIET })
|
|
65
70
|
io_stream.seek(0)
|
|
66
71
|
pdf_parser.parse(io_stream)
|
data/lib/mindee/pdf/pdf_tools.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
Mindee::Dependencies.require_all_deps!
|
|
3
4
|
require 'origami'
|
|
4
5
|
|
|
5
6
|
module Mindee
|
|
@@ -53,10 +54,28 @@ module Mindee
|
|
|
53
54
|
text_operators.any? { |op| data.include?(op) }
|
|
54
55
|
end
|
|
55
56
|
|
|
57
|
+
# Checks whether a stream contains a PDF header near the beginning.
|
|
58
|
+
# @param [StringIO] io_stream Binary-encoded stream.
|
|
59
|
+
# @param [Integer] maximum_offset Maximum allowed offset to find '%PDF-'.
|
|
60
|
+
# @return [bool] `true` when the stream appears to be a PDF.
|
|
61
|
+
def self.pdf_header?(io_stream, maximum_offset: 500)
|
|
62
|
+
initial_pos = nil
|
|
63
|
+
initial_pos = io_stream.pos if io_stream.respond_to?(:pos)
|
|
64
|
+
io_stream.seek(0)
|
|
65
|
+
io_stream.gets('%PDF-')
|
|
66
|
+
!(io_stream.eof? || io_stream.pos > maximum_offset)
|
|
67
|
+
rescue TypeError, IOError, SystemCallError
|
|
68
|
+
false
|
|
69
|
+
ensure
|
|
70
|
+
io_stream.seek(initial_pos) if !initial_pos.nil? && io_stream.respond_to?(:seek)
|
|
71
|
+
end
|
|
72
|
+
|
|
56
73
|
# Checks whether the file has source_text. Sends false if the file isn't a PDF.
|
|
57
74
|
# @param [StringIO] pdf_data Abinary-encoded stream representing the PDF file.
|
|
58
75
|
# @return [bool] `true` if the pdf has source text, false otherwise.
|
|
59
76
|
def self.source_text?(pdf_data)
|
|
77
|
+
return false unless pdf_header?(pdf_data)
|
|
78
|
+
|
|
60
79
|
begin
|
|
61
80
|
pdf_data.rewind
|
|
62
81
|
pdf = Origami::PDF.read(pdf_data)
|