mindee-lite 5.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +24 -0
- data/.gitattributes +14 -0
- data/.gitignore +76 -0
- data/.gitmodules +3 -0
- data/.pre-commit-config.yaml +36 -0
- data/.rubocop.yml +49 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +515 -0
- data/CODE_OF_CONDUCT.md +129 -0
- data/CONTRIBUTING.md +107 -0
- data/Gemfile +14 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +40 -0
- data/Steepfile +30 -0
- data/bin/console +14 -0
- data/bin/mindee.rb +30 -0
- data/bin/v1/parser.rb +153 -0
- data/bin/v1/products.rb +88 -0
- data/bin/v2/parser.rb +235 -0
- data/bin/v2/products.rb +34 -0
- data/docs/code_samples/bank_account_details_v1.txt +24 -0
- data/docs/code_samples/bank_account_details_v2.txt +24 -0
- data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
- data/docs/code_samples/barcode_reader_v1.txt +24 -0
- data/docs/code_samples/cropper_v1.txt +21 -0
- data/docs/code_samples/default.txt +30 -0
- data/docs/code_samples/default_async.txt +29 -0
- data/docs/code_samples/expense_receipts_v5.txt +25 -0
- data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
- data/docs/code_samples/financial_document_v1.txt +25 -0
- data/docs/code_samples/financial_document_v1_async.txt +24 -0
- data/docs/code_samples/idcard_fr_v1.txt +24 -0
- data/docs/code_samples/idcard_fr_v2.txt +24 -0
- data/docs/code_samples/international_id_v2_async.txt +24 -0
- data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
- data/docs/code_samples/invoices_v4.txt +25 -0
- data/docs/code_samples/invoices_v4_async.txt +24 -0
- data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
- data/docs/code_samples/passport_v1.txt +24 -0
- data/docs/code_samples/resume_v1_async.txt +24 -0
- data/docs/code_samples/v2_classification.txt +30 -0
- data/docs/code_samples/v2_crop.txt +30 -0
- data/docs/code_samples/v2_extraction.txt +42 -0
- data/docs/code_samples/v2_extraction_webhook.txt +45 -0
- data/docs/code_samples/v2_ocr.txt +30 -0
- data/docs/code_samples/v2_split.txt +30 -0
- data/docs/code_samples/workflow_execution.txt +28 -0
- data/docs/code_samples/workflow_polling.txt +35 -0
- data/examples/auto_invoice_splitter_extraction.rb +48 -0
- data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
- data/lib/mindee/dependency.rb +29 -0
- data/lib/mindee/error/mindee_error.rb +17 -0
- data/lib/mindee/error/mindee_http_error.rb +36 -0
- data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
- data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
- data/lib/mindee/error/mindee_input_error.rb +30 -0
- data/lib/mindee/error.rb +6 -0
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +41 -0
- data/lib/mindee/geometry/polygon.rb +37 -0
- data/lib/mindee/geometry/quadrilateral.rb +50 -0
- data/lib/mindee/geometry/utils.rb +88 -0
- data/lib/mindee/geometry.rb +7 -0
- data/lib/mindee/http/.rubocop.yml +7 -0
- data/lib/mindee/http/http_error_handler.rb +106 -0
- data/lib/mindee/http/response_validation.rb +81 -0
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/image/extracted_image.rb +89 -0
- data/lib/mindee/image/image_compressor.rb +29 -0
- data/lib/mindee/image/image_extractor.rb +118 -0
- data/lib/mindee/image/image_utils.rb +165 -0
- data/lib/mindee/image.rb +6 -0
- data/lib/mindee/input/base_parameters.rb +149 -0
- data/lib/mindee/input/local_response.rb +80 -0
- data/lib/mindee/input/polling_options.rb +26 -0
- data/lib/mindee/input/sources/base64_input_source.rb +31 -0
- data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
- data/lib/mindee/input/sources/file_input_source.rb +20 -0
- data/lib/mindee/input/sources/local_input_source.rb +216 -0
- data/lib/mindee/input/sources/path_input_source.rb +20 -0
- data/lib/mindee/input/sources/url_input_source.rb +130 -0
- data/lib/mindee/input/sources.rb +8 -0
- data/lib/mindee/input.rb +4 -0
- data/lib/mindee/logging/logger.rb +24 -0
- data/lib/mindee/logging.rb +3 -0
- data/lib/mindee/page_options.rb +24 -0
- data/lib/mindee/pdf/extracted_pdf.rb +70 -0
- data/lib/mindee/pdf/pdf_compressor.rb +121 -0
- data/lib/mindee/pdf/pdf_extractor.rb +121 -0
- data/lib/mindee/pdf/pdf_processor.rb +91 -0
- data/lib/mindee/pdf/pdf_tools.rb +201 -0
- data/lib/mindee/pdf.rb +7 -0
- data/lib/mindee/v1/client.rb +490 -0
- data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
- data/lib/mindee/v1/extraction.rb +3 -0
- data/lib/mindee/v1/http/.rubocop.yml +7 -0
- data/lib/mindee/v1/http/endpoint.rb +221 -0
- data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
- data/lib/mindee/v1/http.rb +4 -0
- data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
- data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
- data/lib/mindee/v1/parsing/common/document.rb +86 -0
- data/lib/mindee/v1/parsing/common/execution.rb +78 -0
- data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
- data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
- data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
- data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
- data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
- data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
- data/lib/mindee/v1/parsing/common/extras.rb +6 -0
- data/lib/mindee/v1/parsing/common/inference.rb +69 -0
- data/lib/mindee/v1/parsing/common/job.rb +48 -0
- data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
- data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
- data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
- data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
- data/lib/mindee/v1/parsing/common/page.rb +49 -0
- data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
- data/lib/mindee/v1/parsing/common/product.rb +26 -0
- data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
- data/lib/mindee/v1/parsing/common.rb +15 -0
- data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
- data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
- data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
- data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
- data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
- data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
- data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
- data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
- data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
- data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
- data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
- data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
- data/lib/mindee/v1/parsing/standard.rb +15 -0
- data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
- data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
- data/lib/mindee/v1/parsing/universal.rb +4 -0
- data/lib/mindee/v1/parsing.rb +5 -0
- data/lib/mindee/v1/product/.rubocop.yml +12 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
- data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
- data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
- data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
- data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
- data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
- data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
- data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
- data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
- data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
- data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
- data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
- data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
- data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
- data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
- data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
- data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
- data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
- data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
- data/lib/mindee/v1/product/universal/universal.rb +48 -0
- data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
- data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
- data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
- data/lib/mindee/v1/product.rb +18 -0
- data/lib/mindee/v1.rb +7 -0
- data/lib/mindee/v2/client.rb +132 -0
- data/lib/mindee/v2/file_operation/crop.rb +51 -0
- data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
- data/lib/mindee/v2/file_operation/split.rb +37 -0
- data/lib/mindee/v2/file_operation/split_files.rb +25 -0
- data/lib/mindee/v2/file_operation.rb +6 -0
- data/lib/mindee/v2/http/.rubocop.yml +7 -0
- data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
- data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
- data/lib/mindee/v2/http.rb +4 -0
- data/lib/mindee/v2/parsing/base_inference.rb +44 -0
- data/lib/mindee/v2/parsing/base_response.rb +15 -0
- data/lib/mindee/v2/parsing/common_response.rb +20 -0
- data/lib/mindee/v2/parsing/error_item.rb +21 -0
- data/lib/mindee/v2/parsing/error_response.rb +51 -0
- data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
- data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
- data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
- data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
- data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
- data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
- data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
- data/lib/mindee/v2/parsing/field.rb +9 -0
- data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
- data/lib/mindee/v2/parsing/inference_file.rb +38 -0
- data/lib/mindee/v2/parsing/inference_job.rb +25 -0
- data/lib/mindee/v2/parsing/inference_model.rb +30 -0
- data/lib/mindee/v2/parsing/job.rb +93 -0
- data/lib/mindee/v2/parsing/job_response.rb +30 -0
- data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
- data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
- data/lib/mindee/v2/parsing/raw_text.rb +27 -0
- data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
- data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
- data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
- data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
- data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
- data/lib/mindee/v2/parsing/search.rb +6 -0
- data/lib/mindee/v2/parsing.rb +16 -0
- data/lib/mindee/v2/product/base_product.rb +28 -0
- data/lib/mindee/v2/product/classification/classification.rb +20 -0
- data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
- data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
- data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
- data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
- data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
- data/lib/mindee/v2/product/crop/crop.rb +20 -0
- data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
- data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
- data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
- data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
- data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
- data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
- data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
- data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
- data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
- data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
- data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
- data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
- data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
- data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
- data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
- data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
- data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
- data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
- data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
- data/lib/mindee/v2/product/split/split.rb +19 -0
- data/lib/mindee/v2/product/split/split_inference.rb +34 -0
- data/lib/mindee/v2/product/split/split_range.rb +38 -0
- data/lib/mindee/v2/product/split/split_response.rb +40 -0
- data/lib/mindee/v2/product/split/split_result.rb +34 -0
- data/lib/mindee/v2/product.rb +7 -0
- data/lib/mindee/v2.rb +7 -0
- data/lib/mindee/version.rb +26 -0
- data/lib/mindee.rb +135 -0
- data/mindee-lite.gemspec +36 -0
- data/mindee.gemspec +44 -0
- data/sig/custom/marcel.rbs +3 -0
- data/sig/custom/mini_magick.rbs +31 -0
- data/sig/custom/net_http.rbs +43 -0
- data/sig/custom/origami.rbs +59 -0
- data/sig/mindee/dependency.rbs +13 -0
- data/sig/mindee/error/mindee_error.rbs +13 -0
- data/sig/mindee/error/mindee_http_error.rbs +17 -0
- data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
- data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
- data/sig/mindee/error/mindee_input_error.rbs +18 -0
- data/sig/mindee/geometry/min_max.rbs +11 -0
- data/sig/mindee/geometry/point.rbs +14 -0
- data/sig/mindee/geometry/polygon.rbs +12 -0
- data/sig/mindee/geometry/quadrilateral.rbs +15 -0
- data/sig/mindee/geometry/utils.rbs +13 -0
- data/sig/mindee/http/http_error_handler.rbs +15 -0
- data/sig/mindee/http/response_validation.rbs +11 -0
- data/sig/mindee/image/extracted_image.rbs +21 -0
- data/sig/mindee/image/image_compressor.rbs +8 -0
- data/sig/mindee/image/image_extractor.rbs +13 -0
- data/sig/mindee/image/image_utils.rbs +19 -0
- data/sig/mindee/input/base_parameters.rbs +35 -0
- data/sig/mindee/input/local_response.rbs +14 -0
- data/sig/mindee/input/polling_options.rbs +12 -0
- data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
- data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
- data/sig/mindee/input/sources/file_input_source.rbs +10 -0
- data/sig/mindee/input/sources/local_input_source.rbs +30 -0
- data/sig/mindee/input/sources/path_input_source.rbs +10 -0
- data/sig/mindee/input/sources/url_input_source.rbs +20 -0
- data/sig/mindee/logging/logger.rbs +11 -0
- data/sig/mindee/page_options.rbs +11 -0
- data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
- data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
- data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
- data/sig/mindee/pdf/pdf_processor.rbs +12 -0
- data/sig/mindee/pdf/pdf_tools.rbs +31 -0
- data/sig/mindee/v1/client.rbs +84 -0
- data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
- data/sig/mindee/v1/http/endpoint.rbs +41 -0
- data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
- data/sig/mindee/v1/parsing/common/document.rbs +32 -0
- data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
- data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
- data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
- data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
- data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
- data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
- data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
- data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
- data/sig/mindee/v1/parsing/common/job.rbs +24 -0
- data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
- data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
- data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
- data/sig/mindee/v1/parsing/common/page.rbs +19 -0
- data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
- data/sig/mindee/v1/parsing/common/product.rbs +16 -0
- data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
- data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
- data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
- data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
- data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
- data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
- data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
- data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
- data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
- data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
- data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
- data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
- data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
- data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
- data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
- data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
- data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
- data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
- data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
- data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
- data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
- data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
- data/sig/mindee/v1/product/universal/universal.rbs +16 -0
- data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
- data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
- data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
- data/sig/mindee/v2/client.rbs +29 -0
- data/sig/mindee/v2/file_operation/crop.rbs +10 -0
- data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
- data/sig/mindee/v2/file_operation/split.rbs +11 -0
- data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
- data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
- data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
- data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
- data/sig/mindee/v2/parsing/base_response.rbs +11 -0
- data/sig/mindee/v2/parsing/common_response.rbs +12 -0
- data/sig/mindee/v2/parsing/error_item.rbs +13 -0
- data/sig/mindee/v2/parsing/error_response.rbs +20 -0
- data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
- data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
- data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
- data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
- data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
- data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
- data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
- data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
- data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
- data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
- data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
- data/sig/mindee/v2/parsing/job.rbs +24 -0
- data/sig/mindee/v2/parsing/job_response.rbs +14 -0
- data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
- data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
- data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
- data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
- data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
- data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
- data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
- data/sig/mindee/v2/parsing/search_models.rbs +14 -0
- data/sig/mindee/v2/product/base_product.rbs +19 -0
- data/sig/mindee/v2/product/classification/classification.rbs +10 -0
- data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
- data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
- data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
- data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
- data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
- data/sig/mindee/v2/product/crop/crop.rbs +10 -0
- data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
- data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
- data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
- data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
- data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
- data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
- data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
- data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
- data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
- data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
- data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
- data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
- data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
- data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
- data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
- data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
- data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
- data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
- data/sig/mindee/v2/product/split/split.rbs +10 -0
- data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
- data/sig/mindee/v2/product/split/split_range.rbs +18 -0
- data/sig/mindee/v2/product/split/split_response.rbs +25 -0
- data/sig/mindee/v2/product/split/split_result.rbs +14 -0
- data/sig/mindee/version.rbs +6 -0
- data/sig/mindee.rbs +62 -0
- metadata +600 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../input'
|
|
4
|
+
require_relative 'http'
|
|
5
|
+
require_relative '../logging'
|
|
6
|
+
require_relative '../page_options'
|
|
7
|
+
require_relative 'parsing/common/api_response'
|
|
8
|
+
require_relative 'parsing/common/job'
|
|
9
|
+
require_relative 'parsing/common/workflow_response'
|
|
10
|
+
require_relative 'product'
|
|
11
|
+
|
|
12
|
+
# Default owner for products.
|
|
13
|
+
OTS_OWNER = 'mindee'
|
|
14
|
+
|
|
15
|
+
module Mindee
|
|
16
|
+
module V1
|
|
17
|
+
# Class for configuration options in parse calls.
|
|
18
|
+
#
|
|
19
|
+
# @!attribute all_words [bool] Whether to include the full text for each page.
|
|
20
|
+
# This performs a full OCR operation on the server and will increase response time.
|
|
21
|
+
# @!attribute full_text [bool] Whether to include the full OCR text response in compatible APIs.
|
|
22
|
+
# This performs a full OCR operation on the server and may increase response time.
|
|
23
|
+
# @!attribute close_file [bool] Whether to `close()` the file after parsing it.
|
|
24
|
+
# Set to false if you need to access the file after this operation.
|
|
25
|
+
# @!attribute page_options [PageOptions, Hash, nil] Page cutting/merge options:
|
|
26
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
|
27
|
+
# * `:operation` Operation to apply on the document, given the specified page indexes:
|
|
28
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
29
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
|
30
|
+
# * `:on_min_pages` Apply the operation only if the document has at least this many pages.
|
|
31
|
+
# @!attribute cropper [bool] Whether to include cropper results for each page.
|
|
32
|
+
# This performs a cropping operation on the server and will increase response time.
|
|
33
|
+
# @!attribute initial_delay_sec [Numeric] Initial delay before polling. Defaults to 2.
|
|
34
|
+
# @!attribute delay_sec [Numeric] Delay between polling attempts. Defaults to 1.5.
|
|
35
|
+
# @!attribute max_retries [Integer] Maximum number of retries. Defaults to 80.
|
|
36
|
+
class ParseOptions
|
|
37
|
+
attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper, :rag,
|
|
38
|
+
:workflow_id, :initial_delay_sec, :delay_sec, :max_retries
|
|
39
|
+
|
|
40
|
+
def initialize(params: {})
|
|
41
|
+
params = params.transform_keys(&:to_sym)
|
|
42
|
+
@all_words = params.fetch(:all_words, false)
|
|
43
|
+
@full_text = params.fetch(:full_text, false)
|
|
44
|
+
@close_file = params.fetch(:close_file, true)
|
|
45
|
+
raw_page_options = params.fetch(:page_options, nil)
|
|
46
|
+
raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
|
|
47
|
+
@page_options = raw_page_options
|
|
48
|
+
@cropper = params.fetch(:cropper, false)
|
|
49
|
+
@rag = params.fetch(:rag, false)
|
|
50
|
+
@workflow_id = params.fetch(:workflow_id, nil)
|
|
51
|
+
@initial_delay_sec = params.fetch(:initial_delay_sec, 2)
|
|
52
|
+
@delay_sec = params.fetch(:delay_sec, 1.5)
|
|
53
|
+
@max_retries = params.fetch(:max_retries, 80)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Class for configuration options in workflow executions.
|
|
58
|
+
#
|
|
59
|
+
# @!attribute document_alias [String, nil] Alias to give to the document.
|
|
60
|
+
# @!attribute priority [Symbol, nil] Priority to give to the document.
|
|
61
|
+
# @!attribute full_text [bool] Whether to include the full OCR text response in compatible APIs.
|
|
62
|
+
# This performs a full OCR operation on the server and may increase response time.
|
|
63
|
+
# @!attribute public_url [String, nil] A unique, encrypted URL for accessing the document validation interface
|
|
64
|
+
# without requiring authentication.
|
|
65
|
+
# @!attribute rag [bool, nil] Whether to enable Retrieval-Augmented Generation.
|
|
66
|
+
# @!attribute page_options [PageOptions, Hash, nil] Page cutting/merge options:
|
|
67
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
|
68
|
+
# * `:operation` Operation to apply on the document, given the specified page indexes:
|
|
69
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
70
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
|
71
|
+
# * `:on_min_pages` Apply the operation only if the document has at least this many pages.
|
|
72
|
+
# @!attribute close_file [bool, nil] Whether to close the file after sending it. Defaults to true.
|
|
73
|
+
class WorkflowOptions
|
|
74
|
+
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag, :close_file
|
|
75
|
+
|
|
76
|
+
def initialize(params: {})
|
|
77
|
+
params = params.transform_keys(&:to_sym)
|
|
78
|
+
@document_alias = params.fetch(:document_alias, nil)
|
|
79
|
+
@priority = params.fetch(:priority, nil)
|
|
80
|
+
@full_text = params.fetch(:full_text, false)
|
|
81
|
+
@public_url = params.fetch(:public_url, nil)
|
|
82
|
+
@rag = params.fetch(:rag, nil)
|
|
83
|
+
raw_page_options = params.fetch(:page_options, nil)
|
|
84
|
+
raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
|
|
85
|
+
@page_options = raw_page_options
|
|
86
|
+
@close_file = params.fetch(:close_file, true)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Mindee API Client.
|
|
91
|
+
# See: https://developers.mindee.com/docs
|
|
92
|
+
class Client
|
|
93
|
+
# @param api_key [String]
|
|
94
|
+
def initialize(api_key: '')
|
|
95
|
+
@api_key = api_key
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Enqueue a document for parsing and automatically try to retrieve it if needed.
|
|
99
|
+
#
|
|
100
|
+
# Accepts options either as a Hash or as a ParseOptions struct.
|
|
101
|
+
#
|
|
102
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
|
103
|
+
# @param product_class [Mindee::Inference] The class of the product.
|
|
104
|
+
# @param endpoint [Mindee::V1::HTTP::Endpoint, nil] Endpoint of the API.
|
|
105
|
+
# @param options [Hash] A hash of options to configure the parsing behavior. Possible keys:
|
|
106
|
+
# * `:all_words` [bool] Whether to extract all the words on each page.
|
|
107
|
+
# This performs a full OCR operation on the server and will increase response time.
|
|
108
|
+
# * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
|
|
109
|
+
# This performs a full OCR operation on the server and may increase response time.
|
|
110
|
+
# * `:close_file` [bool] Whether to `close()` the file after parsing it.
|
|
111
|
+
# Set to false if you need to access the file after this operation.
|
|
112
|
+
# * `:page_options` [Hash, nil] Page cutting/merge options:
|
|
113
|
+
# - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
|
|
114
|
+
# - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
|
|
115
|
+
# - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
116
|
+
# - `:REMOVE` - remove the specified pages, and keep all others.
|
|
117
|
+
# - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
|
|
118
|
+
# * `:cropper` [bool, nil] Whether to include cropper results for each page.
|
|
119
|
+
# This performs a cropping operation on the server and will increase response time.
|
|
120
|
+
# * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2.
|
|
121
|
+
# * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5.
|
|
122
|
+
# * `:max_retries` [Integer] Maximum number of retries. Defaults to 80.
|
|
123
|
+
# @param enqueue [bool] Whether to enqueue the file.
|
|
124
|
+
# @return [Mindee::V1::Parsing::Common::ApiResponse]
|
|
125
|
+
def parse(input_source, product_class, endpoint: nil, options: {}, enqueue: true)
|
|
126
|
+
opts = normalize_parse_options(options)
|
|
127
|
+
process_pdf_if_required(input_source, opts) if input_source.is_a?(Input::Source::LocalInputSource)
|
|
128
|
+
endpoint ||= initialize_endpoint(product_class)
|
|
129
|
+
|
|
130
|
+
if enqueue && product_class.has_async
|
|
131
|
+
enqueue_and_parse(input_source, product_class, endpoint, opts)
|
|
132
|
+
else
|
|
133
|
+
parse_sync(input_source, product_class, endpoint, opts)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Call prediction API on a document and parse the results.
|
|
138
|
+
#
|
|
139
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
|
140
|
+
# @param product_class [Mindee::Inference] class of the product
|
|
141
|
+
# @param endpoint [Mindee::V1::HTTP::Endpoint, nil] Endpoint of the API.
|
|
142
|
+
# @param options [Hash] A hash of options to configure the parsing behavior. Possible keys:
|
|
143
|
+
# * `:all_words` [bool] Whether to extract all the words on each page.
|
|
144
|
+
# This performs a full OCR operation on the server and will increase response time.
|
|
145
|
+
# * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
|
|
146
|
+
# This performs a full OCR operation on the server and may increase response time.
|
|
147
|
+
# * `:close_file` [bool] Whether to `close()` the file after parsing it.
|
|
148
|
+
# Set to false if you need to access the file after this operation.
|
|
149
|
+
# * `:page_options` [Hash, nil] Page cutting/merge options:
|
|
150
|
+
# - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
|
|
151
|
+
# - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
|
|
152
|
+
# - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
153
|
+
# - `:REMOVE` - remove the specified pages, and keep all others.
|
|
154
|
+
# - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
|
|
155
|
+
# * `:cropper` [bool, nil] Whether to include cropper results for each page.
|
|
156
|
+
# This performs a cropping operation on the server and will increase response time.
|
|
157
|
+
# @return [Mindee::V1::Parsing::Common::ApiResponse]
|
|
158
|
+
def parse_sync(input_source, product_class, endpoint, options)
|
|
159
|
+
logger.debug("Parsing document as '#{endpoint.url_root}'")
|
|
160
|
+
|
|
161
|
+
prediction, raw_http = endpoint.predict(
|
|
162
|
+
input_source,
|
|
163
|
+
options
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
Mindee::V1::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Enqueue a document for async parsing
|
|
170
|
+
#
|
|
171
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
|
172
|
+
# The source of the input document (local file or URL).
|
|
173
|
+
# @param product_class [Mindee::Inference] The class of the product.
|
|
174
|
+
# @param options [Hash] A hash of options to configure the enqueue behavior. Possible keys:
|
|
175
|
+
# * `:endpoint` [V1::HTTP::Endpoint, nil] Endpoint of the API.
|
|
176
|
+
# Doesn't need to be set in the case of OTS APIs.
|
|
177
|
+
# * `:all_words` [bool] Whether to extract all the words on each page.
|
|
178
|
+
# This performs a full OCR operation on the server and will increase response time.
|
|
179
|
+
# * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
|
|
180
|
+
# This performs a full OCR operation on the server and may increase response time.
|
|
181
|
+
# * `:close_file` [bool] Whether to `close()` the file after parsing it.
|
|
182
|
+
# Set to false if you need to access the file after this operation.
|
|
183
|
+
# * `:page_options` [Hash, nil] Page cutting/merge options:
|
|
184
|
+
# - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
|
|
185
|
+
# - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
|
|
186
|
+
# - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
187
|
+
# - `:REMOVE` - remove the specified pages, and keep all others.
|
|
188
|
+
# - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
|
|
189
|
+
# * `:cropper` [bool] Whether to include cropper results for each page.
|
|
190
|
+
# This performs a cropping operation on the server and will increase response time.
|
|
191
|
+
# * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
|
|
192
|
+
# * `:workflow_id` [String, nil] ID of the workflow to use.
|
|
193
|
+
# @param endpoint [Mindee::V1::HTTP::Endpoint] Endpoint of the API.
|
|
194
|
+
# @return [Mindee::V1::Parsing::Common::ApiResponse]
|
|
195
|
+
def enqueue(input_source, product_class, endpoint: nil, options: {})
|
|
196
|
+
opts = normalize_parse_options(options)
|
|
197
|
+
endpoint ||= initialize_endpoint(product_class)
|
|
198
|
+
logger.debug("Enqueueing document as '#{endpoint.url_root}'")
|
|
199
|
+
|
|
200
|
+
prediction, raw_http = endpoint.predict_async(
|
|
201
|
+
input_source,
|
|
202
|
+
opts
|
|
203
|
+
)
|
|
204
|
+
Mindee::V1::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Parses a queued document
|
|
208
|
+
#
|
|
209
|
+
# @param job_id [String] ID of the job (queue) to poll from
|
|
210
|
+
# @param product_class [Mindee::Inference] class of the product
|
|
211
|
+
# @param endpoint [V1::HTTP::Endpoint, nil] Endpoint of the API
|
|
212
|
+
# Doesn't need to be set in the case of OTS APIs.
|
|
213
|
+
#
|
|
214
|
+
# @return [Mindee::V1::Parsing::Common::ApiResponse]
|
|
215
|
+
def parse_queued(job_id, product_class, endpoint: nil)
|
|
216
|
+
endpoint = initialize_endpoint(product_class) if endpoint.nil?
|
|
217
|
+
logger.debug("Fetching queued document as '#{endpoint.url_root}'")
|
|
218
|
+
prediction, raw_http = endpoint.parse_async(job_id)
|
|
219
|
+
Mindee::V1::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Enqueue a document for async parsing and automatically try to retrieve it
|
|
223
|
+
#
|
|
224
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
|
225
|
+
# The source of the input document (local file or URL).
|
|
226
|
+
# @param product_class [Mindee::Inference] The class of the product.
|
|
227
|
+
# @param options [Hash] A hash of options to configure the parsing behavior. Possible keys:
|
|
228
|
+
# * `:endpoint` [V1::HTTP::Endpoint, nil] Endpoint of the API.
|
|
229
|
+
# Doesn't need to be set in the case of OTS APIs.
|
|
230
|
+
# * `:all_words` [bool] Whether to extract all the words on each page.
|
|
231
|
+
# This performs a full OCR operation on the server and will increase response time.
|
|
232
|
+
# * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
|
|
233
|
+
# This performs a full OCR operation on the server and may increase response time.
|
|
234
|
+
# * `:close_file` [bool] Whether to `close()` the file after parsing it.
|
|
235
|
+
# Set to false if you need to access the file after this operation.
|
|
236
|
+
# * `:page_options` [Hash, nil] Page cutting/merge options:
|
|
237
|
+
# - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
|
|
238
|
+
# - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
|
|
239
|
+
# - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
240
|
+
# - `:REMOVE` - remove the specified pages, and keep all others.
|
|
241
|
+
# - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
|
|
242
|
+
# * `:cropper` [bool, nil] Whether to include cropper results for each page.
|
|
243
|
+
# This performs a cropping operation on the server and will increase response time.
|
|
244
|
+
# * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
|
|
245
|
+
# * `:workflow_id` [String, nil] ID of the workflow to use.
|
|
246
|
+
# * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2.
|
|
247
|
+
# * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5.
|
|
248
|
+
# * `:max_retries` [Integer] Maximum number of retries. Defaults to 80.
|
|
249
|
+
# @param endpoint [Mindee::V1::HTTP::Endpoint] Endpoint of the API.
|
|
250
|
+
# @return [Mindee::V1::Parsing::Common::ApiResponse]
|
|
251
|
+
def enqueue_and_parse(input_source, product_class, endpoint, options)
|
|
252
|
+
validate_async_params(options.initial_delay_sec, options.delay_sec, options.max_retries)
|
|
253
|
+
enqueue_res = enqueue(input_source, product_class, endpoint: endpoint, options: options)
|
|
254
|
+
job = enqueue_res.job or raise Error::MindeeAPIError, 'Expected job to be present'
|
|
255
|
+
job_id = job.id
|
|
256
|
+
|
|
257
|
+
sleep(options.initial_delay_sec)
|
|
258
|
+
polling_attempts = 1
|
|
259
|
+
logger.debug("Successfully enqueued document with job id: '#{job_id}'")
|
|
260
|
+
queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
|
|
261
|
+
queue_res_job = queue_res.job or raise Error::MindeeAPIError, 'Expected job to be present'
|
|
262
|
+
valid_statuses = [
|
|
263
|
+
Mindee::V1::Parsing::Common::JobStatus::WAITING,
|
|
264
|
+
Mindee::V1::Parsing::Common::JobStatus::PROCESSING,
|
|
265
|
+
]
|
|
266
|
+
# @type var valid_statuses: Array[(:waiting | :processing | :completed | :failed)]
|
|
267
|
+
while valid_statuses.include?(queue_res_job.status) && polling_attempts < options.max_retries
|
|
268
|
+
logger.debug("Polling server for parsing result with job id: '#{job_id}'. Attempt #{polling_attempts}")
|
|
269
|
+
sleep(options.delay_sec)
|
|
270
|
+
queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
|
|
271
|
+
queue_res_job = queue_res.job or raise Error::MindeeAPIError, 'Expected job to be present'
|
|
272
|
+
polling_attempts += 1
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
if queue_res_job.status != Mindee::V1::Parsing::Common::JobStatus::COMPLETED
|
|
276
|
+
elapsed = options.initial_delay_sec + (polling_attempts * options.delay_sec.to_f)
|
|
277
|
+
raise Error::MindeeAPIError,
|
|
278
|
+
"Asynchronous parsing request timed out after #{elapsed} seconds (#{polling_attempts} tries)"
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
queue_res
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Sends a document to a workflow.
|
|
285
|
+
#
|
|
286
|
+
# Accepts options either as a Hash or as a WorkflowOptions struct.
|
|
287
|
+
#
|
|
288
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
|
289
|
+
# @param workflow_id [String]
|
|
290
|
+
# @param options [Hash, WorkflowOptions] Options to configure workflow behavior. Possible keys:
|
|
291
|
+
# * `document_alias` [String, nil] Alias to give to the document.
|
|
292
|
+
# * `priority` [Symbol, nil] Priority to give to the document.
|
|
293
|
+
# * `full_text` [bool] Whether to include the full OCR text response in compatible APIs.
|
|
294
|
+
# * `rag` [bool, nil] Whether to enable Retrieval-Augmented Generation.
|
|
295
|
+
#
|
|
296
|
+
# * `public_url` [String, nil] A unique, encrypted URL for accessing the document validation interface without
|
|
297
|
+
# requiring authentication.
|
|
298
|
+
# * `page_options` [Hash, nil] Page cutting/merge options:
|
|
299
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
|
300
|
+
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
|
301
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
302
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
|
303
|
+
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
|
304
|
+
# @return [Mindee::V1::Parsing::Common::WorkflowResponse]
|
|
305
|
+
def execute_workflow(input_source, workflow_id, options: {})
|
|
306
|
+
opts = options.is_a?(WorkflowOptions) ? options : WorkflowOptions.new(params: options)
|
|
307
|
+
if opts.respond_to?(:page_options) && input_source.is_a?(Input::Source::LocalInputSource)
|
|
308
|
+
process_pdf_if_required(input_source, opts)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
workflow_endpoint = V1::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key.to_s)
|
|
312
|
+
logger.debug("Sending document to workflow '#{workflow_id}'")
|
|
313
|
+
|
|
314
|
+
prediction, raw_http = workflow_endpoint.execute_workflow(
|
|
315
|
+
input_source,
|
|
316
|
+
opts
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
Mindee::V1::Parsing::Common::WorkflowResponse.new(V1::Product::Universal::Universal, prediction, raw_http)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Load a prediction.
|
|
323
|
+
#
|
|
324
|
+
# @param product_class [Mindee::Inference] class of the product
|
|
325
|
+
# @param local_response [Mindee::Input::LocalResponse]
|
|
326
|
+
# @return [Mindee::V1::Parsing::Common::ApiResponse]
|
|
327
|
+
def load_prediction(product_class, local_response)
|
|
328
|
+
raise Error::MindeeAPIError, 'Expected LocalResponse to not be nil.' if local_response.nil?
|
|
329
|
+
|
|
330
|
+
response_hash = local_response.as_hash || {}
|
|
331
|
+
raise Error::MindeeAPIError, 'Expected LocalResponse#as_hash to return a hash.' if response_hash.nil?
|
|
332
|
+
|
|
333
|
+
Mindee::V1::Parsing::Common::ApiResponse.new(product_class, response_hash, response_hash.to_json)
|
|
334
|
+
rescue KeyError, Error::MindeeAPIError
|
|
335
|
+
raise Error::MindeeInputError, 'No prediction found in local response.'
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Load a document from an absolute path, as a string.
|
|
339
|
+
# @param input_path [String] Path of file to open
|
|
340
|
+
# @param repair_pdf [bool] Attempts to fix broken pdf if true
|
|
341
|
+
# @return [Mindee::Input::Source::PathInputSource]
|
|
342
|
+
def source_from_path(input_path, repair_pdf: false)
|
|
343
|
+
Input::Source::PathInputSource.new(input_path, repair_pdf: repair_pdf)
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# Load a document from raw bytes.
|
|
347
|
+
# @param input_bytes [String] Encoding::BINARY byte input
|
|
348
|
+
# @param filename [String] The name of the file (without the path)
|
|
349
|
+
# @param repair_pdf [bool] Attempts to fix broken pdf if true
|
|
350
|
+
# @return [Mindee::Input::Source::BytesInputSource]
|
|
351
|
+
def source_from_bytes(input_bytes, filename, repair_pdf: false)
|
|
352
|
+
Input::Source::BytesInputSource.new(input_bytes, filename, repair_pdf: repair_pdf)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Load a document from a base64 encoded string.
|
|
356
|
+
# @param base64_string [String] Input to parse as base64 string
|
|
357
|
+
# @param filename [String] The name of the file (without the path)
|
|
358
|
+
# @param repair_pdf [bool] Attempts to fix broken pdf if true
|
|
359
|
+
# @return [Mindee::Input::Source::Base64InputSource]
|
|
360
|
+
def source_from_b64string(base64_string, filename, repair_pdf: false)
|
|
361
|
+
Input::Source::Base64InputSource.new(base64_string, filename, repair_pdf: repair_pdf)
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Load a document from a normal Ruby `File`.
|
|
365
|
+
# @param input_file [File] Input file handle
|
|
366
|
+
# @param filename [String] The name of the file (without the path)
|
|
367
|
+
# @param repair_pdf [bool] Attempts to fix broken pdf if true
|
|
368
|
+
# @return [Mindee::Input::Source::FileInputSource]
|
|
369
|
+
def source_from_file(input_file, filename, repair_pdf: false)
|
|
370
|
+
Input::Source::FileInputSource.new(input_file, filename, repair_pdf: repair_pdf)
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# Load a document from a secure remote source (HTTPS).
|
|
374
|
+
# @param url [String] URL of the file
|
|
375
|
+
# @return [Mindee::Input::Source::URLInputSource]
|
|
376
|
+
def source_from_url(url)
|
|
377
|
+
Input::Source::URLInputSource.new(url)
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Creates a custom endpoint with the given values.
|
|
381
|
+
# Do not set for standard (off the shelf) endpoints.
|
|
382
|
+
#
|
|
383
|
+
# @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
|
|
384
|
+
# API Builder. Do not set for standard (off the shelf) endpoints.
|
|
385
|
+
#
|
|
386
|
+
# @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
|
|
387
|
+
# This is normally not required unless you have a custom endpoint which has the same name as a
|
|
388
|
+
# standard (off the shelf) endpoint.
|
|
389
|
+
# @param version [String] For custom endpoints, version of the product
|
|
390
|
+
# @return [Mindee::V1::HTTP::Endpoint]
|
|
391
|
+
def create_endpoint(endpoint_name: '', account_name: '', version: '')
|
|
392
|
+
initialize_endpoint(
|
|
393
|
+
Mindee::V1::Product::Universal::Universal,
|
|
394
|
+
endpoint_name: endpoint_name,
|
|
395
|
+
account_name: account_name,
|
|
396
|
+
version: version
|
|
397
|
+
)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# Validates the parameters for async auto-polling
|
|
401
|
+
# @param initial_delay_sec [Numeric] initial delay before polling
|
|
402
|
+
# @param delay_sec [Numeric] delay between polling attempts
|
|
403
|
+
# @param max_retries [Integer, nil] maximum amount of retries.
|
|
404
|
+
def validate_async_params(initial_delay_sec, delay_sec, max_retries)
|
|
405
|
+
min_delay_sec = 1
|
|
406
|
+
min_initial_delay_sec = 1
|
|
407
|
+
min_retries = 2
|
|
408
|
+
|
|
409
|
+
if delay_sec < min_delay_sec
|
|
410
|
+
raise ArgumentError,
|
|
411
|
+
"Cannot set auto-poll delay to less than #{min_delay_sec} second(s)"
|
|
412
|
+
end
|
|
413
|
+
if initial_delay_sec < min_initial_delay_sec
|
|
414
|
+
raise ArgumentError,
|
|
415
|
+
"Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)"
|
|
416
|
+
end
|
|
417
|
+
raise ArgumentError, "Cannot set auto-poll retries to less than #{min_retries}" if max_retries < min_retries
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Creates an endpoint with the given values. Raises an error if the endpoint is invalid.
|
|
421
|
+
# @param product_class [Mindee::V1::Parsing::Common::Inference] class of the product
|
|
422
|
+
#
|
|
423
|
+
# @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
|
|
424
|
+
# API Builder. Do not set for standard (off the shelf) endpoints.
|
|
425
|
+
#
|
|
426
|
+
# @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
|
|
427
|
+
# This is normally not required unless you have a custom endpoint which has the same name as a
|
|
428
|
+
# standard (off the shelf) endpoint.
|
|
429
|
+
# @param version [String] For custom endpoints, version of the product.
|
|
430
|
+
# @return [Mindee::V1::HTTP::Endpoint]
|
|
431
|
+
def initialize_endpoint(product_class, endpoint_name: '', account_name: '', version: '')
|
|
432
|
+
if (endpoint_name.nil? || endpoint_name.empty?) && product_class == Mindee::V1::Product::Universal::Universal
|
|
433
|
+
raise Mindee::Error::MindeeConfigurationError, 'Missing argument endpoint_name when using custom class'
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
endpoint_name = fix_endpoint_name(product_class, endpoint_name)
|
|
437
|
+
account_name = fix_account_name(account_name)
|
|
438
|
+
version = fix_version(product_class, version)
|
|
439
|
+
|
|
440
|
+
V1::HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key.to_s)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def fix_endpoint_name(product_class, endpoint_name)
|
|
444
|
+
endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name.to_s : endpoint_name.to_s
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def fix_account_name(account_name)
|
|
448
|
+
if account_name.nil? || account_name.empty?
|
|
449
|
+
logger.info("No account name provided, #{OTS_OWNER} will be used by default.")
|
|
450
|
+
return OTS_OWNER
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
account_name
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
def fix_version(product_class, version)
|
|
457
|
+
return version unless version.nil? || version.empty?
|
|
458
|
+
|
|
459
|
+
if product_class.endpoint_version.nil? || product_class.endpoint_version.to_s.empty?
|
|
460
|
+
logger.debug('No version provided for a custom build, will attempt to poll version 1 by default.')
|
|
461
|
+
return '1'
|
|
462
|
+
end
|
|
463
|
+
product_class.endpoint_version || ''
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# If needed, converts the parsing options provided as a hash into a proper ParseOptions object.
|
|
467
|
+
# @param options [Hash, ParseOptions] Options.
|
|
468
|
+
# @return [ParseOptions]
|
|
469
|
+
def normalize_parse_options(options)
|
|
470
|
+
return options if options.is_a?(ParseOptions)
|
|
471
|
+
|
|
472
|
+
ParseOptions.new(params: options)
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Processes a PDF if parameters were provided.
|
|
476
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
|
477
|
+
# @param opts [ParseOptions]
|
|
478
|
+
def process_pdf_if_required(input_source, opts)
|
|
479
|
+
return unless input_source.is_a?(Mindee::Input::Source::LocalInputSource) &&
|
|
480
|
+
opts.page_options.on_min_pages &&
|
|
481
|
+
input_source.pdf?
|
|
482
|
+
|
|
483
|
+
input_source.process_pdf(opts.page_options)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
private :parse_sync, :validate_async_params, :initialize_endpoint, :fix_endpoint_name, :fix_version,
|
|
487
|
+
:fix_account_name, :process_pdf_if_required, :normalize_parse_options
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module V1
|
|
5
|
+
# Custom extraction module
|
|
6
|
+
module Extraction
|
|
7
|
+
# Multi-receipts extraction
|
|
8
|
+
# Extracts individual receipts from multi-receipts documents.
|
|
9
|
+
#
|
|
10
|
+
# @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
|
|
11
|
+
# @param inference [Inference] Results of the inference.
|
|
12
|
+
# @return [Array<ExtractedImage>] Individual extracted receipts as an array of ExtractedMultiReceiptsImage.
|
|
13
|
+
def self.extract_receipts(input_source, inference)
|
|
14
|
+
images = [] # @type var images: Array[Image::ExtractedImage]
|
|
15
|
+
unless inference.prediction.receipts
|
|
16
|
+
raise Error::MindeeInputError,
|
|
17
|
+
'No possible receipts candidates found for Multi-Receipts extraction.'
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
(0...input_source.page_count).each do |page_id|
|
|
21
|
+
receipt_positions = inference.pages[page_id].prediction.receipts.map(&:bounding_box)
|
|
22
|
+
images.concat(
|
|
23
|
+
Mindee::Image::ImageExtractor.extract_multiple_images_from_source(input_source, page_id + 1,
|
|
24
|
+
receipt_positions)
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
images
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|