mindee-lite 5.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +24 -0
- data/.gitattributes +14 -0
- data/.gitignore +76 -0
- data/.gitmodules +3 -0
- data/.pre-commit-config.yaml +36 -0
- data/.rubocop.yml +49 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +515 -0
- data/CODE_OF_CONDUCT.md +129 -0
- data/CONTRIBUTING.md +107 -0
- data/Gemfile +14 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +40 -0
- data/Steepfile +30 -0
- data/bin/console +14 -0
- data/bin/mindee.rb +30 -0
- data/bin/v1/parser.rb +153 -0
- data/bin/v1/products.rb +88 -0
- data/bin/v2/parser.rb +235 -0
- data/bin/v2/products.rb +34 -0
- data/docs/code_samples/bank_account_details_v1.txt +24 -0
- data/docs/code_samples/bank_account_details_v2.txt +24 -0
- data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
- data/docs/code_samples/barcode_reader_v1.txt +24 -0
- data/docs/code_samples/cropper_v1.txt +21 -0
- data/docs/code_samples/default.txt +30 -0
- data/docs/code_samples/default_async.txt +29 -0
- data/docs/code_samples/expense_receipts_v5.txt +25 -0
- data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
- data/docs/code_samples/financial_document_v1.txt +25 -0
- data/docs/code_samples/financial_document_v1_async.txt +24 -0
- data/docs/code_samples/idcard_fr_v1.txt +24 -0
- data/docs/code_samples/idcard_fr_v2.txt +24 -0
- data/docs/code_samples/international_id_v2_async.txt +24 -0
- data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
- data/docs/code_samples/invoices_v4.txt +25 -0
- data/docs/code_samples/invoices_v4_async.txt +24 -0
- data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
- data/docs/code_samples/passport_v1.txt +24 -0
- data/docs/code_samples/resume_v1_async.txt +24 -0
- data/docs/code_samples/v2_classification.txt +30 -0
- data/docs/code_samples/v2_crop.txt +30 -0
- data/docs/code_samples/v2_extraction.txt +42 -0
- data/docs/code_samples/v2_extraction_webhook.txt +45 -0
- data/docs/code_samples/v2_ocr.txt +30 -0
- data/docs/code_samples/v2_split.txt +30 -0
- data/docs/code_samples/workflow_execution.txt +28 -0
- data/docs/code_samples/workflow_polling.txt +35 -0
- data/examples/auto_invoice_splitter_extraction.rb +48 -0
- data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
- data/lib/mindee/dependency.rb +29 -0
- data/lib/mindee/error/mindee_error.rb +17 -0
- data/lib/mindee/error/mindee_http_error.rb +36 -0
- data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
- data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
- data/lib/mindee/error/mindee_input_error.rb +30 -0
- data/lib/mindee/error.rb +6 -0
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +41 -0
- data/lib/mindee/geometry/polygon.rb +37 -0
- data/lib/mindee/geometry/quadrilateral.rb +50 -0
- data/lib/mindee/geometry/utils.rb +88 -0
- data/lib/mindee/geometry.rb +7 -0
- data/lib/mindee/http/.rubocop.yml +7 -0
- data/lib/mindee/http/http_error_handler.rb +106 -0
- data/lib/mindee/http/response_validation.rb +81 -0
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/image/extracted_image.rb +89 -0
- data/lib/mindee/image/image_compressor.rb +29 -0
- data/lib/mindee/image/image_extractor.rb +118 -0
- data/lib/mindee/image/image_utils.rb +165 -0
- data/lib/mindee/image.rb +6 -0
- data/lib/mindee/input/base_parameters.rb +149 -0
- data/lib/mindee/input/local_response.rb +80 -0
- data/lib/mindee/input/polling_options.rb +26 -0
- data/lib/mindee/input/sources/base64_input_source.rb +31 -0
- data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
- data/lib/mindee/input/sources/file_input_source.rb +20 -0
- data/lib/mindee/input/sources/local_input_source.rb +216 -0
- data/lib/mindee/input/sources/path_input_source.rb +20 -0
- data/lib/mindee/input/sources/url_input_source.rb +130 -0
- data/lib/mindee/input/sources.rb +8 -0
- data/lib/mindee/input.rb +4 -0
- data/lib/mindee/logging/logger.rb +24 -0
- data/lib/mindee/logging.rb +3 -0
- data/lib/mindee/page_options.rb +24 -0
- data/lib/mindee/pdf/extracted_pdf.rb +70 -0
- data/lib/mindee/pdf/pdf_compressor.rb +121 -0
- data/lib/mindee/pdf/pdf_extractor.rb +121 -0
- data/lib/mindee/pdf/pdf_processor.rb +91 -0
- data/lib/mindee/pdf/pdf_tools.rb +201 -0
- data/lib/mindee/pdf.rb +7 -0
- data/lib/mindee/v1/client.rb +490 -0
- data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
- data/lib/mindee/v1/extraction.rb +3 -0
- data/lib/mindee/v1/http/.rubocop.yml +7 -0
- data/lib/mindee/v1/http/endpoint.rb +221 -0
- data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
- data/lib/mindee/v1/http.rb +4 -0
- data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
- data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
- data/lib/mindee/v1/parsing/common/document.rb +86 -0
- data/lib/mindee/v1/parsing/common/execution.rb +78 -0
- data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
- data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
- data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
- data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
- data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
- data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
- data/lib/mindee/v1/parsing/common/extras.rb +6 -0
- data/lib/mindee/v1/parsing/common/inference.rb +69 -0
- data/lib/mindee/v1/parsing/common/job.rb +48 -0
- data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
- data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
- data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
- data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
- data/lib/mindee/v1/parsing/common/page.rb +49 -0
- data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
- data/lib/mindee/v1/parsing/common/product.rb +26 -0
- data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
- data/lib/mindee/v1/parsing/common.rb +15 -0
- data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
- data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
- data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
- data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
- data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
- data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
- data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
- data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
- data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
- data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
- data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
- data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
- data/lib/mindee/v1/parsing/standard.rb +15 -0
- data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
- data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
- data/lib/mindee/v1/parsing/universal.rb +4 -0
- data/lib/mindee/v1/parsing.rb +5 -0
- data/lib/mindee/v1/product/.rubocop.yml +12 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
- data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
- data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
- data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
- data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
- data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
- data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
- data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
- data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
- data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
- data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
- data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
- data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
- data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
- data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
- data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
- data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
- data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
- data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
- data/lib/mindee/v1/product/universal/universal.rb +48 -0
- data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
- data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
- data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
- data/lib/mindee/v1/product.rb +18 -0
- data/lib/mindee/v1.rb +7 -0
- data/lib/mindee/v2/client.rb +132 -0
- data/lib/mindee/v2/file_operation/crop.rb +51 -0
- data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
- data/lib/mindee/v2/file_operation/split.rb +37 -0
- data/lib/mindee/v2/file_operation/split_files.rb +25 -0
- data/lib/mindee/v2/file_operation.rb +6 -0
- data/lib/mindee/v2/http/.rubocop.yml +7 -0
- data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
- data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
- data/lib/mindee/v2/http.rb +4 -0
- data/lib/mindee/v2/parsing/base_inference.rb +44 -0
- data/lib/mindee/v2/parsing/base_response.rb +15 -0
- data/lib/mindee/v2/parsing/common_response.rb +20 -0
- data/lib/mindee/v2/parsing/error_item.rb +21 -0
- data/lib/mindee/v2/parsing/error_response.rb +51 -0
- data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
- data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
- data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
- data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
- data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
- data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
- data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
- data/lib/mindee/v2/parsing/field.rb +9 -0
- data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
- data/lib/mindee/v2/parsing/inference_file.rb +38 -0
- data/lib/mindee/v2/parsing/inference_job.rb +25 -0
- data/lib/mindee/v2/parsing/inference_model.rb +30 -0
- data/lib/mindee/v2/parsing/job.rb +93 -0
- data/lib/mindee/v2/parsing/job_response.rb +30 -0
- data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
- data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
- data/lib/mindee/v2/parsing/raw_text.rb +27 -0
- data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
- data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
- data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
- data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
- data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
- data/lib/mindee/v2/parsing/search.rb +6 -0
- data/lib/mindee/v2/parsing.rb +16 -0
- data/lib/mindee/v2/product/base_product.rb +28 -0
- data/lib/mindee/v2/product/classification/classification.rb +20 -0
- data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
- data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
- data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
- data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
- data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
- data/lib/mindee/v2/product/crop/crop.rb +20 -0
- data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
- data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
- data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
- data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
- data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
- data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
- data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
- data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
- data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
- data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
- data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
- data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
- data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
- data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
- data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
- data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
- data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
- data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
- data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
- data/lib/mindee/v2/product/split/split.rb +19 -0
- data/lib/mindee/v2/product/split/split_inference.rb +34 -0
- data/lib/mindee/v2/product/split/split_range.rb +38 -0
- data/lib/mindee/v2/product/split/split_response.rb +40 -0
- data/lib/mindee/v2/product/split/split_result.rb +34 -0
- data/lib/mindee/v2/product.rb +7 -0
- data/lib/mindee/v2.rb +7 -0
- data/lib/mindee/version.rb +26 -0
- data/lib/mindee.rb +135 -0
- data/mindee-lite.gemspec +36 -0
- data/mindee.gemspec +44 -0
- data/sig/custom/marcel.rbs +3 -0
- data/sig/custom/mini_magick.rbs +31 -0
- data/sig/custom/net_http.rbs +43 -0
- data/sig/custom/origami.rbs +59 -0
- data/sig/mindee/dependency.rbs +13 -0
- data/sig/mindee/error/mindee_error.rbs +13 -0
- data/sig/mindee/error/mindee_http_error.rbs +17 -0
- data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
- data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
- data/sig/mindee/error/mindee_input_error.rbs +18 -0
- data/sig/mindee/geometry/min_max.rbs +11 -0
- data/sig/mindee/geometry/point.rbs +14 -0
- data/sig/mindee/geometry/polygon.rbs +12 -0
- data/sig/mindee/geometry/quadrilateral.rbs +15 -0
- data/sig/mindee/geometry/utils.rbs +13 -0
- data/sig/mindee/http/http_error_handler.rbs +15 -0
- data/sig/mindee/http/response_validation.rbs +11 -0
- data/sig/mindee/image/extracted_image.rbs +21 -0
- data/sig/mindee/image/image_compressor.rbs +8 -0
- data/sig/mindee/image/image_extractor.rbs +13 -0
- data/sig/mindee/image/image_utils.rbs +19 -0
- data/sig/mindee/input/base_parameters.rbs +35 -0
- data/sig/mindee/input/local_response.rbs +14 -0
- data/sig/mindee/input/polling_options.rbs +12 -0
- data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
- data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
- data/sig/mindee/input/sources/file_input_source.rbs +10 -0
- data/sig/mindee/input/sources/local_input_source.rbs +30 -0
- data/sig/mindee/input/sources/path_input_source.rbs +10 -0
- data/sig/mindee/input/sources/url_input_source.rbs +20 -0
- data/sig/mindee/logging/logger.rbs +11 -0
- data/sig/mindee/page_options.rbs +11 -0
- data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
- data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
- data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
- data/sig/mindee/pdf/pdf_processor.rbs +12 -0
- data/sig/mindee/pdf/pdf_tools.rbs +31 -0
- data/sig/mindee/v1/client.rbs +84 -0
- data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
- data/sig/mindee/v1/http/endpoint.rbs +41 -0
- data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
- data/sig/mindee/v1/parsing/common/document.rbs +32 -0
- data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
- data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
- data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
- data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
- data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
- data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
- data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
- data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
- data/sig/mindee/v1/parsing/common/job.rbs +24 -0
- data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
- data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
- data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
- data/sig/mindee/v1/parsing/common/page.rbs +19 -0
- data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
- data/sig/mindee/v1/parsing/common/product.rbs +16 -0
- data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
- data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
- data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
- data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
- data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
- data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
- data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
- data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
- data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
- data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
- data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
- data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
- data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
- data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
- data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
- data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
- data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
- data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
- data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
- data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
- data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
- data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
- data/sig/mindee/v1/product/universal/universal.rbs +16 -0
- data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
- data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
- data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
- data/sig/mindee/v2/client.rbs +29 -0
- data/sig/mindee/v2/file_operation/crop.rbs +10 -0
- data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
- data/sig/mindee/v2/file_operation/split.rbs +11 -0
- data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
- data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
- data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
- data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
- data/sig/mindee/v2/parsing/base_response.rbs +11 -0
- data/sig/mindee/v2/parsing/common_response.rbs +12 -0
- data/sig/mindee/v2/parsing/error_item.rbs +13 -0
- data/sig/mindee/v2/parsing/error_response.rbs +20 -0
- data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
- data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
- data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
- data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
- data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
- data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
- data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
- data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
- data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
- data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
- data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
- data/sig/mindee/v2/parsing/job.rbs +24 -0
- data/sig/mindee/v2/parsing/job_response.rbs +14 -0
- data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
- data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
- data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
- data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
- data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
- data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
- data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
- data/sig/mindee/v2/parsing/search_models.rbs +14 -0
- data/sig/mindee/v2/product/base_product.rbs +19 -0
- data/sig/mindee/v2/product/classification/classification.rbs +10 -0
- data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
- data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
- data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
- data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
- data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
- data/sig/mindee/v2/product/crop/crop.rbs +10 -0
- data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
- data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
- data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
- data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
- data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
- data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
- data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
- data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
- data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
- data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
- data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
- data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
- data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
- data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
- data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
- data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
- data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
- data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
- data/sig/mindee/v2/product/split/split.rbs +10 -0
- data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
- data/sig/mindee/v2/product/split/split_range.rbs +18 -0
- data/sig/mindee/v2/product/split/split_response.rbs +25 -0
- data/sig/mindee/v2/product/split/split_result.rbs +14 -0
- data/sig/mindee/version.rbs +6 -0
- data/sig/mindee.rbs +62 -0
- metadata +600 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# Various helper functions for geometry.
|
|
5
|
+
module Geometry
|
|
6
|
+
# A relative set of coordinates (X, Y) on the document.
|
|
7
|
+
class Point
|
|
8
|
+
# @return [Float]
|
|
9
|
+
attr_reader :x
|
|
10
|
+
# @return [Float]
|
|
11
|
+
attr_reader :y
|
|
12
|
+
|
|
13
|
+
# rubocop:disable Naming/MethodParameterName
|
|
14
|
+
|
|
15
|
+
# @param x [Float]
|
|
16
|
+
# @param y [Float]
|
|
17
|
+
def initialize(x, y)
|
|
18
|
+
@x = x
|
|
19
|
+
@y = y
|
|
20
|
+
end
|
|
21
|
+
# rubocop:enable Naming/MethodParameterName
|
|
22
|
+
|
|
23
|
+
# @return [Float]
|
|
24
|
+
def [](key)
|
|
25
|
+
case key
|
|
26
|
+
when 0
|
|
27
|
+
@x
|
|
28
|
+
when 1
|
|
29
|
+
@y
|
|
30
|
+
else
|
|
31
|
+
throw '0 or 1 only'
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @return [String] Point as a string.
|
|
36
|
+
def to_s
|
|
37
|
+
"(#{@x},#{@y})"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# Various helper functions & classes for geometry.
|
|
5
|
+
module Geometry
|
|
6
|
+
# Contains any number of vertex coordinates (Points).
|
|
7
|
+
class Polygon < Array
|
|
8
|
+
# @param server_response [Hash] Raw server response hash.
|
|
9
|
+
def initialize(server_response)
|
|
10
|
+
points = [] # @type var points: Array[Mindee::Geometry::Point]
|
|
11
|
+
server_response.map do |point|
|
|
12
|
+
points << Point.new(point[0], point[1])
|
|
13
|
+
end
|
|
14
|
+
super(points)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Get the central point (centroid) of the polygon.
|
|
18
|
+
# @return [Mindee::Geometry::Point]
|
|
19
|
+
def centroid
|
|
20
|
+
Geometry.get_centroid(self)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Determine if the Point is in the Polygon's Y-axis.
|
|
24
|
+
# @param point [Mindee::Geometry::Point]
|
|
25
|
+
# @return [bool]
|
|
26
|
+
def point_in_y?(point)
|
|
27
|
+
min_max = Geometry.get_min_max_y(self)
|
|
28
|
+
point.y.between?(min_max.min, min_max.max)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @return [String] Polygon as a string.
|
|
32
|
+
def to_s
|
|
33
|
+
"(#{join(', ')})"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# Various helper functions & classes for geometry.
|
|
5
|
+
module Geometry
|
|
6
|
+
# Contains exactly 4 relative vertices coordinates (Points).
|
|
7
|
+
class Quadrilateral
|
|
8
|
+
# @return [Mindee::Geometry::Point]
|
|
9
|
+
attr_reader :top_left
|
|
10
|
+
# @return [Mindee::Geometry::Point]
|
|
11
|
+
attr_reader :top_right
|
|
12
|
+
# @return [Mindee::Geometry::Point]
|
|
13
|
+
attr_reader :bottom_right
|
|
14
|
+
# @return [Mindee::Geometry::Point]
|
|
15
|
+
attr_reader :bottom_left
|
|
16
|
+
|
|
17
|
+
# @param top_left [Mindee::Geometry::Point]
|
|
18
|
+
# @param top_right [Mindee::Geometry::Point]
|
|
19
|
+
# @param bottom_right [Mindee::Geometry::Point]
|
|
20
|
+
# @param bottom_left [Mindee::Geometry::Point]
|
|
21
|
+
def initialize(top_left, top_right, bottom_right, bottom_left)
|
|
22
|
+
@top_left = top_left
|
|
23
|
+
@top_right = top_right
|
|
24
|
+
@bottom_right = bottom_right
|
|
25
|
+
@bottom_left = bottom_left
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @return [Mindee::Geometry::Point]
|
|
29
|
+
def [](key)
|
|
30
|
+
case key
|
|
31
|
+
when 0
|
|
32
|
+
@top_left
|
|
33
|
+
when 1
|
|
34
|
+
@top_right
|
|
35
|
+
when 2
|
|
36
|
+
@bottom_right
|
|
37
|
+
when 3
|
|
38
|
+
@bottom_left
|
|
39
|
+
else
|
|
40
|
+
throw '0, 1, 2, 3 only'
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# A quadrilateral has four corners, always.
|
|
45
|
+
def size
|
|
46
|
+
4
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# Various helper functions for geometry.
|
|
5
|
+
module Geometry
|
|
6
|
+
# Transform a prediction into a Quadrilateral.
|
|
7
|
+
# @param prediction [Array<Array<Float>>]
|
|
8
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
9
|
+
def self.quadrilateral_from_prediction(prediction)
|
|
10
|
+
throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
|
|
11
|
+
|
|
12
|
+
Quadrilateral.new(
|
|
13
|
+
Point.new(prediction[0][0], prediction[0][1]),
|
|
14
|
+
Point.new(prediction[1][0], prediction[1][1]),
|
|
15
|
+
Point.new(prediction[2][0], prediction[2][1]),
|
|
16
|
+
Point.new(prediction[3][0], prediction[3][1])
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Gets the points of a bounding box for a given set of points
|
|
21
|
+
# @param vertices [Array<Mindee::Geometry::Point>]
|
|
22
|
+
# @return [Array<Float>]
|
|
23
|
+
def self.get_bbox(vertices)
|
|
24
|
+
x_coords = vertices.map(&:x)
|
|
25
|
+
y_coords = vertices.map(&:y)
|
|
26
|
+
[x_coords.min, y_coords.min, x_coords.max, y_coords.max]
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Creates the bounding box for a given set of points
|
|
30
|
+
# @param vertices [Array<Mindee::Geometry::Point>]
|
|
31
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
32
|
+
def self.get_bounding_box(vertices)
|
|
33
|
+
x_min, y_min, x_max, y_max = get_bbox(vertices)
|
|
34
|
+
Quadrilateral.new(
|
|
35
|
+
Point.new(x_min, y_min),
|
|
36
|
+
Point.new(x_max, y_min),
|
|
37
|
+
Point.new(x_max, y_max),
|
|
38
|
+
Point.new(x_min, y_max)
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get the central point (centroid) given a sequence of points.
|
|
43
|
+
# @param points [Array<Mindee::Geometry::Point>]
|
|
44
|
+
# @return [Mindee::Geometry::Point]
|
|
45
|
+
def self.get_centroid(points)
|
|
46
|
+
vertices_count = points.size
|
|
47
|
+
x_sum = points.map(&:x).sum
|
|
48
|
+
y_sum = points.map(&:y).sum
|
|
49
|
+
Point.new(x_sum / vertices_count, y_sum / vertices_count)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Get the maximum and minimum Y value given a sequence of points.
|
|
53
|
+
# @param points [Array<Mindee::Geometry::Point>]
|
|
54
|
+
# @return [Mindee::Geometry::MinMax]
|
|
55
|
+
def self.get_min_max_y(points)
|
|
56
|
+
coords = points.map(&:y)
|
|
57
|
+
MinMax.new(coords.min, coords.max)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get the maximum and minimum X value given a sequence of points.
|
|
61
|
+
# @param points [Array<Mindee::Geometry::Point>]
|
|
62
|
+
# @return [Mindee::Geometry::MinMax]
|
|
63
|
+
def self.get_min_max_x(points)
|
|
64
|
+
coords = points.map(&:x)
|
|
65
|
+
MinMax.new(coords.min, coords.max)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Checks whether a set of coordinates is below another on the page, with a slight margin for the lateral value.
|
|
69
|
+
# @param candidate [Array<Mindee::Geometry::Point] Polygon to check
|
|
70
|
+
# @param anchor [Array<Mindee::Geometry::Point] Reference polygon
|
|
71
|
+
# @param margin_left [Float] Margin tolerance on the left of the anchor
|
|
72
|
+
# @param margin_right [Float] Margin tolerance on the right of the anchor
|
|
73
|
+
# @return bool
|
|
74
|
+
def self.below?(candidate, anchor, margin_left, margin_right)
|
|
75
|
+
return false if Geometry.get_min_max_y(candidate).min < Geometry.get_min_max_y(anchor).min
|
|
76
|
+
if Geometry.get_min_max_x(candidate).min <
|
|
77
|
+
Geometry.get_min_max_x(anchor).min - (Geometry.get_min_max_x(anchor).min * margin_left)
|
|
78
|
+
return false
|
|
79
|
+
end
|
|
80
|
+
if Geometry.get_min_max_x(candidate).max >
|
|
81
|
+
Geometry.get_min_max_x(anchor).max + (Geometry.get_min_max_x(anchor).max * margin_right)
|
|
82
|
+
return false
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
true
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require_relative '../error/mindee_http_error'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
module HTTP
|
|
8
|
+
# Mindee HTTP error module.
|
|
9
|
+
module ErrorHandler
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
# Extracts the HTTP error from the response hash, or the job error if there is one.
|
|
13
|
+
# @param response [Hash] dictionary response retrieved by the server
|
|
14
|
+
def extract_error(response)
|
|
15
|
+
return unless response.respond_to?(:each_pair)
|
|
16
|
+
|
|
17
|
+
if !response.dig('api_request', 'error').empty?
|
|
18
|
+
response.dig('api_request', 'error')
|
|
19
|
+
elsif !response.dig('job', 'error').empty?
|
|
20
|
+
response.dig('job', 'error')
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Creates an error object based on what's retrieved from a request.
|
|
25
|
+
# @param response [Hash] dictionary response retrieved by the server
|
|
26
|
+
def create_error_obj(response)
|
|
27
|
+
error_obj = extract_error(response)
|
|
28
|
+
if error_obj.nil?
|
|
29
|
+
error_obj = if response.include?('Maximum pdf pages')
|
|
30
|
+
{
|
|
31
|
+
'code' => 'TooManyPages',
|
|
32
|
+
'message' => 'Maximum amound of pdf pages reached.',
|
|
33
|
+
'details' => response,
|
|
34
|
+
}
|
|
35
|
+
elsif response.include?('Max file size is')
|
|
36
|
+
{
|
|
37
|
+
'code' => 'FileTooLarge',
|
|
38
|
+
'message' => 'Maximum file size reached.',
|
|
39
|
+
'details' => response,
|
|
40
|
+
}
|
|
41
|
+
elsif response.include?('Invalid file type')
|
|
42
|
+
{
|
|
43
|
+
'code' => 'InvalidFiletype',
|
|
44
|
+
'message' => 'Invalid file type.',
|
|
45
|
+
'details' => response,
|
|
46
|
+
}
|
|
47
|
+
elsif response.include?('Gateway timeout')
|
|
48
|
+
{
|
|
49
|
+
'code' => 'RequestTimeout',
|
|
50
|
+
'message' => 'Request timed out.',
|
|
51
|
+
'details' => response,
|
|
52
|
+
}
|
|
53
|
+
elsif response.include?('Too Many Requests')
|
|
54
|
+
{
|
|
55
|
+
'code' => 'TooManyRequests',
|
|
56
|
+
'message' => 'Too Many Requests.',
|
|
57
|
+
'details' => response,
|
|
58
|
+
}
|
|
59
|
+
else
|
|
60
|
+
{
|
|
61
|
+
'code' => 'UnknownError',
|
|
62
|
+
'message' => 'Server sent back an unexpected reply.',
|
|
63
|
+
'details' => response,
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
error_obj
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Creates an appropriate HTTP error exception, based on retrieved http error code
|
|
72
|
+
# @param url [String] the url of the product
|
|
73
|
+
# @param response [Hash] dictionary response retrieved by the server
|
|
74
|
+
def handle_error(url, response)
|
|
75
|
+
code = response.code.to_i
|
|
76
|
+
begin
|
|
77
|
+
parsed_hash = JSON.parse(response.body, object_class: Hash)
|
|
78
|
+
rescue JSON::ParserError
|
|
79
|
+
parsed_hash = response.body.to_s
|
|
80
|
+
end
|
|
81
|
+
error_obj = create_error_obj(parsed_hash)
|
|
82
|
+
case code
|
|
83
|
+
when 400..499
|
|
84
|
+
Error::MindeeHTTPClientError.new(error_obj || {}, url, code)
|
|
85
|
+
when 500..599
|
|
86
|
+
Error::MindeeHTTPServerError.new(error_obj || {}, url, code)
|
|
87
|
+
else
|
|
88
|
+
Error::MindeeHTTPError.new(error_obj || {}, url, code)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Creates an appropriate HTTP error exception for a V2 API response, based on retrieved http error code.
|
|
93
|
+
# @param hashed_response [Hash] dictionary response retrieved by the server
|
|
94
|
+
def generate_v2_error(hashed_response)
|
|
95
|
+
code = hashed_response[:code].to_i
|
|
96
|
+
if hashed_response.key?(:status)
|
|
97
|
+
Error::MindeeHTTPErrorV2.new(hashed_response.transform_keys(&:to_s))
|
|
98
|
+
elsif code < 200 || code > 399
|
|
99
|
+
Error::MindeeHTTPErrorV2.new({ 'status' => code, 'detail' => 'No details available.' })
|
|
100
|
+
else
|
|
101
|
+
Error::MindeeHTTPErrorV2.new({ 'status' => -1, 'detail' => 'Unknown Error.' })
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'net/http'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
module HTTP
|
|
8
|
+
# Module dedicated to the validation & sanitizing of HTTP responses.
|
|
9
|
+
module ResponseValidation
|
|
10
|
+
# Checks if the synchronous response is valid. Returns True if the response is valid.
|
|
11
|
+
# @param [Net::HTTPResponse] response
|
|
12
|
+
# @return [bool]
|
|
13
|
+
def self.valid_sync_response?(response)
|
|
14
|
+
return false unless (200..399).cover?(response.code.to_i)
|
|
15
|
+
|
|
16
|
+
begin
|
|
17
|
+
JSON.parse(response.body, object_class: Hash)
|
|
18
|
+
rescue StandardError
|
|
19
|
+
return false
|
|
20
|
+
end
|
|
21
|
+
true
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Checks if a V2 response is valid.
|
|
25
|
+
# @param [Net::HTTPResponse] response
|
|
26
|
+
# @return [bool]
|
|
27
|
+
def self.valid_v2_response?(response)
|
|
28
|
+
return false unless valid_sync_response?(response)
|
|
29
|
+
|
|
30
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
|
31
|
+
|
|
32
|
+
return false if hashed_response.dig('job', 'status').to_s == 'Failed'
|
|
33
|
+
|
|
34
|
+
return false if hashed_response.dig('job',
|
|
35
|
+
'error') && !(hashed_response.dig('job',
|
|
36
|
+
'error').empty? || hashed_response.dig(
|
|
37
|
+
'job', 'error'
|
|
38
|
+
).nil?)
|
|
39
|
+
|
|
40
|
+
true
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Checks if the asynchronous response is valid. Also checks if it is a valid synchronous response.
|
|
44
|
+
# Returns true if the response is valid.
|
|
45
|
+
# @param [Net::HTTPResponse] response
|
|
46
|
+
# @return [bool]
|
|
47
|
+
def self.valid_async_response?(response)
|
|
48
|
+
return false unless valid_sync_response?(response)
|
|
49
|
+
|
|
50
|
+
return false unless (200..302).cover?(response.code.to_i)
|
|
51
|
+
|
|
52
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
|
53
|
+
|
|
54
|
+
return false if hashed_response.dig('job', 'status') == Mindee::V1::Parsing::Common::JobStatus::FAILURE
|
|
55
|
+
|
|
56
|
+
return false if hashed_response.dig('job', 'error') && !hashed_response.dig('job', 'error').empty?
|
|
57
|
+
|
|
58
|
+
true
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Checks and correct the response object depending on the possible kinds of returns.
|
|
62
|
+
# @param response [Net::HTTPResponse]
|
|
63
|
+
def self.clean_request!(response)
|
|
64
|
+
return response if (response.code.to_i < 200) || (response.code.to_i > 302) # : Net::HTTPResponse
|
|
65
|
+
|
|
66
|
+
return response if response.body.empty?
|
|
67
|
+
|
|
68
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
|
69
|
+
if hashed_response.dig('api_request', 'status_code').to_i > 399
|
|
70
|
+
response.instance_variable_set(:@code, hashed_response['api_request']['status_code'].to_s)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
return if !hashed_response.dig('job', 'error').empty? &&
|
|
74
|
+
(hashed_response.dig('job',
|
|
75
|
+
'status').downcase != Mindee::V1::Parsing::Common::JobStatus::FAILURE.to_s)
|
|
76
|
+
|
|
77
|
+
response.instance_variable_set(:@code, '500')
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
data/lib/mindee/http.rb
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../input/sources'
|
|
4
|
+
require_relative '../logging'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
# Image Extraction Module.
|
|
8
|
+
module Image
|
|
9
|
+
# Generic class for image extraction.
|
|
10
|
+
class ExtractedImage
|
|
11
|
+
# ID of the page the image was extracted from.
|
|
12
|
+
attr_reader :page_id
|
|
13
|
+
|
|
14
|
+
# ID of the element on a given page.
|
|
15
|
+
attr_reader :element_id
|
|
16
|
+
|
|
17
|
+
# Buffer object of the file's content.
|
|
18
|
+
attr_reader :buffer
|
|
19
|
+
|
|
20
|
+
# Internal name for the file.
|
|
21
|
+
attr_reader :filename
|
|
22
|
+
|
|
23
|
+
# Initializes the ExtractedImage with a buffer and an internal file name.
|
|
24
|
+
#
|
|
25
|
+
# @param input_source [LocalInputSource, BytesInputSource] Local source for input.
|
|
26
|
+
# @param page_id [Integer] ID of the page the element was found on.
|
|
27
|
+
# @param element_id [Integer, nil] ID of the element in a page.
|
|
28
|
+
# @param preserve_input_filename [Boolean] If true, keep the input source filename as-is.
|
|
29
|
+
def initialize(input_source, page_id, element_id, preserve_input_filename: false)
|
|
30
|
+
@buffer = StringIO.new(input_source.io_stream.read.to_s)
|
|
31
|
+
@buffer.rewind
|
|
32
|
+
|
|
33
|
+
@filename = if preserve_input_filename
|
|
34
|
+
input_source.filename.to_s
|
|
35
|
+
else
|
|
36
|
+
extension = if input_source.pdf?
|
|
37
|
+
'.jpg'
|
|
38
|
+
else
|
|
39
|
+
File.extname(input_source.filename)
|
|
40
|
+
end
|
|
41
|
+
base_name = File.basename(input_source.filename, File.extname(input_source.filename))
|
|
42
|
+
"#{base_name}_p#{page_id}_#{element_id}#{extension}"
|
|
43
|
+
end
|
|
44
|
+
@page_id = page_id
|
|
45
|
+
@element_id = element_id.nil? ? 0 : element_id
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Saves the document to a file.
|
|
49
|
+
#
|
|
50
|
+
# @param output_path [String] Path to save the file to.
|
|
51
|
+
# @param file_format [String, nil] Optional MiniMagick-compatible format for the file. Inferred from file
|
|
52
|
+
# extension if not provided.
|
|
53
|
+
# @raise [MindeeError] If an invalid path or filename is provided.
|
|
54
|
+
def write_to_file(output_path, file_format = nil)
|
|
55
|
+
resolved_path = Pathname.new(File.expand_path(output_path))
|
|
56
|
+
if file_format.nil?
|
|
57
|
+
raise Error::MindeeImageError, 'Invalid file format.' if resolved_path.extname.delete('.').empty?
|
|
58
|
+
|
|
59
|
+
file_format = resolved_path.extname.delete('.').upcase
|
|
60
|
+
end
|
|
61
|
+
begin
|
|
62
|
+
@buffer.rewind
|
|
63
|
+
image = MiniMagick::Image.read(@buffer)
|
|
64
|
+
image.format file_format.to_s.downcase
|
|
65
|
+
image.write resolved_path.to_s
|
|
66
|
+
logger.info("File saved successfully to '#{resolved_path}'")
|
|
67
|
+
rescue StandardError
|
|
68
|
+
raise Error::MindeeImageError, "Could not save file '#{output_path}'. " \
|
|
69
|
+
'Is the provided file path valid?.'
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Return the file as a Mindee-compatible BufferInput source.
|
|
74
|
+
#
|
|
75
|
+
# @return [FileInputSource] A BufferInput source.
|
|
76
|
+
def as_source
|
|
77
|
+
@buffer.rewind
|
|
78
|
+
Mindee::Input::Source::BytesInputSource.new(@buffer.read || '', @filename)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Return the file as a Mindee-compatible BufferInput source.
|
|
82
|
+
#
|
|
83
|
+
# @return [FileInputSource] A BufferInput source.
|
|
84
|
+
def as_input_source
|
|
85
|
+
as_source
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# Image processing module.
|
|
5
|
+
module Image
|
|
6
|
+
# Image compressor module to handle image compression.
|
|
7
|
+
module ImageCompressor
|
|
8
|
+
# Resize and/or compress an SKBitmap. This assumes the ratio was provided before hands.
|
|
9
|
+
# @param image [MiniMagick::Image, StringIO] Input image.
|
|
10
|
+
# @param quality [Integer, nil] Quality of the final file.
|
|
11
|
+
# @param max_width [Integer, nil] Maximum width. If not specified, the horizontal ratio will remain the same.
|
|
12
|
+
# @param max_height [Integer] Maximum height. If not specified, the vertical ratio will remain the same.
|
|
13
|
+
# @return [StringIO]
|
|
14
|
+
def self.compress_image(image, quality: 85, max_width: nil, max_height: nil)
|
|
15
|
+
processed_image = ImageUtils.to_image(image)
|
|
16
|
+
processed_image.format 'jpg'
|
|
17
|
+
final_width, final_height = ImageUtils.calculate_new_dimensions(
|
|
18
|
+
processed_image,
|
|
19
|
+
max_width: max_width,
|
|
20
|
+
max_height: max_height
|
|
21
|
+
)
|
|
22
|
+
ImageUtils.resize_image(processed_image, final_width, final_height) if final_width || final_height
|
|
23
|
+
ImageUtils.compress_image_quality(processed_image, quality)
|
|
24
|
+
|
|
25
|
+
ImageUtils.image_to_stringio(processed_image)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Mindee::Dependency.require_all_deps!
|
|
4
|
+
require 'mini_magick'
|
|
5
|
+
require 'origami'
|
|
6
|
+
require 'stringio'
|
|
7
|
+
require 'tempfile'
|
|
8
|
+
require_relative '../input/sources'
|
|
9
|
+
require_relative 'extracted_image'
|
|
10
|
+
|
|
11
|
+
module Mindee
|
|
12
|
+
# Image Extraction Module.
|
|
13
|
+
module Image
|
|
14
|
+
# Image Extraction wrapper class.
|
|
15
|
+
module ImageExtractor
|
|
16
|
+
# Attaches an image as a new page in a PdfDocument object.
|
|
17
|
+
#
|
|
18
|
+
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
|
19
|
+
# @return [Origami::PDF] A PdfDocument handle.
|
|
20
|
+
def self.attach_image_as_new_file(input_buffer, format: 'jpg')
|
|
21
|
+
magick_image = MiniMagick::Image.read(input_buffer)
|
|
22
|
+
# NOTE: We force format consolidation to a single format to avoid frames being interpreted as the final output.
|
|
23
|
+
magick_image.format(format)
|
|
24
|
+
original_density = magick_image.resolution
|
|
25
|
+
scale_factor = original_density[0].to_f / 4.166666 # Convert from default 300 DPI to 72.
|
|
26
|
+
magick_image.format('pdf', 0, { density: scale_factor.to_s })
|
|
27
|
+
Origami::PDF.read(StringIO.new(magick_image.to_blob))
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Extracts multiple images from a given local input source.
|
|
31
|
+
#
|
|
32
|
+
# @param [Input::Source::LocalInputSource] input_source
|
|
33
|
+
# @param [Integer] page_id ID of the Page to extract from.
|
|
34
|
+
# @param [Array<Array<Geometry::Point>>, Array<Geometry::Quadrilateral>] polygons List of coordinates to extract.
|
|
35
|
+
# @return [Array<Image::ExtractedImage>] Extracted Images.
|
|
36
|
+
def self.extract_multiple_images_from_source(input_source, page_id, polygons)
|
|
37
|
+
new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
|
|
38
|
+
new_stream.seek(0)
|
|
39
|
+
|
|
40
|
+
extract_images_from_polygons(input_source, new_stream, page_id, polygons)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Extracts images from their positions on a file (as polygons).
|
|
44
|
+
#
|
|
45
|
+
# @param [Input::Source::LocalInputSource] input_source Local input source.
|
|
46
|
+
# @param [StringIO] pdf_stream Buffer of the PDF.
|
|
47
|
+
# @param [Integer] page_id Page ID.
|
|
48
|
+
# @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
|
|
49
|
+
# @return [Array<Image::ExtractedImage>] Extracted Images.
|
|
50
|
+
def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
|
|
51
|
+
extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage]
|
|
52
|
+
|
|
53
|
+
polygons.each_with_index do |polygon, element_id|
|
|
54
|
+
polygon = ImageUtils.normalize_polygon(polygon)
|
|
55
|
+
page_content = ImageUtils.read_page_content(pdf_stream)
|
|
56
|
+
|
|
57
|
+
min_max_x = Geometry.get_min_max_x([
|
|
58
|
+
polygon.top_left,
|
|
59
|
+
polygon.bottom_right,
|
|
60
|
+
polygon.top_right,
|
|
61
|
+
polygon.bottom_left,
|
|
62
|
+
])
|
|
63
|
+
min_max_y = Geometry.get_min_max_y([
|
|
64
|
+
polygon.top_left,
|
|
65
|
+
polygon.bottom_right,
|
|
66
|
+
polygon.top_right,
|
|
67
|
+
polygon.bottom_left,
|
|
68
|
+
])
|
|
69
|
+
file_extension = ImageUtils.determine_file_extension(input_source)
|
|
70
|
+
cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
|
|
71
|
+
if file_extension == 'pdf'
|
|
72
|
+
cropped_image.format('jpg')
|
|
73
|
+
else
|
|
74
|
+
cropped_image.format(file_extension.to_s)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
buffer = StringIO.new
|
|
78
|
+
ImageUtils.write_image_to_buffer(cropped_image, buffer)
|
|
79
|
+
file_name = "#{input_source.filename}_page#{page_id}-#{element_id}.#{file_extension}"
|
|
80
|
+
|
|
81
|
+
extracted_elements << create_extracted_image(buffer, file_name, page_id, element_id)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
extracted_elements
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Generates an ExtractedImage.
|
|
88
|
+
#
|
|
89
|
+
# @param [StringIO] buffer Buffer containing the image.
|
|
90
|
+
# @param [String] file_name Name for the file.
|
|
91
|
+
# @param [Integer] page_id ID of the page the file was universal from.
|
|
92
|
+
# @param [Integer] element_id ID of the element of a given page.
|
|
93
|
+
def self.create_extracted_image(buffer, file_name, page_id, element_id)
|
|
94
|
+
buffer.rewind
|
|
95
|
+
ExtractedImage.new(
|
|
96
|
+
Input::Source::BytesInputSource.new(buffer.read.to_s, file_name),
|
|
97
|
+
page_id,
|
|
98
|
+
element_id,
|
|
99
|
+
preserve_input_filename: true
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Loads a single_page from an image file or a pdf document.
|
|
104
|
+
#
|
|
105
|
+
# @param input_file [LocalInputSource] Local input.
|
|
106
|
+
# @param [Integer] page_id Page ID.
|
|
107
|
+
# @return [StringIO] A valid PdfDocument handle.
|
|
108
|
+
def self.load_input_source_pdf_page_as_stringio(input_file, page_id)
|
|
109
|
+
input_file.io_stream.rewind
|
|
110
|
+
if input_file.pdf?
|
|
111
|
+
PDF::PDFProcessor.get_page(Origami::PDF.read(input_file.io_stream), page_id)
|
|
112
|
+
else
|
|
113
|
+
input_file.io_stream
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|