mindee-lite 5.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +24 -0
- data/.gitattributes +14 -0
- data/.gitignore +76 -0
- data/.gitmodules +3 -0
- data/.pre-commit-config.yaml +36 -0
- data/.rubocop.yml +49 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +515 -0
- data/CODE_OF_CONDUCT.md +129 -0
- data/CONTRIBUTING.md +107 -0
- data/Gemfile +14 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +40 -0
- data/Steepfile +30 -0
- data/bin/console +14 -0
- data/bin/mindee.rb +30 -0
- data/bin/v1/parser.rb +153 -0
- data/bin/v1/products.rb +88 -0
- data/bin/v2/parser.rb +235 -0
- data/bin/v2/products.rb +34 -0
- data/docs/code_samples/bank_account_details_v1.txt +24 -0
- data/docs/code_samples/bank_account_details_v2.txt +24 -0
- data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
- data/docs/code_samples/barcode_reader_v1.txt +24 -0
- data/docs/code_samples/cropper_v1.txt +21 -0
- data/docs/code_samples/default.txt +30 -0
- data/docs/code_samples/default_async.txt +29 -0
- data/docs/code_samples/expense_receipts_v5.txt +25 -0
- data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
- data/docs/code_samples/financial_document_v1.txt +25 -0
- data/docs/code_samples/financial_document_v1_async.txt +24 -0
- data/docs/code_samples/idcard_fr_v1.txt +24 -0
- data/docs/code_samples/idcard_fr_v2.txt +24 -0
- data/docs/code_samples/international_id_v2_async.txt +24 -0
- data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
- data/docs/code_samples/invoices_v4.txt +25 -0
- data/docs/code_samples/invoices_v4_async.txt +24 -0
- data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
- data/docs/code_samples/passport_v1.txt +24 -0
- data/docs/code_samples/resume_v1_async.txt +24 -0
- data/docs/code_samples/v2_classification.txt +30 -0
- data/docs/code_samples/v2_crop.txt +30 -0
- data/docs/code_samples/v2_extraction.txt +42 -0
- data/docs/code_samples/v2_extraction_webhook.txt +45 -0
- data/docs/code_samples/v2_ocr.txt +30 -0
- data/docs/code_samples/v2_split.txt +30 -0
- data/docs/code_samples/workflow_execution.txt +28 -0
- data/docs/code_samples/workflow_polling.txt +35 -0
- data/examples/auto_invoice_splitter_extraction.rb +48 -0
- data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
- data/lib/mindee/dependency.rb +29 -0
- data/lib/mindee/error/mindee_error.rb +17 -0
- data/lib/mindee/error/mindee_http_error.rb +36 -0
- data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
- data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
- data/lib/mindee/error/mindee_input_error.rb +30 -0
- data/lib/mindee/error.rb +6 -0
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +41 -0
- data/lib/mindee/geometry/polygon.rb +37 -0
- data/lib/mindee/geometry/quadrilateral.rb +50 -0
- data/lib/mindee/geometry/utils.rb +88 -0
- data/lib/mindee/geometry.rb +7 -0
- data/lib/mindee/http/.rubocop.yml +7 -0
- data/lib/mindee/http/http_error_handler.rb +106 -0
- data/lib/mindee/http/response_validation.rb +81 -0
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/image/extracted_image.rb +89 -0
- data/lib/mindee/image/image_compressor.rb +29 -0
- data/lib/mindee/image/image_extractor.rb +118 -0
- data/lib/mindee/image/image_utils.rb +165 -0
- data/lib/mindee/image.rb +6 -0
- data/lib/mindee/input/base_parameters.rb +149 -0
- data/lib/mindee/input/local_response.rb +80 -0
- data/lib/mindee/input/polling_options.rb +26 -0
- data/lib/mindee/input/sources/base64_input_source.rb +31 -0
- data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
- data/lib/mindee/input/sources/file_input_source.rb +20 -0
- data/lib/mindee/input/sources/local_input_source.rb +216 -0
- data/lib/mindee/input/sources/path_input_source.rb +20 -0
- data/lib/mindee/input/sources/url_input_source.rb +130 -0
- data/lib/mindee/input/sources.rb +8 -0
- data/lib/mindee/input.rb +4 -0
- data/lib/mindee/logging/logger.rb +24 -0
- data/lib/mindee/logging.rb +3 -0
- data/lib/mindee/page_options.rb +24 -0
- data/lib/mindee/pdf/extracted_pdf.rb +70 -0
- data/lib/mindee/pdf/pdf_compressor.rb +121 -0
- data/lib/mindee/pdf/pdf_extractor.rb +121 -0
- data/lib/mindee/pdf/pdf_processor.rb +91 -0
- data/lib/mindee/pdf/pdf_tools.rb +201 -0
- data/lib/mindee/pdf.rb +7 -0
- data/lib/mindee/v1/client.rb +490 -0
- data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
- data/lib/mindee/v1/extraction.rb +3 -0
- data/lib/mindee/v1/http/.rubocop.yml +7 -0
- data/lib/mindee/v1/http/endpoint.rb +221 -0
- data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
- data/lib/mindee/v1/http.rb +4 -0
- data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
- data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
- data/lib/mindee/v1/parsing/common/document.rb +86 -0
- data/lib/mindee/v1/parsing/common/execution.rb +78 -0
- data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
- data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
- data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
- data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
- data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
- data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
- data/lib/mindee/v1/parsing/common/extras.rb +6 -0
- data/lib/mindee/v1/parsing/common/inference.rb +69 -0
- data/lib/mindee/v1/parsing/common/job.rb +48 -0
- data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
- data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
- data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
- data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
- data/lib/mindee/v1/parsing/common/page.rb +49 -0
- data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
- data/lib/mindee/v1/parsing/common/product.rb +26 -0
- data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
- data/lib/mindee/v1/parsing/common.rb +15 -0
- data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
- data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
- data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
- data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
- data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
- data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
- data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
- data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
- data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
- data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
- data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
- data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
- data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
- data/lib/mindee/v1/parsing/standard.rb +15 -0
- data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
- data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
- data/lib/mindee/v1/parsing/universal.rb +4 -0
- data/lib/mindee/v1/parsing.rb +5 -0
- data/lib/mindee/v1/product/.rubocop.yml +12 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
- data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
- data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
- data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
- data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
- data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
- data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
- data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
- data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
- data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
- data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
- data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
- data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
- data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
- data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
- data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
- data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
- data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
- data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
- data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
- data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
- data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
- data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
- data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
- data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
- data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
- data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
- data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
- data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
- data/lib/mindee/v1/product/universal/universal.rb +48 -0
- data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
- data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
- data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
- data/lib/mindee/v1/product.rb +18 -0
- data/lib/mindee/v1.rb +7 -0
- data/lib/mindee/v2/client.rb +132 -0
- data/lib/mindee/v2/file_operation/crop.rb +51 -0
- data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
- data/lib/mindee/v2/file_operation/split.rb +37 -0
- data/lib/mindee/v2/file_operation/split_files.rb +25 -0
- data/lib/mindee/v2/file_operation.rb +6 -0
- data/lib/mindee/v2/http/.rubocop.yml +7 -0
- data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
- data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
- data/lib/mindee/v2/http.rb +4 -0
- data/lib/mindee/v2/parsing/base_inference.rb +44 -0
- data/lib/mindee/v2/parsing/base_response.rb +15 -0
- data/lib/mindee/v2/parsing/common_response.rb +20 -0
- data/lib/mindee/v2/parsing/error_item.rb +21 -0
- data/lib/mindee/v2/parsing/error_response.rb +51 -0
- data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
- data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
- data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
- data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
- data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
- data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
- data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
- data/lib/mindee/v2/parsing/field.rb +9 -0
- data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
- data/lib/mindee/v2/parsing/inference_file.rb +38 -0
- data/lib/mindee/v2/parsing/inference_job.rb +25 -0
- data/lib/mindee/v2/parsing/inference_model.rb +30 -0
- data/lib/mindee/v2/parsing/job.rb +93 -0
- data/lib/mindee/v2/parsing/job_response.rb +30 -0
- data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
- data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
- data/lib/mindee/v2/parsing/raw_text.rb +27 -0
- data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
- data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
- data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
- data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
- data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
- data/lib/mindee/v2/parsing/search.rb +6 -0
- data/lib/mindee/v2/parsing.rb +16 -0
- data/lib/mindee/v2/product/base_product.rb +28 -0
- data/lib/mindee/v2/product/classification/classification.rb +20 -0
- data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
- data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
- data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
- data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
- data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
- data/lib/mindee/v2/product/crop/crop.rb +20 -0
- data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
- data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
- data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
- data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
- data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
- data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
- data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
- data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
- data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
- data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
- data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
- data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
- data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
- data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
- data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
- data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
- data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
- data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
- data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
- data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
- data/lib/mindee/v2/product/split/split.rb +19 -0
- data/lib/mindee/v2/product/split/split_inference.rb +34 -0
- data/lib/mindee/v2/product/split/split_range.rb +38 -0
- data/lib/mindee/v2/product/split/split_response.rb +40 -0
- data/lib/mindee/v2/product/split/split_result.rb +34 -0
- data/lib/mindee/v2/product.rb +7 -0
- data/lib/mindee/v2.rb +7 -0
- data/lib/mindee/version.rb +26 -0
- data/lib/mindee.rb +135 -0
- data/mindee-lite.gemspec +36 -0
- data/mindee.gemspec +44 -0
- data/sig/custom/marcel.rbs +3 -0
- data/sig/custom/mini_magick.rbs +31 -0
- data/sig/custom/net_http.rbs +43 -0
- data/sig/custom/origami.rbs +59 -0
- data/sig/mindee/dependency.rbs +13 -0
- data/sig/mindee/error/mindee_error.rbs +13 -0
- data/sig/mindee/error/mindee_http_error.rbs +17 -0
- data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
- data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
- data/sig/mindee/error/mindee_input_error.rbs +18 -0
- data/sig/mindee/geometry/min_max.rbs +11 -0
- data/sig/mindee/geometry/point.rbs +14 -0
- data/sig/mindee/geometry/polygon.rbs +12 -0
- data/sig/mindee/geometry/quadrilateral.rbs +15 -0
- data/sig/mindee/geometry/utils.rbs +13 -0
- data/sig/mindee/http/http_error_handler.rbs +15 -0
- data/sig/mindee/http/response_validation.rbs +11 -0
- data/sig/mindee/image/extracted_image.rbs +21 -0
- data/sig/mindee/image/image_compressor.rbs +8 -0
- data/sig/mindee/image/image_extractor.rbs +13 -0
- data/sig/mindee/image/image_utils.rbs +19 -0
- data/sig/mindee/input/base_parameters.rbs +35 -0
- data/sig/mindee/input/local_response.rbs +14 -0
- data/sig/mindee/input/polling_options.rbs +12 -0
- data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
- data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
- data/sig/mindee/input/sources/file_input_source.rbs +10 -0
- data/sig/mindee/input/sources/local_input_source.rbs +30 -0
- data/sig/mindee/input/sources/path_input_source.rbs +10 -0
- data/sig/mindee/input/sources/url_input_source.rbs +20 -0
- data/sig/mindee/logging/logger.rbs +11 -0
- data/sig/mindee/page_options.rbs +11 -0
- data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
- data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
- data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
- data/sig/mindee/pdf/pdf_processor.rbs +12 -0
- data/sig/mindee/pdf/pdf_tools.rbs +31 -0
- data/sig/mindee/v1/client.rbs +84 -0
- data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
- data/sig/mindee/v1/http/endpoint.rbs +41 -0
- data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
- data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
- data/sig/mindee/v1/parsing/common/document.rbs +32 -0
- data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
- data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
- data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
- data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
- data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
- data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
- data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
- data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
- data/sig/mindee/v1/parsing/common/job.rbs +24 -0
- data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
- data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
- data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
- data/sig/mindee/v1/parsing/common/page.rbs +19 -0
- data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
- data/sig/mindee/v1/parsing/common/product.rbs +16 -0
- data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
- data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
- data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
- data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
- data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
- data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
- data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
- data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
- data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
- data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
- data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
- data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
- data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
- data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
- data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
- data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
- data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
- data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
- data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
- data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
- data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
- data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
- data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
- data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
- data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
- data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
- data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
- data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
- data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
- data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
- data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
- data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
- data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
- data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
- data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
- data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
- data/sig/mindee/v1/product/universal/universal.rbs +16 -0
- data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
- data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
- data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
- data/sig/mindee/v2/client.rbs +29 -0
- data/sig/mindee/v2/file_operation/crop.rbs +10 -0
- data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
- data/sig/mindee/v2/file_operation/split.rbs +11 -0
- data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
- data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
- data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
- data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
- data/sig/mindee/v2/parsing/base_response.rbs +11 -0
- data/sig/mindee/v2/parsing/common_response.rbs +12 -0
- data/sig/mindee/v2/parsing/error_item.rbs +13 -0
- data/sig/mindee/v2/parsing/error_response.rbs +20 -0
- data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
- data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
- data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
- data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
- data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
- data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
- data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
- data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
- data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
- data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
- data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
- data/sig/mindee/v2/parsing/job.rbs +24 -0
- data/sig/mindee/v2/parsing/job_response.rbs +14 -0
- data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
- data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
- data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
- data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
- data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
- data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
- data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
- data/sig/mindee/v2/parsing/search_models.rbs +14 -0
- data/sig/mindee/v2/product/base_product.rbs +19 -0
- data/sig/mindee/v2/product/classification/classification.rbs +10 -0
- data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
- data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
- data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
- data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
- data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
- data/sig/mindee/v2/product/crop/crop.rbs +10 -0
- data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
- data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
- data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
- data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
- data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
- data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
- data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
- data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
- data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
- data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
- data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
- data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
- data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
- data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
- data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
- data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
- data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
- data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
- data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
- data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
- data/sig/mindee/v2/product/split/split.rbs +10 -0
- data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
- data/sig/mindee/v2/product/split/split_range.rbs +18 -0
- data/sig/mindee/v2/product/split/split_response.rbs +25 -0
- data/sig/mindee/v2/product/split/split_result.rbs +14 -0
- data/sig/mindee/version.rbs +6 -0
- data/sig/mindee.rbs +62 -0
- metadata +600 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
require 'marcel'
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
|
|
7
|
+
require_relative '../../dependency'
|
|
8
|
+
require_relative '../../pdf' if Mindee::Dependency.all_deps_available?
|
|
9
|
+
require_relative '../../image' if Mindee::Dependency.all_deps_available?
|
|
10
|
+
|
|
11
|
+
module Mindee
|
|
12
|
+
module Input
|
|
13
|
+
# Document source handling.
|
|
14
|
+
module Source
|
|
15
|
+
# Mime types accepted by the server.
|
|
16
|
+
ALLOWED_MIME_TYPES = [
|
|
17
|
+
'application/pdf',
|
|
18
|
+
'image/heic',
|
|
19
|
+
'image/png',
|
|
20
|
+
'image/jpeg',
|
|
21
|
+
'image/tiff',
|
|
22
|
+
'image/webp',
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
# Base class for loading documents.
|
|
26
|
+
class LocalInputSource
|
|
27
|
+
# @return [String]
|
|
28
|
+
attr_reader :filename
|
|
29
|
+
# @return [String]
|
|
30
|
+
attr_reader :file_mimetype
|
|
31
|
+
# @return [StringIO | File]
|
|
32
|
+
attr_reader :io_stream
|
|
33
|
+
|
|
34
|
+
# @param io_stream [StringIO, File]
|
|
35
|
+
# @param filename [String]
|
|
36
|
+
# @param repair_pdf [bool]
|
|
37
|
+
def initialize(io_stream, filename, repair_pdf: false)
|
|
38
|
+
@io_stream = io_stream
|
|
39
|
+
@filename = filename
|
|
40
|
+
@file_mimetype = if repair_pdf
|
|
41
|
+
Marcel::MimeType.for @io_stream
|
|
42
|
+
else
|
|
43
|
+
Marcel::MimeType.for @io_stream, name: @filename
|
|
44
|
+
end
|
|
45
|
+
if ALLOWED_MIME_TYPES.include? @file_mimetype
|
|
46
|
+
logger.debug("Loaded new input #{@filename} from #{self.class}")
|
|
47
|
+
return
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if filename.end_with?('.pdf') && repair_pdf
|
|
51
|
+
fix_pdf!
|
|
52
|
+
|
|
53
|
+
logger.debug("Loaded new input #{@filename} from #{self.class}")
|
|
54
|
+
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
raise Error::MindeeMimeTypeError, @file_mimetype.to_s
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @deprecated See {#fix_pdf!} or {#self.fix_pdf} instead.
|
|
61
|
+
def rescue_broken_pdf(_)
|
|
62
|
+
fix_pdf!
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Shorthand for PDF mimetype validation.
|
|
66
|
+
def pdf?
|
|
67
|
+
@file_mimetype.to_s == 'application/pdf'
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Attempts to fix the PDF data in the file.
|
|
71
|
+
# @param maximum_offset [Integer] Maximum offset to look for the PDF header.
|
|
72
|
+
# @return [void]
|
|
73
|
+
# @raise [Mindee::Error::MindeePDFError]
|
|
74
|
+
def fix_pdf!(maximum_offset: 500)
|
|
75
|
+
@io_stream = LocalInputSource.fix_pdf(@io_stream, maximum_offset: maximum_offset)
|
|
76
|
+
@io_stream.rewind
|
|
77
|
+
@file_mimetype = Marcel::MimeType.for @io_stream
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Attempt to fix the PDF data in the given stream.
|
|
81
|
+
# @param stream [StringIO] The stream to fix.
|
|
82
|
+
# @param maximum_offset [Integer] Maximum offset to look for the PDF header.
|
|
83
|
+
# @return [StringIO] The fixed stream.
|
|
84
|
+
# @raise [Mindee::Error::MindeePDFError]
|
|
85
|
+
def self.fix_pdf(stream, maximum_offset: 500)
|
|
86
|
+
out_stream = StringIO.new
|
|
87
|
+
stream.gets('%PDF-')
|
|
88
|
+
raise Error::MindeePDFError if stream.eof? || stream.pos > maximum_offset
|
|
89
|
+
|
|
90
|
+
stream.pos = stream.pos - 5
|
|
91
|
+
out_stream << stream.read
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Cuts a PDF file according to provided options.
|
|
95
|
+
# @param options [PageOptions, nil] Page cutting/merge options:
|
|
96
|
+
#
|
|
97
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
|
98
|
+
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
|
99
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
100
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
|
101
|
+
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
|
102
|
+
def apply_page_options(options)
|
|
103
|
+
@io_stream.seek(0)
|
|
104
|
+
@io_stream = PDF::PDFProcessor.parse(@io_stream, options)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# @deprecated Use {#apply_page_options} instead.
|
|
108
|
+
# @see #apply_page_options
|
|
109
|
+
def process_pdf(options)
|
|
110
|
+
apply_page_options(options)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Reads a document.
|
|
114
|
+
# @param close [bool]
|
|
115
|
+
# @return [Array<>]
|
|
116
|
+
def read_contents(close: true)
|
|
117
|
+
logger.debug("Reading data from: #{@filename}")
|
|
118
|
+
@io_stream.seek(0)
|
|
119
|
+
# Avoids needlessly re-packing some files
|
|
120
|
+
data = @io_stream.read
|
|
121
|
+
@io_stream.rewind
|
|
122
|
+
@io_stream.close if close
|
|
123
|
+
[data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Write the file to a given path. Uses the initial file name by default.
|
|
127
|
+
# @param path [String] Path to write the file to.
|
|
128
|
+
def write_to_file(path)
|
|
129
|
+
t_path = if File.directory?(path || '') || path.to_s.end_with?('/')
|
|
130
|
+
File.join(path || '', @filename)
|
|
131
|
+
else
|
|
132
|
+
path
|
|
133
|
+
end
|
|
134
|
+
full_path = File.expand_path(t_path || '')
|
|
135
|
+
FileUtils.mkdir_p(File.dirname(full_path))
|
|
136
|
+
@io_stream.rewind
|
|
137
|
+
File.binwrite(full_path, @io_stream.read || '')
|
|
138
|
+
logger.debug("Wrote file successfully to #{full_path}")
|
|
139
|
+
@io_stream.rewind
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Returns the page count for a document.
|
|
143
|
+
# Defaults to one for images.
|
|
144
|
+
# @return [Integer]
|
|
145
|
+
def page_count
|
|
146
|
+
unless Mindee::Dependency.all_deps_available?
|
|
147
|
+
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
148
|
+
end
|
|
149
|
+
return 1 unless pdf?
|
|
150
|
+
|
|
151
|
+
@io_stream.seek(0)
|
|
152
|
+
pdf_processor = Mindee::PDF::PDFProcessor.open_pdf(@io_stream)
|
|
153
|
+
pdf_processor.pages.size
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Compresses the file, according to the provided info.
|
|
157
|
+
# @param [Integer] quality Quality of the output file.
|
|
158
|
+
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
|
|
159
|
+
# @param [Integer, nil] max_height Maximum height (Ignored for PDFs).
|
|
160
|
+
# @param [bool] force_source_text Whether to force the operation on PDFs with source text.
|
|
161
|
+
# This will attempt to re-render PDF text over the rasterized original. If disabled, ignored the operation.
|
|
162
|
+
# WARNING: this operation is strongly discouraged.
|
|
163
|
+
# @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
|
|
164
|
+
# not. Needs force_source_text to work.
|
|
165
|
+
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
|
|
166
|
+
unless Mindee::Dependency.all_deps_available?
|
|
167
|
+
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
buffer = if pdf?
|
|
171
|
+
Mindee::PDF::PDFCompressor.compress_pdf(
|
|
172
|
+
@io_stream,
|
|
173
|
+
quality: quality,
|
|
174
|
+
force_source_text_compression: force_source_text,
|
|
175
|
+
disable_source_text: disable_source_text
|
|
176
|
+
)
|
|
177
|
+
else
|
|
178
|
+
Mindee::Image::ImageCompressor.compress_image(
|
|
179
|
+
@io_stream,
|
|
180
|
+
quality: quality,
|
|
181
|
+
max_width: max_width,
|
|
182
|
+
max_height: max_height
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
@io_stream = buffer
|
|
186
|
+
@io_stream.rewind
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Checks whether the file has source text if it is a pdf. `false` otherwise
|
|
190
|
+
# @return [bool] `true` if the file is a PDF and has source text.
|
|
191
|
+
def source_text?
|
|
192
|
+
unless Mindee::Dependency.all_deps_available?
|
|
193
|
+
raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Replaces non-ASCII characters by their UNICODE escape sequence.
|
|
201
|
+
# Keeps other characters as is.
|
|
202
|
+
# @return A clean String.
|
|
203
|
+
def self.convert_to_unicode_escape(string)
|
|
204
|
+
unicode_escape_string = ''.dup
|
|
205
|
+
string.each_char do |char|
|
|
206
|
+
unicode_escape_string << if char.bytesize > 1
|
|
207
|
+
"\\u#{format('%04x', char.unpack1('U'))}"
|
|
208
|
+
else
|
|
209
|
+
char
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
unicode_escape_string
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Input
|
|
7
|
+
# Document source handling.
|
|
8
|
+
module Source
|
|
9
|
+
# Load a document from a path.
|
|
10
|
+
class PathInputSource < LocalInputSource
|
|
11
|
+
# @param filepath [String]
|
|
12
|
+
# @param repair_pdf [bool]
|
|
13
|
+
def initialize(filepath, repair_pdf: false)
|
|
14
|
+
io_stream = File.new(filepath, 'rb')
|
|
15
|
+
super(io_stream, File.basename(filepath), repair_pdf: repair_pdf)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'net/http'
|
|
4
|
+
require 'uri'
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
require_relative '../../logging'
|
|
7
|
+
|
|
8
|
+
module Mindee
|
|
9
|
+
module Input
|
|
10
|
+
module Source
|
|
11
|
+
# Load a remote document from a file url.
|
|
12
|
+
class URLInputSource
|
|
13
|
+
# @return [String]
|
|
14
|
+
attr_reader :url
|
|
15
|
+
|
|
16
|
+
def initialize(url)
|
|
17
|
+
raise Error::MindeeInputError, 'URL must be HTTPS' unless url.start_with? 'https://'
|
|
18
|
+
|
|
19
|
+
logger.debug("URL input: #{url}")
|
|
20
|
+
|
|
21
|
+
@url = url
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Downloads the file from the URL and saves it to the specified path.
|
|
25
|
+
#
|
|
26
|
+
# @param path [String] Path to save the file to.
|
|
27
|
+
# @param filename [String, nil] Optional name to give to the file.
|
|
28
|
+
# @param username [String, nil] Optional username for authentication.
|
|
29
|
+
# @param password [String, nil] Optional password for authentication.
|
|
30
|
+
# @param token [String, nil] Optional token for JWT-based authentication.
|
|
31
|
+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
|
32
|
+
# @return [String] The full path of the saved file.
|
|
33
|
+
def write_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
|
|
34
|
+
response_body = fetch_file_content(username: username, password: password, token: token,
|
|
35
|
+
max_redirects: max_redirects)
|
|
36
|
+
|
|
37
|
+
filename = fill_filename(filename)
|
|
38
|
+
|
|
39
|
+
full_path = File.join(path.chomp('/'), filename)
|
|
40
|
+
File.write(full_path, response_body)
|
|
41
|
+
|
|
42
|
+
full_path
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Downloads the file from the url, and returns a BytesInputSource wrapper object for it.
|
|
46
|
+
#
|
|
47
|
+
# @param filename [String, nil] Optional name to give to the file.
|
|
48
|
+
# @param username [String, nil] Optional username for authentication.
|
|
49
|
+
# @param password [String, nil] Optional password for authentication.
|
|
50
|
+
# @param token [String, nil] Optional token for JWT-based authentication.
|
|
51
|
+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
|
52
|
+
# @return [BytesInputSource] The full path of the saved file.
|
|
53
|
+
def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
|
|
54
|
+
filename = fill_filename(filename)
|
|
55
|
+
response_body = fetch_file_content(username: username, password: password, token: token,
|
|
56
|
+
max_redirects: max_redirects)
|
|
57
|
+
bytes = StringIO.new(response_body)
|
|
58
|
+
|
|
59
|
+
BytesInputSource.new(bytes.read || '', filename || '')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Fetches the file content from the URL.
|
|
63
|
+
#
|
|
64
|
+
# @param username [String, nil] Optional username for authentication.
|
|
65
|
+
# @param password [String, nil] Optional password for authentication.
|
|
66
|
+
# @param token [String, nil] Optional token for JWT-based authentication.
|
|
67
|
+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
|
68
|
+
# @return [String] The downloaded file content.
|
|
69
|
+
def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3)
|
|
70
|
+
uri = URI.parse(@url)
|
|
71
|
+
request = Net::HTTP::Get.new(uri)
|
|
72
|
+
|
|
73
|
+
request['Authorization'] = "Bearer #{token}" if token
|
|
74
|
+
request.basic_auth(username, password) if username && password
|
|
75
|
+
|
|
76
|
+
response = make_request(uri, request, max_redirects)
|
|
77
|
+
if response.code.to_i > 299
|
|
78
|
+
raise Error::MindeeAPIError, "Failed to download file: HTTP status code #{response.code}"
|
|
79
|
+
elsif response.code.to_i < 200
|
|
80
|
+
raise Error::MindeeAPIError, "Failed to download file: Invalid response code #{response.code}."
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
response.body
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
def extract_filename_from_url(uri)
|
|
89
|
+
filename = File.basename(uri.path.to_s)
|
|
90
|
+
filename.empty? ? '' : filename
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def fill_filename(filename)
|
|
94
|
+
filename ||= extract_filename_from_url(URI.parse(@url))
|
|
95
|
+
if filename.empty? || File.extname(filename).empty?
|
|
96
|
+
filename = generate_file_name(extension: get_file_extension(filename))
|
|
97
|
+
end
|
|
98
|
+
filename
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def make_request(uri, request, max_redirects)
|
|
102
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
|
103
|
+
response = http.request(request)
|
|
104
|
+
if response.is_a?(Net::HTTPRedirection) && max_redirects.positive?
|
|
105
|
+
location = response['location']
|
|
106
|
+
raise Error::MindeeInputError, 'No location in redirection header.' if location.nil?
|
|
107
|
+
|
|
108
|
+
new_uri = URI.parse(location)
|
|
109
|
+
request = Net::HTTP::Get.new(new_uri)
|
|
110
|
+
make_request(new_uri, request, max_redirects - 1)
|
|
111
|
+
else
|
|
112
|
+
response
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def get_file_extension(filename)
|
|
118
|
+
ext = File.extname(filename)
|
|
119
|
+
ext.empty? ? nil : ext.downcase
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def generate_file_name(extension: nil)
|
|
123
|
+
extension ||= '.tmp'
|
|
124
|
+
random_string = Array.new(8) { rand(36).to_s(36) }.join
|
|
125
|
+
"mindee_temp_#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}_#{random_string}#{extension}"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'sources/local_input_source'
|
|
4
|
+
require_relative 'sources/bytes_input_source'
|
|
5
|
+
require_relative 'sources/base64_input_source'
|
|
6
|
+
require_relative 'sources/file_input_source'
|
|
7
|
+
require_relative 'sources/path_input_source'
|
|
8
|
+
require_relative 'sources/url_input_source'
|
data/lib/mindee/input.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
# Mindee logging module.
|
|
7
|
+
module Logging
|
|
8
|
+
log_level = ENV.fetch('MINDEE_LOG_LEVEL', 'WARN')
|
|
9
|
+
log_output = ENV.fetch('MINDEE_LOG_OUTPUT', 'stderr')
|
|
10
|
+
@logger = if log_output == 'stderr'
|
|
11
|
+
Logger.new($stderr)
|
|
12
|
+
elsif log_output == 'stdout'
|
|
13
|
+
Logger.new($stdout)
|
|
14
|
+
else
|
|
15
|
+
warn "Invalid MINDEE_LOG_OUTPUT='#{log_output}', defaulting to 'stderr'"
|
|
16
|
+
Logger.new($stderr)
|
|
17
|
+
end
|
|
18
|
+
@logger.level = Logger.const_get(log_level)
|
|
19
|
+
|
|
20
|
+
class << self
|
|
21
|
+
attr_accessor :logger
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# Class for page options in parse calls.
|
|
5
|
+
# @!attribute page_indexes [Array[Integer]] Zero-based list of page indexes.
|
|
6
|
+
# @!attribute operation [:KEEP_ONLY, :REMOVE] Operation to apply on the document, given the specified page indexes:
|
|
7
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
|
8
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
|
9
|
+
# @!attribute on_min_pages [Integer, nil] Apply the operation only if the document has at least this many pages.
|
|
10
|
+
class PageOptions
|
|
11
|
+
attr_accessor :page_indexes, :operation, :on_min_pages
|
|
12
|
+
|
|
13
|
+
def initialize(params: {})
|
|
14
|
+
params ||= {} # : Hash[Symbol, untyped]
|
|
15
|
+
params = params.transform_keys(&:to_sym)
|
|
16
|
+
@page_indexes = params.fetch(
|
|
17
|
+
:page_indexes,
|
|
18
|
+
[] # : Array[Integer]
|
|
19
|
+
)
|
|
20
|
+
@operation = params.fetch(:operation, :KEEP_ONLY)
|
|
21
|
+
@on_min_pages = params.fetch(:on_min_pages, nil)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
# PDF Extraction Module.
|
|
5
|
+
module PDF
|
|
6
|
+
# An extracted sub-Pdf.
|
|
7
|
+
class ExtractedPDF
|
|
8
|
+
# Byte contents of the pdf
|
|
9
|
+
# @return [StringIO]
|
|
10
|
+
attr_reader :pdf_bytes
|
|
11
|
+
|
|
12
|
+
# Name of the file.
|
|
13
|
+
# @return [String]
|
|
14
|
+
attr_reader :filename
|
|
15
|
+
|
|
16
|
+
# @param pdf_stream [StringIO, File]
|
|
17
|
+
# @param filename [String]
|
|
18
|
+
def initialize(pdf_stream, filename)
|
|
19
|
+
@filename = filename
|
|
20
|
+
|
|
21
|
+
if pdf_stream.is_a?(File)
|
|
22
|
+
pdf_stream.rewind
|
|
23
|
+
@pdf_bytes = StringIO.new(pdf_stream.read)
|
|
24
|
+
else
|
|
25
|
+
@pdf_bytes = pdf_stream
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Retrieves the page count for a given pdf.
|
|
30
|
+
# @return [Integer]
|
|
31
|
+
def page_count
|
|
32
|
+
current_pdf = Mindee::PDF::PDFProcessor.open_pdf(pdf_bytes)
|
|
33
|
+
current_pdf.pages.size
|
|
34
|
+
rescue TypeError, Origami::InvalidPDFError
|
|
35
|
+
raise Error::MindeePDFError, 'Could not retrieve page count from Extracted PDF object.'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Writes the contents of the current PDF object to a file.
|
|
39
|
+
# @param output_path [String] Path to write to.
|
|
40
|
+
# @param override [bool] Whether to override the destination file.
|
|
41
|
+
def write_to_file(output_path, override: false)
|
|
42
|
+
raise Error::MindeePDFError, 'Provided path is not a file' if File.directory?(output_path)
|
|
43
|
+
raise Error::MindeePDFError, 'Invalid save path provided' unless File.exist?(
|
|
44
|
+
File.expand_path('..', output_path)
|
|
45
|
+
) && !override
|
|
46
|
+
|
|
47
|
+
if File.extname(output_path).downcase == 'pdf'
|
|
48
|
+
base_path = File.expand_path('..', output_path)
|
|
49
|
+
output_path = File.expand_path("#{File.basename(output_path)}.pdf", base_path)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
53
|
+
File.binwrite(output_path, @pdf_bytes.read.to_s)
|
|
54
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns the current PDF object as a usable BytesInputSource.
|
|
58
|
+
# @return [Mindee::Input::Source::BytesInputSource]
|
|
59
|
+
def as_input_source
|
|
60
|
+
raise Error::MindeePDFError, 'Bytes object is nil.' if @pdf_bytes.nil?
|
|
61
|
+
|
|
62
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
63
|
+
data = @pdf_bytes.read || ''
|
|
64
|
+
@pdf_bytes.rewind if @pdf_bytes.respond_to?(:rewind)
|
|
65
|
+
|
|
66
|
+
Mindee::Input::Source::BytesInputSource.new(data, @filename)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Mindee::Dependency.require_all_deps!
|
|
4
|
+
require 'pdf-reader'
|
|
5
|
+
|
|
6
|
+
# Shorthand for pdf-reader's PDF namespace, to avoid mixups with the local Origami fork.
|
|
7
|
+
PDFReader = PDF
|
|
8
|
+
|
|
9
|
+
module Mindee
|
|
10
|
+
module PDF
|
|
11
|
+
# Image compressor module to handle PDF compression.
|
|
12
|
+
module PDFCompressor
|
|
13
|
+
# Compresses each page of a provided PDF stream. Skips if force_source_text isn't set and source text is detected.
|
|
14
|
+
# @param pdf_data [StringIO] StringIO handle of the file.
|
|
15
|
+
# @param quality [Integer] Compression quality (70-100 for most JPG images in the test dataset).
|
|
16
|
+
# @param force_source_text_compression [bool] If true, attempts to re-write detected text.
|
|
17
|
+
# @param disable_source_text [bool] If true, doesn't re-apply source text to the original PDF.
|
|
18
|
+
def self.compress_pdf(pdf_data, quality: 85, force_source_text_compression: false, disable_source_text: true)
|
|
19
|
+
if PDFTools.source_text?(pdf_data)
|
|
20
|
+
if force_source_text_compression
|
|
21
|
+
if disable_source_text
|
|
22
|
+
logger.warn('Re-writing PDF source-text is an EXPERIMENTAL feature.')
|
|
23
|
+
else
|
|
24
|
+
logger.warn('Source-file contains text, but disable_source_text flag is ignored. ' \
|
|
25
|
+
'Resulting file will not contain any embedded text.')
|
|
26
|
+
end
|
|
27
|
+
else
|
|
28
|
+
logger.warn('Source-text detected in input PDF. Aborting operation.')
|
|
29
|
+
return pdf_data
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
pdf_data.rewind
|
|
34
|
+
pdf = Origami::PDF.read(pdf_data)
|
|
35
|
+
pages = process_pdf_pages(pdf, quality)
|
|
36
|
+
|
|
37
|
+
output_pdf = create_output_pdf(pages, disable_source_text, pdf_data)
|
|
38
|
+
|
|
39
|
+
output_stream = StringIO.new
|
|
40
|
+
output_pdf.save(output_stream)
|
|
41
|
+
output_stream
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Processes all pages in the PDF.
|
|
45
|
+
# @param pdf [Origami::PDF] The Origami PDF object to process.
|
|
46
|
+
# @param quality [Integer] Compression quality.
|
|
47
|
+
# @return [Array<Origami::Page>] Processed pages.
|
|
48
|
+
def self.process_pdf_pages(pdf, quality)
|
|
49
|
+
pdf.pages.map.with_index do |page, index|
|
|
50
|
+
retrieved_page = Mindee::PDF::PDFProcessor.get_page(pdf, index)
|
|
51
|
+
process_pdf_page(retrieved_page, index, quality, page[:MediaBox])
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Creates the output PDF with processed pages.
|
|
56
|
+
# @param pages [Array<Origami::Page>] Processed pages.
|
|
57
|
+
# @param disable_source_text [bool] Whether to disable source text.
|
|
58
|
+
# @param pdf_data [StringIO] Original PDF data.
|
|
59
|
+
# @return [Origami::PDF] Output PDF object.
|
|
60
|
+
def self.create_output_pdf(pages, disable_source_text, pdf_data)
|
|
61
|
+
output_pdf = Origami::PDF.new
|
|
62
|
+
pages.rotate!(1) if pages.count >= 2
|
|
63
|
+
|
|
64
|
+
inject_text(pdf_data, pages) unless disable_source_text
|
|
65
|
+
|
|
66
|
+
pages.each { |page| output_pdf.append_page(page) }
|
|
67
|
+
|
|
68
|
+
output_pdf
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Extracts text from a source text PDF, and injects it into a newly-created one.
|
|
72
|
+
# @param pdf_data [StringIO] Stream representation of the PDF.
|
|
73
|
+
# @param pages [Array<Origami::Page>] Array of pages containing the rasterized version of the initial pages.
|
|
74
|
+
def self.inject_text(pdf_data, pages)
|
|
75
|
+
reader = PDFReader::Reader.new(pdf_data)
|
|
76
|
+
|
|
77
|
+
reader.pages.each_with_index do |original_page, index|
|
|
78
|
+
break if index >= pages.length
|
|
79
|
+
|
|
80
|
+
receiver = PDFReader::Reader::PageTextReceiver.new
|
|
81
|
+
original_page.walk(receiver)
|
|
82
|
+
|
|
83
|
+
receiver.runs.each do |text_run|
|
|
84
|
+
x = text_run.origin.x
|
|
85
|
+
y = text_run.origin.y
|
|
86
|
+
text = text_run.text
|
|
87
|
+
font_size = text_run.font_size
|
|
88
|
+
|
|
89
|
+
content_stream = Origami::Stream.new
|
|
90
|
+
content_stream.dictionary[:Filter] = :FlateDecode
|
|
91
|
+
content_stream.data = "BT\n/F1 #{font_size} Tf\n#{x} #{y} Td\n(#{text}) Tj\nET\n"
|
|
92
|
+
|
|
93
|
+
pages[index].Contents.data += content_stream.data
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Takes in a page stream, rasterizes it into a JPEG image, and applies the result onto a new Origami PDF page.
|
|
99
|
+
# @param page_stream [StringIO] Stream representation of a single page from the initial PDF.
|
|
100
|
+
# @param page_index [Integer] Index of the current page. Technically not needed, but left for debugging purposes.
|
|
101
|
+
# @param image_quality [Integer] Quality to apply to the rasterized page.
|
|
102
|
+
# @param media_box [Array<Integer>, nil] Extracted media box from the page. Can be nil.
|
|
103
|
+
# @return [Origami::Page]
|
|
104
|
+
def self.process_pdf_page(page_stream, page_index, image_quality, media_box)
|
|
105
|
+
new_page = Origami::Page.new
|
|
106
|
+
compressed_image = Mindee::Image::ImageUtils.pdf_to_magick_image(page_stream, image_quality)
|
|
107
|
+
width, height = Mindee::Image::ImageUtils.calculate_dimensions_from_media_box(compressed_image, media_box)
|
|
108
|
+
|
|
109
|
+
compressed_xobject = PDF::PDFTools.create_xobject(compressed_image)
|
|
110
|
+
PDF::PDFTools.set_xobject_properties(compressed_xobject, compressed_image)
|
|
111
|
+
|
|
112
|
+
xobject_name = "X#{page_index + 1}"
|
|
113
|
+
PDF::PDFTools.add_content_to_page(new_page, xobject_name, width, height)
|
|
114
|
+
new_page.add_xobject(compressed_xobject, xobject_name)
|
|
115
|
+
|
|
116
|
+
PDF::PDFTools.set_page_dimensions(new_page, width, height)
|
|
117
|
+
new_page
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|