mindee-lite 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +24 -0
  3. data/.gitattributes +14 -0
  4. data/.gitignore +76 -0
  5. data/.gitmodules +3 -0
  6. data/.pre-commit-config.yaml +36 -0
  7. data/.rubocop.yml +49 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +515 -0
  10. data/CODE_OF_CONDUCT.md +129 -0
  11. data/CONTRIBUTING.md +107 -0
  12. data/Gemfile +14 -0
  13. data/LICENSE +21 -0
  14. data/README.md +42 -0
  15. data/Rakefile +40 -0
  16. data/Steepfile +30 -0
  17. data/bin/console +14 -0
  18. data/bin/mindee.rb +30 -0
  19. data/bin/v1/parser.rb +153 -0
  20. data/bin/v1/products.rb +88 -0
  21. data/bin/v2/parser.rb +235 -0
  22. data/bin/v2/products.rb +34 -0
  23. data/docs/code_samples/bank_account_details_v1.txt +24 -0
  24. data/docs/code_samples/bank_account_details_v2.txt +24 -0
  25. data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
  26. data/docs/code_samples/barcode_reader_v1.txt +24 -0
  27. data/docs/code_samples/cropper_v1.txt +21 -0
  28. data/docs/code_samples/default.txt +30 -0
  29. data/docs/code_samples/default_async.txt +29 -0
  30. data/docs/code_samples/expense_receipts_v5.txt +25 -0
  31. data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
  32. data/docs/code_samples/financial_document_v1.txt +25 -0
  33. data/docs/code_samples/financial_document_v1_async.txt +24 -0
  34. data/docs/code_samples/idcard_fr_v1.txt +24 -0
  35. data/docs/code_samples/idcard_fr_v2.txt +24 -0
  36. data/docs/code_samples/international_id_v2_async.txt +24 -0
  37. data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
  38. data/docs/code_samples/invoices_v4.txt +25 -0
  39. data/docs/code_samples/invoices_v4_async.txt +24 -0
  40. data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
  41. data/docs/code_samples/passport_v1.txt +24 -0
  42. data/docs/code_samples/resume_v1_async.txt +24 -0
  43. data/docs/code_samples/v2_classification.txt +30 -0
  44. data/docs/code_samples/v2_crop.txt +30 -0
  45. data/docs/code_samples/v2_extraction.txt +42 -0
  46. data/docs/code_samples/v2_extraction_webhook.txt +45 -0
  47. data/docs/code_samples/v2_ocr.txt +30 -0
  48. data/docs/code_samples/v2_split.txt +30 -0
  49. data/docs/code_samples/workflow_execution.txt +28 -0
  50. data/docs/code_samples/workflow_polling.txt +35 -0
  51. data/examples/auto_invoice_splitter_extraction.rb +48 -0
  52. data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
  53. data/lib/mindee/dependency.rb +29 -0
  54. data/lib/mindee/error/mindee_error.rb +17 -0
  55. data/lib/mindee/error/mindee_http_error.rb +36 -0
  56. data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
  57. data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
  58. data/lib/mindee/error/mindee_input_error.rb +30 -0
  59. data/lib/mindee/error.rb +6 -0
  60. data/lib/mindee/geometry/min_max.rb +23 -0
  61. data/lib/mindee/geometry/point.rb +41 -0
  62. data/lib/mindee/geometry/polygon.rb +37 -0
  63. data/lib/mindee/geometry/quadrilateral.rb +50 -0
  64. data/lib/mindee/geometry/utils.rb +88 -0
  65. data/lib/mindee/geometry.rb +7 -0
  66. data/lib/mindee/http/.rubocop.yml +7 -0
  67. data/lib/mindee/http/http_error_handler.rb +106 -0
  68. data/lib/mindee/http/response_validation.rb +81 -0
  69. data/lib/mindee/http.rb +3 -0
  70. data/lib/mindee/image/extracted_image.rb +89 -0
  71. data/lib/mindee/image/image_compressor.rb +29 -0
  72. data/lib/mindee/image/image_extractor.rb +118 -0
  73. data/lib/mindee/image/image_utils.rb +165 -0
  74. data/lib/mindee/image.rb +6 -0
  75. data/lib/mindee/input/base_parameters.rb +149 -0
  76. data/lib/mindee/input/local_response.rb +80 -0
  77. data/lib/mindee/input/polling_options.rb +26 -0
  78. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  79. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  80. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  81. data/lib/mindee/input/sources/local_input_source.rb +216 -0
  82. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  83. data/lib/mindee/input/sources/url_input_source.rb +130 -0
  84. data/lib/mindee/input/sources.rb +8 -0
  85. data/lib/mindee/input.rb +4 -0
  86. data/lib/mindee/logging/logger.rb +24 -0
  87. data/lib/mindee/logging.rb +3 -0
  88. data/lib/mindee/page_options.rb +24 -0
  89. data/lib/mindee/pdf/extracted_pdf.rb +70 -0
  90. data/lib/mindee/pdf/pdf_compressor.rb +121 -0
  91. data/lib/mindee/pdf/pdf_extractor.rb +121 -0
  92. data/lib/mindee/pdf/pdf_processor.rb +91 -0
  93. data/lib/mindee/pdf/pdf_tools.rb +201 -0
  94. data/lib/mindee/pdf.rb +7 -0
  95. data/lib/mindee/v1/client.rb +490 -0
  96. data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
  97. data/lib/mindee/v1/extraction.rb +3 -0
  98. data/lib/mindee/v1/http/.rubocop.yml +7 -0
  99. data/lib/mindee/v1/http/endpoint.rb +221 -0
  100. data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
  101. data/lib/mindee/v1/http.rb +4 -0
  102. data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
  103. data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
  104. data/lib/mindee/v1/parsing/common/document.rb +86 -0
  105. data/lib/mindee/v1/parsing/common/execution.rb +78 -0
  106. data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
  107. data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
  108. data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
  109. data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
  110. data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
  111. data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
  112. data/lib/mindee/v1/parsing/common/extras.rb +6 -0
  113. data/lib/mindee/v1/parsing/common/inference.rb +69 -0
  114. data/lib/mindee/v1/parsing/common/job.rb +48 -0
  115. data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
  116. data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
  117. data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
  118. data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
  119. data/lib/mindee/v1/parsing/common/page.rb +49 -0
  120. data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
  121. data/lib/mindee/v1/parsing/common/product.rb +26 -0
  122. data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
  123. data/lib/mindee/v1/parsing/common.rb +15 -0
  124. data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
  125. data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
  126. data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
  127. data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
  128. data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
  129. data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
  130. data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
  131. data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
  132. data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
  133. data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
  134. data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
  135. data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
  136. data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
  137. data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
  138. data/lib/mindee/v1/parsing/standard.rb +15 -0
  139. data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
  140. data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
  141. data/lib/mindee/v1/parsing/universal.rb +4 -0
  142. data/lib/mindee/v1/parsing.rb +5 -0
  143. data/lib/mindee/v1/product/.rubocop.yml +12 -0
  144. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
  145. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
  146. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
  147. data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
  148. data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
  149. data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
  150. data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
  151. data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
  152. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
  153. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
  154. data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
  155. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
  156. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
  157. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
  158. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
  159. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
  160. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
  161. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
  162. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
  163. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
  164. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
  165. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
  166. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
  167. data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
  168. data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
  169. data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
  170. data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
  171. data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
  172. data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
  173. data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
  174. data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
  175. data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
  176. data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
  177. data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
  178. data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
  179. data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
  180. data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
  181. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
  182. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
  183. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
  184. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
  185. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
  186. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
  187. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
  188. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
  189. data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
  190. data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
  191. data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
  192. data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
  193. data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
  194. data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
  195. data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
  196. data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
  197. data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
  198. data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
  199. data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
  200. data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
  201. data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
  202. data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
  203. data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
  204. data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
  205. data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
  206. data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
  207. data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
  208. data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
  209. data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
  210. data/lib/mindee/v1/product/universal/universal.rb +48 -0
  211. data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
  212. data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
  213. data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
  214. data/lib/mindee/v1/product.rb +18 -0
  215. data/lib/mindee/v1.rb +7 -0
  216. data/lib/mindee/v2/client.rb +132 -0
  217. data/lib/mindee/v2/file_operation/crop.rb +51 -0
  218. data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
  219. data/lib/mindee/v2/file_operation/split.rb +37 -0
  220. data/lib/mindee/v2/file_operation/split_files.rb +25 -0
  221. data/lib/mindee/v2/file_operation.rb +6 -0
  222. data/lib/mindee/v2/http/.rubocop.yml +7 -0
  223. data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
  224. data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
  225. data/lib/mindee/v2/http.rb +4 -0
  226. data/lib/mindee/v2/parsing/base_inference.rb +44 -0
  227. data/lib/mindee/v2/parsing/base_response.rb +15 -0
  228. data/lib/mindee/v2/parsing/common_response.rb +20 -0
  229. data/lib/mindee/v2/parsing/error_item.rb +21 -0
  230. data/lib/mindee/v2/parsing/error_response.rb +51 -0
  231. data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
  232. data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
  233. data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
  234. data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
  235. data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
  236. data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
  237. data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
  238. data/lib/mindee/v2/parsing/field.rb +9 -0
  239. data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
  240. data/lib/mindee/v2/parsing/inference_file.rb +38 -0
  241. data/lib/mindee/v2/parsing/inference_job.rb +25 -0
  242. data/lib/mindee/v2/parsing/inference_model.rb +30 -0
  243. data/lib/mindee/v2/parsing/job.rb +93 -0
  244. data/lib/mindee/v2/parsing/job_response.rb +30 -0
  245. data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
  246. data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
  247. data/lib/mindee/v2/parsing/raw_text.rb +27 -0
  248. data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
  249. data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
  250. data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
  251. data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
  252. data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
  253. data/lib/mindee/v2/parsing/search.rb +6 -0
  254. data/lib/mindee/v2/parsing.rb +16 -0
  255. data/lib/mindee/v2/product/base_product.rb +28 -0
  256. data/lib/mindee/v2/product/classification/classification.rb +20 -0
  257. data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
  258. data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
  259. data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
  260. data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
  261. data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
  262. data/lib/mindee/v2/product/crop/crop.rb +20 -0
  263. data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
  264. data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
  265. data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
  266. data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
  267. data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
  268. data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
  269. data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
  270. data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
  271. data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
  272. data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
  273. data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
  274. data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
  275. data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
  276. data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
  277. data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
  278. data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
  279. data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
  280. data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
  281. data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
  282. data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
  283. data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
  284. data/lib/mindee/v2/product/split/split.rb +19 -0
  285. data/lib/mindee/v2/product/split/split_inference.rb +34 -0
  286. data/lib/mindee/v2/product/split/split_range.rb +38 -0
  287. data/lib/mindee/v2/product/split/split_response.rb +40 -0
  288. data/lib/mindee/v2/product/split/split_result.rb +34 -0
  289. data/lib/mindee/v2/product.rb +7 -0
  290. data/lib/mindee/v2.rb +7 -0
  291. data/lib/mindee/version.rb +26 -0
  292. data/lib/mindee.rb +135 -0
  293. data/mindee-lite.gemspec +36 -0
  294. data/mindee.gemspec +44 -0
  295. data/sig/custom/marcel.rbs +3 -0
  296. data/sig/custom/mini_magick.rbs +31 -0
  297. data/sig/custom/net_http.rbs +43 -0
  298. data/sig/custom/origami.rbs +59 -0
  299. data/sig/mindee/dependency.rbs +13 -0
  300. data/sig/mindee/error/mindee_error.rbs +13 -0
  301. data/sig/mindee/error/mindee_http_error.rbs +17 -0
  302. data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
  303. data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
  304. data/sig/mindee/error/mindee_input_error.rbs +18 -0
  305. data/sig/mindee/geometry/min_max.rbs +11 -0
  306. data/sig/mindee/geometry/point.rbs +14 -0
  307. data/sig/mindee/geometry/polygon.rbs +12 -0
  308. data/sig/mindee/geometry/quadrilateral.rbs +15 -0
  309. data/sig/mindee/geometry/utils.rbs +13 -0
  310. data/sig/mindee/http/http_error_handler.rbs +15 -0
  311. data/sig/mindee/http/response_validation.rbs +11 -0
  312. data/sig/mindee/image/extracted_image.rbs +21 -0
  313. data/sig/mindee/image/image_compressor.rbs +8 -0
  314. data/sig/mindee/image/image_extractor.rbs +13 -0
  315. data/sig/mindee/image/image_utils.rbs +19 -0
  316. data/sig/mindee/input/base_parameters.rbs +35 -0
  317. data/sig/mindee/input/local_response.rbs +14 -0
  318. data/sig/mindee/input/polling_options.rbs +12 -0
  319. data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
  320. data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
  321. data/sig/mindee/input/sources/file_input_source.rbs +10 -0
  322. data/sig/mindee/input/sources/local_input_source.rbs +30 -0
  323. data/sig/mindee/input/sources/path_input_source.rbs +10 -0
  324. data/sig/mindee/input/sources/url_input_source.rbs +20 -0
  325. data/sig/mindee/logging/logger.rbs +11 -0
  326. data/sig/mindee/page_options.rbs +11 -0
  327. data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
  328. data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
  329. data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
  330. data/sig/mindee/pdf/pdf_processor.rbs +12 -0
  331. data/sig/mindee/pdf/pdf_tools.rbs +31 -0
  332. data/sig/mindee/v1/client.rbs +84 -0
  333. data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
  334. data/sig/mindee/v1/http/endpoint.rbs +41 -0
  335. data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
  336. data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
  337. data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
  338. data/sig/mindee/v1/parsing/common/document.rbs +32 -0
  339. data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
  340. data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
  341. data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
  342. data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
  343. data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
  344. data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
  345. data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
  346. data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
  347. data/sig/mindee/v1/parsing/common/job.rbs +24 -0
  348. data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
  349. data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
  350. data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
  351. data/sig/mindee/v1/parsing/common/page.rbs +19 -0
  352. data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
  353. data/sig/mindee/v1/parsing/common/product.rbs +16 -0
  354. data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
  355. data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
  356. data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
  357. data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
  358. data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
  359. data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
  360. data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
  361. data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
  362. data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
  363. data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
  364. data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
  365. data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
  366. data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
  367. data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
  368. data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
  369. data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
  370. data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
  371. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
  372. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
  373. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
  374. data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
  375. data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
  376. data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
  377. data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
  378. data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
  379. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
  380. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
  381. data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
  382. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
  383. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
  384. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
  385. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
  386. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
  387. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
  388. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
  389. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
  390. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
  391. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
  392. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
  393. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
  394. data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
  395. data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
  396. data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
  397. data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
  398. data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
  399. data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
  400. data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
  401. data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
  402. data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
  403. data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
  404. data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
  405. data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
  406. data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
  407. data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
  408. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
  409. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
  410. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
  411. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
  412. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
  413. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
  414. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
  415. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
  416. data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
  417. data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
  418. data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
  419. data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
  420. data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
  421. data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
  422. data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
  423. data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
  424. data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
  425. data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
  426. data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
  427. data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
  428. data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
  429. data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
  430. data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
  431. data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
  432. data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
  433. data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
  434. data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
  435. data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
  436. data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
  437. data/sig/mindee/v1/product/universal/universal.rbs +16 -0
  438. data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
  439. data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
  440. data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
  441. data/sig/mindee/v2/client.rbs +29 -0
  442. data/sig/mindee/v2/file_operation/crop.rbs +10 -0
  443. data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
  444. data/sig/mindee/v2/file_operation/split.rbs +11 -0
  445. data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
  446. data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
  447. data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
  448. data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
  449. data/sig/mindee/v2/parsing/base_response.rbs +11 -0
  450. data/sig/mindee/v2/parsing/common_response.rbs +12 -0
  451. data/sig/mindee/v2/parsing/error_item.rbs +13 -0
  452. data/sig/mindee/v2/parsing/error_response.rbs +20 -0
  453. data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
  454. data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
  455. data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
  456. data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
  457. data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
  458. data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
  459. data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
  460. data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
  461. data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
  462. data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
  463. data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
  464. data/sig/mindee/v2/parsing/job.rbs +24 -0
  465. data/sig/mindee/v2/parsing/job_response.rbs +14 -0
  466. data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
  467. data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
  468. data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
  469. data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
  470. data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
  471. data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
  472. data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
  473. data/sig/mindee/v2/parsing/search_models.rbs +14 -0
  474. data/sig/mindee/v2/product/base_product.rbs +19 -0
  475. data/sig/mindee/v2/product/classification/classification.rbs +10 -0
  476. data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
  477. data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
  478. data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
  479. data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
  480. data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
  481. data/sig/mindee/v2/product/crop/crop.rbs +10 -0
  482. data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
  483. data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
  484. data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
  485. data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
  486. data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
  487. data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
  488. data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
  489. data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
  490. data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
  491. data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
  492. data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
  493. data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
  494. data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
  495. data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
  496. data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
  497. data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
  498. data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
  499. data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
  500. data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
  501. data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
  502. data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
  503. data/sig/mindee/v2/product/split/split.rbs +10 -0
  504. data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
  505. data/sig/mindee/v2/product/split/split_range.rbs +18 -0
  506. data/sig/mindee/v2/product/split/split_response.rbs +25 -0
  507. data/sig/mindee/v2/product/split/split_result.rbs +14 -0
  508. data/sig/mindee/version.rbs +6 -0
  509. data/sig/mindee.rbs +62 -0
  510. metadata +600 -0
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Image processing module.
5
+ module Image
6
+ # Miscellaneous image operations.
7
+ module ImageUtils
8
+ # Resizes a provided MiniMagick Image with the given width & height, if present.
9
+ # @param image [MiniMagick::Image] MiniMagick image handle.
10
+ # @param width [Integer] Width to comply with.
11
+ # @param height [Integer] Height to comply with.
12
+ def self.resize_image(image, width, height)
13
+ if width && height
14
+ image.resize "#{width}x#{height}"
15
+ elsif width
16
+ image.resize width.to_s
17
+ elsif height
18
+ image.resize "x#{height}"
19
+ end
20
+ end
21
+
22
+ # Compresses the quality of the provided MiniMagick image.
23
+ # @param image [MiniMagick::Image] MiniMagick image handle.
24
+ # @param quality [Integer] Quality to apply to the image. This is independent of a JPG's base quality.
25
+ def self.compress_image_quality(image, quality)
26
+ image.quality quality.to_s
27
+ end
28
+
29
+ # Mostly here so that IDEs don't get confused on the type (@type annotation fails sometimes.)
30
+ # @param [MiniMagick::Image, StringIO, File, Tempfile] image The input image
31
+ # @return [MiniMagick::Image]
32
+ def self.to_image(image)
33
+ if image.is_a?(MiniMagick::Image)
34
+ image
35
+ elsif image.is_a?(StringIO) || image.is_a?(IO) || image.is_a?(File) || image.is_a?(Tempfile)
36
+ image.rewind
37
+ MiniMagick::Image.read(image)
38
+ else
39
+ img_class = image.class ? image.class.to_s : 'unknown format'
40
+ raise Error::MindeeImageError, "Expected an I/O object or a MiniMagick::Image. '#{img_class}' given instead."
41
+ end
42
+ end
43
+
44
+ # Converts a StringIO containing an image into a MiniMagick image.
45
+ # @param image [MiniMagick::Image] the input image.
46
+ # @param format [String] Format parameter, left open for the future, but should be JPEG for current use-cases.
47
+ # @return [StringIO]
48
+ def self.image_to_stringio(image, format = 'JPEG')
49
+ image.format format
50
+ blob = image.to_blob
51
+ stringio = StringIO.new(blob)
52
+ stringio.rewind
53
+
54
+ stringio
55
+ end
56
+
57
+ # Computes the new dimensions for a given SKBitmap, and returns a scaled down version of it relative to the
58
+ # provided bounds.
59
+ # @param [MiniMagick::Image] original Input MiniMagick image.
60
+ # @param max_width [Integer] Maximum width. If not specified, the horizontal ratio will remain the same.
61
+ # @param max_height [Integer] Maximum height. If not specified, the vertical ratio will remain the same.
62
+ def self.calculate_new_dimensions(original, max_width: nil, max_height: nil)
63
+ raise Error::MindeeImageError, 'Provided image could not be processed for resizing.' if original.nil?
64
+
65
+ return [original.width, original.height] if max_width.nil? && max_height.nil?
66
+
67
+ width_ratio = max_width ? max_width.to_f / original.width : Float::INFINITY
68
+ height_ratio = max_height ? max_height.to_f / original.height : Float::INFINITY
69
+
70
+ scale_factor = [width_ratio, height_ratio].min
71
+
72
+ new_width = (original.width.to_f * scale_factor).to_i
73
+ new_height = (original.height.to_f * scale_factor).to_i
74
+
75
+ [new_width, new_height]
76
+ end
77
+
78
+ # Computes the Height & Width from a page's media box. Falls back to the size of the initial image.
79
+ # @param image [MiniMagick::Image] The initial image that will fit into the page.
80
+ # @param media_box [Array<Integer>, nil]
81
+ # @return [Array<Integer>]
82
+ def self.calculate_dimensions_from_media_box(image, media_box)
83
+ if !media_box.nil? && media_box.any?
84
+ [
85
+ media_box[2]&.to_i || image[:width].to_i,
86
+ media_box[3]&.to_i || image[:height].to_i,
87
+ ]
88
+ else
89
+ [image[:width].to_i, image[:height].to_i]
90
+ end
91
+ end
92
+
93
+ # Transforms a PDF into a MagickImage. This is currently used for single-page PDFs.
94
+ # @param pdf_stream [StringIO] Input stream.
95
+ # @param image_quality [Integer] Quality to apply to the image.
96
+ # @return [MiniMagick::Image]
97
+ def self.pdf_to_magick_image(pdf_stream, image_quality)
98
+ compressed_image = MiniMagick::Image.read(pdf_stream.read)
99
+ compressed_image.format('jpg')
100
+ compressed_image.quality image_quality.to_s
101
+ compressed_image
102
+ end
103
+
104
+ # Retrieves the bounding box of a polygon.
105
+ #
106
+ # @param [Array<Point>, Mindee::Geometry::Polygon] polygon
107
+ def self.normalize_polygon(polygon)
108
+ if polygon.is_a?(Mindee::Geometry::Polygon) ||
109
+ (polygon.is_a?(Array) && polygon[0].is_a?(Mindee::Geometry::Point))
110
+ Mindee::Geometry.get_bounding_box(polygon)
111
+ elsif polygon.is_a?(Mindee::Geometry::Quadrilateral)
112
+ polygon
113
+ else
114
+ raise Error::MindeeGeometryError, 'Provided polygon has an invalid type.'
115
+ end
116
+ end
117
+
118
+ # Loads a buffer into a MiniMagick Image.
119
+ #
120
+ # @param [StringIO] pdf_stream Buffer containg the PDF
121
+ # @return [MiniMagick::Image] a valid MiniMagick image handle.
122
+ def self.read_page_content(pdf_stream)
123
+ pdf_stream.rewind
124
+ MiniMagick::Image.read(pdf_stream)
125
+ end
126
+
127
+ # Crops a MiniMagick Image from a the given bounding box.
128
+ #
129
+ # @param [MiniMagick::Image] image Input Image.
130
+ # @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
131
+ # @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
132
+ def self.crop_image(image, min_max_x, min_max_y)
133
+ width = image[:width].to_i
134
+ height = image[:height].to_i
135
+
136
+ image.format('jpg')
137
+ new_width = (min_max_x.max - min_max_x.min) * width
138
+ new_height = (min_max_y.max - min_max_y.min) * height
139
+ image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
140
+
141
+ image
142
+ end
143
+
144
+ # Writes a MiniMagick::Image to a buffer.
145
+ #
146
+ # @param [MiniMagick::Image] image a valid MiniMagick image.
147
+ # @param [StringIO] buffer
148
+ def self.write_image_to_buffer(image, buffer)
149
+ image.write(buffer)
150
+ end
151
+
152
+ # Retrieves the file extension from the main file to apply it to the extracted images. Note: coerces pdf as jpg.
153
+ #
154
+ # @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
155
+ # @return [String, nil] A valid file extension.
156
+ def self.determine_file_extension(input_source)
157
+ if input_source.pdf? || input_source.filename.downcase.end_with?('pdf')
158
+ 'jpg'
159
+ else
160
+ File.extname(input_source.filename.to_s).strip.downcase[1..].to_s
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'image/extracted_image'
4
+ require_relative 'image/image_compressor'
5
+ require_relative 'image/image_extractor'
6
+ require_relative 'image/image_utils'
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Input
5
+ # Base class for parameters accepted by all V2 endpoints.
6
+ class BaseParameters
7
+ # @return [String] ID of the model (required).
8
+ attr_reader :model_id
9
+
10
+ # @return [String, nil] Optional alias for the file.
11
+ attr_reader :file_alias
12
+
13
+ # @return [Array<String>, nil] Optional list of Webhooks IDs to propagate the API response to.
14
+ attr_reader :webhook_ids
15
+
16
+ # @return [PollingOptions] Options for polling. Set only if having timeout issues.
17
+ attr_reader :polling_options
18
+
19
+ # @return [Boolean, nil] Whether to close the file after parsing.
20
+ attr_reader :close_file
21
+
22
+ # @param [String] model_id ID of the model
23
+ # @param [String, nil] file_alias File alias, if applicable.
24
+ # @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
25
+ # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
26
+ # @param [Boolean, nil] close_file Whether to close the file after parsing.
27
+ def initialize(
28
+ model_id,
29
+ file_alias: nil,
30
+ webhook_ids: nil,
31
+ polling_options: nil,
32
+ close_file: true
33
+ )
34
+ raise Error::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?
35
+
36
+ @model_id = model_id
37
+ @file_alias = file_alias
38
+ @webhook_ids = webhook_ids || []
39
+ @polling_options = get_clean_polling_options(polling_options)
40
+ @close_file = close_file.nil? || close_file
41
+ end
42
+
43
+ # @return [String] Slug for the endpoint.
44
+ def self.slug
45
+ if self == BaseParameters
46
+ raise NotImplementedError, 'Cannot access `slug` directly on the BaseParameters class.'
47
+ end
48
+
49
+ ''
50
+ end
51
+
52
+ # @return [String] Slug for the endpoint.
53
+ def slug
54
+ self.class.slug
55
+ end
56
+
57
+ # Load from a hash
58
+ # @param [Hash] params Parameters to provide as a hash.
59
+ # @return [BaseParameters]
60
+ def self.from_hash(params: {})
61
+ load_from_hash(params: params)
62
+ new(
63
+ params[:model_id],
64
+ file_alias: params[:file_alias],
65
+ webhook_ids: params[:webhook_ids],
66
+ polling_options: params[:polling_options],
67
+ close_file: params[:close_file]
68
+ )
69
+ end
70
+
71
+ # Loads a prediction from a Hash.
72
+ # @param [Hash] params Parameters to provide as a hash.
73
+ # @return [Hash]
74
+ def self.load_from_hash(params: {})
75
+ params.transform_keys!(&:to_sym)
76
+
77
+ if params.empty? || params[:model_id].nil? || params[:model_id].empty?
78
+ raise Error::MindeeInputError, 'Model ID is required.'
79
+ end
80
+
81
+ polling_options_input = params.fetch(:page_options, PollingOptions.new)
82
+ if polling_options_input.is_a?(Hash)
83
+ polling_options_input = polling_options_input.transform_keys(&:to_sym)
84
+ PollingOptions.new(
85
+ initial_delay_sec: polling_options_input.fetch(:initial_delay_sec, 2.0),
86
+ delay_sec: polling_options_input.fetch(:delay_sec, 1.5),
87
+ max_retries: polling_options_input.fetch(:max_retries, 80)
88
+ )
89
+ end
90
+ params
91
+ end
92
+
93
+ # Appends base form data to the provided array.
94
+ # @param [Array] form_data Array of form fields
95
+ # @return [Array]
96
+ def append_form_data(form_data)
97
+ form_data.push(['file_alias', @file_alias]) if @file_alias
98
+ webhook_ids = @webhook_ids || []
99
+ form_data.push(['webhook_ids', webhook_ids.join(',')]) unless @webhook_ids.nil? || webhook_ids.empty?
100
+ form_data
101
+ end
102
+
103
+ # Validates the parameters for async auto-polling
104
+ def validate_async_params
105
+ min_delay_sec = 1
106
+ min_initial_delay_sec = 1
107
+ min_retries = 2
108
+
109
+ if @polling_options.delay_sec < min_delay_sec
110
+ raise ArgumentError,
111
+ "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)"
112
+ end
113
+ if @polling_options.initial_delay_sec < min_initial_delay_sec
114
+ raise ArgumentError,
115
+ "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)"
116
+ end
117
+ return unless @polling_options.max_retries < min_retries
118
+
119
+ raise ArgumentError,
120
+ "Cannot set auto-poll retries to less than #{min_retries}"
121
+ end
122
+
123
+ private
124
+
125
+ # Cleans a proper polling options object potentially from a hash.
126
+ # @param [Hash, PollingOptions, nil] polling_options Polling options.
127
+ # @return [PollingOptions] Valid polling options object.
128
+ def get_clean_polling_options(polling_options)
129
+ return PollingOptions.new if polling_options.is_a?(PollingOptions)
130
+
131
+ if polling_options.is_a?(Hash)
132
+ polling_options = polling_options.transform_keys(&:to_sym)
133
+ output_polling_options = PollingOptions.new(
134
+ initial_delay_sec: polling_options.fetch(:initial_delay_sec, 2.0),
135
+ delay_sec: polling_options.fetch(:delay_sec, 1.5),
136
+ max_retries: polling_options.fetch(:max_retries, 80)
137
+ )
138
+ else
139
+ output_polling_options = if polling_options.is_a?(PollingOptions)
140
+ polling_options || PollingOptions.new
141
+ else
142
+ PollingOptions.new
143
+ end
144
+ end
145
+ output_polling_options
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'openssl'
5
+ require 'stringio'
6
+ require 'pathname'
7
+ require 'tempfile'
8
+
9
+ module Mindee
10
+ module Input
11
+ # Response loaded locally.
12
+ class LocalResponse
13
+ attr_reader :file
14
+
15
+ # @param input_file [File, Tempfile, IO, StringIO, String, Pathname] The input file, which can be a StringIO.
16
+ def initialize(input_file)
17
+ case input_file
18
+ when IO, StringIO, File, Tempfile
19
+ str_stripped = input_file.read.to_s.gsub(%r{[\r\n]}, '')
20
+ @file = StringIO.new(str_stripped)
21
+ @file.rewind
22
+ when Pathname, String
23
+ @file = if Pathname(input_file.to_s).exist?
24
+ StringIO.new(File.read(input_file.to_s, encoding: 'utf-8').gsub(%r{[\r\n]}, ''))
25
+ else
26
+ StringIO.new(input_file.to_s.gsub(%r{[\r\n]}, ''))
27
+ end
28
+ @file.rewind
29
+ else
30
+ raise Error::MindeeInputError, "Incompatible type for input '#{input_file.class}'."
31
+ end
32
+ end
33
+
34
+ # Returns the file as a hash.
35
+ # @return [Hash]
36
+ def as_hash
37
+ @file.rewind
38
+ file_str = @file.read or raise 'File could not be read'
39
+ JSON.parse(file_str, object_class: Hash)
40
+ rescue JSON::ParserError
41
+ raise Error::MindeeInputError, "File is not a valid dict. #{file_str}"
42
+ end
43
+
44
+ # Processes the secret key
45
+ # @param secret_key [String] the secret key as plain text.
46
+ # @return [String]
47
+ def self.process_secret_key(secret_key)
48
+ secret_key.is_a?(String) ? secret_key.encode('utf-8') : secret_key
49
+ end
50
+
51
+ # @param [String] secret_key [String] Secret key, either a string or a byte/byte array.
52
+ # @return [String]
53
+ def get_hmac_signature(secret_key)
54
+ algorithm = OpenSSL::Digest.new('sha256')
55
+ begin
56
+ @file.rewind
57
+ mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key),
58
+ @file.read || raise('File could not be read'))
59
+ rescue StandardError
60
+ raise Error::MindeeInputError, 'Could not get HMAC signature from payload.'
61
+ end
62
+ mac
63
+ end
64
+
65
+ # @param secret_key [String] Secret key, either a string or a byte/byte array.
66
+ # @param signature [String] Signature to match
67
+ # @return [bool]
68
+ def valid_hmac_signature?(secret_key, signature)
69
+ signature == get_hmac_signature(secret_key)
70
+ end
71
+
72
+ # Deserializes a loaded response
73
+ # @param response_class [Class<V2::Parsing::BaseResponse>] class to return.
74
+ # @return [V2::Parsing::JobResponse, Mindee::V2::Parsing::BaseResponse]
75
+ def deserialize_response(response_class)
76
+ response_class.new(as_hash) # : Mindee::V2::Parsing::JobResponse | Mindee::V2::Parsing::BaseResponse
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Input
5
+ # Options for asynchronous polling.
6
+ class PollingOptions
7
+ # @return [Integer, Float] Initial delay before the first polling attempt (in seconds).
8
+ attr_reader :initial_delay_sec
9
+
10
+ # @return [Integer, Float] Delay between each polling attempt (in seconds).
11
+ attr_reader :delay_sec
12
+
13
+ # @return [Integer] Total number of polling attempts.
14
+ attr_reader :max_retries
15
+
16
+ # @param initial_delay_sec [Float] Initial delay before the first attempt (default:2.0).
17
+ # @param delay_sec [Float] Delay between attempts (default: 1.5).
18
+ # @param max_retries [Integer] Maximum number of retries (default:80).
19
+ def initialize(initial_delay_sec: 2.0, delay_sec: 1.5, max_retries: 80)
20
+ @initial_delay_sec = initial_delay_sec
21
+ @delay_sec = delay_sec
22
+ @max_retries = max_retries
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module Mindee
6
+ module Input
7
+ module Source
8
+ # Load a document from a base64 string.
9
+ class Base64InputSource < LocalInputSource
10
+ # @param base64_string [String]
11
+ # @param filename [String]
12
+ # @param repair_pdf [bool]
13
+ def initialize(base64_string, filename, repair_pdf: false)
14
+ io_stream = StringIO.new(base64_string.unpack1('m*').to_s)
15
+ io_stream.set_encoding Encoding::BINARY
16
+ super(io_stream, filename, repair_pdf: repair_pdf)
17
+ end
18
+
19
+ # Overload of the same function to prevent a base64 from being re-encoded.
20
+ # @param close [bool]
21
+ # @return [Array<[String, aBinaryString ], [Hash, nil] >]
22
+ def read_contents(close: true)
23
+ @io_stream.seek(0)
24
+ data = @io_stream.read
25
+ @io_stream.close if close
26
+ [[data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module Mindee
6
+ module Input
7
+ module Source
8
+ # Load a document from raw bytes.
9
+ class BytesInputSource < LocalInputSource
10
+ # @param raw_bytes [String]
11
+ # @param filename [String]
12
+ # @param repair_pdf [bool]
13
+ def initialize(raw_bytes, filename, repair_pdf: false)
14
+ io_stream = StringIO.new(raw_bytes)
15
+ io_stream.set_encoding Encoding::BINARY
16
+ super(io_stream, filename, repair_pdf: repair_pdf)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module Mindee
6
+ module Input
7
+ module Source
8
+ # Load a document from a file handle.
9
+ class FileInputSource < LocalInputSource
10
+ # @param input_file [File]
11
+ # @param filename [String]
12
+ # @param repair_pdf [bool]
13
+ def initialize(input_file, filename, repair_pdf: false)
14
+ io_stream = input_file
15
+ super(io_stream, filename, repair_pdf: repair_pdf)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end