mindee-lite 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +24 -0
  3. data/.gitattributes +14 -0
  4. data/.gitignore +76 -0
  5. data/.gitmodules +3 -0
  6. data/.pre-commit-config.yaml +36 -0
  7. data/.rubocop.yml +49 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +515 -0
  10. data/CODE_OF_CONDUCT.md +129 -0
  11. data/CONTRIBUTING.md +107 -0
  12. data/Gemfile +14 -0
  13. data/LICENSE +21 -0
  14. data/README.md +42 -0
  15. data/Rakefile +40 -0
  16. data/Steepfile +30 -0
  17. data/bin/console +14 -0
  18. data/bin/mindee.rb +30 -0
  19. data/bin/v1/parser.rb +153 -0
  20. data/bin/v1/products.rb +88 -0
  21. data/bin/v2/parser.rb +235 -0
  22. data/bin/v2/products.rb +34 -0
  23. data/docs/code_samples/bank_account_details_v1.txt +24 -0
  24. data/docs/code_samples/bank_account_details_v2.txt +24 -0
  25. data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
  26. data/docs/code_samples/barcode_reader_v1.txt +24 -0
  27. data/docs/code_samples/cropper_v1.txt +21 -0
  28. data/docs/code_samples/default.txt +30 -0
  29. data/docs/code_samples/default_async.txt +29 -0
  30. data/docs/code_samples/expense_receipts_v5.txt +25 -0
  31. data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
  32. data/docs/code_samples/financial_document_v1.txt +25 -0
  33. data/docs/code_samples/financial_document_v1_async.txt +24 -0
  34. data/docs/code_samples/idcard_fr_v1.txt +24 -0
  35. data/docs/code_samples/idcard_fr_v2.txt +24 -0
  36. data/docs/code_samples/international_id_v2_async.txt +24 -0
  37. data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
  38. data/docs/code_samples/invoices_v4.txt +25 -0
  39. data/docs/code_samples/invoices_v4_async.txt +24 -0
  40. data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
  41. data/docs/code_samples/passport_v1.txt +24 -0
  42. data/docs/code_samples/resume_v1_async.txt +24 -0
  43. data/docs/code_samples/v2_classification.txt +30 -0
  44. data/docs/code_samples/v2_crop.txt +30 -0
  45. data/docs/code_samples/v2_extraction.txt +42 -0
  46. data/docs/code_samples/v2_extraction_webhook.txt +45 -0
  47. data/docs/code_samples/v2_ocr.txt +30 -0
  48. data/docs/code_samples/v2_split.txt +30 -0
  49. data/docs/code_samples/workflow_execution.txt +28 -0
  50. data/docs/code_samples/workflow_polling.txt +35 -0
  51. data/examples/auto_invoice_splitter_extraction.rb +48 -0
  52. data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
  53. data/lib/mindee/dependency.rb +29 -0
  54. data/lib/mindee/error/mindee_error.rb +17 -0
  55. data/lib/mindee/error/mindee_http_error.rb +36 -0
  56. data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
  57. data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
  58. data/lib/mindee/error/mindee_input_error.rb +30 -0
  59. data/lib/mindee/error.rb +6 -0
  60. data/lib/mindee/geometry/min_max.rb +23 -0
  61. data/lib/mindee/geometry/point.rb +41 -0
  62. data/lib/mindee/geometry/polygon.rb +37 -0
  63. data/lib/mindee/geometry/quadrilateral.rb +50 -0
  64. data/lib/mindee/geometry/utils.rb +88 -0
  65. data/lib/mindee/geometry.rb +7 -0
  66. data/lib/mindee/http/.rubocop.yml +7 -0
  67. data/lib/mindee/http/http_error_handler.rb +106 -0
  68. data/lib/mindee/http/response_validation.rb +81 -0
  69. data/lib/mindee/http.rb +3 -0
  70. data/lib/mindee/image/extracted_image.rb +89 -0
  71. data/lib/mindee/image/image_compressor.rb +29 -0
  72. data/lib/mindee/image/image_extractor.rb +118 -0
  73. data/lib/mindee/image/image_utils.rb +165 -0
  74. data/lib/mindee/image.rb +6 -0
  75. data/lib/mindee/input/base_parameters.rb +149 -0
  76. data/lib/mindee/input/local_response.rb +80 -0
  77. data/lib/mindee/input/polling_options.rb +26 -0
  78. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  79. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  80. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  81. data/lib/mindee/input/sources/local_input_source.rb +216 -0
  82. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  83. data/lib/mindee/input/sources/url_input_source.rb +130 -0
  84. data/lib/mindee/input/sources.rb +8 -0
  85. data/lib/mindee/input.rb +4 -0
  86. data/lib/mindee/logging/logger.rb +24 -0
  87. data/lib/mindee/logging.rb +3 -0
  88. data/lib/mindee/page_options.rb +24 -0
  89. data/lib/mindee/pdf/extracted_pdf.rb +70 -0
  90. data/lib/mindee/pdf/pdf_compressor.rb +121 -0
  91. data/lib/mindee/pdf/pdf_extractor.rb +121 -0
  92. data/lib/mindee/pdf/pdf_processor.rb +91 -0
  93. data/lib/mindee/pdf/pdf_tools.rb +201 -0
  94. data/lib/mindee/pdf.rb +7 -0
  95. data/lib/mindee/v1/client.rb +490 -0
  96. data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
  97. data/lib/mindee/v1/extraction.rb +3 -0
  98. data/lib/mindee/v1/http/.rubocop.yml +7 -0
  99. data/lib/mindee/v1/http/endpoint.rb +221 -0
  100. data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
  101. data/lib/mindee/v1/http.rb +4 -0
  102. data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
  103. data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
  104. data/lib/mindee/v1/parsing/common/document.rb +86 -0
  105. data/lib/mindee/v1/parsing/common/execution.rb +78 -0
  106. data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
  107. data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
  108. data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
  109. data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
  110. data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
  111. data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
  112. data/lib/mindee/v1/parsing/common/extras.rb +6 -0
  113. data/lib/mindee/v1/parsing/common/inference.rb +69 -0
  114. data/lib/mindee/v1/parsing/common/job.rb +48 -0
  115. data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
  116. data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
  117. data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
  118. data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
  119. data/lib/mindee/v1/parsing/common/page.rb +49 -0
  120. data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
  121. data/lib/mindee/v1/parsing/common/product.rb +26 -0
  122. data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
  123. data/lib/mindee/v1/parsing/common.rb +15 -0
  124. data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
  125. data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
  126. data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
  127. data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
  128. data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
  129. data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
  130. data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
  131. data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
  132. data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
  133. data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
  134. data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
  135. data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
  136. data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
  137. data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
  138. data/lib/mindee/v1/parsing/standard.rb +15 -0
  139. data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
  140. data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
  141. data/lib/mindee/v1/parsing/universal.rb +4 -0
  142. data/lib/mindee/v1/parsing.rb +5 -0
  143. data/lib/mindee/v1/product/.rubocop.yml +12 -0
  144. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
  145. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
  146. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
  147. data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
  148. data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
  149. data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
  150. data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
  151. data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
  152. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
  153. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
  154. data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
  155. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
  156. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
  157. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
  158. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
  159. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
  160. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
  161. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
  162. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
  163. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
  164. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
  165. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
  166. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
  167. data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
  168. data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
  169. data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
  170. data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
  171. data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
  172. data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
  173. data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
  174. data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
  175. data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
  176. data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
  177. data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
  178. data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
  179. data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
  180. data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
  181. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
  182. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
  183. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
  184. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
  185. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
  186. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
  187. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
  188. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
  189. data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
  190. data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
  191. data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
  192. data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
  193. data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
  194. data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
  195. data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
  196. data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
  197. data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
  198. data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
  199. data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
  200. data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
  201. data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
  202. data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
  203. data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
  204. data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
  205. data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
  206. data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
  207. data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
  208. data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
  209. data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
  210. data/lib/mindee/v1/product/universal/universal.rb +48 -0
  211. data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
  212. data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
  213. data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
  214. data/lib/mindee/v1/product.rb +18 -0
  215. data/lib/mindee/v1.rb +7 -0
  216. data/lib/mindee/v2/client.rb +132 -0
  217. data/lib/mindee/v2/file_operation/crop.rb +51 -0
  218. data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
  219. data/lib/mindee/v2/file_operation/split.rb +37 -0
  220. data/lib/mindee/v2/file_operation/split_files.rb +25 -0
  221. data/lib/mindee/v2/file_operation.rb +6 -0
  222. data/lib/mindee/v2/http/.rubocop.yml +7 -0
  223. data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
  224. data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
  225. data/lib/mindee/v2/http.rb +4 -0
  226. data/lib/mindee/v2/parsing/base_inference.rb +44 -0
  227. data/lib/mindee/v2/parsing/base_response.rb +15 -0
  228. data/lib/mindee/v2/parsing/common_response.rb +20 -0
  229. data/lib/mindee/v2/parsing/error_item.rb +21 -0
  230. data/lib/mindee/v2/parsing/error_response.rb +51 -0
  231. data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
  232. data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
  233. data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
  234. data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
  235. data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
  236. data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
  237. data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
  238. data/lib/mindee/v2/parsing/field.rb +9 -0
  239. data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
  240. data/lib/mindee/v2/parsing/inference_file.rb +38 -0
  241. data/lib/mindee/v2/parsing/inference_job.rb +25 -0
  242. data/lib/mindee/v2/parsing/inference_model.rb +30 -0
  243. data/lib/mindee/v2/parsing/job.rb +93 -0
  244. data/lib/mindee/v2/parsing/job_response.rb +30 -0
  245. data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
  246. data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
  247. data/lib/mindee/v2/parsing/raw_text.rb +27 -0
  248. data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
  249. data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
  250. data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
  251. data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
  252. data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
  253. data/lib/mindee/v2/parsing/search.rb +6 -0
  254. data/lib/mindee/v2/parsing.rb +16 -0
  255. data/lib/mindee/v2/product/base_product.rb +28 -0
  256. data/lib/mindee/v2/product/classification/classification.rb +20 -0
  257. data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
  258. data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
  259. data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
  260. data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
  261. data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
  262. data/lib/mindee/v2/product/crop/crop.rb +20 -0
  263. data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
  264. data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
  265. data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
  266. data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
  267. data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
  268. data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
  269. data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
  270. data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
  271. data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
  272. data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
  273. data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
  274. data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
  275. data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
  276. data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
  277. data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
  278. data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
  279. data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
  280. data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
  281. data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
  282. data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
  283. data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
  284. data/lib/mindee/v2/product/split/split.rb +19 -0
  285. data/lib/mindee/v2/product/split/split_inference.rb +34 -0
  286. data/lib/mindee/v2/product/split/split_range.rb +38 -0
  287. data/lib/mindee/v2/product/split/split_response.rb +40 -0
  288. data/lib/mindee/v2/product/split/split_result.rb +34 -0
  289. data/lib/mindee/v2/product.rb +7 -0
  290. data/lib/mindee/v2.rb +7 -0
  291. data/lib/mindee/version.rb +26 -0
  292. data/lib/mindee.rb +135 -0
  293. data/mindee-lite.gemspec +36 -0
  294. data/mindee.gemspec +44 -0
  295. data/sig/custom/marcel.rbs +3 -0
  296. data/sig/custom/mini_magick.rbs +31 -0
  297. data/sig/custom/net_http.rbs +43 -0
  298. data/sig/custom/origami.rbs +59 -0
  299. data/sig/mindee/dependency.rbs +13 -0
  300. data/sig/mindee/error/mindee_error.rbs +13 -0
  301. data/sig/mindee/error/mindee_http_error.rbs +17 -0
  302. data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
  303. data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
  304. data/sig/mindee/error/mindee_input_error.rbs +18 -0
  305. data/sig/mindee/geometry/min_max.rbs +11 -0
  306. data/sig/mindee/geometry/point.rbs +14 -0
  307. data/sig/mindee/geometry/polygon.rbs +12 -0
  308. data/sig/mindee/geometry/quadrilateral.rbs +15 -0
  309. data/sig/mindee/geometry/utils.rbs +13 -0
  310. data/sig/mindee/http/http_error_handler.rbs +15 -0
  311. data/sig/mindee/http/response_validation.rbs +11 -0
  312. data/sig/mindee/image/extracted_image.rbs +21 -0
  313. data/sig/mindee/image/image_compressor.rbs +8 -0
  314. data/sig/mindee/image/image_extractor.rbs +13 -0
  315. data/sig/mindee/image/image_utils.rbs +19 -0
  316. data/sig/mindee/input/base_parameters.rbs +35 -0
  317. data/sig/mindee/input/local_response.rbs +14 -0
  318. data/sig/mindee/input/polling_options.rbs +12 -0
  319. data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
  320. data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
  321. data/sig/mindee/input/sources/file_input_source.rbs +10 -0
  322. data/sig/mindee/input/sources/local_input_source.rbs +30 -0
  323. data/sig/mindee/input/sources/path_input_source.rbs +10 -0
  324. data/sig/mindee/input/sources/url_input_source.rbs +20 -0
  325. data/sig/mindee/logging/logger.rbs +11 -0
  326. data/sig/mindee/page_options.rbs +11 -0
  327. data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
  328. data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
  329. data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
  330. data/sig/mindee/pdf/pdf_processor.rbs +12 -0
  331. data/sig/mindee/pdf/pdf_tools.rbs +31 -0
  332. data/sig/mindee/v1/client.rbs +84 -0
  333. data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
  334. data/sig/mindee/v1/http/endpoint.rbs +41 -0
  335. data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
  336. data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
  337. data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
  338. data/sig/mindee/v1/parsing/common/document.rbs +32 -0
  339. data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
  340. data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
  341. data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
  342. data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
  343. data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
  344. data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
  345. data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
  346. data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
  347. data/sig/mindee/v1/parsing/common/job.rbs +24 -0
  348. data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
  349. data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
  350. data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
  351. data/sig/mindee/v1/parsing/common/page.rbs +19 -0
  352. data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
  353. data/sig/mindee/v1/parsing/common/product.rbs +16 -0
  354. data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
  355. data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
  356. data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
  357. data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
  358. data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
  359. data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
  360. data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
  361. data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
  362. data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
  363. data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
  364. data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
  365. data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
  366. data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
  367. data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
  368. data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
  369. data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
  370. data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
  371. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
  372. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
  373. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
  374. data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
  375. data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
  376. data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
  377. data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
  378. data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
  379. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
  380. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
  381. data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
  382. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
  383. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
  384. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
  385. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
  386. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
  387. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
  388. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
  389. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
  390. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
  391. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
  392. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
  393. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
  394. data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
  395. data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
  396. data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
  397. data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
  398. data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
  399. data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
  400. data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
  401. data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
  402. data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
  403. data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
  404. data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
  405. data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
  406. data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
  407. data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
  408. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
  409. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
  410. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
  411. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
  412. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
  413. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
  414. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
  415. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
  416. data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
  417. data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
  418. data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
  419. data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
  420. data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
  421. data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
  422. data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
  423. data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
  424. data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
  425. data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
  426. data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
  427. data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
  428. data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
  429. data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
  430. data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
  431. data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
  432. data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
  433. data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
  434. data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
  435. data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
  436. data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
  437. data/sig/mindee/v1/product/universal/universal.rbs +16 -0
  438. data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
  439. data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
  440. data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
  441. data/sig/mindee/v2/client.rbs +29 -0
  442. data/sig/mindee/v2/file_operation/crop.rbs +10 -0
  443. data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
  444. data/sig/mindee/v2/file_operation/split.rbs +11 -0
  445. data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
  446. data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
  447. data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
  448. data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
  449. data/sig/mindee/v2/parsing/base_response.rbs +11 -0
  450. data/sig/mindee/v2/parsing/common_response.rbs +12 -0
  451. data/sig/mindee/v2/parsing/error_item.rbs +13 -0
  452. data/sig/mindee/v2/parsing/error_response.rbs +20 -0
  453. data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
  454. data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
  455. data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
  456. data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
  457. data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
  458. data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
  459. data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
  460. data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
  461. data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
  462. data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
  463. data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
  464. data/sig/mindee/v2/parsing/job.rbs +24 -0
  465. data/sig/mindee/v2/parsing/job_response.rbs +14 -0
  466. data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
  467. data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
  468. data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
  469. data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
  470. data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
  471. data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
  472. data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
  473. data/sig/mindee/v2/parsing/search_models.rbs +14 -0
  474. data/sig/mindee/v2/product/base_product.rbs +19 -0
  475. data/sig/mindee/v2/product/classification/classification.rbs +10 -0
  476. data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
  477. data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
  478. data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
  479. data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
  480. data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
  481. data/sig/mindee/v2/product/crop/crop.rbs +10 -0
  482. data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
  483. data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
  484. data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
  485. data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
  486. data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
  487. data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
  488. data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
  489. data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
  490. data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
  491. data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
  492. data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
  493. data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
  494. data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
  495. data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
  496. data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
  497. data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
  498. data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
  499. data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
  500. data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
  501. data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
  502. data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
  503. data/sig/mindee/v2/product/split/split.rbs +10 -0
  504. data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
  505. data/sig/mindee/v2/product/split/split_range.rbs +18 -0
  506. data/sig/mindee/v2/product/split/split_response.rbs +25 -0
  507. data/sig/mindee/v2/product/split/split_result.rbs +14 -0
  508. data/sig/mindee/version.rbs +6 -0
  509. data/sig/mindee.rbs +62 -0
  510. metadata +600 -0
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Various helper functions for geometry.
5
+ module Geometry
6
+ # A relative set of coordinates (X, Y) on the document.
7
+ class Point
8
+ # @return [Float]
9
+ attr_reader :x
10
+ # @return [Float]
11
+ attr_reader :y
12
+
13
+ # rubocop:disable Naming/MethodParameterName
14
+
15
+ # @param x [Float]
16
+ # @param y [Float]
17
+ def initialize(x, y)
18
+ @x = x
19
+ @y = y
20
+ end
21
+ # rubocop:enable Naming/MethodParameterName
22
+
23
+ # @return [Float]
24
+ def [](key)
25
+ case key
26
+ when 0
27
+ @x
28
+ when 1
29
+ @y
30
+ else
31
+ throw '0 or 1 only'
32
+ end
33
+ end
34
+
35
+ # @return [String] Point as a string.
36
+ def to_s
37
+ "(#{@x},#{@y})"
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Various helper functions & classes for geometry.
5
+ module Geometry
6
+ # Contains any number of vertex coordinates (Points).
7
+ class Polygon < Array
8
+ # @param server_response [Hash] Raw server response hash.
9
+ def initialize(server_response)
10
+ points = [] # @type var points: Array[Mindee::Geometry::Point]
11
+ server_response.map do |point|
12
+ points << Point.new(point[0], point[1])
13
+ end
14
+ super(points)
15
+ end
16
+
17
+ # Get the central point (centroid) of the polygon.
18
+ # @return [Mindee::Geometry::Point]
19
+ def centroid
20
+ Geometry.get_centroid(self)
21
+ end
22
+
23
+ # Determine if the Point is in the Polygon's Y-axis.
24
+ # @param point [Mindee::Geometry::Point]
25
+ # @return [bool]
26
+ def point_in_y?(point)
27
+ min_max = Geometry.get_min_max_y(self)
28
+ point.y.between?(min_max.min, min_max.max)
29
+ end
30
+
31
+ # @return [String] Polygon as a string.
32
+ def to_s
33
+ "(#{join(', ')})"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Various helper functions & classes for geometry.
5
+ module Geometry
6
+ # Contains exactly 4 relative vertices coordinates (Points).
7
+ class Quadrilateral
8
+ # @return [Mindee::Geometry::Point]
9
+ attr_reader :top_left
10
+ # @return [Mindee::Geometry::Point]
11
+ attr_reader :top_right
12
+ # @return [Mindee::Geometry::Point]
13
+ attr_reader :bottom_right
14
+ # @return [Mindee::Geometry::Point]
15
+ attr_reader :bottom_left
16
+
17
+ # @param top_left [Mindee::Geometry::Point]
18
+ # @param top_right [Mindee::Geometry::Point]
19
+ # @param bottom_right [Mindee::Geometry::Point]
20
+ # @param bottom_left [Mindee::Geometry::Point]
21
+ def initialize(top_left, top_right, bottom_right, bottom_left)
22
+ @top_left = top_left
23
+ @top_right = top_right
24
+ @bottom_right = bottom_right
25
+ @bottom_left = bottom_left
26
+ end
27
+
28
+ # @return [Mindee::Geometry::Point]
29
+ def [](key)
30
+ case key
31
+ when 0
32
+ @top_left
33
+ when 1
34
+ @top_right
35
+ when 2
36
+ @bottom_right
37
+ when 3
38
+ @bottom_left
39
+ else
40
+ throw '0, 1, 2, 3 only'
41
+ end
42
+ end
43
+
44
+ # A quadrilateral has four corners, always.
45
+ def size
46
+ 4
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Various helper functions for geometry.
5
+ module Geometry
6
+ # Transform a prediction into a Quadrilateral.
7
+ # @param prediction [Array<Array<Float>>]
8
+ # @return [Mindee::Geometry::Quadrilateral]
9
+ def self.quadrilateral_from_prediction(prediction)
10
+ throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
11
+
12
+ Quadrilateral.new(
13
+ Point.new(prediction[0][0], prediction[0][1]),
14
+ Point.new(prediction[1][0], prediction[1][1]),
15
+ Point.new(prediction[2][0], prediction[2][1]),
16
+ Point.new(prediction[3][0], prediction[3][1])
17
+ )
18
+ end
19
+
20
+ # Gets the points of a bounding box for a given set of points
21
+ # @param vertices [Array<Mindee::Geometry::Point>]
22
+ # @return [Array<Float>]
23
+ def self.get_bbox(vertices)
24
+ x_coords = vertices.map(&:x)
25
+ y_coords = vertices.map(&:y)
26
+ [x_coords.min, y_coords.min, x_coords.max, y_coords.max]
27
+ end
28
+
29
+ # Creates the bounding box for a given set of points
30
+ # @param vertices [Array<Mindee::Geometry::Point>]
31
+ # @return [Mindee::Geometry::Quadrilateral]
32
+ def self.get_bounding_box(vertices)
33
+ x_min, y_min, x_max, y_max = get_bbox(vertices)
34
+ Quadrilateral.new(
35
+ Point.new(x_min, y_min),
36
+ Point.new(x_max, y_min),
37
+ Point.new(x_max, y_max),
38
+ Point.new(x_min, y_max)
39
+ )
40
+ end
41
+
42
+ # Get the central point (centroid) given a sequence of points.
43
+ # @param points [Array<Mindee::Geometry::Point>]
44
+ # @return [Mindee::Geometry::Point]
45
+ def self.get_centroid(points)
46
+ vertices_count = points.size
47
+ x_sum = points.map(&:x).sum
48
+ y_sum = points.map(&:y).sum
49
+ Point.new(x_sum / vertices_count, y_sum / vertices_count)
50
+ end
51
+
52
+ # Get the maximum and minimum Y value given a sequence of points.
53
+ # @param points [Array<Mindee::Geometry::Point>]
54
+ # @return [Mindee::Geometry::MinMax]
55
+ def self.get_min_max_y(points)
56
+ coords = points.map(&:y)
57
+ MinMax.new(coords.min, coords.max)
58
+ end
59
+
60
+ # Get the maximum and minimum X value given a sequence of points.
61
+ # @param points [Array<Mindee::Geometry::Point>]
62
+ # @return [Mindee::Geometry::MinMax]
63
+ def self.get_min_max_x(points)
64
+ coords = points.map(&:x)
65
+ MinMax.new(coords.min, coords.max)
66
+ end
67
+
68
+ # Checks whether a set of coordinates is below another on the page, with a slight margin for the lateral value.
69
+ # @param candidate [Array<Mindee::Geometry::Point] Polygon to check
70
+ # @param anchor [Array<Mindee::Geometry::Point] Reference polygon
71
+ # @param margin_left [Float] Margin tolerance on the left of the anchor
72
+ # @param margin_right [Float] Margin tolerance on the right of the anchor
73
+ # @return bool
74
+ def self.below?(candidate, anchor, margin_left, margin_right)
75
+ return false if Geometry.get_min_max_y(candidate).min < Geometry.get_min_max_y(anchor).min
76
+ if Geometry.get_min_max_x(candidate).min <
77
+ Geometry.get_min_max_x(anchor).min - (Geometry.get_min_max_x(anchor).min * margin_left)
78
+ return false
79
+ end
80
+ if Geometry.get_min_max_x(candidate).max >
81
+ Geometry.get_min_max_x(anchor).max + (Geometry.get_min_max_x(anchor).max * margin_right)
82
+ return false
83
+ end
84
+
85
+ true
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'geometry/min_max'
4
+ require_relative 'geometry/point'
5
+ require_relative 'geometry/quadrilateral'
6
+ require_relative 'geometry/polygon'
7
+ require_relative 'geometry/utils'
@@ -0,0 +1,7 @@
1
+ inherit_from: ../../../.rubocop.yml
2
+
3
+ Metrics/CyclomaticComplexity:
4
+ Max: 10
5
+
6
+ Metrics/PerceivedComplexity:
7
+ Max: 10
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative '../error/mindee_http_error'
5
+
6
+ module Mindee
7
+ module HTTP
8
+ # Mindee HTTP error module.
9
+ module ErrorHandler
10
+ module_function
11
+
12
+ # Extracts the HTTP error from the response hash, or the job error if there is one.
13
+ # @param response [Hash] dictionary response retrieved by the server
14
+ def extract_error(response)
15
+ return unless response.respond_to?(:each_pair)
16
+
17
+ if !response.dig('api_request', 'error').empty?
18
+ response.dig('api_request', 'error')
19
+ elsif !response.dig('job', 'error').empty?
20
+ response.dig('job', 'error')
21
+ end
22
+ end
23
+
24
+ # Creates an error object based on what's retrieved from a request.
25
+ # @param response [Hash] dictionary response retrieved by the server
26
+ def create_error_obj(response)
27
+ error_obj = extract_error(response)
28
+ if error_obj.nil?
29
+ error_obj = if response.include?('Maximum pdf pages')
30
+ {
31
+ 'code' => 'TooManyPages',
32
+ 'message' => 'Maximum amound of pdf pages reached.',
33
+ 'details' => response,
34
+ }
35
+ elsif response.include?('Max file size is')
36
+ {
37
+ 'code' => 'FileTooLarge',
38
+ 'message' => 'Maximum file size reached.',
39
+ 'details' => response,
40
+ }
41
+ elsif response.include?('Invalid file type')
42
+ {
43
+ 'code' => 'InvalidFiletype',
44
+ 'message' => 'Invalid file type.',
45
+ 'details' => response,
46
+ }
47
+ elsif response.include?('Gateway timeout')
48
+ {
49
+ 'code' => 'RequestTimeout',
50
+ 'message' => 'Request timed out.',
51
+ 'details' => response,
52
+ }
53
+ elsif response.include?('Too Many Requests')
54
+ {
55
+ 'code' => 'TooManyRequests',
56
+ 'message' => 'Too Many Requests.',
57
+ 'details' => response,
58
+ }
59
+ else
60
+ {
61
+ 'code' => 'UnknownError',
62
+ 'message' => 'Server sent back an unexpected reply.',
63
+ 'details' => response,
64
+ }
65
+ end
66
+
67
+ end
68
+ error_obj
69
+ end
70
+
71
+ # Creates an appropriate HTTP error exception, based on retrieved http error code
72
+ # @param url [String] the url of the product
73
+ # @param response [Hash] dictionary response retrieved by the server
74
+ def handle_error(url, response)
75
+ code = response.code.to_i
76
+ begin
77
+ parsed_hash = JSON.parse(response.body, object_class: Hash)
78
+ rescue JSON::ParserError
79
+ parsed_hash = response.body.to_s
80
+ end
81
+ error_obj = create_error_obj(parsed_hash)
82
+ case code
83
+ when 400..499
84
+ Error::MindeeHTTPClientError.new(error_obj || {}, url, code)
85
+ when 500..599
86
+ Error::MindeeHTTPServerError.new(error_obj || {}, url, code)
87
+ else
88
+ Error::MindeeHTTPError.new(error_obj || {}, url, code)
89
+ end
90
+ end
91
+
92
+ # Creates an appropriate HTTP error exception for a V2 API response, based on retrieved http error code.
93
+ # @param hashed_response [Hash] dictionary response retrieved by the server
94
+ def generate_v2_error(hashed_response)
95
+ code = hashed_response[:code].to_i
96
+ if hashed_response.key?(:status)
97
+ Error::MindeeHTTPErrorV2.new(hashed_response.transform_keys(&:to_s))
98
+ elsif code < 200 || code > 399
99
+ Error::MindeeHTTPErrorV2.new({ 'status' => code, 'detail' => 'No details available.' })
100
+ else
101
+ Error::MindeeHTTPErrorV2.new({ 'status' => -1, 'detail' => 'Unknown Error.' })
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'net/http'
5
+
6
+ module Mindee
7
+ module HTTP
8
+ # Module dedicated to the validation & sanitizing of HTTP responses.
9
+ module ResponseValidation
10
+ # Checks if the synchronous response is valid. Returns True if the response is valid.
11
+ # @param [Net::HTTPResponse] response
12
+ # @return [bool]
13
+ def self.valid_sync_response?(response)
14
+ return false unless (200..399).cover?(response.code.to_i)
15
+
16
+ begin
17
+ JSON.parse(response.body, object_class: Hash)
18
+ rescue StandardError
19
+ return false
20
+ end
21
+ true
22
+ end
23
+
24
+ # Checks if a V2 response is valid.
25
+ # @param [Net::HTTPResponse] response
26
+ # @return [bool]
27
+ def self.valid_v2_response?(response)
28
+ return false unless valid_sync_response?(response)
29
+
30
+ hashed_response = JSON.parse(response.body, object_class: Hash)
31
+
32
+ return false if hashed_response.dig('job', 'status').to_s == 'Failed'
33
+
34
+ return false if hashed_response.dig('job',
35
+ 'error') && !(hashed_response.dig('job',
36
+ 'error').empty? || hashed_response.dig(
37
+ 'job', 'error'
38
+ ).nil?)
39
+
40
+ true
41
+ end
42
+
43
+ # Checks if the asynchronous response is valid. Also checks if it is a valid synchronous response.
44
+ # Returns true if the response is valid.
45
+ # @param [Net::HTTPResponse] response
46
+ # @return [bool]
47
+ def self.valid_async_response?(response)
48
+ return false unless valid_sync_response?(response)
49
+
50
+ return false unless (200..302).cover?(response.code.to_i)
51
+
52
+ hashed_response = JSON.parse(response.body, object_class: Hash)
53
+
54
+ return false if hashed_response.dig('job', 'status') == Mindee::V1::Parsing::Common::JobStatus::FAILURE
55
+
56
+ return false if hashed_response.dig('job', 'error') && !hashed_response.dig('job', 'error').empty?
57
+
58
+ true
59
+ end
60
+
61
+ # Checks and correct the response object depending on the possible kinds of returns.
62
+ # @param response [Net::HTTPResponse]
63
+ def self.clean_request!(response)
64
+ return response if (response.code.to_i < 200) || (response.code.to_i > 302) # : Net::HTTPResponse
65
+
66
+ return response if response.body.empty?
67
+
68
+ hashed_response = JSON.parse(response.body, object_class: Hash)
69
+ if hashed_response.dig('api_request', 'status_code').to_i > 399
70
+ response.instance_variable_set(:@code, hashed_response['api_request']['status_code'].to_s)
71
+ end
72
+
73
+ return if !hashed_response.dig('job', 'error').empty? &&
74
+ (hashed_response.dig('job',
75
+ 'status').downcase != Mindee::V1::Parsing::Common::JobStatus::FAILURE.to_s)
76
+
77
+ response.instance_variable_set(:@code, '500')
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'http/http_error_handler'
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../input/sources'
4
+ require_relative '../logging'
5
+
6
+ module Mindee
7
+ # Image Extraction Module.
8
+ module Image
9
+ # Generic class for image extraction.
10
+ class ExtractedImage
11
+ # ID of the page the image was extracted from.
12
+ attr_reader :page_id
13
+
14
+ # ID of the element on a given page.
15
+ attr_reader :element_id
16
+
17
+ # Buffer object of the file's content.
18
+ attr_reader :buffer
19
+
20
+ # Internal name for the file.
21
+ attr_reader :filename
22
+
23
+ # Initializes the ExtractedImage with a buffer and an internal file name.
24
+ #
25
+ # @param input_source [LocalInputSource, BytesInputSource] Local source for input.
26
+ # @param page_id [Integer] ID of the page the element was found on.
27
+ # @param element_id [Integer, nil] ID of the element in a page.
28
+ # @param preserve_input_filename [Boolean] If true, keep the input source filename as-is.
29
+ def initialize(input_source, page_id, element_id, preserve_input_filename: false)
30
+ @buffer = StringIO.new(input_source.io_stream.read.to_s)
31
+ @buffer.rewind
32
+
33
+ @filename = if preserve_input_filename
34
+ input_source.filename.to_s
35
+ else
36
+ extension = if input_source.pdf?
37
+ '.jpg'
38
+ else
39
+ File.extname(input_source.filename)
40
+ end
41
+ base_name = File.basename(input_source.filename, File.extname(input_source.filename))
42
+ "#{base_name}_p#{page_id}_#{element_id}#{extension}"
43
+ end
44
+ @page_id = page_id
45
+ @element_id = element_id.nil? ? 0 : element_id
46
+ end
47
+
48
+ # Saves the document to a file.
49
+ #
50
+ # @param output_path [String] Path to save the file to.
51
+ # @param file_format [String, nil] Optional MiniMagick-compatible format for the file. Inferred from file
52
+ # extension if not provided.
53
+ # @raise [MindeeError] If an invalid path or filename is provided.
54
+ def write_to_file(output_path, file_format = nil)
55
+ resolved_path = Pathname.new(File.expand_path(output_path))
56
+ if file_format.nil?
57
+ raise Error::MindeeImageError, 'Invalid file format.' if resolved_path.extname.delete('.').empty?
58
+
59
+ file_format = resolved_path.extname.delete('.').upcase
60
+ end
61
+ begin
62
+ @buffer.rewind
63
+ image = MiniMagick::Image.read(@buffer)
64
+ image.format file_format.to_s.downcase
65
+ image.write resolved_path.to_s
66
+ logger.info("File saved successfully to '#{resolved_path}'")
67
+ rescue StandardError
68
+ raise Error::MindeeImageError, "Could not save file '#{output_path}'. " \
69
+ 'Is the provided file path valid?.'
70
+ end
71
+ end
72
+
73
+ # Return the file as a Mindee-compatible BufferInput source.
74
+ #
75
+ # @return [FileInputSource] A BufferInput source.
76
+ def as_source
77
+ @buffer.rewind
78
+ Mindee::Input::Source::BytesInputSource.new(@buffer.read || '', @filename)
79
+ end
80
+
81
+ # Return the file as a Mindee-compatible BufferInput source.
82
+ #
83
+ # @return [FileInputSource] A BufferInput source.
84
+ def as_input_source
85
+ as_source
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Image processing module.
5
+ module Image
6
+ # Image compressor module to handle image compression.
7
+ module ImageCompressor
8
+ # Resize and/or compress an SKBitmap. This assumes the ratio was provided before hands.
9
+ # @param image [MiniMagick::Image, StringIO] Input image.
10
+ # @param quality [Integer, nil] Quality of the final file.
11
+ # @param max_width [Integer, nil] Maximum width. If not specified, the horizontal ratio will remain the same.
12
+ # @param max_height [Integer] Maximum height. If not specified, the vertical ratio will remain the same.
13
+ # @return [StringIO]
14
+ def self.compress_image(image, quality: 85, max_width: nil, max_height: nil)
15
+ processed_image = ImageUtils.to_image(image)
16
+ processed_image.format 'jpg'
17
+ final_width, final_height = ImageUtils.calculate_new_dimensions(
18
+ processed_image,
19
+ max_width: max_width,
20
+ max_height: max_height
21
+ )
22
+ ImageUtils.resize_image(processed_image, final_width, final_height) if final_width || final_height
23
+ ImageUtils.compress_image_quality(processed_image, quality)
24
+
25
+ ImageUtils.image_to_stringio(processed_image)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ Mindee::Dependency.require_all_deps!
4
+ require 'mini_magick'
5
+ require 'origami'
6
+ require 'stringio'
7
+ require 'tempfile'
8
+ require_relative '../input/sources'
9
+ require_relative 'extracted_image'
10
+
11
+ module Mindee
12
+ # Image Extraction Module.
13
+ module Image
14
+ # Image Extraction wrapper class.
15
+ module ImageExtractor
16
+ # Attaches an image as a new page in a PdfDocument object.
17
+ #
18
+ # @param [StringIO] input_buffer Input buffer. Only supports JPEG.
19
+ # @return [Origami::PDF] A PdfDocument handle.
20
+ def self.attach_image_as_new_file(input_buffer, format: 'jpg')
21
+ magick_image = MiniMagick::Image.read(input_buffer)
22
+ # NOTE: We force format consolidation to a single format to avoid frames being interpreted as the final output.
23
+ magick_image.format(format)
24
+ original_density = magick_image.resolution
25
+ scale_factor = original_density[0].to_f / 4.166666 # Convert from default 300 DPI to 72.
26
+ magick_image.format('pdf', 0, { density: scale_factor.to_s })
27
+ Origami::PDF.read(StringIO.new(magick_image.to_blob))
28
+ end
29
+
30
+ # Extracts multiple images from a given local input source.
31
+ #
32
+ # @param [Input::Source::LocalInputSource] input_source
33
+ # @param [Integer] page_id ID of the Page to extract from.
34
+ # @param [Array<Array<Geometry::Point>>, Array<Geometry::Quadrilateral>] polygons List of coordinates to extract.
35
+ # @return [Array<Image::ExtractedImage>] Extracted Images.
36
+ def self.extract_multiple_images_from_source(input_source, page_id, polygons)
37
+ new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
38
+ new_stream.seek(0)
39
+
40
+ extract_images_from_polygons(input_source, new_stream, page_id, polygons)
41
+ end
42
+
43
+ # Extracts images from their positions on a file (as polygons).
44
+ #
45
+ # @param [Input::Source::LocalInputSource] input_source Local input source.
46
+ # @param [StringIO] pdf_stream Buffer of the PDF.
47
+ # @param [Integer] page_id Page ID.
48
+ # @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
49
+ # @return [Array<Image::ExtractedImage>] Extracted Images.
50
+ def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
51
+ extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage]
52
+
53
+ polygons.each_with_index do |polygon, element_id|
54
+ polygon = ImageUtils.normalize_polygon(polygon)
55
+ page_content = ImageUtils.read_page_content(pdf_stream)
56
+
57
+ min_max_x = Geometry.get_min_max_x([
58
+ polygon.top_left,
59
+ polygon.bottom_right,
60
+ polygon.top_right,
61
+ polygon.bottom_left,
62
+ ])
63
+ min_max_y = Geometry.get_min_max_y([
64
+ polygon.top_left,
65
+ polygon.bottom_right,
66
+ polygon.top_right,
67
+ polygon.bottom_left,
68
+ ])
69
+ file_extension = ImageUtils.determine_file_extension(input_source)
70
+ cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
71
+ if file_extension == 'pdf'
72
+ cropped_image.format('jpg')
73
+ else
74
+ cropped_image.format(file_extension.to_s)
75
+ end
76
+
77
+ buffer = StringIO.new
78
+ ImageUtils.write_image_to_buffer(cropped_image, buffer)
79
+ file_name = "#{input_source.filename}_page#{page_id}-#{element_id}.#{file_extension}"
80
+
81
+ extracted_elements << create_extracted_image(buffer, file_name, page_id, element_id)
82
+ end
83
+
84
+ extracted_elements
85
+ end
86
+
87
+ # Generates an ExtractedImage.
88
+ #
89
+ # @param [StringIO] buffer Buffer containing the image.
90
+ # @param [String] file_name Name for the file.
91
+ # @param [Integer] page_id ID of the page the file was universal from.
92
+ # @param [Integer] element_id ID of the element of a given page.
93
+ def self.create_extracted_image(buffer, file_name, page_id, element_id)
94
+ buffer.rewind
95
+ ExtractedImage.new(
96
+ Input::Source::BytesInputSource.new(buffer.read.to_s, file_name),
97
+ page_id,
98
+ element_id,
99
+ preserve_input_filename: true
100
+ )
101
+ end
102
+
103
+ # Loads a single_page from an image file or a pdf document.
104
+ #
105
+ # @param input_file [LocalInputSource] Local input.
106
+ # @param [Integer] page_id Page ID.
107
+ # @return [StringIO] A valid PdfDocument handle.
108
+ def self.load_input_source_pdf_page_as_stringio(input_file, page_id)
109
+ input_file.io_stream.rewind
110
+ if input_file.pdf?
111
+ PDF::PDFProcessor.get_page(Origami::PDF.read(input_file.io_stream), page_id)
112
+ else
113
+ input_file.io_stream
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end