mindee-lite 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +24 -0
  3. data/.gitattributes +14 -0
  4. data/.gitignore +76 -0
  5. data/.gitmodules +3 -0
  6. data/.pre-commit-config.yaml +36 -0
  7. data/.rubocop.yml +49 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +515 -0
  10. data/CODE_OF_CONDUCT.md +129 -0
  11. data/CONTRIBUTING.md +107 -0
  12. data/Gemfile +14 -0
  13. data/LICENSE +21 -0
  14. data/README.md +42 -0
  15. data/Rakefile +40 -0
  16. data/Steepfile +30 -0
  17. data/bin/console +14 -0
  18. data/bin/mindee.rb +30 -0
  19. data/bin/v1/parser.rb +153 -0
  20. data/bin/v1/products.rb +88 -0
  21. data/bin/v2/parser.rb +235 -0
  22. data/bin/v2/products.rb +34 -0
  23. data/docs/code_samples/bank_account_details_v1.txt +24 -0
  24. data/docs/code_samples/bank_account_details_v2.txt +24 -0
  25. data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
  26. data/docs/code_samples/barcode_reader_v1.txt +24 -0
  27. data/docs/code_samples/cropper_v1.txt +21 -0
  28. data/docs/code_samples/default.txt +30 -0
  29. data/docs/code_samples/default_async.txt +29 -0
  30. data/docs/code_samples/expense_receipts_v5.txt +25 -0
  31. data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
  32. data/docs/code_samples/financial_document_v1.txt +25 -0
  33. data/docs/code_samples/financial_document_v1_async.txt +24 -0
  34. data/docs/code_samples/idcard_fr_v1.txt +24 -0
  35. data/docs/code_samples/idcard_fr_v2.txt +24 -0
  36. data/docs/code_samples/international_id_v2_async.txt +24 -0
  37. data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
  38. data/docs/code_samples/invoices_v4.txt +25 -0
  39. data/docs/code_samples/invoices_v4_async.txt +24 -0
  40. data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
  41. data/docs/code_samples/passport_v1.txt +24 -0
  42. data/docs/code_samples/resume_v1_async.txt +24 -0
  43. data/docs/code_samples/v2_classification.txt +30 -0
  44. data/docs/code_samples/v2_crop.txt +30 -0
  45. data/docs/code_samples/v2_extraction.txt +42 -0
  46. data/docs/code_samples/v2_extraction_webhook.txt +45 -0
  47. data/docs/code_samples/v2_ocr.txt +30 -0
  48. data/docs/code_samples/v2_split.txt +30 -0
  49. data/docs/code_samples/workflow_execution.txt +28 -0
  50. data/docs/code_samples/workflow_polling.txt +35 -0
  51. data/examples/auto_invoice_splitter_extraction.rb +48 -0
  52. data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
  53. data/lib/mindee/dependency.rb +29 -0
  54. data/lib/mindee/error/mindee_error.rb +17 -0
  55. data/lib/mindee/error/mindee_http_error.rb +36 -0
  56. data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
  57. data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
  58. data/lib/mindee/error/mindee_input_error.rb +30 -0
  59. data/lib/mindee/error.rb +6 -0
  60. data/lib/mindee/geometry/min_max.rb +23 -0
  61. data/lib/mindee/geometry/point.rb +41 -0
  62. data/lib/mindee/geometry/polygon.rb +37 -0
  63. data/lib/mindee/geometry/quadrilateral.rb +50 -0
  64. data/lib/mindee/geometry/utils.rb +88 -0
  65. data/lib/mindee/geometry.rb +7 -0
  66. data/lib/mindee/http/.rubocop.yml +7 -0
  67. data/lib/mindee/http/http_error_handler.rb +106 -0
  68. data/lib/mindee/http/response_validation.rb +81 -0
  69. data/lib/mindee/http.rb +3 -0
  70. data/lib/mindee/image/extracted_image.rb +89 -0
  71. data/lib/mindee/image/image_compressor.rb +29 -0
  72. data/lib/mindee/image/image_extractor.rb +118 -0
  73. data/lib/mindee/image/image_utils.rb +165 -0
  74. data/lib/mindee/image.rb +6 -0
  75. data/lib/mindee/input/base_parameters.rb +149 -0
  76. data/lib/mindee/input/local_response.rb +80 -0
  77. data/lib/mindee/input/polling_options.rb +26 -0
  78. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  79. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  80. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  81. data/lib/mindee/input/sources/local_input_source.rb +216 -0
  82. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  83. data/lib/mindee/input/sources/url_input_source.rb +130 -0
  84. data/lib/mindee/input/sources.rb +8 -0
  85. data/lib/mindee/input.rb +4 -0
  86. data/lib/mindee/logging/logger.rb +24 -0
  87. data/lib/mindee/logging.rb +3 -0
  88. data/lib/mindee/page_options.rb +24 -0
  89. data/lib/mindee/pdf/extracted_pdf.rb +70 -0
  90. data/lib/mindee/pdf/pdf_compressor.rb +121 -0
  91. data/lib/mindee/pdf/pdf_extractor.rb +121 -0
  92. data/lib/mindee/pdf/pdf_processor.rb +91 -0
  93. data/lib/mindee/pdf/pdf_tools.rb +201 -0
  94. data/lib/mindee/pdf.rb +7 -0
  95. data/lib/mindee/v1/client.rb +490 -0
  96. data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
  97. data/lib/mindee/v1/extraction.rb +3 -0
  98. data/lib/mindee/v1/http/.rubocop.yml +7 -0
  99. data/lib/mindee/v1/http/endpoint.rb +221 -0
  100. data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
  101. data/lib/mindee/v1/http.rb +4 -0
  102. data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
  103. data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
  104. data/lib/mindee/v1/parsing/common/document.rb +86 -0
  105. data/lib/mindee/v1/parsing/common/execution.rb +78 -0
  106. data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
  107. data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
  108. data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
  109. data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
  110. data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
  111. data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
  112. data/lib/mindee/v1/parsing/common/extras.rb +6 -0
  113. data/lib/mindee/v1/parsing/common/inference.rb +69 -0
  114. data/lib/mindee/v1/parsing/common/job.rb +48 -0
  115. data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
  116. data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
  117. data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
  118. data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
  119. data/lib/mindee/v1/parsing/common/page.rb +49 -0
  120. data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
  121. data/lib/mindee/v1/parsing/common/product.rb +26 -0
  122. data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
  123. data/lib/mindee/v1/parsing/common.rb +15 -0
  124. data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
  125. data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
  126. data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
  127. data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
  128. data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
  129. data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
  130. data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
  131. data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
  132. data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
  133. data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
  134. data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
  135. data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
  136. data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
  137. data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
  138. data/lib/mindee/v1/parsing/standard.rb +15 -0
  139. data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
  140. data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
  141. data/lib/mindee/v1/parsing/universal.rb +4 -0
  142. data/lib/mindee/v1/parsing.rb +5 -0
  143. data/lib/mindee/v1/product/.rubocop.yml +12 -0
  144. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
  145. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
  146. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
  147. data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
  148. data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
  149. data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
  150. data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
  151. data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
  152. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
  153. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
  154. data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
  155. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
  156. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
  157. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
  158. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
  159. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
  160. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
  161. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
  162. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
  163. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
  164. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
  165. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
  166. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
  167. data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
  168. data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
  169. data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
  170. data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
  171. data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
  172. data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
  173. data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
  174. data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
  175. data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
  176. data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
  177. data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
  178. data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
  179. data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
  180. data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
  181. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
  182. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
  183. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
  184. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
  185. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
  186. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
  187. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
  188. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
  189. data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
  190. data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
  191. data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
  192. data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
  193. data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
  194. data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
  195. data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
  196. data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
  197. data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
  198. data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
  199. data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
  200. data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
  201. data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
  202. data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
  203. data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
  204. data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
  205. data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
  206. data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
  207. data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
  208. data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
  209. data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
  210. data/lib/mindee/v1/product/universal/universal.rb +48 -0
  211. data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
  212. data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
  213. data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
  214. data/lib/mindee/v1/product.rb +18 -0
  215. data/lib/mindee/v1.rb +7 -0
  216. data/lib/mindee/v2/client.rb +132 -0
  217. data/lib/mindee/v2/file_operation/crop.rb +51 -0
  218. data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
  219. data/lib/mindee/v2/file_operation/split.rb +37 -0
  220. data/lib/mindee/v2/file_operation/split_files.rb +25 -0
  221. data/lib/mindee/v2/file_operation.rb +6 -0
  222. data/lib/mindee/v2/http/.rubocop.yml +7 -0
  223. data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
  224. data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
  225. data/lib/mindee/v2/http.rb +4 -0
  226. data/lib/mindee/v2/parsing/base_inference.rb +44 -0
  227. data/lib/mindee/v2/parsing/base_response.rb +15 -0
  228. data/lib/mindee/v2/parsing/common_response.rb +20 -0
  229. data/lib/mindee/v2/parsing/error_item.rb +21 -0
  230. data/lib/mindee/v2/parsing/error_response.rb +51 -0
  231. data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
  232. data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
  233. data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
  234. data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
  235. data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
  236. data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
  237. data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
  238. data/lib/mindee/v2/parsing/field.rb +9 -0
  239. data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
  240. data/lib/mindee/v2/parsing/inference_file.rb +38 -0
  241. data/lib/mindee/v2/parsing/inference_job.rb +25 -0
  242. data/lib/mindee/v2/parsing/inference_model.rb +30 -0
  243. data/lib/mindee/v2/parsing/job.rb +93 -0
  244. data/lib/mindee/v2/parsing/job_response.rb +30 -0
  245. data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
  246. data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
  247. data/lib/mindee/v2/parsing/raw_text.rb +27 -0
  248. data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
  249. data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
  250. data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
  251. data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
  252. data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
  253. data/lib/mindee/v2/parsing/search.rb +6 -0
  254. data/lib/mindee/v2/parsing.rb +16 -0
  255. data/lib/mindee/v2/product/base_product.rb +28 -0
  256. data/lib/mindee/v2/product/classification/classification.rb +20 -0
  257. data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
  258. data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
  259. data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
  260. data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
  261. data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
  262. data/lib/mindee/v2/product/crop/crop.rb +20 -0
  263. data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
  264. data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
  265. data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
  266. data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
  267. data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
  268. data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
  269. data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
  270. data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
  271. data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
  272. data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
  273. data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
  274. data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
  275. data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
  276. data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
  277. data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
  278. data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
  279. data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
  280. data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
  281. data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
  282. data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
  283. data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
  284. data/lib/mindee/v2/product/split/split.rb +19 -0
  285. data/lib/mindee/v2/product/split/split_inference.rb +34 -0
  286. data/lib/mindee/v2/product/split/split_range.rb +38 -0
  287. data/lib/mindee/v2/product/split/split_response.rb +40 -0
  288. data/lib/mindee/v2/product/split/split_result.rb +34 -0
  289. data/lib/mindee/v2/product.rb +7 -0
  290. data/lib/mindee/v2.rb +7 -0
  291. data/lib/mindee/version.rb +26 -0
  292. data/lib/mindee.rb +135 -0
  293. data/mindee-lite.gemspec +36 -0
  294. data/mindee.gemspec +44 -0
  295. data/sig/custom/marcel.rbs +3 -0
  296. data/sig/custom/mini_magick.rbs +31 -0
  297. data/sig/custom/net_http.rbs +43 -0
  298. data/sig/custom/origami.rbs +59 -0
  299. data/sig/mindee/dependency.rbs +13 -0
  300. data/sig/mindee/error/mindee_error.rbs +13 -0
  301. data/sig/mindee/error/mindee_http_error.rbs +17 -0
  302. data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
  303. data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
  304. data/sig/mindee/error/mindee_input_error.rbs +18 -0
  305. data/sig/mindee/geometry/min_max.rbs +11 -0
  306. data/sig/mindee/geometry/point.rbs +14 -0
  307. data/sig/mindee/geometry/polygon.rbs +12 -0
  308. data/sig/mindee/geometry/quadrilateral.rbs +15 -0
  309. data/sig/mindee/geometry/utils.rbs +13 -0
  310. data/sig/mindee/http/http_error_handler.rbs +15 -0
  311. data/sig/mindee/http/response_validation.rbs +11 -0
  312. data/sig/mindee/image/extracted_image.rbs +21 -0
  313. data/sig/mindee/image/image_compressor.rbs +8 -0
  314. data/sig/mindee/image/image_extractor.rbs +13 -0
  315. data/sig/mindee/image/image_utils.rbs +19 -0
  316. data/sig/mindee/input/base_parameters.rbs +35 -0
  317. data/sig/mindee/input/local_response.rbs +14 -0
  318. data/sig/mindee/input/polling_options.rbs +12 -0
  319. data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
  320. data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
  321. data/sig/mindee/input/sources/file_input_source.rbs +10 -0
  322. data/sig/mindee/input/sources/local_input_source.rbs +30 -0
  323. data/sig/mindee/input/sources/path_input_source.rbs +10 -0
  324. data/sig/mindee/input/sources/url_input_source.rbs +20 -0
  325. data/sig/mindee/logging/logger.rbs +11 -0
  326. data/sig/mindee/page_options.rbs +11 -0
  327. data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
  328. data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
  329. data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
  330. data/sig/mindee/pdf/pdf_processor.rbs +12 -0
  331. data/sig/mindee/pdf/pdf_tools.rbs +31 -0
  332. data/sig/mindee/v1/client.rbs +84 -0
  333. data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
  334. data/sig/mindee/v1/http/endpoint.rbs +41 -0
  335. data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
  336. data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
  337. data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
  338. data/sig/mindee/v1/parsing/common/document.rbs +32 -0
  339. data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
  340. data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
  341. data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
  342. data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
  343. data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
  344. data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
  345. data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
  346. data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
  347. data/sig/mindee/v1/parsing/common/job.rbs +24 -0
  348. data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
  349. data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
  350. data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
  351. data/sig/mindee/v1/parsing/common/page.rbs +19 -0
  352. data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
  353. data/sig/mindee/v1/parsing/common/product.rbs +16 -0
  354. data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
  355. data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
  356. data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
  357. data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
  358. data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
  359. data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
  360. data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
  361. data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
  362. data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
  363. data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
  364. data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
  365. data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
  366. data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
  367. data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
  368. data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
  369. data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
  370. data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
  371. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
  372. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
  373. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
  374. data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
  375. data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
  376. data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
  377. data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
  378. data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
  379. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
  380. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
  381. data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
  382. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
  383. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
  384. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
  385. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
  386. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
  387. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
  388. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
  389. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
  390. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
  391. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
  392. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
  393. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
  394. data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
  395. data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
  396. data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
  397. data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
  398. data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
  399. data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
  400. data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
  401. data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
  402. data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
  403. data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
  404. data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
  405. data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
  406. data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
  407. data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
  408. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
  409. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
  410. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
  411. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
  412. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
  413. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
  414. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
  415. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
  416. data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
  417. data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
  418. data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
  419. data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
  420. data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
  421. data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
  422. data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
  423. data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
  424. data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
  425. data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
  426. data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
  427. data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
  428. data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
  429. data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
  430. data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
  431. data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
  432. data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
  433. data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
  434. data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
  435. data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
  436. data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
  437. data/sig/mindee/v1/product/universal/universal.rbs +16 -0
  438. data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
  439. data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
  440. data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
  441. data/sig/mindee/v2/client.rbs +29 -0
  442. data/sig/mindee/v2/file_operation/crop.rbs +10 -0
  443. data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
  444. data/sig/mindee/v2/file_operation/split.rbs +11 -0
  445. data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
  446. data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
  447. data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
  448. data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
  449. data/sig/mindee/v2/parsing/base_response.rbs +11 -0
  450. data/sig/mindee/v2/parsing/common_response.rbs +12 -0
  451. data/sig/mindee/v2/parsing/error_item.rbs +13 -0
  452. data/sig/mindee/v2/parsing/error_response.rbs +20 -0
  453. data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
  454. data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
  455. data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
  456. data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
  457. data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
  458. data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
  459. data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
  460. data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
  461. data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
  462. data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
  463. data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
  464. data/sig/mindee/v2/parsing/job.rbs +24 -0
  465. data/sig/mindee/v2/parsing/job_response.rbs +14 -0
  466. data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
  467. data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
  468. data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
  469. data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
  470. data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
  471. data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
  472. data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
  473. data/sig/mindee/v2/parsing/search_models.rbs +14 -0
  474. data/sig/mindee/v2/product/base_product.rbs +19 -0
  475. data/sig/mindee/v2/product/classification/classification.rbs +10 -0
  476. data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
  477. data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
  478. data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
  479. data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
  480. data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
  481. data/sig/mindee/v2/product/crop/crop.rbs +10 -0
  482. data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
  483. data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
  484. data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
  485. data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
  486. data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
  487. data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
  488. data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
  489. data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
  490. data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
  491. data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
  492. data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
  493. data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
  494. data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
  495. data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
  496. data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
  497. data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
  498. data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
  499. data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
  500. data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
  501. data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
  502. data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
  503. data/sig/mindee/v2/product/split/split.rbs +10 -0
  504. data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
  505. data/sig/mindee/v2/product/split/split_range.rbs +18 -0
  506. data/sig/mindee/v2/product/split/split_response.rbs +25 -0
  507. data/sig/mindee/v2/product/split/split_result.rbs +14 -0
  508. data/sig/mindee/version.rbs +6 -0
  509. data/sig/mindee.rbs +62 -0
  510. metadata +600 -0
@@ -0,0 +1,490 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../input'
4
+ require_relative 'http'
5
+ require_relative '../logging'
6
+ require_relative '../page_options'
7
+ require_relative 'parsing/common/api_response'
8
+ require_relative 'parsing/common/job'
9
+ require_relative 'parsing/common/workflow_response'
10
+ require_relative 'product'
11
+
12
+ # Default owner for products.
13
+ OTS_OWNER = 'mindee'
14
+
15
+ module Mindee
16
+ module V1
17
+ # Class for configuration options in parse calls.
18
+ #
19
+ # @!attribute all_words [bool] Whether to include the full text for each page.
20
+ # This performs a full OCR operation on the server and will increase response time.
21
+ # @!attribute full_text [bool] Whether to include the full OCR text response in compatible APIs.
22
+ # This performs a full OCR operation on the server and may increase response time.
23
+ # @!attribute close_file [bool] Whether to `close()` the file after parsing it.
24
+ # Set to false if you need to access the file after this operation.
25
+ # @!attribute page_options [PageOptions, Hash, nil] Page cutting/merge options:
26
+ # * `:page_indexes` Zero-based list of page indexes.
27
+ # * `:operation` Operation to apply on the document, given the specified page indexes:
28
+ # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
29
+ # * `:REMOVE` - remove the specified pages, and keep all others.
30
+ # * `:on_min_pages` Apply the operation only if the document has at least this many pages.
31
+ # @!attribute cropper [bool] Whether to include cropper results for each page.
32
+ # This performs a cropping operation on the server and will increase response time.
33
+ # @!attribute initial_delay_sec [Numeric] Initial delay before polling. Defaults to 2.
34
+ # @!attribute delay_sec [Numeric] Delay between polling attempts. Defaults to 1.5.
35
+ # @!attribute max_retries [Integer] Maximum number of retries. Defaults to 80.
36
+ class ParseOptions
37
+ attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper, :rag,
38
+ :workflow_id, :initial_delay_sec, :delay_sec, :max_retries
39
+
40
+ def initialize(params: {})
41
+ params = params.transform_keys(&:to_sym)
42
+ @all_words = params.fetch(:all_words, false)
43
+ @full_text = params.fetch(:full_text, false)
44
+ @close_file = params.fetch(:close_file, true)
45
+ raw_page_options = params.fetch(:page_options, nil)
46
+ raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
47
+ @page_options = raw_page_options
48
+ @cropper = params.fetch(:cropper, false)
49
+ @rag = params.fetch(:rag, false)
50
+ @workflow_id = params.fetch(:workflow_id, nil)
51
+ @initial_delay_sec = params.fetch(:initial_delay_sec, 2)
52
+ @delay_sec = params.fetch(:delay_sec, 1.5)
53
+ @max_retries = params.fetch(:max_retries, 80)
54
+ end
55
+ end
56
+
57
+ # Class for configuration options in workflow executions.
58
+ #
59
+ # @!attribute document_alias [String, nil] Alias to give to the document.
60
+ # @!attribute priority [Symbol, nil] Priority to give to the document.
61
+ # @!attribute full_text [bool] Whether to include the full OCR text response in compatible APIs.
62
+ # This performs a full OCR operation on the server and may increase response time.
63
+ # @!attribute public_url [String, nil] A unique, encrypted URL for accessing the document validation interface
64
+ # without requiring authentication.
65
+ # @!attribute rag [bool, nil] Whether to enable Retrieval-Augmented Generation.
66
+ # @!attribute page_options [PageOptions, Hash, nil] Page cutting/merge options:
67
+ # * `:page_indexes` Zero-based list of page indexes.
68
+ # * `:operation` Operation to apply on the document, given the specified page indexes:
69
+ # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
70
+ # * `:REMOVE` - remove the specified pages, and keep all others.
71
+ # * `:on_min_pages` Apply the operation only if the document has at least this many pages.
72
+ # @!attribute close_file [bool, nil] Whether to close the file after sending it. Defaults to true.
73
+ class WorkflowOptions
74
+ attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag, :close_file
75
+
76
+ def initialize(params: {})
77
+ params = params.transform_keys(&:to_sym)
78
+ @document_alias = params.fetch(:document_alias, nil)
79
+ @priority = params.fetch(:priority, nil)
80
+ @full_text = params.fetch(:full_text, false)
81
+ @public_url = params.fetch(:public_url, nil)
82
+ @rag = params.fetch(:rag, nil)
83
+ raw_page_options = params.fetch(:page_options, nil)
84
+ raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
85
+ @page_options = raw_page_options
86
+ @close_file = params.fetch(:close_file, true)
87
+ end
88
+ end
89
+
90
+ # Mindee API Client.
91
+ # See: https://developers.mindee.com/docs
92
+ class Client
93
+ # @param api_key [String]
94
+ def initialize(api_key: '')
95
+ @api_key = api_key
96
+ end
97
+
98
+ # Enqueue a document for parsing and automatically try to retrieve it if needed.
99
+ #
100
+ # Accepts options either as a Hash or as a ParseOptions struct.
101
+ #
102
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
103
+ # @param product_class [Mindee::Inference] The class of the product.
104
+ # @param endpoint [Mindee::V1::HTTP::Endpoint, nil] Endpoint of the API.
105
+ # @param options [Hash] A hash of options to configure the parsing behavior. Possible keys:
106
+ # * `:all_words` [bool] Whether to extract all the words on each page.
107
+ # This performs a full OCR operation on the server and will increase response time.
108
+ # * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
109
+ # This performs a full OCR operation on the server and may increase response time.
110
+ # * `:close_file` [bool] Whether to `close()` the file after parsing it.
111
+ # Set to false if you need to access the file after this operation.
112
+ # * `:page_options` [Hash, nil] Page cutting/merge options:
113
+ # - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
114
+ # - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
115
+ # - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
116
+ # - `:REMOVE` - remove the specified pages, and keep all others.
117
+ # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
118
+ # * `:cropper` [bool, nil] Whether to include cropper results for each page.
119
+ # This performs a cropping operation on the server and will increase response time.
120
+ # * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2.
121
+ # * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5.
122
+ # * `:max_retries` [Integer] Maximum number of retries. Defaults to 80.
123
+ # @param enqueue [bool] Whether to enqueue the file.
124
+ # @return [Mindee::V1::Parsing::Common::ApiResponse]
125
+ def parse(input_source, product_class, endpoint: nil, options: {}, enqueue: true)
126
+ opts = normalize_parse_options(options)
127
+ process_pdf_if_required(input_source, opts) if input_source.is_a?(Input::Source::LocalInputSource)
128
+ endpoint ||= initialize_endpoint(product_class)
129
+
130
+ if enqueue && product_class.has_async
131
+ enqueue_and_parse(input_source, product_class, endpoint, opts)
132
+ else
133
+ parse_sync(input_source, product_class, endpoint, opts)
134
+ end
135
+ end
136
+
137
+ # Call prediction API on a document and parse the results.
138
+ #
139
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
140
+ # @param product_class [Mindee::Inference] class of the product
141
+ # @param endpoint [Mindee::V1::HTTP::Endpoint, nil] Endpoint of the API.
142
+ # @param options [Hash] A hash of options to configure the parsing behavior. Possible keys:
143
+ # * `:all_words` [bool] Whether to extract all the words on each page.
144
+ # This performs a full OCR operation on the server and will increase response time.
145
+ # * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
146
+ # This performs a full OCR operation on the server and may increase response time.
147
+ # * `:close_file` [bool] Whether to `close()` the file after parsing it.
148
+ # Set to false if you need to access the file after this operation.
149
+ # * `:page_options` [Hash, nil] Page cutting/merge options:
150
+ # - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
151
+ # - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
152
+ # - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
153
+ # - `:REMOVE` - remove the specified pages, and keep all others.
154
+ # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
155
+ # * `:cropper` [bool, nil] Whether to include cropper results for each page.
156
+ # This performs a cropping operation on the server and will increase response time.
157
+ # @return [Mindee::V1::Parsing::Common::ApiResponse]
158
+ def parse_sync(input_source, product_class, endpoint, options)
159
+ logger.debug("Parsing document as '#{endpoint.url_root}'")
160
+
161
+ prediction, raw_http = endpoint.predict(
162
+ input_source,
163
+ options
164
+ )
165
+
166
+ Mindee::V1::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
167
+ end
168
+
169
+ # Enqueue a document for async parsing
170
+ #
171
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
172
+ # The source of the input document (local file or URL).
173
+ # @param product_class [Mindee::Inference] The class of the product.
174
+ # @param options [Hash] A hash of options to configure the enqueue behavior. Possible keys:
175
+ # * `:endpoint` [V1::HTTP::Endpoint, nil] Endpoint of the API.
176
+ # Doesn't need to be set in the case of OTS APIs.
177
+ # * `:all_words` [bool] Whether to extract all the words on each page.
178
+ # This performs a full OCR operation on the server and will increase response time.
179
+ # * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
180
+ # This performs a full OCR operation on the server and may increase response time.
181
+ # * `:close_file` [bool] Whether to `close()` the file after parsing it.
182
+ # Set to false if you need to access the file after this operation.
183
+ # * `:page_options` [Hash, nil] Page cutting/merge options:
184
+ # - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
185
+ # - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
186
+ # - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
187
+ # - `:REMOVE` - remove the specified pages, and keep all others.
188
+ # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
189
+ # * `:cropper` [bool] Whether to include cropper results for each page.
190
+ # This performs a cropping operation on the server and will increase response time.
191
+ # * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
192
+ # * `:workflow_id` [String, nil] ID of the workflow to use.
193
+ # @param endpoint [Mindee::V1::HTTP::Endpoint] Endpoint of the API.
194
+ # @return [Mindee::V1::Parsing::Common::ApiResponse]
195
+ def enqueue(input_source, product_class, endpoint: nil, options: {})
196
+ opts = normalize_parse_options(options)
197
+ endpoint ||= initialize_endpoint(product_class)
198
+ logger.debug("Enqueueing document as '#{endpoint.url_root}'")
199
+
200
+ prediction, raw_http = endpoint.predict_async(
201
+ input_source,
202
+ opts
203
+ )
204
+ Mindee::V1::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
205
+ end
206
+
207
+ # Parses a queued document
208
+ #
209
+ # @param job_id [String] ID of the job (queue) to poll from
210
+ # @param product_class [Mindee::Inference] class of the product
211
+ # @param endpoint [V1::HTTP::Endpoint, nil] Endpoint of the API
212
+ # Doesn't need to be set in the case of OTS APIs.
213
+ #
214
+ # @return [Mindee::V1::Parsing::Common::ApiResponse]
215
+ def parse_queued(job_id, product_class, endpoint: nil)
216
+ endpoint = initialize_endpoint(product_class) if endpoint.nil?
217
+ logger.debug("Fetching queued document as '#{endpoint.url_root}'")
218
+ prediction, raw_http = endpoint.parse_async(job_id)
219
+ Mindee::V1::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
220
+ end
221
+
222
+ # Enqueue a document for async parsing and automatically try to retrieve it
223
+ #
224
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
225
+ # The source of the input document (local file or URL).
226
+ # @param product_class [Mindee::Inference] The class of the product.
227
+ # @param options [Hash] A hash of options to configure the parsing behavior. Possible keys:
228
+ # * `:endpoint` [V1::HTTP::Endpoint, nil] Endpoint of the API.
229
+ # Doesn't need to be set in the case of OTS APIs.
230
+ # * `:all_words` [bool] Whether to extract all the words on each page.
231
+ # This performs a full OCR operation on the server and will increase response time.
232
+ # * `:full_text` [bool] Whether to include the full OCR text response in compatible APIs.
233
+ # This performs a full OCR operation on the server and may increase response time.
234
+ # * `:close_file` [bool] Whether to `close()` the file after parsing it.
235
+ # Set to false if you need to access the file after this operation.
236
+ # * `:page_options` [Hash, nil] Page cutting/merge options:
237
+ # - `:page_indexes` [Array<Integer>] Zero-based list of page indexes.
238
+ # - `:operation` [Symbol] Operation to apply on the document, given the `page_indexes` specified:
239
+ # - `:KEEP_ONLY` - keep only the specified pages, and remove all others.
240
+ # - `:REMOVE` - remove the specified pages, and keep all others.
241
+ # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
242
+ # * `:cropper` [bool, nil] Whether to include cropper results for each page.
243
+ # This performs a cropping operation on the server and will increase response time.
244
+ # * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
245
+ # * `:workflow_id` [String, nil] ID of the workflow to use.
246
+ # * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2.
247
+ # * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5.
248
+ # * `:max_retries` [Integer] Maximum number of retries. Defaults to 80.
249
+ # @param endpoint [Mindee::V1::HTTP::Endpoint] Endpoint of the API.
250
+ # @return [Mindee::V1::Parsing::Common::ApiResponse]
251
+ def enqueue_and_parse(input_source, product_class, endpoint, options)
252
+ validate_async_params(options.initial_delay_sec, options.delay_sec, options.max_retries)
253
+ enqueue_res = enqueue(input_source, product_class, endpoint: endpoint, options: options)
254
+ job = enqueue_res.job or raise Error::MindeeAPIError, 'Expected job to be present'
255
+ job_id = job.id
256
+
257
+ sleep(options.initial_delay_sec)
258
+ polling_attempts = 1
259
+ logger.debug("Successfully enqueued document with job id: '#{job_id}'")
260
+ queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
261
+ queue_res_job = queue_res.job or raise Error::MindeeAPIError, 'Expected job to be present'
262
+ valid_statuses = [
263
+ Mindee::V1::Parsing::Common::JobStatus::WAITING,
264
+ Mindee::V1::Parsing::Common::JobStatus::PROCESSING,
265
+ ]
266
+ # @type var valid_statuses: Array[(:waiting | :processing | :completed | :failed)]
267
+ while valid_statuses.include?(queue_res_job.status) && polling_attempts < options.max_retries
268
+ logger.debug("Polling server for parsing result with job id: '#{job_id}'. Attempt #{polling_attempts}")
269
+ sleep(options.delay_sec)
270
+ queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
271
+ queue_res_job = queue_res.job or raise Error::MindeeAPIError, 'Expected job to be present'
272
+ polling_attempts += 1
273
+ end
274
+
275
+ if queue_res_job.status != Mindee::V1::Parsing::Common::JobStatus::COMPLETED
276
+ elapsed = options.initial_delay_sec + (polling_attempts * options.delay_sec.to_f)
277
+ raise Error::MindeeAPIError,
278
+ "Asynchronous parsing request timed out after #{elapsed} seconds (#{polling_attempts} tries)"
279
+ end
280
+
281
+ queue_res
282
+ end
283
+
284
+ # Sends a document to a workflow.
285
+ #
286
+ # Accepts options either as a Hash or as a WorkflowOptions struct.
287
+ #
288
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
289
+ # @param workflow_id [String]
290
+ # @param options [Hash, WorkflowOptions] Options to configure workflow behavior. Possible keys:
291
+ # * `document_alias` [String, nil] Alias to give to the document.
292
+ # * `priority` [Symbol, nil] Priority to give to the document.
293
+ # * `full_text` [bool] Whether to include the full OCR text response in compatible APIs.
294
+ # * `rag` [bool, nil] Whether to enable Retrieval-Augmented Generation.
295
+ #
296
+ # * `public_url` [String, nil] A unique, encrypted URL for accessing the document validation interface without
297
+ # requiring authentication.
298
+ # * `page_options` [Hash, nil] Page cutting/merge options:
299
+ # * `:page_indexes` Zero-based list of page indexes.
300
+ # * `:operation` Operation to apply on the document, given the `page_indexes specified:
301
+ # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
302
+ # * `:REMOVE` - remove the specified pages, and keep all others.
303
+ # * `:on_min_pages` Apply the operation only if document has at least this many pages.
304
+ # @return [Mindee::V1::Parsing::Common::WorkflowResponse]
305
+ def execute_workflow(input_source, workflow_id, options: {})
306
+ opts = options.is_a?(WorkflowOptions) ? options : WorkflowOptions.new(params: options)
307
+ if opts.respond_to?(:page_options) && input_source.is_a?(Input::Source::LocalInputSource)
308
+ process_pdf_if_required(input_source, opts)
309
+ end
310
+
311
+ workflow_endpoint = V1::HTTP::WorkflowEndpoint.new(workflow_id, api_key: @api_key.to_s)
312
+ logger.debug("Sending document to workflow '#{workflow_id}'")
313
+
314
+ prediction, raw_http = workflow_endpoint.execute_workflow(
315
+ input_source,
316
+ opts
317
+ )
318
+
319
+ Mindee::V1::Parsing::Common::WorkflowResponse.new(V1::Product::Universal::Universal, prediction, raw_http)
320
+ end
321
+
322
+ # Load a prediction.
323
+ #
324
+ # @param product_class [Mindee::Inference] class of the product
325
+ # @param local_response [Mindee::Input::LocalResponse]
326
+ # @return [Mindee::V1::Parsing::Common::ApiResponse]
327
+ def load_prediction(product_class, local_response)
328
+ raise Error::MindeeAPIError, 'Expected LocalResponse to not be nil.' if local_response.nil?
329
+
330
+ response_hash = local_response.as_hash || {}
331
+ raise Error::MindeeAPIError, 'Expected LocalResponse#as_hash to return a hash.' if response_hash.nil?
332
+
333
+ Mindee::V1::Parsing::Common::ApiResponse.new(product_class, response_hash, response_hash.to_json)
334
+ rescue KeyError, Error::MindeeAPIError
335
+ raise Error::MindeeInputError, 'No prediction found in local response.'
336
+ end
337
+
338
+ # Load a document from an absolute path, as a string.
339
+ # @param input_path [String] Path of file to open
340
+ # @param repair_pdf [bool] Attempts to fix broken pdf if true
341
+ # @return [Mindee::Input::Source::PathInputSource]
342
+ def source_from_path(input_path, repair_pdf: false)
343
+ Input::Source::PathInputSource.new(input_path, repair_pdf: repair_pdf)
344
+ end
345
+
346
+ # Load a document from raw bytes.
347
+ # @param input_bytes [String] Encoding::BINARY byte input
348
+ # @param filename [String] The name of the file (without the path)
349
+ # @param repair_pdf [bool] Attempts to fix broken pdf if true
350
+ # @return [Mindee::Input::Source::BytesInputSource]
351
+ def source_from_bytes(input_bytes, filename, repair_pdf: false)
352
+ Input::Source::BytesInputSource.new(input_bytes, filename, repair_pdf: repair_pdf)
353
+ end
354
+
355
+ # Load a document from a base64 encoded string.
356
+ # @param base64_string [String] Input to parse as base64 string
357
+ # @param filename [String] The name of the file (without the path)
358
+ # @param repair_pdf [bool] Attempts to fix broken pdf if true
359
+ # @return [Mindee::Input::Source::Base64InputSource]
360
+ def source_from_b64string(base64_string, filename, repair_pdf: false)
361
+ Input::Source::Base64InputSource.new(base64_string, filename, repair_pdf: repair_pdf)
362
+ end
363
+
364
+ # Load a document from a normal Ruby `File`.
365
+ # @param input_file [File] Input file handle
366
+ # @param filename [String] The name of the file (without the path)
367
+ # @param repair_pdf [bool] Attempts to fix broken pdf if true
368
+ # @return [Mindee::Input::Source::FileInputSource]
369
+ def source_from_file(input_file, filename, repair_pdf: false)
370
+ Input::Source::FileInputSource.new(input_file, filename, repair_pdf: repair_pdf)
371
+ end
372
+
373
+ # Load a document from a secure remote source (HTTPS).
374
+ # @param url [String] URL of the file
375
+ # @return [Mindee::Input::Source::URLInputSource]
376
+ def source_from_url(url)
377
+ Input::Source::URLInputSource.new(url)
378
+ end
379
+
380
+ # Creates a custom endpoint with the given values.
381
+ # Do not set for standard (off the shelf) endpoints.
382
+ #
383
+ # @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
384
+ # API Builder. Do not set for standard (off the shelf) endpoints.
385
+ #
386
+ # @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
387
+ # This is normally not required unless you have a custom endpoint which has the same name as a
388
+ # standard (off the shelf) endpoint.
389
+ # @param version [String] For custom endpoints, version of the product
390
+ # @return [Mindee::V1::HTTP::Endpoint]
391
+ def create_endpoint(endpoint_name: '', account_name: '', version: '')
392
+ initialize_endpoint(
393
+ Mindee::V1::Product::Universal::Universal,
394
+ endpoint_name: endpoint_name,
395
+ account_name: account_name,
396
+ version: version
397
+ )
398
+ end
399
+
400
+ # Validates the parameters for async auto-polling
401
+ # @param initial_delay_sec [Numeric] initial delay before polling
402
+ # @param delay_sec [Numeric] delay between polling attempts
403
+ # @param max_retries [Integer, nil] maximum amount of retries.
404
+ def validate_async_params(initial_delay_sec, delay_sec, max_retries)
405
+ min_delay_sec = 1
406
+ min_initial_delay_sec = 1
407
+ min_retries = 2
408
+
409
+ if delay_sec < min_delay_sec
410
+ raise ArgumentError,
411
+ "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)"
412
+ end
413
+ if initial_delay_sec < min_initial_delay_sec
414
+ raise ArgumentError,
415
+ "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)"
416
+ end
417
+ raise ArgumentError, "Cannot set auto-poll retries to less than #{min_retries}" if max_retries < min_retries
418
+ end
419
+
420
+ # Creates an endpoint with the given values. Raises an error if the endpoint is invalid.
421
+ # @param product_class [Mindee::V1::Parsing::Common::Inference] class of the product
422
+ #
423
+ # @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
424
+ # API Builder. Do not set for standard (off the shelf) endpoints.
425
+ #
426
+ # @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
427
+ # This is normally not required unless you have a custom endpoint which has the same name as a
428
+ # standard (off the shelf) endpoint.
429
+ # @param version [String] For custom endpoints, version of the product.
430
+ # @return [Mindee::V1::HTTP::Endpoint]
431
+ def initialize_endpoint(product_class, endpoint_name: '', account_name: '', version: '')
432
+ if (endpoint_name.nil? || endpoint_name.empty?) && product_class == Mindee::V1::Product::Universal::Universal
433
+ raise Mindee::Error::MindeeConfigurationError, 'Missing argument endpoint_name when using custom class'
434
+ end
435
+
436
+ endpoint_name = fix_endpoint_name(product_class, endpoint_name)
437
+ account_name = fix_account_name(account_name)
438
+ version = fix_version(product_class, version)
439
+
440
+ V1::HTTP::Endpoint.new(account_name, endpoint_name, version, api_key: @api_key.to_s)
441
+ end
442
+
443
+ def fix_endpoint_name(product_class, endpoint_name)
444
+ endpoint_name.nil? || endpoint_name.empty? ? product_class.endpoint_name.to_s : endpoint_name.to_s
445
+ end
446
+
447
+ def fix_account_name(account_name)
448
+ if account_name.nil? || account_name.empty?
449
+ logger.info("No account name provided, #{OTS_OWNER} will be used by default.")
450
+ return OTS_OWNER
451
+ end
452
+
453
+ account_name
454
+ end
455
+
456
+ def fix_version(product_class, version)
457
+ return version unless version.nil? || version.empty?
458
+
459
+ if product_class.endpoint_version.nil? || product_class.endpoint_version.to_s.empty?
460
+ logger.debug('No version provided for a custom build, will attempt to poll version 1 by default.')
461
+ return '1'
462
+ end
463
+ product_class.endpoint_version || ''
464
+ end
465
+
466
+ # If needed, converts the parsing options provided as a hash into a proper ParseOptions object.
467
+ # @param options [Hash, ParseOptions] Options.
468
+ # @return [ParseOptions]
469
+ def normalize_parse_options(options)
470
+ return options if options.is_a?(ParseOptions)
471
+
472
+ ParseOptions.new(params: options)
473
+ end
474
+
475
+ # Processes a PDF if parameters were provided.
476
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
477
+ # @param opts [ParseOptions]
478
+ def process_pdf_if_required(input_source, opts)
479
+ return unless input_source.is_a?(Mindee::Input::Source::LocalInputSource) &&
480
+ opts.page_options.on_min_pages &&
481
+ input_source.pdf?
482
+
483
+ input_source.process_pdf(opts.page_options)
484
+ end
485
+
486
+ private :parse_sync, :validate_async_params, :initialize_endpoint, :fix_endpoint_name, :fix_version,
487
+ :fix_account_name, :process_pdf_if_required, :normalize_parse_options
488
+ end
489
+ end
490
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ # Custom extraction module
6
+ module Extraction
7
+ # Multi-receipts extraction
8
+ # Extracts individual receipts from multi-receipts documents.
9
+ #
10
+ # @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
11
+ # @param inference [Inference] Results of the inference.
12
+ # @return [Array<ExtractedImage>] Individual extracted receipts as an array of ExtractedMultiReceiptsImage.
13
+ def self.extract_receipts(input_source, inference)
14
+ images = [] # @type var images: Array[Image::ExtractedImage]
15
+ unless inference.prediction.receipts
16
+ raise Error::MindeeInputError,
17
+ 'No possible receipts candidates found for Multi-Receipts extraction.'
18
+ end
19
+
20
+ (0...input_source.page_count).each do |page_id|
21
+ receipt_positions = inference.pages[page_id].prediction.receipts.map(&:bounding_box)
22
+ images.concat(
23
+ Mindee::Image::ImageExtractor.extract_multiple_images_from_source(input_source, page_id + 1,
24
+ receipt_positions)
25
+ )
26
+ end
27
+
28
+ images
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extraction/multi_receipts_extractor'
@@ -0,0 +1,7 @@
1
+ inherit_from: ../../../../.rubocop.yml
2
+
3
+ Metrics/CyclomaticComplexity:
4
+ Max: 10
5
+
6
+ Metrics/PerceivedComplexity:
7
+ Max: 10