mindee-lite 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +24 -0
  3. data/.gitattributes +14 -0
  4. data/.gitignore +76 -0
  5. data/.gitmodules +3 -0
  6. data/.pre-commit-config.yaml +36 -0
  7. data/.rubocop.yml +49 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +515 -0
  10. data/CODE_OF_CONDUCT.md +129 -0
  11. data/CONTRIBUTING.md +107 -0
  12. data/Gemfile +14 -0
  13. data/LICENSE +21 -0
  14. data/README.md +42 -0
  15. data/Rakefile +40 -0
  16. data/Steepfile +30 -0
  17. data/bin/console +14 -0
  18. data/bin/mindee.rb +30 -0
  19. data/bin/v1/parser.rb +153 -0
  20. data/bin/v1/products.rb +88 -0
  21. data/bin/v2/parser.rb +235 -0
  22. data/bin/v2/products.rb +34 -0
  23. data/docs/code_samples/bank_account_details_v1.txt +24 -0
  24. data/docs/code_samples/bank_account_details_v2.txt +24 -0
  25. data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
  26. data/docs/code_samples/barcode_reader_v1.txt +24 -0
  27. data/docs/code_samples/cropper_v1.txt +21 -0
  28. data/docs/code_samples/default.txt +30 -0
  29. data/docs/code_samples/default_async.txt +29 -0
  30. data/docs/code_samples/expense_receipts_v5.txt +25 -0
  31. data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
  32. data/docs/code_samples/financial_document_v1.txt +25 -0
  33. data/docs/code_samples/financial_document_v1_async.txt +24 -0
  34. data/docs/code_samples/idcard_fr_v1.txt +24 -0
  35. data/docs/code_samples/idcard_fr_v2.txt +24 -0
  36. data/docs/code_samples/international_id_v2_async.txt +24 -0
  37. data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
  38. data/docs/code_samples/invoices_v4.txt +25 -0
  39. data/docs/code_samples/invoices_v4_async.txt +24 -0
  40. data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
  41. data/docs/code_samples/passport_v1.txt +24 -0
  42. data/docs/code_samples/resume_v1_async.txt +24 -0
  43. data/docs/code_samples/v2_classification.txt +30 -0
  44. data/docs/code_samples/v2_crop.txt +30 -0
  45. data/docs/code_samples/v2_extraction.txt +42 -0
  46. data/docs/code_samples/v2_extraction_webhook.txt +45 -0
  47. data/docs/code_samples/v2_ocr.txt +30 -0
  48. data/docs/code_samples/v2_split.txt +30 -0
  49. data/docs/code_samples/workflow_execution.txt +28 -0
  50. data/docs/code_samples/workflow_polling.txt +35 -0
  51. data/examples/auto_invoice_splitter_extraction.rb +48 -0
  52. data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
  53. data/lib/mindee/dependency.rb +29 -0
  54. data/lib/mindee/error/mindee_error.rb +17 -0
  55. data/lib/mindee/error/mindee_http_error.rb +36 -0
  56. data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
  57. data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
  58. data/lib/mindee/error/mindee_input_error.rb +30 -0
  59. data/lib/mindee/error.rb +6 -0
  60. data/lib/mindee/geometry/min_max.rb +23 -0
  61. data/lib/mindee/geometry/point.rb +41 -0
  62. data/lib/mindee/geometry/polygon.rb +37 -0
  63. data/lib/mindee/geometry/quadrilateral.rb +50 -0
  64. data/lib/mindee/geometry/utils.rb +88 -0
  65. data/lib/mindee/geometry.rb +7 -0
  66. data/lib/mindee/http/.rubocop.yml +7 -0
  67. data/lib/mindee/http/http_error_handler.rb +106 -0
  68. data/lib/mindee/http/response_validation.rb +81 -0
  69. data/lib/mindee/http.rb +3 -0
  70. data/lib/mindee/image/extracted_image.rb +89 -0
  71. data/lib/mindee/image/image_compressor.rb +29 -0
  72. data/lib/mindee/image/image_extractor.rb +118 -0
  73. data/lib/mindee/image/image_utils.rb +165 -0
  74. data/lib/mindee/image.rb +6 -0
  75. data/lib/mindee/input/base_parameters.rb +149 -0
  76. data/lib/mindee/input/local_response.rb +80 -0
  77. data/lib/mindee/input/polling_options.rb +26 -0
  78. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  79. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  80. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  81. data/lib/mindee/input/sources/local_input_source.rb +216 -0
  82. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  83. data/lib/mindee/input/sources/url_input_source.rb +130 -0
  84. data/lib/mindee/input/sources.rb +8 -0
  85. data/lib/mindee/input.rb +4 -0
  86. data/lib/mindee/logging/logger.rb +24 -0
  87. data/lib/mindee/logging.rb +3 -0
  88. data/lib/mindee/page_options.rb +24 -0
  89. data/lib/mindee/pdf/extracted_pdf.rb +70 -0
  90. data/lib/mindee/pdf/pdf_compressor.rb +121 -0
  91. data/lib/mindee/pdf/pdf_extractor.rb +121 -0
  92. data/lib/mindee/pdf/pdf_processor.rb +91 -0
  93. data/lib/mindee/pdf/pdf_tools.rb +201 -0
  94. data/lib/mindee/pdf.rb +7 -0
  95. data/lib/mindee/v1/client.rb +490 -0
  96. data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
  97. data/lib/mindee/v1/extraction.rb +3 -0
  98. data/lib/mindee/v1/http/.rubocop.yml +7 -0
  99. data/lib/mindee/v1/http/endpoint.rb +221 -0
  100. data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
  101. data/lib/mindee/v1/http.rb +4 -0
  102. data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
  103. data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
  104. data/lib/mindee/v1/parsing/common/document.rb +86 -0
  105. data/lib/mindee/v1/parsing/common/execution.rb +78 -0
  106. data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
  107. data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
  108. data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
  109. data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
  110. data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
  111. data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
  112. data/lib/mindee/v1/parsing/common/extras.rb +6 -0
  113. data/lib/mindee/v1/parsing/common/inference.rb +69 -0
  114. data/lib/mindee/v1/parsing/common/job.rb +48 -0
  115. data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
  116. data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
  117. data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
  118. data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
  119. data/lib/mindee/v1/parsing/common/page.rb +49 -0
  120. data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
  121. data/lib/mindee/v1/parsing/common/product.rb +26 -0
  122. data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
  123. data/lib/mindee/v1/parsing/common.rb +15 -0
  124. data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
  125. data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
  126. data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
  127. data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
  128. data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
  129. data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
  130. data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
  131. data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
  132. data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
  133. data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
  134. data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
  135. data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
  136. data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
  137. data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
  138. data/lib/mindee/v1/parsing/standard.rb +15 -0
  139. data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
  140. data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
  141. data/lib/mindee/v1/parsing/universal.rb +4 -0
  142. data/lib/mindee/v1/parsing.rb +5 -0
  143. data/lib/mindee/v1/product/.rubocop.yml +12 -0
  144. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
  145. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
  146. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
  147. data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
  148. data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
  149. data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
  150. data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
  151. data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
  152. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
  153. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
  154. data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
  155. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
  156. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
  157. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
  158. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
  159. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
  160. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
  161. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
  162. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
  163. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
  164. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
  165. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
  166. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
  167. data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
  168. data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
  169. data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
  170. data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
  171. data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
  172. data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
  173. data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
  174. data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
  175. data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
  176. data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
  177. data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
  178. data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
  179. data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
  180. data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
  181. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
  182. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
  183. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
  184. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
  185. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
  186. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
  187. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
  188. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
  189. data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
  190. data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
  191. data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
  192. data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
  193. data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
  194. data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
  195. data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
  196. data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
  197. data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
  198. data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
  199. data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
  200. data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
  201. data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
  202. data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
  203. data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
  204. data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
  205. data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
  206. data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
  207. data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
  208. data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
  209. data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
  210. data/lib/mindee/v1/product/universal/universal.rb +48 -0
  211. data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
  212. data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
  213. data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
  214. data/lib/mindee/v1/product.rb +18 -0
  215. data/lib/mindee/v1.rb +7 -0
  216. data/lib/mindee/v2/client.rb +132 -0
  217. data/lib/mindee/v2/file_operation/crop.rb +51 -0
  218. data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
  219. data/lib/mindee/v2/file_operation/split.rb +37 -0
  220. data/lib/mindee/v2/file_operation/split_files.rb +25 -0
  221. data/lib/mindee/v2/file_operation.rb +6 -0
  222. data/lib/mindee/v2/http/.rubocop.yml +7 -0
  223. data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
  224. data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
  225. data/lib/mindee/v2/http.rb +4 -0
  226. data/lib/mindee/v2/parsing/base_inference.rb +44 -0
  227. data/lib/mindee/v2/parsing/base_response.rb +15 -0
  228. data/lib/mindee/v2/parsing/common_response.rb +20 -0
  229. data/lib/mindee/v2/parsing/error_item.rb +21 -0
  230. data/lib/mindee/v2/parsing/error_response.rb +51 -0
  231. data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
  232. data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
  233. data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
  234. data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
  235. data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
  236. data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
  237. data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
  238. data/lib/mindee/v2/parsing/field.rb +9 -0
  239. data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
  240. data/lib/mindee/v2/parsing/inference_file.rb +38 -0
  241. data/lib/mindee/v2/parsing/inference_job.rb +25 -0
  242. data/lib/mindee/v2/parsing/inference_model.rb +30 -0
  243. data/lib/mindee/v2/parsing/job.rb +93 -0
  244. data/lib/mindee/v2/parsing/job_response.rb +30 -0
  245. data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
  246. data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
  247. data/lib/mindee/v2/parsing/raw_text.rb +27 -0
  248. data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
  249. data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
  250. data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
  251. data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
  252. data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
  253. data/lib/mindee/v2/parsing/search.rb +6 -0
  254. data/lib/mindee/v2/parsing.rb +16 -0
  255. data/lib/mindee/v2/product/base_product.rb +28 -0
  256. data/lib/mindee/v2/product/classification/classification.rb +20 -0
  257. data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
  258. data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
  259. data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
  260. data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
  261. data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
  262. data/lib/mindee/v2/product/crop/crop.rb +20 -0
  263. data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
  264. data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
  265. data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
  266. data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
  267. data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
  268. data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
  269. data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
  270. data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
  271. data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
  272. data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
  273. data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
  274. data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
  275. data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
  276. data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
  277. data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
  278. data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
  279. data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
  280. data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
  281. data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
  282. data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
  283. data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
  284. data/lib/mindee/v2/product/split/split.rb +19 -0
  285. data/lib/mindee/v2/product/split/split_inference.rb +34 -0
  286. data/lib/mindee/v2/product/split/split_range.rb +38 -0
  287. data/lib/mindee/v2/product/split/split_response.rb +40 -0
  288. data/lib/mindee/v2/product/split/split_result.rb +34 -0
  289. data/lib/mindee/v2/product.rb +7 -0
  290. data/lib/mindee/v2.rb +7 -0
  291. data/lib/mindee/version.rb +26 -0
  292. data/lib/mindee.rb +135 -0
  293. data/mindee-lite.gemspec +36 -0
  294. data/mindee.gemspec +44 -0
  295. data/sig/custom/marcel.rbs +3 -0
  296. data/sig/custom/mini_magick.rbs +31 -0
  297. data/sig/custom/net_http.rbs +43 -0
  298. data/sig/custom/origami.rbs +59 -0
  299. data/sig/mindee/dependency.rbs +13 -0
  300. data/sig/mindee/error/mindee_error.rbs +13 -0
  301. data/sig/mindee/error/mindee_http_error.rbs +17 -0
  302. data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
  303. data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
  304. data/sig/mindee/error/mindee_input_error.rbs +18 -0
  305. data/sig/mindee/geometry/min_max.rbs +11 -0
  306. data/sig/mindee/geometry/point.rbs +14 -0
  307. data/sig/mindee/geometry/polygon.rbs +12 -0
  308. data/sig/mindee/geometry/quadrilateral.rbs +15 -0
  309. data/sig/mindee/geometry/utils.rbs +13 -0
  310. data/sig/mindee/http/http_error_handler.rbs +15 -0
  311. data/sig/mindee/http/response_validation.rbs +11 -0
  312. data/sig/mindee/image/extracted_image.rbs +21 -0
  313. data/sig/mindee/image/image_compressor.rbs +8 -0
  314. data/sig/mindee/image/image_extractor.rbs +13 -0
  315. data/sig/mindee/image/image_utils.rbs +19 -0
  316. data/sig/mindee/input/base_parameters.rbs +35 -0
  317. data/sig/mindee/input/local_response.rbs +14 -0
  318. data/sig/mindee/input/polling_options.rbs +12 -0
  319. data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
  320. data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
  321. data/sig/mindee/input/sources/file_input_source.rbs +10 -0
  322. data/sig/mindee/input/sources/local_input_source.rbs +30 -0
  323. data/sig/mindee/input/sources/path_input_source.rbs +10 -0
  324. data/sig/mindee/input/sources/url_input_source.rbs +20 -0
  325. data/sig/mindee/logging/logger.rbs +11 -0
  326. data/sig/mindee/page_options.rbs +11 -0
  327. data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
  328. data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
  329. data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
  330. data/sig/mindee/pdf/pdf_processor.rbs +12 -0
  331. data/sig/mindee/pdf/pdf_tools.rbs +31 -0
  332. data/sig/mindee/v1/client.rbs +84 -0
  333. data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
  334. data/sig/mindee/v1/http/endpoint.rbs +41 -0
  335. data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
  336. data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
  337. data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
  338. data/sig/mindee/v1/parsing/common/document.rbs +32 -0
  339. data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
  340. data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
  341. data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
  342. data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
  343. data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
  344. data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
  345. data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
  346. data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
  347. data/sig/mindee/v1/parsing/common/job.rbs +24 -0
  348. data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
  349. data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
  350. data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
  351. data/sig/mindee/v1/parsing/common/page.rbs +19 -0
  352. data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
  353. data/sig/mindee/v1/parsing/common/product.rbs +16 -0
  354. data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
  355. data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
  356. data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
  357. data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
  358. data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
  359. data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
  360. data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
  361. data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
  362. data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
  363. data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
  364. data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
  365. data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
  366. data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
  367. data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
  368. data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
  369. data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
  370. data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
  371. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
  372. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
  373. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
  374. data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
  375. data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
  376. data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
  377. data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
  378. data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
  379. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
  380. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
  381. data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
  382. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
  383. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
  384. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
  385. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
  386. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
  387. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
  388. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
  389. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
  390. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
  391. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
  392. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
  393. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
  394. data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
  395. data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
  396. data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
  397. data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
  398. data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
  399. data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
  400. data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
  401. data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
  402. data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
  403. data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
  404. data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
  405. data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
  406. data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
  407. data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
  408. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
  409. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
  410. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
  411. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
  412. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
  413. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
  414. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
  415. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
  416. data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
  417. data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
  418. data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
  419. data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
  420. data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
  421. data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
  422. data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
  423. data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
  424. data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
  425. data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
  426. data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
  427. data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
  428. data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
  429. data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
  430. data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
  431. data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
  432. data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
  433. data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
  434. data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
  435. data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
  436. data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
  437. data/sig/mindee/v1/product/universal/universal.rbs +16 -0
  438. data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
  439. data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
  440. data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
  441. data/sig/mindee/v2/client.rbs +29 -0
  442. data/sig/mindee/v2/file_operation/crop.rbs +10 -0
  443. data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
  444. data/sig/mindee/v2/file_operation/split.rbs +11 -0
  445. data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
  446. data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
  447. data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
  448. data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
  449. data/sig/mindee/v2/parsing/base_response.rbs +11 -0
  450. data/sig/mindee/v2/parsing/common_response.rbs +12 -0
  451. data/sig/mindee/v2/parsing/error_item.rbs +13 -0
  452. data/sig/mindee/v2/parsing/error_response.rbs +20 -0
  453. data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
  454. data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
  455. data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
  456. data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
  457. data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
  458. data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
  459. data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
  460. data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
  461. data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
  462. data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
  463. data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
  464. data/sig/mindee/v2/parsing/job.rbs +24 -0
  465. data/sig/mindee/v2/parsing/job_response.rbs +14 -0
  466. data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
  467. data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
  468. data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
  469. data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
  470. data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
  471. data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
  472. data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
  473. data/sig/mindee/v2/parsing/search_models.rbs +14 -0
  474. data/sig/mindee/v2/product/base_product.rbs +19 -0
  475. data/sig/mindee/v2/product/classification/classification.rbs +10 -0
  476. data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
  477. data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
  478. data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
  479. data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
  480. data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
  481. data/sig/mindee/v2/product/crop/crop.rbs +10 -0
  482. data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
  483. data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
  484. data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
  485. data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
  486. data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
  487. data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
  488. data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
  489. data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
  490. data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
  491. data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
  492. data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
  493. data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
  494. data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
  495. data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
  496. data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
  497. data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
  498. data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
  499. data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
  500. data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
  501. data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
  502. data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
  503. data/sig/mindee/v2/product/split/split.rbs +10 -0
  504. data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
  505. data/sig/mindee/v2/product/split/split_range.rbs +18 -0
  506. data/sig/mindee/v2/product/split/split_response.rbs +25 -0
  507. data/sig/mindee/v2/product/split/split_result.rbs +14 -0
  508. data/sig/mindee/version.rbs +6 -0
  509. data/sig/mindee.rbs +62 -0
  510. metadata +600 -0
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../input'
4
+ require_relative '../http'
5
+ require_relative 'product'
6
+ require_relative 'parsing/job'
7
+ require_relative '../logging'
8
+
9
+ module Mindee
10
+ module V2
11
+ # Mindee V2 API Client.
12
+ class Client
13
+ # @return [V2::HTTP::MindeeApiV2]
14
+ private attr_reader :mindee_api
15
+
16
+ # @param api_key [String]
17
+ def initialize(api_key: '')
18
+ @mindee_api = Mindee::V2::HTTP::MindeeApiV2.new(api_key: api_key)
19
+ end
20
+
21
+ # Retrieves a result from a given queue or URL to the result.
22
+ # @param product [Class<Mindee::V2::Product::BaseProduct>] The return class.
23
+ # @param resource [String] ID of the inference or URL to the result.
24
+ # @return [Mindee::V2::Parsing::BaseResponse]
25
+ def get_result(product, resource)
26
+ @mindee_api.req_get_result(product, resource)
27
+ end
28
+
29
+ # Retrieves an inference from a given queue or URL to the job.
30
+ # @param job_id [String] ID of the job.
31
+ # @return [Mindee::V2::Parsing::JobResponse]
32
+ def get_job(job_id)
33
+ @mindee_api.req_get_job(job_id)
34
+ end
35
+
36
+ # Enqueue a document for async parsing.
37
+ # @param product [Class<Mindee::V2::Product::BaseProduct>] The return class.
38
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
39
+ # The source of the input document (local file or URL).
40
+ # @param params [Hash, Input::BaseParameters] Parameters for the inference.
41
+ # @return [Mindee::V2::Parsing::JobResponse]
42
+ def enqueue(
43
+ product,
44
+ input_source,
45
+ params
46
+ )
47
+ normalized_params = normalize_parameters(product.params_type, params)
48
+ normalized_params.validate_async_params
49
+ logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.")
50
+
51
+ @mindee_api.req_post_enqueue(input_source, normalized_params)
52
+ end
53
+
54
+ # Enqueues to an asynchronous endpoint and automatically polls for a response.
55
+ #
56
+ # @param product [Class<Mindee::V2::Product::BaseProduct>] The return class.
57
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
58
+ # The source of the input document (local file or URL).
59
+ # @param params [Hash, Input::BaseParameters] Parameters for the inference.
60
+ # @return [Parsing::BaseResponse]
61
+ def enqueue_and_get_result(
62
+ product,
63
+ input_source,
64
+ params
65
+ )
66
+ enqueue_response = enqueue(product, input_source, params)
67
+ normalized_params = normalize_parameters(product.params_type, params)
68
+ normalized_params.validate_async_params
69
+
70
+ if enqueue_response.job.id.nil? || enqueue_response.job.id.empty?
71
+ logger.error("Failed enqueueing:\n#{enqueue_response.raw_http}")
72
+ raise Mindee::Error::MindeeError, 'Enqueueing of the document failed.'
73
+ end
74
+
75
+ job_id = enqueue_response.job.id
76
+ logger.debug("Successfully enqueued document with job id: #{job_id}.")
77
+
78
+ sleep(normalized_params.polling_options.initial_delay_sec)
79
+ retry_counter = 1
80
+ poll_results = get_job(job_id)
81
+
82
+ while retry_counter < normalized_params.polling_options.max_retries
83
+ if poll_results.job.status == 'Failed'
84
+ break
85
+ elsif !poll_results.job.result_url.nil?
86
+ return get_result(product, poll_results.job.result_url)
87
+ end
88
+
89
+ logger.debug(
90
+ "Successfully enqueued inference with job id: #{job_id}.\n" \
91
+ "Attempt n°#{retry_counter}/#{normalized_params.polling_options.max_retries}.\n" \
92
+ "Job status: #{poll_results.job.status}."
93
+ )
94
+
95
+ sleep(normalized_params.polling_options.delay_sec)
96
+ poll_results = get_job(job_id)
97
+ retry_counter += 1
98
+ end
99
+
100
+ error = poll_results.job.error
101
+ unless error.nil?
102
+ err_to_raise = Mindee::Error::MindeeHTTPErrorV2.new(error)
103
+ # NOTE: purposefully decoupled from the line above, otherwise rubocop thinks `error` is a `message` param.
104
+ raise err_to_raise
105
+ end
106
+
107
+ sec_count = normalized_params.polling_options.delay_sec * retry_counter
108
+ raise Mindee::Error::MindeeError,
109
+ "Asynchronous parsing request timed out after #{sec_count} seconds"
110
+ end
111
+
112
+ # Searches for a list of available models for the given API key.
113
+ # @param model_name [String]
114
+ # @param model_type [String]
115
+ # @return [Mindee::V2::Parsing::Search::SearchResponse]
116
+ def search_models(model_name, model_type)
117
+ @mindee_api.search_models(model_name, model_type)
118
+ end
119
+
120
+ private
121
+
122
+ # If needed, converts the parsing options provided as a hash into a proper BaseParameters subclass object.
123
+ # @param params [Hash, Class<BaseParameters>] Params.
124
+ # @return [BaseParameters]
125
+ def normalize_parameters(param_class, params)
126
+ return param_class.from_hash(params: params) if params.is_a?(Hash)
127
+
128
+ params
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module FileOperation
6
+ # Crop operations.
7
+ module Crop
8
+ # Extracts a single crop as complete PDFs from the document.
9
+ #
10
+ # @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
11
+ # @param crop [FieldLocation] Crop to extract.
12
+ # @return [ExtractedImage]
13
+ def self.extract_single_crop(input_source, crop)
14
+ polygons = [crop.polygon]
15
+ Mindee::Image::ImageExtractor.extract_multiple_images_from_source(
16
+ input_source, crop.page, polygons
17
+ ).first
18
+ end
19
+
20
+ # Extracts individual receipts from multi-receipts documents.
21
+ #
22
+ # @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
23
+ # @param crops [Array<CropItem>] List of crops.
24
+ # @return [CropFiles] Individual extracted receipts as an array of ExtractedImage.
25
+ # @raise [MindeeError] if the crops array is empty.
26
+ def self.extract_crops(input_source, crops)
27
+ if crops.nil? || crops.empty?
28
+ raise Mindee::Error::MindeeError,
29
+ 'No possible candidates found for Crop extraction.'
30
+ end
31
+
32
+ polygons = Array.new(input_source.page_count) { [] }
33
+
34
+ crops.each do |crop|
35
+ polygons[crop.location.page] << crop.location.polygon
36
+ end
37
+
38
+ images = [] # @type var images: Array[Image::ExtractedImage]
39
+ polygons.each_with_index do |page_polygons, page_index|
40
+ extracted = Mindee::Image::ImageExtractor.extract_multiple_images_from_source(
41
+ input_source, page_index, page_polygons
42
+ )
43
+ images.concat(extracted)
44
+ end
45
+
46
+ CropFiles.new(images)
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module FileOperation
6
+ # Collection of cropped files.
7
+ class CropFiles < Array
8
+ # Save all extracted crops to disk.
9
+ #
10
+ # @param path [String, Pathname] Path to save the extracted crops to.
11
+ # @param prefix [String] Prefix to add to the filename, defaults to 'crop'.
12
+ # @param file_format [String, nil] File format to save the crops as, defaults to jpg if nil.]
13
+ def save_all_to_disk(path, prefix: 'crop', file_format: nil)
14
+ FileUtils.mkdir_p(path)
15
+ each.with_index(1) do |crop, idx|
16
+ filename = "#{prefix}_#{format('%03d', idx)}.jpg"
17
+ file_path = File.join(path.to_s, filename)
18
+
19
+ crop.write_to_file(file_path, file_format)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module FileOperation
6
+ # Split operations.
7
+ module Split
8
+ # Extracts a single split as a complete PDF from the document.
9
+ #
10
+ # @param input_source [LocalInputSource] Input source to split.
11
+ # @param split [Array<Integer>] List of pages to keep.
12
+ # @return [ExtractedPDF] Extracted PDF
13
+ def self.extract_single_split(input_source, split)
14
+ extract_splits(input_source, [split]).first
15
+ end
16
+
17
+ # Extracts splits as complete PDFs from the document.
18
+ #
19
+ # @param input_source [LocalInputSource] Input source to split.
20
+ # @param splits [Array<Array<Integer>>] List of sub-lists of pages to keep.
21
+ # @return [SplitFiles] A list of extracted invoices.
22
+ # @raise [MindeeError] if no indexes are provided.
23
+ def self.extract_splits(input_source, splits)
24
+ raise Mindee::Error::MindeeError, 'No indexes provided.' if splits.nil? || splits.empty?
25
+
26
+ pdf_extractor = Mindee::PDF::PDFExtractor.new(input_source)
27
+
28
+ page_groups = splits.map do |split|
29
+ (split[0]..split[1]).to_a
30
+ end
31
+
32
+ SplitFiles.new(pdf_extractor.extract_sub_documents(page_groups))
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module FileOperation
6
+ # Collection of split files.
7
+ class SplitFiles < Array
8
+ # Save all extracted splits to disk.
9
+ #
10
+ # @param path [String, Pathname] Path to save the extracted splits to.
11
+ # @param prefix [String] Prefix to add to the filename, defaults to 'split'.
12
+ def save_all_to_disk(path, prefix: 'split')
13
+ FileUtils.mkdir_p(path)
14
+
15
+ each.with_index(1) do |split, idx|
16
+ filename = "#{prefix}_#{format('%03d', idx)}.pdf"
17
+ file_path = File.join(path.to_s, filename)
18
+
19
+ split.write_to_file(file_path)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'file_operation/crop'
4
+ require_relative 'file_operation/crop_files'
5
+ require_relative 'file_operation/split'
6
+ require_relative 'file_operation/split_files'
@@ -0,0 +1,7 @@
1
+ inherit_from: ../../../../.rubocop.yml
2
+
3
+ Metrics/CyclomaticComplexity:
4
+ Max: 10
5
+
6
+ Metrics/PerceivedComplexity:
7
+ Max: 10
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../version'
4
+
5
+ module Mindee
6
+ module V2
7
+ module HTTP
8
+ # API client for version 2.
9
+ class ApiV2Settings
10
+ # V2 API key's default environment key name.
11
+ MINDEE_V2_API_KEY_ENV_NAME = 'MINDEE_V2_API_KEY'
12
+ # V2 API key's default value.
13
+ MINDEE_V2_API_KEY_DEFAULT = nil
14
+
15
+ # V2 base URL default environment key name.
16
+ MINDEE_V2_BASE_URL_ENV_NAME = 'MINDEE_V2_BASE_URL'
17
+ # V2 base URL default value.
18
+ MINDEE_V2_BASE_URL_DEFAULT = 'https://api-v2.mindee.net'
19
+
20
+ # HTTP request timeout default environment key name.
21
+ MINDEE_V2_REQUEST_TIMEOUT_ENV_NAME = 'MINDEE_V2_REQUEST_TIMEOUT'
22
+ # HTTP request timeout default value.
23
+ MINDEE_V2_TIMEOUT_DEFAULT = 120
24
+
25
+ # Default value for the user agent (same as V1).
26
+ USER_AGENT = "mindee-api-ruby@v#{Mindee::VERSION} ruby-v#{RUBY_VERSION} #{Mindee::PLATFORM}".freeze
27
+
28
+ # @return [String]
29
+ attr_reader :api_key
30
+ # @return [Integer]
31
+ attr_reader :request_timeout
32
+ # @return [String]
33
+ attr_reader :base_url
34
+ # @return [String]
35
+ attr_reader :user_agent
36
+
37
+ def initialize(api_key: nil)
38
+ @request_timeout = ENV.fetch(MINDEE_V2_REQUEST_TIMEOUT_ENV_NAME, MINDEE_V2_TIMEOUT_DEFAULT).to_i
39
+ if api_key.nil? && !ENV.fetch(MINDEE_V2_API_KEY_ENV_NAME, MINDEE_V2_API_KEY_DEFAULT).to_s.empty?
40
+ logger.debug('API key set from environment')
41
+ end
42
+ @api_key = if api_key.nil? || api_key.empty?
43
+ ENV.fetch(MINDEE_V2_API_KEY_ENV_NAME,
44
+ MINDEE_V2_API_KEY_DEFAULT)
45
+ else
46
+ api_key
47
+ end
48
+ @base_url = ENV.fetch(MINDEE_V2_BASE_URL_ENV_NAME, MINDEE_V2_BASE_URL_DEFAULT).chomp('/')
49
+ @user_agent = USER_AGENT
50
+ end
51
+
52
+ # Checks API key for a value.
53
+ # @return
54
+ # @raise [Error::MindeeAPIError] Raises if the api key is empty or nil.
55
+ def check_api_key
56
+ return unless @api_key.nil? || @api_key.to_s.empty?
57
+
58
+ raise Error::MindeeAPIError,
59
+ "Missing API key. check your Client Configuration.\nYou can set this using the " \
60
+ "'#{MINDEE_V2_API_KEY_ENV_NAME}' environment variable."
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../input'
4
+ require_relative '../../http'
5
+ require_relative '../../error'
6
+
7
+ module Mindee
8
+ module V2
9
+ module HTTP
10
+ # API client for version 2.
11
+ class MindeeApiV2
12
+ # @return [ApiV2Settings]
13
+ attr_reader :settings
14
+
15
+ # @param api_key [String, nil]
16
+ def initialize(api_key: nil)
17
+ @settings = ApiV2Settings.new(api_key: api_key)
18
+ end
19
+
20
+ # Sends a file to the queue.
21
+ #
22
+ # @param input_source [Input::Source::LocalInputSource, Input::Source::URLInputSource]
23
+ # @param params [Input::BaseParameters]
24
+ # @return [Mindee::V2::Parsing::JobResponse]
25
+ # @raise [Mindee::Error::MindeeHttpErrorV2]
26
+ def req_post_enqueue(input_source, params)
27
+ @settings.check_api_key
28
+ response = enqueue(
29
+ input_source,
30
+ params
31
+ )
32
+ Mindee::V2::Parsing::JobResponse.new(process_response(response))
33
+ end
34
+
35
+ # Retrieves a result from a given queue.
36
+ # @param product [Class<Mindee::V2::Product::BaseProduct>] The return class.
37
+ # @param resource [String] ID of the inference or URL to the result.
38
+ # @return [Mindee::V2::Parsing::BaseResponse]
39
+ def req_get_result(product, resource)
40
+ return req_get_result_url(product.response_type, resource) if uri?(resource)
41
+
42
+ @settings.check_api_key
43
+ response = result_req_get(
44
+ resource,
45
+ product
46
+ )
47
+ product.response_type.new(process_response(response))
48
+ end
49
+
50
+ # Retrieves a queued job.
51
+ #
52
+ # @param job_id [String] ID of the job or URL to the job.
53
+ # @return [Mindee::V2::Parsing::JobResponse]
54
+ def req_get_job(job_id)
55
+ @settings.check_api_key
56
+ response = poll("#{@settings.base_url}/v2/jobs/#{job_id}")
57
+ Mindee::V2::Parsing::JobResponse.new(process_response(response))
58
+ end
59
+
60
+ # Retrieves a list of models.
61
+ # @param model_name [String, nil]
62
+ # @param model_type [String, nil]
63
+ # @return [Mindee::V2::Parsing::Search::SearchResponse]
64
+ def search_models(model_name, model_type)
65
+ Mindee::V2::Parsing::Search::SearchResponse.new(process_response(req_get_search_models(model_name,
66
+ model_type)))
67
+ end
68
+
69
+ private
70
+
71
+ # Retrieves a list of models.
72
+ # @param model_name [String, nil]
73
+ # @param model_type [String, nil]
74
+ # @return [Net::HTTPResponse]
75
+ def req_get_search_models(model_name, model_type)
76
+ url = "#{@settings.base_url}/v2/search/models"
77
+ uri = URI(url)
78
+
79
+ query_params = {} # @type var query_params: Hash[Symbol, String | nil]
80
+ query_params[:name] = model_name if model_name
81
+ query_params[:model_type] = model_type if model_type
82
+ uri.query = URI.encode_www_form(query_params) unless query_params.empty?
83
+
84
+ headers = {
85
+ 'Authorization' => @settings.api_key,
86
+ 'User-Agent' => @settings.user_agent,
87
+ }
88
+ req = Net::HTTP::Get.new(uri, headers)
89
+ req['Transfer-Encoding'] = 'chunked'
90
+
91
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @settings.request_timeout) do |http|
92
+ return http.request(req)
93
+ end
94
+ raise Mindee::Error::MindeeError, 'Could not resolve server response.'
95
+ end
96
+
97
+ # @param resource [String] Resource to check.
98
+ # @return [Boolean]
99
+ def uri?(resource)
100
+ uri = URI.parse(resource)
101
+ throw Mindee::Error::MindeeError, 'HTTP is not supported.' if uri.scheme == 'http'
102
+ uri.scheme == 'https'
103
+ rescue URI::BadURIError, URI::InvalidURIError
104
+ false
105
+ end
106
+
107
+ # Retrieves a queued job.
108
+ #
109
+ # @param url [String]
110
+ # @return [Mindee::V2::Parsing::JobResponse]
111
+ def req_get_job_url(url)
112
+ @settings.check_api_key
113
+ response = poll(url)
114
+ Mindee::V2::Parsing::JobResponse.new(process_response(response))
115
+ end
116
+
117
+ # Retrieves a queued job.
118
+ #
119
+ # @param result_class [Class<Mindee::V2::Parsing::BaseResponse>]
120
+ # @param url [String]
121
+ # @return [Mindee::V2::Parsing::BaseResponse]
122
+ def req_get_result_url(result_class, url)
123
+ @settings.check_api_key
124
+ response = poll(url)
125
+ result_class.new(process_response(response))
126
+ end
127
+
128
+ # Converts an HTTP response to a parsed response object.
129
+ #
130
+ # @param response [Net::HTTPResponse, nil]
131
+ # @return [Hash]
132
+ # @raise Throws if the server returned an error.
133
+ def process_response(response)
134
+ if !response.nil? && response.respond_to?(:body) &&
135
+ Mindee::HTTP::ResponseValidation.valid_v2_response?(response)
136
+ return JSON.parse(response.body, object_class: Hash)
137
+ end
138
+
139
+ response_body = if response.nil? || !response.respond_to?(:body)
140
+ '{ "status": -1,
141
+ "detail": "Empty server response." }'
142
+ else
143
+ response.body
144
+ end
145
+ raise Mindee::HTTP::ErrorHandler.generate_v2_error(JSON.parse(response_body).transform_keys(&:to_sym))
146
+ end
147
+
148
+ # Polls a queue for either a result or a job.
149
+ # @param url [String] URL, passed as a string.
150
+ # @return [Net::HTTPResponse]
151
+ def poll(url)
152
+ uri = URI(url)
153
+ headers = {
154
+ 'Authorization' => @settings.api_key,
155
+ 'User-Agent' => @settings.user_agent,
156
+ }
157
+ req = Net::HTTP::Get.new(uri, headers)
158
+ req['Transfer-Encoding'] = 'chunked'
159
+
160
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @settings.request_timeout) do |http|
161
+ return http.request(req)
162
+ end
163
+ raise Mindee::Error::MindeeError, 'Could not resolve server response.'
164
+ end
165
+
166
+ # Polls the API for the result of an inference.
167
+ #
168
+ # @param queue_id [String] ID of the queue.
169
+ # @param product [Class<Mindee::V2::Product::BaseProduct>] The return class.
170
+ # @return [Net::HTTPResponse]
171
+ def result_req_get(queue_id, product)
172
+ poll("#{@settings.base_url}/v2/products/#{product.slug}/results/#{queue_id}")
173
+ end
174
+
175
+ # Handle parameters for the enqueue form
176
+ # @param form_data [Array] Array of form fields
177
+ # @param params [V2::Product::Extraction::Params::ExtractionParameters] Inference options.
178
+ def enqueue_form_options(form_data, params)
179
+ form_data.push(['rag', params.rag.to_s]) unless params.rag.nil?
180
+ form_data.push(['raw_text', params.raw_text.to_s]) unless params.raw_text.nil?
181
+ form_data.push(['polygon', params.polygon.to_s]) unless params.polygon.nil?
182
+ form_data.push(['confidence', params.confidence.to_s]) unless params.confidence.nil?
183
+ form_data.push ['text_context', params.text_context] if params.text_context
184
+ form_data.push ['data_schema', params.data_schema.to_s] if params.data_schema
185
+ unless params.webhook_ids.nil? || params.webhook_ids.empty?
186
+ params.webhook_ids.each do |webhook_id|
187
+ form_data.push ['webhook_ids[]', webhook_id]
188
+ end
189
+ end
190
+ form_data
191
+ end
192
+
193
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
194
+ # @param params [Input::BaseParameters] Inference options.
195
+ # @return [Net::HTTPResponse, nil]
196
+ def enqueue(input_source, params)
197
+ uri = URI("#{@settings.base_url}/v2/products/#{params.slug}/enqueue")
198
+
199
+ form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource)
200
+ [['url', input_source.url]] # : Array[Array[untyped]]
201
+ else
202
+ file_data, file_metadata = input_source.read_contents(close: params.close_file)
203
+ [['file', file_data, file_metadata]] # : Array[Array[untyped]]
204
+ end
205
+ form_data.push(['model_id', params.model_id])
206
+ form_data.push ['file_alias', params.file_alias] if params.file_alias
207
+ if params.is_a?(V2::Product::Extraction::Params::ExtractionParameters)
208
+ form_data = enqueue_form_options(form_data, params)
209
+ end
210
+
211
+ form_data = params.append_form_data(form_data)
212
+
213
+ headers = {
214
+ 'Authorization' => @settings.api_key,
215
+ 'User-Agent' => @settings.user_agent,
216
+ }
217
+ req = Net::HTTP::Post.new(uri, headers)
218
+
219
+ req.set_form(form_data, 'multipart/form-data')
220
+ req['Transfer-Encoding'] = 'chunked'
221
+
222
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @settings.request_timeout) do |http|
223
+ return http.request(req)
224
+ end
225
+ raise Mindee::Error::MindeeError, 'Could not resolve server response.'
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'http/api_v2_settings'
4
+ require_relative 'http/mindee_api_v2'
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../product/base_product'
4
+ require_relative '../parsing'
5
+
6
+ module Mindee
7
+ module V2
8
+ module Parsing
9
+ # Base class for V2 inference responses.
10
+ class BaseInference < Mindee::V2::Product::BaseProduct
11
+ # @return [InferenceJob] Metadata about the job.
12
+ attr_reader :job
13
+ # @return [V2::Parsing::InferenceModel] Model info for the inference.
14
+ attr_reader :model
15
+ # @return [V2::Parsing::InferenceFile] File info for the inference.
16
+ attr_reader :file
17
+ # @return [String] ID of the inference.
18
+ attr_reader :id
19
+
20
+ def initialize(http_response)
21
+ raise ArgumentError, 'Server response must be a Hash' unless http_response.is_a?(Hash)
22
+
23
+ super()
24
+ @model = Mindee::V2::Parsing::InferenceModel.new(http_response['model'])
25
+ @file = Mindee::V2::Parsing::InferenceFile.new(http_response['file'])
26
+ @id = http_response['id']
27
+ @job = Mindee::V2::Parsing::InferenceJob.new(http_response['job']) if http_response.key?('job')
28
+ end
29
+
30
+ # String representation.
31
+ # @return [String]
32
+ def to_s
33
+ [
34
+ 'Inference',
35
+ '#########',
36
+ @job.to_s,
37
+ @model.to_s,
38
+ @file.to_s,
39
+ ].join("\n")
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'common_response'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Parsing
8
+ # Base class for V2 inference responses.
9
+ class BaseResponse < Mindee::V2::Parsing::CommonResponse
10
+ # @return [BaseInference] The inference result for a split utility request
11
+ attr_reader :inference
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Parsing
8
+ # Base class for inference and job responses on the V2 API.
9
+ class CommonResponse
10
+ # @return [String]
11
+ attr_reader :raw_http
12
+
13
+ # @param http_response [Hash]
14
+ def initialize(http_response)
15
+ @raw_http = JSON.generate(http_response)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end