mindee-lite 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +24 -0
  3. data/.gitattributes +14 -0
  4. data/.gitignore +76 -0
  5. data/.gitmodules +3 -0
  6. data/.pre-commit-config.yaml +36 -0
  7. data/.rubocop.yml +49 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +515 -0
  10. data/CODE_OF_CONDUCT.md +129 -0
  11. data/CONTRIBUTING.md +107 -0
  12. data/Gemfile +14 -0
  13. data/LICENSE +21 -0
  14. data/README.md +42 -0
  15. data/Rakefile +40 -0
  16. data/Steepfile +30 -0
  17. data/bin/console +14 -0
  18. data/bin/mindee.rb +30 -0
  19. data/bin/v1/parser.rb +153 -0
  20. data/bin/v1/products.rb +88 -0
  21. data/bin/v2/parser.rb +235 -0
  22. data/bin/v2/products.rb +34 -0
  23. data/docs/code_samples/bank_account_details_v1.txt +24 -0
  24. data/docs/code_samples/bank_account_details_v2.txt +24 -0
  25. data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
  26. data/docs/code_samples/barcode_reader_v1.txt +24 -0
  27. data/docs/code_samples/cropper_v1.txt +21 -0
  28. data/docs/code_samples/default.txt +30 -0
  29. data/docs/code_samples/default_async.txt +29 -0
  30. data/docs/code_samples/expense_receipts_v5.txt +25 -0
  31. data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
  32. data/docs/code_samples/financial_document_v1.txt +25 -0
  33. data/docs/code_samples/financial_document_v1_async.txt +24 -0
  34. data/docs/code_samples/idcard_fr_v1.txt +24 -0
  35. data/docs/code_samples/idcard_fr_v2.txt +24 -0
  36. data/docs/code_samples/international_id_v2_async.txt +24 -0
  37. data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
  38. data/docs/code_samples/invoices_v4.txt +25 -0
  39. data/docs/code_samples/invoices_v4_async.txt +24 -0
  40. data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
  41. data/docs/code_samples/passport_v1.txt +24 -0
  42. data/docs/code_samples/resume_v1_async.txt +24 -0
  43. data/docs/code_samples/v2_classification.txt +30 -0
  44. data/docs/code_samples/v2_crop.txt +30 -0
  45. data/docs/code_samples/v2_extraction.txt +42 -0
  46. data/docs/code_samples/v2_extraction_webhook.txt +45 -0
  47. data/docs/code_samples/v2_ocr.txt +30 -0
  48. data/docs/code_samples/v2_split.txt +30 -0
  49. data/docs/code_samples/workflow_execution.txt +28 -0
  50. data/docs/code_samples/workflow_polling.txt +35 -0
  51. data/examples/auto_invoice_splitter_extraction.rb +48 -0
  52. data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
  53. data/lib/mindee/dependency.rb +29 -0
  54. data/lib/mindee/error/mindee_error.rb +17 -0
  55. data/lib/mindee/error/mindee_http_error.rb +36 -0
  56. data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
  57. data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
  58. data/lib/mindee/error/mindee_input_error.rb +30 -0
  59. data/lib/mindee/error.rb +6 -0
  60. data/lib/mindee/geometry/min_max.rb +23 -0
  61. data/lib/mindee/geometry/point.rb +41 -0
  62. data/lib/mindee/geometry/polygon.rb +37 -0
  63. data/lib/mindee/geometry/quadrilateral.rb +50 -0
  64. data/lib/mindee/geometry/utils.rb +88 -0
  65. data/lib/mindee/geometry.rb +7 -0
  66. data/lib/mindee/http/.rubocop.yml +7 -0
  67. data/lib/mindee/http/http_error_handler.rb +106 -0
  68. data/lib/mindee/http/response_validation.rb +81 -0
  69. data/lib/mindee/http.rb +3 -0
  70. data/lib/mindee/image/extracted_image.rb +89 -0
  71. data/lib/mindee/image/image_compressor.rb +29 -0
  72. data/lib/mindee/image/image_extractor.rb +118 -0
  73. data/lib/mindee/image/image_utils.rb +165 -0
  74. data/lib/mindee/image.rb +6 -0
  75. data/lib/mindee/input/base_parameters.rb +149 -0
  76. data/lib/mindee/input/local_response.rb +80 -0
  77. data/lib/mindee/input/polling_options.rb +26 -0
  78. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  79. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  80. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  81. data/lib/mindee/input/sources/local_input_source.rb +216 -0
  82. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  83. data/lib/mindee/input/sources/url_input_source.rb +130 -0
  84. data/lib/mindee/input/sources.rb +8 -0
  85. data/lib/mindee/input.rb +4 -0
  86. data/lib/mindee/logging/logger.rb +24 -0
  87. data/lib/mindee/logging.rb +3 -0
  88. data/lib/mindee/page_options.rb +24 -0
  89. data/lib/mindee/pdf/extracted_pdf.rb +70 -0
  90. data/lib/mindee/pdf/pdf_compressor.rb +121 -0
  91. data/lib/mindee/pdf/pdf_extractor.rb +121 -0
  92. data/lib/mindee/pdf/pdf_processor.rb +91 -0
  93. data/lib/mindee/pdf/pdf_tools.rb +201 -0
  94. data/lib/mindee/pdf.rb +7 -0
  95. data/lib/mindee/v1/client.rb +490 -0
  96. data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
  97. data/lib/mindee/v1/extraction.rb +3 -0
  98. data/lib/mindee/v1/http/.rubocop.yml +7 -0
  99. data/lib/mindee/v1/http/endpoint.rb +221 -0
  100. data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
  101. data/lib/mindee/v1/http.rb +4 -0
  102. data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
  103. data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
  104. data/lib/mindee/v1/parsing/common/document.rb +86 -0
  105. data/lib/mindee/v1/parsing/common/execution.rb +78 -0
  106. data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
  107. data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
  108. data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
  109. data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
  110. data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
  111. data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
  112. data/lib/mindee/v1/parsing/common/extras.rb +6 -0
  113. data/lib/mindee/v1/parsing/common/inference.rb +69 -0
  114. data/lib/mindee/v1/parsing/common/job.rb +48 -0
  115. data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
  116. data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
  117. data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
  118. data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
  119. data/lib/mindee/v1/parsing/common/page.rb +49 -0
  120. data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
  121. data/lib/mindee/v1/parsing/common/product.rb +26 -0
  122. data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
  123. data/lib/mindee/v1/parsing/common.rb +15 -0
  124. data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
  125. data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
  126. data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
  127. data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
  128. data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
  129. data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
  130. data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
  131. data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
  132. data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
  133. data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
  134. data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
  135. data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
  136. data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
  137. data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
  138. data/lib/mindee/v1/parsing/standard.rb +15 -0
  139. data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
  140. data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
  141. data/lib/mindee/v1/parsing/universal.rb +4 -0
  142. data/lib/mindee/v1/parsing.rb +5 -0
  143. data/lib/mindee/v1/product/.rubocop.yml +12 -0
  144. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
  145. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
  146. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
  147. data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
  148. data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
  149. data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
  150. data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
  151. data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
  152. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
  153. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
  154. data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
  155. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
  156. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
  157. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
  158. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
  159. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
  160. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
  161. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
  162. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
  163. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
  164. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
  165. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
  166. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
  167. data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
  168. data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
  169. data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
  170. data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
  171. data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
  172. data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
  173. data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
  174. data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
  175. data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
  176. data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
  177. data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
  178. data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
  179. data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
  180. data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
  181. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
  182. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
  183. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
  184. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
  185. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
  186. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
  187. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
  188. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
  189. data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
  190. data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
  191. data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
  192. data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
  193. data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
  194. data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
  195. data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
  196. data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
  197. data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
  198. data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
  199. data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
  200. data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
  201. data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
  202. data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
  203. data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
  204. data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
  205. data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
  206. data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
  207. data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
  208. data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
  209. data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
  210. data/lib/mindee/v1/product/universal/universal.rb +48 -0
  211. data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
  212. data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
  213. data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
  214. data/lib/mindee/v1/product.rb +18 -0
  215. data/lib/mindee/v1.rb +7 -0
  216. data/lib/mindee/v2/client.rb +132 -0
  217. data/lib/mindee/v2/file_operation/crop.rb +51 -0
  218. data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
  219. data/lib/mindee/v2/file_operation/split.rb +37 -0
  220. data/lib/mindee/v2/file_operation/split_files.rb +25 -0
  221. data/lib/mindee/v2/file_operation.rb +6 -0
  222. data/lib/mindee/v2/http/.rubocop.yml +7 -0
  223. data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
  224. data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
  225. data/lib/mindee/v2/http.rb +4 -0
  226. data/lib/mindee/v2/parsing/base_inference.rb +44 -0
  227. data/lib/mindee/v2/parsing/base_response.rb +15 -0
  228. data/lib/mindee/v2/parsing/common_response.rb +20 -0
  229. data/lib/mindee/v2/parsing/error_item.rb +21 -0
  230. data/lib/mindee/v2/parsing/error_response.rb +51 -0
  231. data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
  232. data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
  233. data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
  234. data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
  235. data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
  236. data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
  237. data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
  238. data/lib/mindee/v2/parsing/field.rb +9 -0
  239. data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
  240. data/lib/mindee/v2/parsing/inference_file.rb +38 -0
  241. data/lib/mindee/v2/parsing/inference_job.rb +25 -0
  242. data/lib/mindee/v2/parsing/inference_model.rb +30 -0
  243. data/lib/mindee/v2/parsing/job.rb +93 -0
  244. data/lib/mindee/v2/parsing/job_response.rb +30 -0
  245. data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
  246. data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
  247. data/lib/mindee/v2/parsing/raw_text.rb +27 -0
  248. data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
  249. data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
  250. data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
  251. data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
  252. data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
  253. data/lib/mindee/v2/parsing/search.rb +6 -0
  254. data/lib/mindee/v2/parsing.rb +16 -0
  255. data/lib/mindee/v2/product/base_product.rb +28 -0
  256. data/lib/mindee/v2/product/classification/classification.rb +20 -0
  257. data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
  258. data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
  259. data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
  260. data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
  261. data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
  262. data/lib/mindee/v2/product/crop/crop.rb +20 -0
  263. data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
  264. data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
  265. data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
  266. data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
  267. data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
  268. data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
  269. data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
  270. data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
  271. data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
  272. data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
  273. data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
  274. data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
  275. data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
  276. data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
  277. data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
  278. data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
  279. data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
  280. data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
  281. data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
  282. data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
  283. data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
  284. data/lib/mindee/v2/product/split/split.rb +19 -0
  285. data/lib/mindee/v2/product/split/split_inference.rb +34 -0
  286. data/lib/mindee/v2/product/split/split_range.rb +38 -0
  287. data/lib/mindee/v2/product/split/split_response.rb +40 -0
  288. data/lib/mindee/v2/product/split/split_result.rb +34 -0
  289. data/lib/mindee/v2/product.rb +7 -0
  290. data/lib/mindee/v2.rb +7 -0
  291. data/lib/mindee/version.rb +26 -0
  292. data/lib/mindee.rb +135 -0
  293. data/mindee-lite.gemspec +36 -0
  294. data/mindee.gemspec +44 -0
  295. data/sig/custom/marcel.rbs +3 -0
  296. data/sig/custom/mini_magick.rbs +31 -0
  297. data/sig/custom/net_http.rbs +43 -0
  298. data/sig/custom/origami.rbs +59 -0
  299. data/sig/mindee/dependency.rbs +13 -0
  300. data/sig/mindee/error/mindee_error.rbs +13 -0
  301. data/sig/mindee/error/mindee_http_error.rbs +17 -0
  302. data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
  303. data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
  304. data/sig/mindee/error/mindee_input_error.rbs +18 -0
  305. data/sig/mindee/geometry/min_max.rbs +11 -0
  306. data/sig/mindee/geometry/point.rbs +14 -0
  307. data/sig/mindee/geometry/polygon.rbs +12 -0
  308. data/sig/mindee/geometry/quadrilateral.rbs +15 -0
  309. data/sig/mindee/geometry/utils.rbs +13 -0
  310. data/sig/mindee/http/http_error_handler.rbs +15 -0
  311. data/sig/mindee/http/response_validation.rbs +11 -0
  312. data/sig/mindee/image/extracted_image.rbs +21 -0
  313. data/sig/mindee/image/image_compressor.rbs +8 -0
  314. data/sig/mindee/image/image_extractor.rbs +13 -0
  315. data/sig/mindee/image/image_utils.rbs +19 -0
  316. data/sig/mindee/input/base_parameters.rbs +35 -0
  317. data/sig/mindee/input/local_response.rbs +14 -0
  318. data/sig/mindee/input/polling_options.rbs +12 -0
  319. data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
  320. data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
  321. data/sig/mindee/input/sources/file_input_source.rbs +10 -0
  322. data/sig/mindee/input/sources/local_input_source.rbs +30 -0
  323. data/sig/mindee/input/sources/path_input_source.rbs +10 -0
  324. data/sig/mindee/input/sources/url_input_source.rbs +20 -0
  325. data/sig/mindee/logging/logger.rbs +11 -0
  326. data/sig/mindee/page_options.rbs +11 -0
  327. data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
  328. data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
  329. data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
  330. data/sig/mindee/pdf/pdf_processor.rbs +12 -0
  331. data/sig/mindee/pdf/pdf_tools.rbs +31 -0
  332. data/sig/mindee/v1/client.rbs +84 -0
  333. data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
  334. data/sig/mindee/v1/http/endpoint.rbs +41 -0
  335. data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
  336. data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
  337. data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
  338. data/sig/mindee/v1/parsing/common/document.rbs +32 -0
  339. data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
  340. data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
  341. data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
  342. data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
  343. data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
  344. data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
  345. data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
  346. data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
  347. data/sig/mindee/v1/parsing/common/job.rbs +24 -0
  348. data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
  349. data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
  350. data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
  351. data/sig/mindee/v1/parsing/common/page.rbs +19 -0
  352. data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
  353. data/sig/mindee/v1/parsing/common/product.rbs +16 -0
  354. data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
  355. data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
  356. data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
  357. data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
  358. data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
  359. data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
  360. data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
  361. data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
  362. data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
  363. data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
  364. data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
  365. data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
  366. data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
  367. data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
  368. data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
  369. data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
  370. data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
  371. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
  372. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
  373. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
  374. data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
  375. data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
  376. data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
  377. data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
  378. data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
  379. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
  380. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
  381. data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
  382. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
  383. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
  384. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
  385. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
  386. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
  387. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
  388. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
  389. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
  390. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
  391. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
  392. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
  393. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
  394. data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
  395. data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
  396. data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
  397. data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
  398. data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
  399. data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
  400. data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
  401. data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
  402. data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
  403. data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
  404. data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
  405. data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
  406. data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
  407. data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
  408. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
  409. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
  410. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
  411. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
  412. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
  413. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
  414. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
  415. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
  416. data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
  417. data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
  418. data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
  419. data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
  420. data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
  421. data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
  422. data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
  423. data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
  424. data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
  425. data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
  426. data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
  427. data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
  428. data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
  429. data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
  430. data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
  431. data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
  432. data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
  433. data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
  434. data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
  435. data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
  436. data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
  437. data/sig/mindee/v1/product/universal/universal.rbs +16 -0
  438. data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
  439. data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
  440. data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
  441. data/sig/mindee/v2/client.rbs +29 -0
  442. data/sig/mindee/v2/file_operation/crop.rbs +10 -0
  443. data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
  444. data/sig/mindee/v2/file_operation/split.rbs +11 -0
  445. data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
  446. data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
  447. data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
  448. data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
  449. data/sig/mindee/v2/parsing/base_response.rbs +11 -0
  450. data/sig/mindee/v2/parsing/common_response.rbs +12 -0
  451. data/sig/mindee/v2/parsing/error_item.rbs +13 -0
  452. data/sig/mindee/v2/parsing/error_response.rbs +20 -0
  453. data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
  454. data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
  455. data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
  456. data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
  457. data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
  458. data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
  459. data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
  460. data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
  461. data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
  462. data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
  463. data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
  464. data/sig/mindee/v2/parsing/job.rbs +24 -0
  465. data/sig/mindee/v2/parsing/job_response.rbs +14 -0
  466. data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
  467. data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
  468. data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
  469. data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
  470. data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
  471. data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
  472. data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
  473. data/sig/mindee/v2/parsing/search_models.rbs +14 -0
  474. data/sig/mindee/v2/product/base_product.rbs +19 -0
  475. data/sig/mindee/v2/product/classification/classification.rbs +10 -0
  476. data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
  477. data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
  478. data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
  479. data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
  480. data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
  481. data/sig/mindee/v2/product/crop/crop.rbs +10 -0
  482. data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
  483. data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
  484. data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
  485. data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
  486. data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
  487. data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
  488. data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
  489. data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
  490. data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
  491. data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
  492. data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
  493. data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
  494. data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
  495. data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
  496. data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
  497. data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
  498. data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
  499. data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
  500. data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
  501. data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
  502. data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
  503. data/sig/mindee/v2/product/split/split.rbs +10 -0
  504. data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
  505. data/sig/mindee/v2/product/split/split_range.rbs +18 -0
  506. data/sig/mindee/v2/product/split/split_response.rbs +25 -0
  507. data/sig/mindee/v2/product/split/split_result.rbs +14 -0
  508. data/sig/mindee/version.rbs +6 -0
  509. data/sig/mindee.rbs +62 -0
  510. metadata +600 -0
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../../../input/base_parameters'
4
+ require_relative 'data_schema'
5
+
6
+ module Mindee
7
+ module V2
8
+ module Product
9
+ module Extraction
10
+ module Params
11
+ # Parameters accepted by the extraction v2 endpoint.
12
+ class ExtractionParameters < Mindee::Input::BaseParameters
13
+ # @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation.
14
+ attr_reader :rag
15
+
16
+ # @return [Boolean, nil] Extract the full text content from the document as strings,
17
+ # and fill the raw_text` attribute.
18
+ attr_reader :raw_text
19
+
20
+ # @return [Boolean, nil] Calculate bounding box polygons for all fields,
21
+ # and fill their `locations` attribute.
22
+ attr_reader :polygon
23
+
24
+ # @return [Boolean, nil] Boost the precision and accuracy of all extractions.
25
+ # Calculate confidence scores for all fields, and fill their confidence attribute.
26
+ attr_reader :confidence
27
+
28
+ # @return [String, nil] Additional text context used by the model during inference.
29
+ # Not recommended, for specific use only.
30
+ attr_reader :text_context
31
+
32
+ # @return [DataSchemaField]
33
+ attr_reader :data_schema
34
+
35
+ # @return [String] Slug for the endpoint.
36
+ def self.slug
37
+ 'extraction'
38
+ end
39
+
40
+ # rubocop:disable Metrics/ParameterLists
41
+ # @param [String] model_id ID of the model
42
+ # @param [Boolean, nil] rag Whether to enable RAG.
43
+ # @param [Boolean, nil] raw_text Whether to enable rax text.
44
+ # @param [Boolean, nil] polygon Whether to enable polygons.
45
+ # @param [Boolean, nil] confidence Whether to enable confidence scores.
46
+ # @param [String, nil] file_alias File alias, if applicable.
47
+ # @param [Array<String>, nil] webhook_ids
48
+ # @param [String, nil] text_context
49
+ # @param [Hash, nil] polling_options
50
+ # @param [Boolean, nil] close_file
51
+ # @param [DataSchemaField, String, Hash nil] data_schema
52
+ def initialize(
53
+ model_id,
54
+ rag: nil,
55
+ raw_text: nil,
56
+ polygon: nil,
57
+ confidence: nil,
58
+ file_alias: nil,
59
+ webhook_ids: nil,
60
+ text_context: nil,
61
+ polling_options: nil,
62
+ close_file: true,
63
+ data_schema: nil
64
+ )
65
+ super(
66
+ model_id,
67
+ file_alias: file_alias,
68
+ webhook_ids: webhook_ids,
69
+ polling_options: polling_options,
70
+ close_file: close_file
71
+ )
72
+
73
+ @rag = rag
74
+ @raw_text = raw_text
75
+ @polygon = polygon
76
+ @confidence = confidence
77
+ @text_context = text_context
78
+ @data_schema = DataSchema.new(data_schema) unless data_schema.nil?
79
+ # rubocop:enable Metrics/ParameterLists
80
+ end
81
+
82
+ # Appends inference-specific form data to the provided array.
83
+ # @param [Array] form_data Array of form fields
84
+ # @return [Array]
85
+ def append_form_data(form_data)
86
+ new_form_data = super
87
+
88
+ new_form_data.push(['rag', @rag.to_s]) unless @rag.nil?
89
+ new_form_data.push(['raw_text', @raw_text.to_s]) unless @raw_text.nil?
90
+ new_form_data.push(['polygon', @polygon.to_s]) unless @polygon.nil?
91
+ new_form_data.push(['confidence', @confidence.to_s]) unless @confidence.nil?
92
+ new_form_data.push(['text_context', @text_context]) if @text_context
93
+ new_form_data.push(['data_schema', @data_schema.to_s]) if @data_schema
94
+
95
+ new_form_data
96
+ end
97
+
98
+ # Loads a prediction from a Hash.
99
+ # @param [Hash] params Parameters to provide as a hash.
100
+ # @return [ExtractionParameters]
101
+ def self.from_hash(params: {})
102
+ rag = params.fetch(:rag, nil)
103
+ raw_text = params.fetch(:raw_text, nil)
104
+ polygon = params.fetch(:polygon, nil)
105
+ confidence = params.fetch(:confidence, nil)
106
+ base_params = load_from_hash(params: params)
107
+ new_params = base_params.merge(rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence)
108
+ model_id = new_params.fetch(:model_id)
109
+
110
+ ExtractionParameters.new(
111
+ model_id, rag: rag,
112
+ raw_text: raw_text,
113
+ polygon: polygon,
114
+ confidence: confidence,
115
+ file_alias: params.fetch(:file_alias, nil),
116
+ webhook_ids: params.fetch(:webhook_ids, nil),
117
+ close_file: params.fetch(:close_file, true)
118
+ )
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr_response'
4
+ require_relative 'params/ocr_parameters'
5
+
6
+ module Mindee
7
+ module V2
8
+ module Product
9
+ # OCR module.
10
+ module OCR
11
+ # OCR product.
12
+ class OCR < BaseProduct
13
+ @slug = 'ocr'
14
+ @params_type = Mindee::V2::Product::OCR::Params::OCRParameters
15
+ @response_type = Mindee::V2::Product::OCR::OCRResponse
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr_result'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Product
8
+ module OCR
9
+ # The inference result for an OCR utility request.
10
+ class OCRInference < Mindee::V2::Parsing::BaseInference
11
+ # @return [OCRResult] Parsed inference payload.
12
+ attr_reader :result
13
+
14
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
15
+ def initialize(server_response)
16
+ super
17
+
18
+ @result = OCRResult.new(server_response['result'])
19
+ end
20
+
21
+ # String representation.
22
+ # @return [String]
23
+ def to_s
24
+ [
25
+ super,
26
+ @result.to_s,
27
+ '',
28
+ ].join("\n")
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr_word'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Product
8
+ module OCR
9
+ # OCR result for a single page.
10
+ class OCRPage
11
+ # @return [Array<OCRWord>] List of words extracted from the document page.
12
+ attr_reader :words
13
+ # @return [String] Full text content extracted from the document page.
14
+ attr_reader :content
15
+
16
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
17
+ def initialize(server_response)
18
+ @words = server_response['words'].map { |word| OCRWord.new(word) }
19
+ @content = server_response['content']
20
+ end
21
+
22
+ # String representation.
23
+ # @return [String]
24
+ def to_s
25
+ ocr_words = "\n"
26
+ ocr_words += @words.join("\n\n") if @words&.any?
27
+ "OCR Words\n======#{ocr_words}\n\n:Content: #{@content}"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../parsing/base_response'
4
+ require_relative 'params/ocr_parameters'
5
+ require_relative 'ocr_inference'
6
+
7
+ module Mindee
8
+ module V2
9
+ module Product
10
+ module OCR
11
+ # HTTP response wrapper that embeds a V2 Inference.
12
+ class OCRResponse < Mindee::V2::Parsing::BaseResponse
13
+ # @return [OCRInference] Parsed inference payload.
14
+ attr_reader :inference
15
+
16
+ # @param server_response [Hash] Hash parsed from the API JSON response.
17
+ def initialize(server_response)
18
+ super
19
+
20
+ @inference = OCRInference.new(server_response['inference'])
21
+ end
22
+
23
+ # String representation.
24
+ # @return [String]
25
+ def to_s
26
+ @inference.to_s
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr_page'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Product
8
+ module OCR
9
+ # Result of a ocr utility inference.
10
+ class OCRResult
11
+ # @return [Array<OCRPage>] List of OCR results for each page in the document.
12
+ attr_reader :pages
13
+
14
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
15
+ def initialize(server_response)
16
+ @pages = if server_response.key?('pages')
17
+ server_response['pages'].map do |pages|
18
+ OCRPage.new(pages)
19
+ end
20
+ end
21
+ end
22
+
23
+ # String representation.
24
+ # @return [String]
25
+ def to_s
26
+ pages_str = @pages.join("\n")
27
+
28
+ "Pages\n======\n#{pages_str}"
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module Product
6
+ module OCR
7
+ # OCR result for a single word extracted from the document page.
8
+ class OCRWord
9
+ # @return [String] Text content of the word.
10
+ attr_reader :content
11
+ # @return [Mindee::Geometry::Polygon] Position information as a list of points in clockwise order.
12
+ attr_reader :polygon
13
+
14
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
15
+ def initialize(server_response)
16
+ @content = server_response['content']
17
+ @polygon = Mindee::Geometry::Polygon.new(server_response['polygon'])
18
+ end
19
+
20
+ # String representation.
21
+ # @return [String]
22
+ def to_s
23
+ @content
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module Product
6
+ module OCR
7
+ # OCR Parameters.
8
+ module Params
9
+ # Parameters accepted by the ocr utility v2 endpoint.
10
+ class OCRParameters < Mindee::Input::BaseParameters
11
+ # @return [String] Slug for the endpoint.
12
+ def self.slug
13
+ 'ocr'
14
+ end
15
+
16
+ # @param [String] model_id ID of the model
17
+ # @param [String, nil] file_alias File alias, if applicable.
18
+ # @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
19
+ # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
20
+ # @param [Boolean, nil] close_file Whether to close the file after parsing.
21
+ def initialize(
22
+ model_id,
23
+ file_alias: nil,
24
+ webhook_ids: nil,
25
+ polling_options: nil,
26
+ close_file: true
27
+ )
28
+ super
29
+ end
30
+
31
+ # Loads the parameters from a Hash.
32
+ # @param [Hash] params Parameters to provide as a hash.
33
+ # @return [OCRParameters]
34
+ def self.from_hash(params: {})
35
+ OCRParameters.new(
36
+ params.fetch(:model_id),
37
+ file_alias: params.fetch(:file_alias, nil),
38
+ webhook_ids: params.fetch(:webhook_ids, nil),
39
+ close_file: params.fetch(:close_file, true)
40
+ )
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module Product
6
+ # Split module.
7
+ module Split
8
+ # Split parameters.
9
+ module Params
10
+ # Parameters accepted by the split utility v2 endpoint.
11
+ class SplitParameters < Mindee::Input::BaseParameters
12
+ # @return [String] Slug for the endpoint.
13
+ def self.slug
14
+ 'split'
15
+ end
16
+
17
+ # @param [String] model_id ID of the model
18
+ # @param [String, nil] file_alias File alias, if applicable.
19
+ # @param [Array<String>, nil] webhook_ids List of webhook IDs to propagate the API response to.
20
+ # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues.
21
+ # @param [Boolean, nil] close_file Whether to close the file after parsing.
22
+ def initialize(
23
+ model_id,
24
+ file_alias: nil,
25
+ webhook_ids: nil,
26
+ polling_options: nil,
27
+ close_file: true
28
+ )
29
+ super
30
+ end
31
+
32
+ # Loads the parameters from a Hash.
33
+ # @param [Hash] params Parameters to provide as a hash.
34
+ # @return [SplitParameters]
35
+ def self.from_hash(params: {})
36
+ SplitParameters.new(
37
+ params.fetch(:model_id),
38
+ file_alias: params.fetch(:file_alias, nil),
39
+ webhook_ids: params.fetch(:webhook_ids, nil),
40
+ close_file: params.fetch(:close_file, true)
41
+ )
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'split_response'
4
+ require_relative 'params/split_parameters'
5
+
6
+ module Mindee
7
+ module V2
8
+ module Product
9
+ module Split
10
+ # Split product.
11
+ class Split < BaseProduct
12
+ @slug = 'split'
13
+ @params_type = Mindee::V2::Product::Split::Params::SplitParameters
14
+ @response_type = Mindee::V2::Product::Split::SplitResponse
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'split_result'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Product
8
+ module Split
9
+ # Split inference result.
10
+ class SplitInference < Mindee::V2::Parsing::BaseInference
11
+ # @return [SplitResult] Result of a split inference.
12
+ attr_reader :result
13
+
14
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
15
+ def initialize(server_response)
16
+ super
17
+
18
+ @result = SplitResult.new(server_response['result'])
19
+ end
20
+
21
+ # String representation.
22
+ # @return [String]
23
+ def to_s
24
+ [
25
+ super,
26
+ @result.to_s,
27
+ '',
28
+ ].join("\n")
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V2
5
+ module Product
6
+ module Split
7
+ # Split inference result.
8
+ class SplitRange
9
+ # @return [Array<Integer>] 0-based page indexes, where the first integer indicates the start page and the
10
+ # second integer indicates the end page.
11
+ attr_reader :page_range
12
+ # @return [String] The document type, as identified on given classification values.
13
+ attr_reader :document_type
14
+
15
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
16
+ def initialize(server_response)
17
+ @page_range = server_response['page_range']
18
+ @document_type = server_response['document_type']
19
+ end
20
+
21
+ # String representation.
22
+ # @return [String]
23
+ def to_s
24
+ "* :Page Range: #{@page_range}\n :Document Type: #{@document_type}"
25
+ end
26
+
27
+ # Apply the split range inference to a file and return a single extracted PDF.
28
+ #
29
+ # @param input_source [Mindee::Input::Source::LocalInputSource] Local file to extract from
30
+ # @return [PDF::ExtractedPDF]
31
+ def extract_from_file(input_source)
32
+ FileOperation::Split.extract_single_split(input_source, @page_range)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../parsing/base_response'
4
+ require_relative 'params/split_parameters'
5
+ require_relative 'split_inference'
6
+
7
+ module Mindee
8
+ module V2
9
+ module Product
10
+ module Split
11
+ # HTTP response wrapper that embeds a V2 Inference.
12
+ class SplitResponse < Mindee::V2::Parsing::BaseResponse
13
+ # @return [SplitInference] Parsed inference payload.
14
+ attr_reader :inference
15
+
16
+ # @param server_response [Hash] Hash parsed from the API JSON response.
17
+ def initialize(server_response)
18
+ super
19
+
20
+ @inference = SplitInference.new(server_response['inference'])
21
+ end
22
+
23
+ # String representation.
24
+ # @return [String]
25
+ def to_s
26
+ @inference.to_s
27
+ end
28
+
29
+ # Extracts the crops from the input source.
30
+ # @param input_source [Mindee::Input::Source::LocalInputSource] Path to the file or a File object.
31
+ # @return [FileOperation::SplitFiles]
32
+ def extract_from_file(input_source)
33
+ splits = @inference.result.splits.map(&:page_range)
34
+ FileOperation::Split.extract_splits(input_source, splits)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'split_range'
4
+
5
+ module Mindee
6
+ module V2
7
+ module Product
8
+ module Split
9
+ # Result of a split utility inference.
10
+ class SplitResult
11
+ # @return [Array<SplitRange>] List of results of splitped document regions.
12
+ attr_reader :splits
13
+
14
+ # @param server_response [Hash] Hash representation of the JSON returned by the service.
15
+ def initialize(server_response)
16
+ @splits = if server_response.key?('splits')
17
+ server_response['splits'].map do |split|
18
+ SplitRange.new(split)
19
+ end
20
+ end
21
+ end
22
+
23
+ # String representation.
24
+ # @return [String]
25
+ def to_s
26
+ splits_str = @splits.join("\n")
27
+
28
+ "Splits\n======\n#{splits_str}"
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product/classification/classification'
4
+ require_relative 'product/crop/crop'
5
+ require_relative 'product/extraction/extraction'
6
+ require_relative 'product/ocr/ocr'
7
+ require_relative 'product/split/split'
data/lib/mindee/v2.rb ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'v2/client'
4
+ require_relative 'v2/http'
5
+ require_relative 'v2/file_operation' if Mindee::Dependency.all_deps_available?
6
+ require_relative 'v2/parsing'
7
+ require_relative 'v2/product'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Mindee
4
+ module Mindee
5
+ # Current version.
6
+ VERSION = '5.0.0.beta1'
7
+
8
+ # Finds and return the current platform.
9
+ # @return [Symbol, Hash[String | Symbol, Regexp], Nil?]
10
+ def self.find_platform
11
+ host = RbConfig::CONFIG['host_os']
12
+ platforms = {
13
+ linux: %r{linux|cygwin},
14
+ windows: %r{mswin|mingw|bccwin|wince|emx|win32},
15
+ macos: %r{mac|darwin},
16
+ bsd: %r{bsd},
17
+ solaris: %r{solaris|sunos},
18
+ }
19
+ platforms.each do |os, regexp|
20
+ return os unless (regexp =~ host).nil?
21
+ end
22
+ end
23
+
24
+ # Current platform.
25
+ PLATFORM = find_platform.freeze
26
+ end