mindee-lite 5.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +24 -0
  3. data/.gitattributes +14 -0
  4. data/.gitignore +76 -0
  5. data/.gitmodules +3 -0
  6. data/.pre-commit-config.yaml +36 -0
  7. data/.rubocop.yml +49 -0
  8. data/.yardopts +4 -0
  9. data/CHANGELOG.md +515 -0
  10. data/CODE_OF_CONDUCT.md +129 -0
  11. data/CONTRIBUTING.md +107 -0
  12. data/Gemfile +14 -0
  13. data/LICENSE +21 -0
  14. data/README.md +42 -0
  15. data/Rakefile +40 -0
  16. data/Steepfile +30 -0
  17. data/bin/console +14 -0
  18. data/bin/mindee.rb +30 -0
  19. data/bin/v1/parser.rb +153 -0
  20. data/bin/v1/products.rb +88 -0
  21. data/bin/v2/parser.rb +235 -0
  22. data/bin/v2/products.rb +34 -0
  23. data/docs/code_samples/bank_account_details_v1.txt +24 -0
  24. data/docs/code_samples/bank_account_details_v2.txt +24 -0
  25. data/docs/code_samples/bank_statement_fr_v2_async.txt +24 -0
  26. data/docs/code_samples/barcode_reader_v1.txt +24 -0
  27. data/docs/code_samples/cropper_v1.txt +21 -0
  28. data/docs/code_samples/default.txt +30 -0
  29. data/docs/code_samples/default_async.txt +29 -0
  30. data/docs/code_samples/expense_receipts_v5.txt +25 -0
  31. data/docs/code_samples/expense_receipts_v5_async.txt +24 -0
  32. data/docs/code_samples/financial_document_v1.txt +25 -0
  33. data/docs/code_samples/financial_document_v1_async.txt +24 -0
  34. data/docs/code_samples/idcard_fr_v1.txt +24 -0
  35. data/docs/code_samples/idcard_fr_v2.txt +24 -0
  36. data/docs/code_samples/international_id_v2_async.txt +24 -0
  37. data/docs/code_samples/invoice_splitter_v1_async.txt +24 -0
  38. data/docs/code_samples/invoices_v4.txt +25 -0
  39. data/docs/code_samples/invoices_v4_async.txt +24 -0
  40. data/docs/code_samples/multi_receipts_detector_v1.txt +24 -0
  41. data/docs/code_samples/passport_v1.txt +24 -0
  42. data/docs/code_samples/resume_v1_async.txt +24 -0
  43. data/docs/code_samples/v2_classification.txt +30 -0
  44. data/docs/code_samples/v2_crop.txt +30 -0
  45. data/docs/code_samples/v2_extraction.txt +42 -0
  46. data/docs/code_samples/v2_extraction_webhook.txt +45 -0
  47. data/docs/code_samples/v2_ocr.txt +30 -0
  48. data/docs/code_samples/v2_split.txt +30 -0
  49. data/docs/code_samples/workflow_execution.txt +28 -0
  50. data/docs/code_samples/workflow_polling.txt +35 -0
  51. data/examples/auto_invoice_splitter_extraction.rb +48 -0
  52. data/examples/auto_multi_receipts_detector_extraction.rb +30 -0
  53. data/lib/mindee/dependency.rb +29 -0
  54. data/lib/mindee/error/mindee_error.rb +17 -0
  55. data/lib/mindee/error/mindee_http_error.rb +36 -0
  56. data/lib/mindee/error/mindee_http_error_v2.rb +45 -0
  57. data/lib/mindee/error/mindee_http_unknown_error_v2.rb +18 -0
  58. data/lib/mindee/error/mindee_input_error.rb +30 -0
  59. data/lib/mindee/error.rb +6 -0
  60. data/lib/mindee/geometry/min_max.rb +23 -0
  61. data/lib/mindee/geometry/point.rb +41 -0
  62. data/lib/mindee/geometry/polygon.rb +37 -0
  63. data/lib/mindee/geometry/quadrilateral.rb +50 -0
  64. data/lib/mindee/geometry/utils.rb +88 -0
  65. data/lib/mindee/geometry.rb +7 -0
  66. data/lib/mindee/http/.rubocop.yml +7 -0
  67. data/lib/mindee/http/http_error_handler.rb +106 -0
  68. data/lib/mindee/http/response_validation.rb +81 -0
  69. data/lib/mindee/http.rb +3 -0
  70. data/lib/mindee/image/extracted_image.rb +89 -0
  71. data/lib/mindee/image/image_compressor.rb +29 -0
  72. data/lib/mindee/image/image_extractor.rb +118 -0
  73. data/lib/mindee/image/image_utils.rb +165 -0
  74. data/lib/mindee/image.rb +6 -0
  75. data/lib/mindee/input/base_parameters.rb +149 -0
  76. data/lib/mindee/input/local_response.rb +80 -0
  77. data/lib/mindee/input/polling_options.rb +26 -0
  78. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  79. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  80. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  81. data/lib/mindee/input/sources/local_input_source.rb +216 -0
  82. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  83. data/lib/mindee/input/sources/url_input_source.rb +130 -0
  84. data/lib/mindee/input/sources.rb +8 -0
  85. data/lib/mindee/input.rb +4 -0
  86. data/lib/mindee/logging/logger.rb +24 -0
  87. data/lib/mindee/logging.rb +3 -0
  88. data/lib/mindee/page_options.rb +24 -0
  89. data/lib/mindee/pdf/extracted_pdf.rb +70 -0
  90. data/lib/mindee/pdf/pdf_compressor.rb +121 -0
  91. data/lib/mindee/pdf/pdf_extractor.rb +121 -0
  92. data/lib/mindee/pdf/pdf_processor.rb +91 -0
  93. data/lib/mindee/pdf/pdf_tools.rb +201 -0
  94. data/lib/mindee/pdf.rb +7 -0
  95. data/lib/mindee/v1/client.rb +490 -0
  96. data/lib/mindee/v1/extraction/multi_receipts_extractor.rb +32 -0
  97. data/lib/mindee/v1/extraction.rb +3 -0
  98. data/lib/mindee/v1/http/.rubocop.yml +7 -0
  99. data/lib/mindee/v1/http/endpoint.rb +221 -0
  100. data/lib/mindee/v1/http/workflow_endpoint.rb +93 -0
  101. data/lib/mindee/v1/http.rb +4 -0
  102. data/lib/mindee/v1/parsing/common/api_request.rb +38 -0
  103. data/lib/mindee/v1/parsing/common/api_response.rb +63 -0
  104. data/lib/mindee/v1/parsing/common/document.rb +86 -0
  105. data/lib/mindee/v1/parsing/common/execution.rb +78 -0
  106. data/lib/mindee/v1/parsing/common/execution_file.rb +26 -0
  107. data/lib/mindee/v1/parsing/common/execution_priority.rb +38 -0
  108. data/lib/mindee/v1/parsing/common/extras/cropper_extra.rb +32 -0
  109. data/lib/mindee/v1/parsing/common/extras/extras.rb +62 -0
  110. data/lib/mindee/v1/parsing/common/extras/full_text_ocr_extra.rb +35 -0
  111. data/lib/mindee/v1/parsing/common/extras/rag_extra.rb +28 -0
  112. data/lib/mindee/v1/parsing/common/extras.rb +6 -0
  113. data/lib/mindee/v1/parsing/common/inference.rb +69 -0
  114. data/lib/mindee/v1/parsing/common/job.rb +48 -0
  115. data/lib/mindee/v1/parsing/common/ocr/mvision_v1.rb +52 -0
  116. data/lib/mindee/v1/parsing/common/ocr/ocr.rb +180 -0
  117. data/lib/mindee/v1/parsing/common/ocr.rb +3 -0
  118. data/lib/mindee/v1/parsing/common/orientation.rb +28 -0
  119. data/lib/mindee/v1/parsing/common/page.rb +49 -0
  120. data/lib/mindee/v1/parsing/common/prediction.rb +19 -0
  121. data/lib/mindee/v1/parsing/common/product.rb +26 -0
  122. data/lib/mindee/v1/parsing/common/workflow_response.rb +30 -0
  123. data/lib/mindee/v1/parsing/common.rb +15 -0
  124. data/lib/mindee/v1/parsing/standard/abstract_field.rb +74 -0
  125. data/lib/mindee/v1/parsing/standard/address_field.rb +51 -0
  126. data/lib/mindee/v1/parsing/standard/amount_field.rb +28 -0
  127. data/lib/mindee/v1/parsing/standard/base_field.rb +30 -0
  128. data/lib/mindee/v1/parsing/standard/boolean_field.rb +29 -0
  129. data/lib/mindee/v1/parsing/standard/classification_field.rb +18 -0
  130. data/lib/mindee/v1/parsing/standard/company_registration_field.rb +45 -0
  131. data/lib/mindee/v1/parsing/standard/date_field.rb +40 -0
  132. data/lib/mindee/v1/parsing/standard/feature_field.rb +26 -0
  133. data/lib/mindee/v1/parsing/standard/locale_field.rb +52 -0
  134. data/lib/mindee/v1/parsing/standard/payment_details_field.rb +44 -0
  135. data/lib/mindee/v1/parsing/standard/position_field.rb +61 -0
  136. data/lib/mindee/v1/parsing/standard/string_field.rb +26 -0
  137. data/lib/mindee/v1/parsing/standard/tax_field.rb +110 -0
  138. data/lib/mindee/v1/parsing/standard.rb +15 -0
  139. data/lib/mindee/v1/parsing/universal/universal_list_field.rb +60 -0
  140. data/lib/mindee/v1/parsing/universal/universal_object_field.rb +123 -0
  141. data/lib/mindee/v1/parsing/universal.rb +4 -0
  142. data/lib/mindee/v1/parsing.rb +5 -0
  143. data/lib/mindee/v1/product/.rubocop.yml +12 -0
  144. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1.rb +47 -0
  145. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rb +47 -0
  146. data/lib/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rb +38 -0
  147. data/lib/mindee/v1/product/cropper/cropper_v1.rb +47 -0
  148. data/lib/mindee/v1/product/cropper/cropper_v1_document.rb +15 -0
  149. data/lib/mindee/v1/product/cropper/cropper_v1_page.rb +55 -0
  150. data/lib/mindee/v1/product/financial_document/financial_document_v1.rb +47 -0
  151. data/lib/mindee/v1/product/financial_document/financial_document_v1_document.rb +329 -0
  152. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_item.rb +124 -0
  153. data/lib/mindee/v1/product/financial_document/financial_document_v1_line_items.rb +64 -0
  154. data/lib/mindee/v1/product/financial_document/financial_document_v1_page.rb +38 -0
  155. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rb +49 -0
  156. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rb +49 -0
  157. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rb +40 -0
  158. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rb +49 -0
  159. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rb +63 -0
  160. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rb +60 -0
  161. data/lib/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rb +40 -0
  162. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2.rb +49 -0
  163. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rb +169 -0
  164. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rb +40 -0
  165. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rb +78 -0
  166. data/lib/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rb +56 -0
  167. data/lib/mindee/v1/product/fr/id_card/id_card_v1.rb +49 -0
  168. data/lib/mindee/v1/product/fr/id_card/id_card_v1_document.rb +106 -0
  169. data/lib/mindee/v1/product/fr/id_card/id_card_v1_page.rb +57 -0
  170. data/lib/mindee/v1/product/fr/id_card/id_card_v2.rb +49 -0
  171. data/lib/mindee/v1/product/fr/id_card/id_card_v2_document.rb +143 -0
  172. data/lib/mindee/v1/product/fr/id_card/id_card_v2_page.rb +65 -0
  173. data/lib/mindee/v1/product/international_id/international_id_v2.rb +47 -0
  174. data/lib/mindee/v1/product/international_id/international_id_v2_document.rb +164 -0
  175. data/lib/mindee/v1/product/international_id/international_id_v2_page.rb +38 -0
  176. data/lib/mindee/v1/product/invoice/invoice_v4.rb +47 -0
  177. data/lib/mindee/v1/product/invoice/invoice_v4_document.rb +300 -0
  178. data/lib/mindee/v1/product/invoice/invoice_v4_line_item.rb +124 -0
  179. data/lib/mindee/v1/product/invoice/invoice_v4_line_items.rb +64 -0
  180. data/lib/mindee/v1/product/invoice/invoice_v4_page.rb +38 -0
  181. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rb +47 -0
  182. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +66 -0
  183. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +58 -0
  184. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +50 -0
  185. data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rb +38 -0
  186. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rb +47 -0
  187. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +38 -0
  188. data/lib/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +38 -0
  189. data/lib/mindee/v1/product/passport/passport_v1.rb +47 -0
  190. data/lib/mindee/v1/product/passport/passport_v1_document.rb +112 -0
  191. data/lib/mindee/v1/product/passport/passport_v1_page.rb +38 -0
  192. data/lib/mindee/v1/product/receipt/receipt_v5.rb +47 -0
  193. data/lib/mindee/v1/product/receipt/receipt_v5_document.rb +187 -0
  194. data/lib/mindee/v1/product/receipt/receipt_v5_line_item.rb +88 -0
  195. data/lib/mindee/v1/product/receipt/receipt_v5_line_items.rb +56 -0
  196. data/lib/mindee/v1/product/receipt/receipt_v5_page.rb +38 -0
  197. data/lib/mindee/v1/product/resume/resume_v1.rb +47 -0
  198. data/lib/mindee/v1/product/resume/resume_v1_certificate.rb +82 -0
  199. data/lib/mindee/v1/product/resume/resume_v1_certificates.rb +60 -0
  200. data/lib/mindee/v1/product/resume/resume_v1_document.rb +340 -0
  201. data/lib/mindee/v1/product/resume/resume_v1_education.rb +106 -0
  202. data/lib/mindee/v1/product/resume/resume_v1_educations.rb +66 -0
  203. data/lib/mindee/v1/product/resume/resume_v1_language.rb +66 -0
  204. data/lib/mindee/v1/product/resume/resume_v1_languages.rb +56 -0
  205. data/lib/mindee/v1/product/resume/resume_v1_page.rb +38 -0
  206. data/lib/mindee/v1/product/resume/resume_v1_professional_experience.rb +122 -0
  207. data/lib/mindee/v1/product/resume/resume_v1_professional_experiences.rb +70 -0
  208. data/lib/mindee/v1/product/resume/resume_v1_social_networks_url.rb +66 -0
  209. data/lib/mindee/v1/product/resume/resume_v1_social_networks_urls.rb +56 -0
  210. data/lib/mindee/v1/product/universal/universal.rb +48 -0
  211. data/lib/mindee/v1/product/universal/universal_document.rb +35 -0
  212. data/lib/mindee/v1/product/universal/universal_page.rb +54 -0
  213. data/lib/mindee/v1/product/universal/universal_prediction.rb +128 -0
  214. data/lib/mindee/v1/product.rb +18 -0
  215. data/lib/mindee/v1.rb +7 -0
  216. data/lib/mindee/v2/client.rb +132 -0
  217. data/lib/mindee/v2/file_operation/crop.rb +51 -0
  218. data/lib/mindee/v2/file_operation/crop_files.rb +25 -0
  219. data/lib/mindee/v2/file_operation/split.rb +37 -0
  220. data/lib/mindee/v2/file_operation/split_files.rb +25 -0
  221. data/lib/mindee/v2/file_operation.rb +6 -0
  222. data/lib/mindee/v2/http/.rubocop.yml +7 -0
  223. data/lib/mindee/v2/http/api_v2_settings.rb +65 -0
  224. data/lib/mindee/v2/http/mindee_api_v2.rb +230 -0
  225. data/lib/mindee/v2/http.rb +4 -0
  226. data/lib/mindee/v2/parsing/base_inference.rb +44 -0
  227. data/lib/mindee/v2/parsing/base_response.rb +15 -0
  228. data/lib/mindee/v2/parsing/common_response.rb +20 -0
  229. data/lib/mindee/v2/parsing/error_item.rb +21 -0
  230. data/lib/mindee/v2/parsing/error_response.rb +51 -0
  231. data/lib/mindee/v2/parsing/field/base_field.rb +63 -0
  232. data/lib/mindee/v2/parsing/field/field_confidence.rb +128 -0
  233. data/lib/mindee/v2/parsing/field/field_location.rb +33 -0
  234. data/lib/mindee/v2/parsing/field/inference_fields.rb +105 -0
  235. data/lib/mindee/v2/parsing/field/list_field.rb +79 -0
  236. data/lib/mindee/v2/parsing/field/object_field.rb +138 -0
  237. data/lib/mindee/v2/parsing/field/simple_field.rb +60 -0
  238. data/lib/mindee/v2/parsing/field.rb +9 -0
  239. data/lib/mindee/v2/parsing/inference_active_options.rb +67 -0
  240. data/lib/mindee/v2/parsing/inference_file.rb +38 -0
  241. data/lib/mindee/v2/parsing/inference_job.rb +25 -0
  242. data/lib/mindee/v2/parsing/inference_model.rb +30 -0
  243. data/lib/mindee/v2/parsing/job.rb +93 -0
  244. data/lib/mindee/v2/parsing/job_response.rb +30 -0
  245. data/lib/mindee/v2/parsing/job_webhook.rb +59 -0
  246. data/lib/mindee/v2/parsing/rag_metadata.rb +17 -0
  247. data/lib/mindee/v2/parsing/raw_text.rb +27 -0
  248. data/lib/mindee/v2/parsing/raw_text_page.rb +24 -0
  249. data/lib/mindee/v2/parsing/search/pagination_metadata.rb +44 -0
  250. data/lib/mindee/v2/parsing/search/search_model.rb +38 -0
  251. data/lib/mindee/v2/parsing/search/search_models.rb +34 -0
  252. data/lib/mindee/v2/parsing/search/search_response.rb +38 -0
  253. data/lib/mindee/v2/parsing/search.rb +6 -0
  254. data/lib/mindee/v2/parsing.rb +16 -0
  255. data/lib/mindee/v2/product/base_product.rb +28 -0
  256. data/lib/mindee/v2/product/classification/classification.rb +20 -0
  257. data/lib/mindee/v2/product/classification/classification_classifier.rb +25 -0
  258. data/lib/mindee/v2/product/classification/classification_inference.rb +35 -0
  259. data/lib/mindee/v2/product/classification/classification_response.rb +32 -0
  260. data/lib/mindee/v2/product/classification/classification_result.rb +27 -0
  261. data/lib/mindee/v2/product/classification/params/classification_parameters.rb +47 -0
  262. data/lib/mindee/v2/product/crop/crop.rb +20 -0
  263. data/lib/mindee/v2/product/crop/crop_inference.rb +34 -0
  264. data/lib/mindee/v2/product/crop/crop_item.rb +39 -0
  265. data/lib/mindee/v2/product/crop/crop_response.rb +40 -0
  266. data/lib/mindee/v2/product/crop/crop_result.rb +34 -0
  267. data/lib/mindee/v2/product/crop/params/crop_parameters.rb +47 -0
  268. data/lib/mindee/v2/product/extraction/extraction.rb +21 -0
  269. data/lib/mindee/v2/product/extraction/extraction_inference.rb +40 -0
  270. data/lib/mindee/v2/product/extraction/extraction_response.rb +32 -0
  271. data/lib/mindee/v2/product/extraction/extraction_result.rb +44 -0
  272. data/lib/mindee/v2/product/extraction/params/data_schema.rb +51 -0
  273. data/lib/mindee/v2/product/extraction/params/data_schema_field.rb +69 -0
  274. data/lib/mindee/v2/product/extraction/params/data_schema_replace.rb +39 -0
  275. data/lib/mindee/v2/product/extraction/params/extraction_parameters.rb +125 -0
  276. data/lib/mindee/v2/product/ocr/ocr.rb +20 -0
  277. data/lib/mindee/v2/product/ocr/ocr_inference.rb +34 -0
  278. data/lib/mindee/v2/product/ocr/ocr_page.rb +33 -0
  279. data/lib/mindee/v2/product/ocr/ocr_response.rb +32 -0
  280. data/lib/mindee/v2/product/ocr/ocr_result.rb +34 -0
  281. data/lib/mindee/v2/product/ocr/ocr_word.rb +29 -0
  282. data/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +47 -0
  283. data/lib/mindee/v2/product/split/params/split_parameters.rb +48 -0
  284. data/lib/mindee/v2/product/split/split.rb +19 -0
  285. data/lib/mindee/v2/product/split/split_inference.rb +34 -0
  286. data/lib/mindee/v2/product/split/split_range.rb +38 -0
  287. data/lib/mindee/v2/product/split/split_response.rb +40 -0
  288. data/lib/mindee/v2/product/split/split_result.rb +34 -0
  289. data/lib/mindee/v2/product.rb +7 -0
  290. data/lib/mindee/v2.rb +7 -0
  291. data/lib/mindee/version.rb +26 -0
  292. data/lib/mindee.rb +135 -0
  293. data/mindee-lite.gemspec +36 -0
  294. data/mindee.gemspec +44 -0
  295. data/sig/custom/marcel.rbs +3 -0
  296. data/sig/custom/mini_magick.rbs +31 -0
  297. data/sig/custom/net_http.rbs +43 -0
  298. data/sig/custom/origami.rbs +59 -0
  299. data/sig/mindee/dependency.rbs +13 -0
  300. data/sig/mindee/error/mindee_error.rbs +13 -0
  301. data/sig/mindee/error/mindee_http_error.rbs +17 -0
  302. data/sig/mindee/error/mindee_http_error_v2.rbs +15 -0
  303. data/sig/mindee/error/mindee_http_unknown_error_v2.rbs +9 -0
  304. data/sig/mindee/error/mindee_input_error.rbs +18 -0
  305. data/sig/mindee/geometry/min_max.rbs +11 -0
  306. data/sig/mindee/geometry/point.rbs +14 -0
  307. data/sig/mindee/geometry/polygon.rbs +12 -0
  308. data/sig/mindee/geometry/quadrilateral.rbs +15 -0
  309. data/sig/mindee/geometry/utils.rbs +13 -0
  310. data/sig/mindee/http/http_error_handler.rbs +15 -0
  311. data/sig/mindee/http/response_validation.rbs +11 -0
  312. data/sig/mindee/image/extracted_image.rbs +21 -0
  313. data/sig/mindee/image/image_compressor.rbs +8 -0
  314. data/sig/mindee/image/image_extractor.rbs +13 -0
  315. data/sig/mindee/image/image_utils.rbs +19 -0
  316. data/sig/mindee/input/base_parameters.rbs +35 -0
  317. data/sig/mindee/input/local_response.rbs +14 -0
  318. data/sig/mindee/input/polling_options.rbs +12 -0
  319. data/sig/mindee/input/sources/base64_input_source.rbs +11 -0
  320. data/sig/mindee/input/sources/bytes_input_source.rbs +10 -0
  321. data/sig/mindee/input/sources/file_input_source.rbs +10 -0
  322. data/sig/mindee/input/sources/local_input_source.rbs +30 -0
  323. data/sig/mindee/input/sources/path_input_source.rbs +10 -0
  324. data/sig/mindee/input/sources/url_input_source.rbs +20 -0
  325. data/sig/mindee/logging/logger.rbs +11 -0
  326. data/sig/mindee/page_options.rbs +11 -0
  327. data/sig/mindee/pdf/extracted_pdf.rbs +17 -0
  328. data/sig/mindee/pdf/pdf_compressor.rbs +15 -0
  329. data/sig/mindee/pdf/pdf_extractor.rbs +19 -0
  330. data/sig/mindee/pdf/pdf_processor.rbs +12 -0
  331. data/sig/mindee/pdf/pdf_tools.rbs +31 -0
  332. data/sig/mindee/v1/client.rbs +84 -0
  333. data/sig/mindee/v1/extraction/multi_receipts_extractor.rbs +8 -0
  334. data/sig/mindee/v1/http/endpoint.rbs +41 -0
  335. data/sig/mindee/v1/http/workflow_endpoint.rbs +22 -0
  336. data/sig/mindee/v1/parsing/common/api_request.rbs +22 -0
  337. data/sig/mindee/v1/parsing/common/api_response.rbs +31 -0
  338. data/sig/mindee/v1/parsing/common/document.rbs +32 -0
  339. data/sig/mindee/v1/parsing/common/execution.rbs +26 -0
  340. data/sig/mindee/v1/parsing/common/execution_file.rbs +16 -0
  341. data/sig/mindee/v1/parsing/common/execution_priority.rbs +16 -0
  342. data/sig/mindee/v1/parsing/common/extras/cropper_extra.rbs +18 -0
  343. data/sig/mindee/v1/parsing/common/extras/extras.rbs +24 -0
  344. data/sig/mindee/v1/parsing/common/extras/full_text_ocr_extra.rbs +22 -0
  345. data/sig/mindee/v1/parsing/common/extras/rag_extra.rbs +19 -0
  346. data/sig/mindee/v1/parsing/common/inference.rbs +31 -0
  347. data/sig/mindee/v1/parsing/common/job.rbs +24 -0
  348. data/sig/mindee/v1/parsing/common/ocr/mvision_v1.rbs +20 -0
  349. data/sig/mindee/v1/parsing/common/ocr/ocr.rbs +56 -0
  350. data/sig/mindee/v1/parsing/common/orientation.rbs +15 -0
  351. data/sig/mindee/v1/parsing/common/page.rbs +19 -0
  352. data/sig/mindee/v1/parsing/common/prediction.rbs +14 -0
  353. data/sig/mindee/v1/parsing/common/product.rbs +16 -0
  354. data/sig/mindee/v1/parsing/common/workflow_response.rbs +22 -0
  355. data/sig/mindee/v1/parsing/standard/abstract_field.rbs +30 -0
  356. data/sig/mindee/v1/parsing/standard/address_field.rbs +28 -0
  357. data/sig/mindee/v1/parsing/standard/amount_field.rbs +16 -0
  358. data/sig/mindee/v1/parsing/standard/base_field.rbs +16 -0
  359. data/sig/mindee/v1/parsing/standard/boolean_field.rbs +16 -0
  360. data/sig/mindee/v1/parsing/standard/classification_field.rbs +12 -0
  361. data/sig/mindee/v1/parsing/standard/company_registration_field.rbs +20 -0
  362. data/sig/mindee/v1/parsing/standard/date_field.rbs +20 -0
  363. data/sig/mindee/v1/parsing/standard/feature_field.rbs +12 -0
  364. data/sig/mindee/v1/parsing/standard/locale_field.rbs +24 -0
  365. data/sig/mindee/v1/parsing/standard/payment_details_field.rbs +19 -0
  366. data/sig/mindee/v1/parsing/standard/position_field.rbs +26 -0
  367. data/sig/mindee/v1/parsing/standard/string_field.rbs +16 -0
  368. data/sig/mindee/v1/parsing/standard/tax_field.rbs +33 -0
  369. data/sig/mindee/v1/parsing/universal/universal_list_field.rbs +21 -0
  370. data/sig/mindee/v1/parsing/universal/universal_object_field.rbs +38 -0
  371. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1.rbs +13 -0
  372. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_document.rbs +16 -0
  373. data/sig/mindee/v1/product/barcode_reader/barcode_reader_v1_page.rbs +17 -0
  374. data/sig/mindee/v1/product/cropper/cropper_v1.rbs +13 -0
  375. data/sig/mindee/v1/product/cropper/cropper_v1_document.rbs +14 -0
  376. data/sig/mindee/v1/product/cropper/cropper_v1_page.rbs +19 -0
  377. data/sig/mindee/v1/product/financial_document/financial_document_v1.rbs +13 -0
  378. data/sig/mindee/v1/product/financial_document/financial_document_v1_document.rbs +49 -0
  379. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_item.rbs +35 -0
  380. data/sig/mindee/v1/product/financial_document/financial_document_v1_line_items.rbs +15 -0
  381. data/sig/mindee/v1/product/financial_document/financial_document_v1_page.rbs +17 -0
  382. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1.rbs +15 -0
  383. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_document.rbs +19 -0
  384. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v1_page.rbs +19 -0
  385. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2.rbs +15 -0
  386. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_bban.rbs +25 -0
  387. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_document.rbs +20 -0
  388. data/sig/mindee/v1/product/fr/bank_account_details/bank_account_details_v2_page.rbs +19 -0
  389. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2.rbs +15 -0
  390. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_document.rbs +31 -0
  391. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_page.rbs +19 -0
  392. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transaction.rbs +27 -0
  393. data/sig/mindee/v1/product/fr/bank_statement/bank_statement_v2_transactions.rbs +17 -0
  394. data/sig/mindee/v1/product/fr/id_card/id_card_v1.rbs +15 -0
  395. data/sig/mindee/v1/product/fr/id_card/id_card_v1_document.rbs +26 -0
  396. data/sig/mindee/v1/product/fr/id_card/id_card_v1_page.rbs +20 -0
  397. data/sig/mindee/v1/product/fr/id_card/id_card_v2.rbs +15 -0
  398. data/sig/mindee/v1/product/fr/id_card/id_card_v2_document.rbs +31 -0
  399. data/sig/mindee/v1/product/fr/id_card/id_card_v2_page.rbs +21 -0
  400. data/sig/mindee/v1/product/international_id/international_id_v2.rbs +13 -0
  401. data/sig/mindee/v1/product/international_id/international_id_v2_document.rbs +31 -0
  402. data/sig/mindee/v1/product/international_id/international_id_v2_page.rbs +17 -0
  403. data/sig/mindee/v1/product/invoice/invoice_v4.rbs +13 -0
  404. data/sig/mindee/v1/product/invoice/invoice_v4_document.rbs +45 -0
  405. data/sig/mindee/v1/product/invoice/invoice_v4_line_item.rbs +35 -0
  406. data/sig/mindee/v1/product/invoice/invoice_v4_line_items.rbs +15 -0
  407. data/sig/mindee/v1/product/invoice/invoice_v4_page.rbs +17 -0
  408. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1.rbs +13 -0
  409. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +17 -0
  410. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +21 -0
  411. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +15 -0
  412. data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_page.rbs +17 -0
  413. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1.rbs +14 -0
  414. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_document.rbs +15 -0
  415. data/sig/mindee/v1/product/multi_receipts_detector/multi_receipts_detector_v1_page.rbs +17 -0
  416. data/sig/mindee/v1/product/passport/passport_v1.rbs +13 -0
  417. data/sig/mindee/v1/product/passport/passport_v1_document.rbs +25 -0
  418. data/sig/mindee/v1/product/passport/passport_v1_page.rbs +17 -0
  419. data/sig/mindee/v1/product/receipt/receipt_v5.rbs +13 -0
  420. data/sig/mindee/v1/product/receipt/receipt_v5_document.rbs +33 -0
  421. data/sig/mindee/v1/product/receipt/receipt_v5_line_item.rbs +27 -0
  422. data/sig/mindee/v1/product/receipt/receipt_v5_line_items.rbs +15 -0
  423. data/sig/mindee/v1/product/receipt/receipt_v5_page.rbs +17 -0
  424. data/sig/mindee/v1/product/resume/resume_v1.rbs +13 -0
  425. data/sig/mindee/v1/product/resume/resume_v1_certificate.rbs +27 -0
  426. data/sig/mindee/v1/product/resume/resume_v1_certificates.rbs +17 -0
  427. data/sig/mindee/v1/product/resume/resume_v1_document.rbs +69 -0
  428. data/sig/mindee/v1/product/resume/resume_v1_education.rbs +33 -0
  429. data/sig/mindee/v1/product/resume/resume_v1_educations.rbs +17 -0
  430. data/sig/mindee/v1/product/resume/resume_v1_language.rbs +23 -0
  431. data/sig/mindee/v1/product/resume/resume_v1_languages.rbs +17 -0
  432. data/sig/mindee/v1/product/resume/resume_v1_page.rbs +19 -0
  433. data/sig/mindee/v1/product/resume/resume_v1_professional_experience.rbs +37 -0
  434. data/sig/mindee/v1/product/resume/resume_v1_professional_experiences.rbs +17 -0
  435. data/sig/mindee/v1/product/resume/resume_v1_social_networks_url.rbs +23 -0
  436. data/sig/mindee/v1/product/resume/resume_v1_social_networks_urls.rbs +17 -0
  437. data/sig/mindee/v1/product/universal/universal.rbs +16 -0
  438. data/sig/mindee/v1/product/universal/universal_document.rbs +12 -0
  439. data/sig/mindee/v1/product/universal/universal_page.rbs +18 -0
  440. data/sig/mindee/v1/product/universal/universal_prediction.rbs +30 -0
  441. data/sig/mindee/v2/client.rbs +29 -0
  442. data/sig/mindee/v2/file_operation/crop.rbs +10 -0
  443. data/sig/mindee/v2/file_operation/crop_files.rbs +9 -0
  444. data/sig/mindee/v2/file_operation/split.rbs +11 -0
  445. data/sig/mindee/v2/file_operation/split_files.rbs +9 -0
  446. data/sig/mindee/v2/http/api_v2_settings.rbs +27 -0
  447. data/sig/mindee/v2/http/mindee_api_v2.rbs +52 -0
  448. data/sig/mindee/v2/parsing/base_inference.rbs +18 -0
  449. data/sig/mindee/v2/parsing/base_response.rbs +11 -0
  450. data/sig/mindee/v2/parsing/common_response.rbs +12 -0
  451. data/sig/mindee/v2/parsing/error_item.rbs +13 -0
  452. data/sig/mindee/v2/parsing/error_response.rbs +20 -0
  453. data/sig/mindee/v2/parsing/field/base_field.rbs +17 -0
  454. data/sig/mindee/v2/parsing/field/field_confidence.rbs +30 -0
  455. data/sig/mindee/v2/parsing/field/field_location.rbs +16 -0
  456. data/sig/mindee/v2/parsing/field/inference_fields.rbs +20 -0
  457. data/sig/mindee/v2/parsing/field/list_field.rbs +23 -0
  458. data/sig/mindee/v2/parsing/field/object_field.rbs +27 -0
  459. data/sig/mindee/v2/parsing/field/simple_field.rbs +16 -0
  460. data/sig/mindee/v2/parsing/inference_active_options.rbs +26 -0
  461. data/sig/mindee/v2/parsing/inference_file.rbs +17 -0
  462. data/sig/mindee/v2/parsing/inference_job.rbs +13 -0
  463. data/sig/mindee/v2/parsing/inference_model.rbs +12 -0
  464. data/sig/mindee/v2/parsing/job.rbs +24 -0
  465. data/sig/mindee/v2/parsing/job_response.rbs +14 -0
  466. data/sig/mindee/v2/parsing/job_webhook.rbs +19 -0
  467. data/sig/mindee/v2/parsing/rag_metadata.rbs +13 -0
  468. data/sig/mindee/v2/parsing/raw_text.rbs +12 -0
  469. data/sig/mindee/v2/parsing/raw_text_page.rbs +11 -0
  470. data/sig/mindee/v2/parsing/search/pagination_metadata.rbs +20 -0
  471. data/sig/mindee/v2/parsing/search/search_model.rbs +19 -0
  472. data/sig/mindee/v2/parsing/search/search_response.rbs +17 -0
  473. data/sig/mindee/v2/parsing/search_models.rbs +14 -0
  474. data/sig/mindee/v2/product/base_product.rbs +19 -0
  475. data/sig/mindee/v2/product/classification/classification.rbs +10 -0
  476. data/sig/mindee/v2/product/classification/classification_classifier.rbs +15 -0
  477. data/sig/mindee/v2/product/classification/classification_inference.rbs +15 -0
  478. data/sig/mindee/v2/product/classification/classification_response.rbs +23 -0
  479. data/sig/mindee/v2/product/classification/classification_result.rbs +15 -0
  480. data/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +23 -0
  481. data/sig/mindee/v2/product/crop/crop.rbs +10 -0
  482. data/sig/mindee/v2/product/crop/crop_inference.rbs +14 -0
  483. data/sig/mindee/v2/product/crop/crop_item.rbs +18 -0
  484. data/sig/mindee/v2/product/crop/crop_response.rbs +25 -0
  485. data/sig/mindee/v2/product/crop/crop_result.rbs +14 -0
  486. data/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +23 -0
  487. data/sig/mindee/v2/product/extraction/extraction.rbs +15 -0
  488. data/sig/mindee/v2/product/extraction/extraction_inference.rbs +19 -0
  489. data/sig/mindee/v2/product/extraction/extraction_response.rbs +24 -0
  490. data/sig/mindee/v2/product/extraction/extraction_result.rbs +18 -0
  491. data/sig/mindee/v2/product/extraction/params/data_schema.rbs +21 -0
  492. data/sig/mindee/v2/product/extraction/params/data_schema_field.rbs +29 -0
  493. data/sig/mindee/v2/product/extraction/params/data_schema_replace.rbs +21 -0
  494. data/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs +38 -0
  495. data/sig/mindee/v2/product/ocr/ocr.rbs +10 -0
  496. data/sig/mindee/v2/product/ocr/ocr_inference.rbs +14 -0
  497. data/sig/mindee/v2/product/ocr/ocr_page.rbs +15 -0
  498. data/sig/mindee/v2/product/ocr/ocr_response.rbs +23 -0
  499. data/sig/mindee/v2/product/ocr/ocr_result.rbs +14 -0
  500. data/sig/mindee/v2/product/ocr/ocr_word.rbs +15 -0
  501. data/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +24 -0
  502. data/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +23 -0
  503. data/sig/mindee/v2/product/split/split.rbs +10 -0
  504. data/sig/mindee/v2/product/split/split_inference.rbs +14 -0
  505. data/sig/mindee/v2/product/split/split_range.rbs +18 -0
  506. data/sig/mindee/v2/product/split/split_response.rbs +25 -0
  507. data/sig/mindee/v2/product/split/split_result.rbs +14 -0
  508. data/sig/mindee/version.rbs +6 -0
  509. data/sig/mindee.rbs +62 -0
  510. metadata +600 -0
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../standard/position_field'
4
+
5
+ module Mindee
6
+ module V1
7
+ module Parsing
8
+ module Common
9
+ # Extras namespace.
10
+ module Extras
11
+ # Extra information added to the prediction.
12
+ class Extras
13
+ # @return [CropperExtra, nil]
14
+ attr_reader :cropper
15
+ # @return [Mindee::V1::Parsing::Common::Extras::FullTextOCRExtra]
16
+ attr_reader :full_text_ocr
17
+ # @return [RAGExtra, nil]
18
+ attr_reader :rag
19
+
20
+ def initialize(raw_prediction)
21
+ if raw_prediction['cropper']
22
+ @cropper = Mindee::V1::Parsing::Common::Extras::CropperExtra.new(raw_prediction['cropper'])
23
+ end
24
+ if raw_prediction['full_text_ocr']
25
+ @full_text_ocr = Mindee::V1::Parsing::Common::Extras::FullTextOCRExtra.new(
26
+ raw_prediction['full_text_ocr']
27
+ )
28
+ end
29
+ @rag = Mindee::V1::Parsing::Common::Extras::RAGExtra.new(raw_prediction['rag']) if raw_prediction['rag']
30
+
31
+ raw_prediction.each do |key, value|
32
+ instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr', 'rag'].include?(key.to_s)
33
+ end
34
+ end
35
+
36
+ # @return [String]
37
+ def to_s
38
+ out_str = String.new
39
+ instance_variables.each do |var|
40
+ out_str << "#{var}: #{instance_variable_get(var)}"
41
+ end
42
+ out_str
43
+ end
44
+
45
+ # Adds artificial extra data for reconstructed extras. Currently only used for full_text_ocr.
46
+ #
47
+ # @param [Hash] raw_prediction Raw prediction used by the document.
48
+ def add_artificial_extra(raw_prediction)
49
+ return unless raw_prediction['full_text_ocr']
50
+
51
+ @full_text_ocr << Mindee::V1::Parsing::Common::Extras::FullTextOCRExtra.new(raw_prediction)
52
+ end
53
+ end
54
+
55
+ def empty?
56
+ instance_variables.all? { |var| instance_variable_get(var).nil? }
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../standard/position_field'
4
+
5
+ module Mindee
6
+ module V1
7
+ module Parsing
8
+ module Common
9
+ module Extras
10
+ # Full Text OCR result.
11
+ class FullTextOCRExtra
12
+ # Contents of the full text OCR result.
13
+ # @return [String, nil]
14
+ attr_reader :contents
15
+ # Language used on the page.
16
+ # @return [String, nil]
17
+ attr_reader :language
18
+
19
+ def initialize(raw_prediction)
20
+ @contents = raw_prediction['content'] if raw_prediction['content']
21
+ return unless raw_prediction['language']
22
+
23
+ @language = raw_prediction['language']
24
+ end
25
+
26
+ # @return [String]
27
+ def to_s
28
+ @contents || ''
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ module Parsing
6
+ module Common
7
+ module Extras
8
+ # Retrieval-Augmented Generation extra.
9
+ class RAGExtra
10
+ # ID of the matching document
11
+ # @return [String, nil]
12
+ attr_reader :matching_document_id
13
+
14
+ def initialize(raw_prediction)
15
+ @matching_document_id = raw_prediction['matching_document_id'] if raw_prediction['matching_document_id']
16
+ end
17
+
18
+ # String representation.
19
+ # @return [String]
20
+ def to_s
21
+ @matching_document_id || ''
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extras/extras'
4
+ require_relative 'extras/cropper_extra'
5
+ require_relative 'extras/full_text_ocr_extra'
6
+ require_relative 'extras/rag_extra'
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product'
4
+
5
+ module Mindee
6
+ module V1
7
+ module Parsing
8
+ # Common fields used for most documents.
9
+ module Common
10
+ # Abstract class for prediction Inferences
11
+ # Holds prediction for a page or entire document.
12
+ class Inference
13
+ # @return [bool]
14
+ attr_reader :is_rotation_applied
15
+ # @return [Array<Mindee::V1::Parsing::Common::Page>]
16
+ attr_reader :pages
17
+ # @return [Mindee::V1::Parsing::Common::Prediction]
18
+ attr_reader :prediction
19
+ # @return [Mindee::V1::Parsing::Common::Product]
20
+ attr_reader :product
21
+ # Name of the endpoint for this product.
22
+ # @return [String]
23
+ attr_reader :endpoint_name
24
+ # Version for this product.
25
+ # @return [String]
26
+ attr_reader :endpoint_version
27
+ # Whether this product has access to an asynchronous endpoint.
28
+ # @return [bool]
29
+ attr_reader :has_async
30
+ # Whether this product has access to synchronous endpoint.
31
+ # @return [bool]
32
+ attr_reader :has_sync
33
+ # @return [Mindee::V1::Parsing::Common::Extras::Extras] Potential Extras fields sent back along the prediction
34
+ attr_reader :extras
35
+
36
+ @endpoint_name = nil
37
+ @endpoint_version = nil
38
+ @has_async = false
39
+ @has_sync = false
40
+
41
+ # @param raw_prediction [Hash]
42
+ def initialize(raw_prediction)
43
+ @is_rotation_applied = raw_prediction['is_rotation_applied']
44
+ @product = Product.new(raw_prediction['product'])
45
+ @pages = [] # : Array[Page]
46
+ @extras = Extras::Extras.new(raw_prediction['extras']) if raw_prediction.include?('extras')
47
+ end
48
+
49
+ # @return [String]
50
+ def to_s
51
+ is_rotation_applied = @is_rotation_applied ? 'Yes' : 'No'
52
+ out_str = String.new
53
+ out_str << "Inference\n#########"
54
+ out_str << "\n:Product: #{@product.name} v#{@product.version}"
55
+ out_str << "\n:Rotation applied: #{is_rotation_applied}"
56
+ out_str << "\n\nPrediction\n=========="
57
+ out_str << "\n#{"#{@prediction}\n" if @prediction.to_s.size.positive?}"
58
+ if @pages.any? { |page| !page.prediction.nil? }
59
+ out_str << "\nPage Predictions\n================\n\n"
60
+ out_str << @pages.join("\n\n")
61
+ end
62
+ out_str.rstrip!
63
+ out_str
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+
5
+ module Mindee
6
+ module V1
7
+ module Parsing
8
+ module Common
9
+ # Job (queue) information on async parsing.
10
+ class Job
11
+ # @return [String] Mindee ID of the document
12
+ attr_reader :id
13
+ # @return [Time]
14
+ attr_reader :issued_at
15
+ # @return [Time, nil]
16
+ attr_reader :available_at
17
+ # @return [JobStatus, Symbol]
18
+ attr_reader :status
19
+ # @return [Integer, nil]
20
+ attr_reader :millisecs_taken
21
+ # @return [Hash, nil]
22
+ attr_reader :error
23
+
24
+ # @param http_response [Hash]
25
+ def initialize(http_response)
26
+ @id = http_response['id']
27
+ @error = http_response['error']
28
+ @issued_at = Time.iso8601(http_response['issued_at'])
29
+ if http_response.key?('available_at') && !http_response['available_at'].nil?
30
+ @available_at = Time.iso8601(http_response['available_at'])
31
+ @millisecs_taken = (1000 * (@available_at.to_time - @issued_at.to_time).to_f).to_i
32
+ end
33
+ @status = case http_response['status']
34
+ when 'waiting'
35
+ JobStatus::WAITING
36
+ when 'processing'
37
+ JobStatus::PROCESSING
38
+ when 'completed'
39
+ JobStatus::COMPLETED
40
+ else
41
+ http_response['status']&.to_sym
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ module Parsing
6
+ module Common
7
+ module OCR
8
+ # Mindee Vision V1.
9
+ class MVisionV1
10
+ # List of pages.
11
+ # @return [Array<OCRPage>]
12
+ attr_reader :pages
13
+
14
+ # @param prediction [Hash]
15
+ def initialize(prediction)
16
+ @pages = [] # : Array[Mindee::V1::Parsing::Common::OCR::OCRPage]
17
+ prediction['pages'].each do |page_prediction|
18
+ @pages.push(OCRPage.new(page_prediction))
19
+ end
20
+ end
21
+
22
+ # @return [String]
23
+ def to_s
24
+ out_str = String.new
25
+ @pages.map do |page|
26
+ out_str << "\n"
27
+ out_str << page.to_s
28
+ end
29
+ out_str.strip
30
+ end
31
+
32
+ # Constructs a line from a column, located underneath given coordinates
33
+ # @param coordinates [Array<Mindee::Geometry::Point>] Polygon or bounding box where the reconstruction
34
+ # should start.
35
+ # @param page_id [Integer] ID of the page to start at
36
+ # @param x_margin [Float] Margin of misalignment for the x coordinate.
37
+ # @return [Mindee::V1::Parsing::Common::OCR::OCRLine]
38
+ def reconstruct_vertically(coordinates, page_id, x_margin)
39
+ line_arr = OCRLine.new([])
40
+ @pages[page_id].all_lines.each do |line|
41
+ line.each do |word|
42
+ line_arr.push(word) if Geometry.below?(word.polygon, coordinates, x_margin / 2, x_margin * 2)
43
+ end
44
+ end
45
+ line_arr
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'mvision_v1'
4
+
5
+ module Mindee
6
+ module V1
7
+ module Parsing
8
+ module Common
9
+ # OCR-specific parsing fields and options
10
+ module OCR
11
+ # A single word.
12
+ class OCRWord
13
+ # The confidence score, value will be between 0.0 and 1.0
14
+ # @return [Float]
15
+ attr_reader :confidence
16
+ # @return [String]
17
+ attr_reader :text
18
+ # @return [Mindee::Geometry::Quadrilateral]
19
+ attr_reader :bounding_box
20
+ # @return [Mindee::Geometry::Polygon]
21
+ attr_reader :polygon
22
+
23
+ # @param prediction [Hash]
24
+ def initialize(prediction)
25
+ @text = prediction['text']
26
+ @confidence = prediction['confidence']
27
+ @polygon = Mindee::Geometry::Polygon.new(prediction['polygon'])
28
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
29
+ end
30
+
31
+ # @return [String]
32
+ def to_s
33
+ @text.to_s
34
+ end
35
+ end
36
+
37
+ # A list of words which are on the same line.
38
+ class OCRLine < Array
39
+ # @param prediction [Hash, nil]
40
+ # @param from_array [Array<OCRWord>, nil]
41
+ def initialize(prediction = nil, from_array = nil)
42
+ if !prediction.nil?
43
+ super(prediction.map { |word_prediction| OCRWord.new(word_prediction) })
44
+ elsif !from_array.nil?
45
+ super(from_array)
46
+ end
47
+ end
48
+
49
+ # Sort the words on the line from left to right.
50
+ # @return [OCRLine]
51
+ def sort_on_x
52
+ from_array = sort_by { |word| Geometry.get_min_max_x(word.polygon).min }
53
+ OCRLine.new(nil, from_array)
54
+ end
55
+
56
+ # @return [String]
57
+ def to_s
58
+ each(&:to_s).join(' ')
59
+ end
60
+ end
61
+
62
+ # OCR extraction for a single page.
63
+ class OCRPage
64
+ # All the words on the page, in semi-random order.
65
+ # @return [Array<OCRWord>]
66
+ attr_reader :all_words
67
+ # @return [Array<OCRLine>]
68
+ attr_reader :lines
69
+
70
+ # @param prediction [Hash]
71
+ def initialize(prediction)
72
+ @lines = [] # : Array[Mindee::V1::Parsing::Common::OCR::OCRLine]
73
+ @all_words = [] # : Array[Mindee::V1::Parsing::Common::OCR::OCRWord]
74
+ prediction['all_words'].each do |word_prediction|
75
+ @all_words.push(OCRWord.new(word_prediction))
76
+ end
77
+ end
78
+
79
+ # All the words on the page, ordered in lines.
80
+ # @return [Array<OCRLine>]
81
+ def all_lines
82
+ @lines = to_lines if @lines.empty?
83
+ @lines
84
+ end
85
+
86
+ # @return [String]
87
+ def to_s
88
+ lines = all_lines
89
+ return '' if lines.empty?
90
+
91
+ out_str = String.new
92
+ lines.map do |line|
93
+ out_str << "#{line}\n" unless line.to_s.strip.empty?
94
+ end
95
+ out_str.strip
96
+ end
97
+
98
+ private
99
+
100
+ # Helper function that iterates through all the words and compares them to a candidate
101
+ # @param sorted_words [Array<OCRWord>]
102
+ # @param current [OCRWord]
103
+ # @param indexes [Array<Integer>]
104
+ # @param lines [Array<OCRLine>]
105
+ def parse_one(sorted_words, current, indexes, lines)
106
+ line = OCRLine.new(nil, [])
107
+ sorted_words.each_with_index do |word, idx|
108
+ next if indexes.include?(idx)
109
+
110
+ if current.nil?
111
+ current = word
112
+ indexes.push(idx)
113
+ line = OCRLine.new([])
114
+ line.push(word)
115
+ elsif words_on_same_line?(current, word)
116
+ line.push(word)
117
+ indexes.push(idx)
118
+ end
119
+ end
120
+ lines.push(line.sort_on_x) if line.any?
121
+ end
122
+
123
+ # Order all the words on the page into lines.
124
+ # @return [Array<OCRLine>]
125
+ def to_lines
126
+ current = nil
127
+ indexes = [] # : Array[Integer]
128
+ lines = [] # : Array[Mindee::V1::Parsing::Common::OCR::OCRLine]
129
+
130
+ # make sure words are sorted from top to bottom
131
+ all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
132
+ all_words.each do
133
+ parse_one(all_words, current, indexes, lines)
134
+ current = nil
135
+ end
136
+ lines
137
+ end
138
+
139
+ # Determine if two words are on the same line.
140
+ # @param current_word [Mindee::V1::Parsing::Common::OCR::OCRWord]
141
+ # @param next_word [Mindee::V1::Parsing::Common::OCR::OCRWord]
142
+ # @return [bool]
143
+ def words_on_same_line?(current_word, next_word)
144
+ current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
145
+ next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid) unless current_word.nil?
146
+ current_in_next || next_in_current
147
+ end
148
+ end
149
+
150
+ # OCR extraction from the entire document.
151
+ class OCR
152
+ # Mindee Vision v1 results.
153
+ # @return [Mindee::V1::Parsing::Common::OCR::MVisionV1]
154
+ attr_reader :mvision_v1
155
+
156
+ # @param prediction [Hash]
157
+ def initialize(prediction)
158
+ @mvision_v1 = Mindee::V1::Parsing::Common::OCR::MVisionV1.new(prediction['mvision-v1'])
159
+ end
160
+
161
+ # @return [String]
162
+ def to_s
163
+ @mvision_v1.to_s
164
+ end
165
+
166
+ # Constructs a line from a column, located underneath given coordinates
167
+ # @param coordinates [Array<Mindee::Geometry::Point>] Polygon or bounding box where the reconstruction
168
+ # should start
169
+ # @param page_id [Integer] ID of the page to start at
170
+ # @param x_margin [Float] Margin of misalignment for the x coordinate (default 10%)
171
+ # @return [Mindee::V1::Parsing::Common::OCR::OCRLine]
172
+ def reconstruct_vertically(coordinates, page_id, x_margin = 0.05)
173
+ @mvision_v1.reconstruct_vertically(coordinates, page_id, x_margin)
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr/ocr'
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ module Parsing
6
+ module Common
7
+ # Page orientation
8
+ class Orientation
9
+ # @return [Integer, nil]
10
+ attr_reader :page_id
11
+ # A prediction among these 3 possible outputs:
12
+ # * 0 degrees: the page is already upright
13
+ # * 90 degrees: the page must be rotated clockwise to be upright
14
+ # * 270 degrees: the page must be rotated counterclockwise to be upright
15
+ # @return [Integer, nil]
16
+ attr_reader :value
17
+
18
+ # @param prediction [Hash]
19
+ # @param page_id [Integer, nil]
20
+ def initialize(prediction, page_id)
21
+ @value = prediction['value']
22
+ @page_id = page_id
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product'
4
+ require_relative 'extras'
5
+
6
+ module Mindee
7
+ module V1
8
+ module Parsing
9
+ # Common fields used for most documents.
10
+ module Common
11
+ # Abstract wrapper class for prediction Pages
12
+ # Holds prediction for a page as well as it's orientation and id.
13
+ class Page
14
+ # ID of the page (as given by the API).
15
+ # @return [Integer]
16
+ attr_reader :page_id
17
+ # Orientation of the page.
18
+ # @return [Mindee::V1::Parsing::Common::Orientation]
19
+ attr_reader :orientation
20
+ # Page prediction
21
+ # @return [Mindee::V1::Parsing::Common::Prediction]
22
+ attr_reader :prediction
23
+ # Additional page-level information.
24
+ # @return [Mindee::V1::Parsing::Common::Extras::Extras]
25
+ attr_reader :extras
26
+
27
+ # @param raw_prediction [Hash]
28
+ def initialize(raw_prediction)
29
+ @page_id = raw_prediction['id']
30
+ @orientation = Mindee::V1::Parsing::Common::Orientation.new(raw_prediction['orientation'], @page_id)
31
+ return if raw_prediction['extras'].nil?
32
+
33
+ @extras = Mindee::V1::Parsing::Common::Extras::Extras.new(raw_prediction['extras'])
34
+ end
35
+
36
+ # @return [String]
37
+ def to_s
38
+ out_str = String.new
39
+ title = "Page #{@page_id}"
40
+ out_str << "#{title}\n"
41
+ out_str << ('-' * title.size)
42
+ out_str << @prediction.to_s
43
+ out_str
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ module Parsing
6
+ module Common
7
+ # Abstract class for all predictions
8
+ class Prediction
9
+ # @return [String]
10
+ def to_s
11
+ ''
12
+ end
13
+
14
+ def initialize(_ = nil, _ = nil); end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ module Parsing
6
+ module Common
7
+ # Product information
8
+ class Product
9
+ # @return [String] Name of the product.
10
+ attr_reader :name
11
+ # @return [String?] Type of product.
12
+ attr_reader :type
13
+ # @return [String] Product version.
14
+ attr_reader :version
15
+
16
+ # @param prediction [Hash]
17
+ def initialize(prediction)
18
+ @name = prediction['name']
19
+ @type = prediction['type']
20
+ @version = prediction['version']
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module V1
5
+ module Parsing
6
+ module Common
7
+ # Represents the server response after a document is sent to a workflow.
8
+ class WorkflowResponse
9
+ # Set the prediction model used to parse the document.
10
+ # The response object will be instantiated based on this parameter.
11
+ # @return [Mindee::V1::Parsing::Common::Execution]
12
+ attr_reader :execution
13
+ # @return [Mindee::V1::Parsing::Common::ApiRequest]
14
+ attr_reader :api_request
15
+ # @return [String]
16
+ attr_reader :raw_http
17
+
18
+ # @param http_response [Hash]
19
+ # @param product_class [Mindee::Inference]
20
+ def initialize(product_class, http_response, raw_http)
21
+ @raw_http = raw_http.to_s
22
+ @api_request = Mindee::V1::Parsing::Common::ApiRequest.new(http_response['api_request'])
23
+ product_class ||= Mindee::V1::Product::Universal::Universal
24
+ @execution = Mindee::V1::Parsing::Common::Execution.new(product_class, http_response['execution'])
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'common/api_request'
4
+ require_relative 'common/api_response'
5
+ require_relative 'common/document'
6
+ require_relative 'common/execution'
7
+ require_relative 'common/execution_file'
8
+ require_relative 'common/execution_priority'
9
+ require_relative 'common/inference'
10
+ require_relative 'common/job'
11
+ require_relative 'common/ocr'
12
+ require_relative 'common/prediction'
13
+ require_relative 'common/orientation'
14
+ require_relative 'common/page'
15
+ require_relative 'common/workflow_response'