mindee 4.2.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/CHANGELOG.md +16 -0
  4. data/docs/code_samples/workflow_execution.txt +1 -1
  5. data/docs/code_samples/workflow_polling.txt +36 -0
  6. data/docs/global_products/financial_document_v1.md +22 -4
  7. data/docs/global_products/invoices_v4.md +22 -4
  8. data/docs/localized_products/us_healthcare_cards_v1.md +17 -1
  9. data/lib/mindee/client.rb +13 -13
  10. data/lib/mindee/http/endpoint.rb +37 -47
  11. data/lib/mindee/http/response_validation.rb +1 -1
  12. data/lib/mindee/parsing/common/api_response.rb +1 -1
  13. data/lib/mindee/parsing/common/document.rb +1 -1
  14. data/lib/mindee/parsing/common/extras/extras.rb +4 -1
  15. data/lib/mindee/parsing/common/extras/rag_extra.rb +24 -0
  16. data/lib/mindee/parsing/common/extras.rb +1 -0
  17. data/lib/mindee/parsing/common/inference.rb +3 -0
  18. data/lib/mindee/parsing/standard/tax_field.rb +9 -9
  19. data/lib/mindee/pdf/pdf_extractor.rb +1 -1
  20. data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb +2 -2
  21. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +9 -1
  22. data/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +2 -2
  23. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
  24. data/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb +2 -2
  25. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb +2 -2
  26. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb +2 -2
  27. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb +2 -2
  28. data/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb +2 -2
  29. data/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb +2 -2
  30. data/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb +2 -2
  31. data/lib/mindee/product/invoice/invoice_v4_document.rb +9 -1
  32. data/lib/mindee/product/invoice/invoice_v4_line_items.rb +2 -2
  33. data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
  34. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +2 -2
  35. data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb +2 -2
  36. data/lib/mindee/product/receipt/receipt_v5_line_items.rb +2 -2
  37. data/lib/mindee/product/resume/resume_v1_certificates.rb +2 -2
  38. data/lib/mindee/product/resume/resume_v1_educations.rb +2 -2
  39. data/lib/mindee/product/resume/resume_v1_languages.rb +2 -2
  40. data/lib/mindee/product/resume/resume_v1_professional_experiences.rb +2 -2
  41. data/lib/mindee/product/resume/resume_v1_social_networks_urls.rb +2 -2
  42. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +2 -2
  43. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb +4 -4
  44. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +8 -3
  45. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +1 -1
  46. data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb +2 -2
  47. data/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb +2 -2
  48. data/lib/mindee/version.rb +1 -1
  49. data/mindee.gemspec +1 -1
  50. data/sig/custom/net_http.rbs +3 -0
  51. data/sig/mindee/client.rbs +4 -2
  52. data/sig/mindee/http/endpoint.rbs +9 -8
  53. data/sig/mindee/http/response_validation.rbs +3 -3
  54. data/sig/mindee/parsing/common/api_response.rbs +1 -1
  55. data/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs +2 -2
  56. data/sig/mindee/parsing/common/extras/rag_extra.rbs +15 -0
  57. data/sig/mindee/product/financial_document/financial_document_v1_document.rbs +1 -0
  58. data/sig/mindee/product/invoice/invoice_v4_document.rbs +1 -0
  59. data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_document.rbs +1 -0
  60. metadata +7 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6c09d5108c2470661075484f14849c7205cbef88d481427bcb612d36ede9835
4
- data.tar.gz: e7b6fa99499a9c3a3d82a9c87ab40dc07cbecd48a4f2d82200cfea2729f9d5cb
3
+ metadata.gz: f9a2e1c98c3971454ebfaefe6902121bfab6e577f693e36a525739eb2dfbeeb9
4
+ data.tar.gz: 7f35351f0f647a165c82f512395e8883647e96165968869bf3904f4c2bf6b4ab
5
5
  SHA512:
6
- metadata.gz: b97c068ddeefb0908e7d9753c0439c86393d1a04b13bf8f256c3dbc93399f213b0405375a036715906a8087eba3eb5126d8e69880c819ef4b0d5b7bf61459e88
7
- data.tar.gz: a15ff1b478675ec58dfd61d0f0483dcc7ab23df1fc02624e8133d5f636101e82f1eeb4f823ebe19a3a06ce70c4288270ebaca7029514e91f204298ad5843c924
6
+ metadata.gz: '072284cd8bb18b6190b870aa014fb278744a6db35d1210ca57dbddb1d78c71a2675f2675a7ce581b83cbb1fdcd88cdeb6dedd6892c2402b43cf3f711425f9e46'
7
+ data.tar.gz: 4bad8302b9bef1496fc47e5f04cb301dd3c81a99dff5800ffbc1aaf3ef4bf1f5a6493f4908178b337e7f25596b7f9ce2946d47d7526b38065ccfbd7ac045624a
data/.rubocop.yml CHANGED
@@ -10,7 +10,7 @@ AllCops:
10
10
  - local_test/*
11
11
  - Steepfile
12
12
 
13
- TargetRubyVersion: 3.0
13
+ TargetRubyVersion: 3.0.0
14
14
  SuggestExtensions: false
15
15
 
16
16
  Gemspec/DevelopmentDependencies:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v4.4.0 - 2025-04-23
4
+ ### Changes
5
+ * :sparkles: add support for workflow polling
6
+ * :sparkles: add extras accessor from inference
7
+ ### Fixes
8
+ * :recycle: fix misc typing issues
9
+ * :bug: fix improper return format for `raw_http`
10
+
11
+
12
+ ## v4.3.0 - 2025-04-08
13
+ ### Changes
14
+ * :sparkles: add support for Financial Document V1.12
15
+ * :sparkles: add support for Invoices V4.10
16
+ * :sparkles: add support for US Healthcare Cards V1.2
17
+
18
+
3
19
  ## v4.2.0 - 2025-03-28
4
20
  ### Changes
5
21
  * :coffin: remove support for US W9
@@ -3,7 +3,7 @@
3
3
  # gem install mindee
4
4
  #
5
5
 
6
- require_relative 'mindee'
6
+ require 'mindee'
7
7
 
8
8
  workflow_id = 'workflow-id'
9
9
 
@@ -0,0 +1,36 @@
1
+ #
2
+ # Install the Ruby client library by running:
3
+ # gem install mindee
4
+ #
5
+
6
+ require 'mindee'
7
+
8
+ workflow_id = 'workflow-id'
9
+
10
+ # Init a new client
11
+ mindee_client = Mindee::Client.new
12
+
13
+ # Load a file from disk
14
+ input_source = mindee_client.source_from_path('path/to/my/file.ext')
15
+
16
+ # Initialize a custom endpoint for this product
17
+ custom_endpoint = mindee_client.create_endpoint(
18
+ account_name: 'my-account',
19
+ endpoint_name: 'my-endpoint',
20
+ version: 'my-version'
21
+ )
22
+
23
+ # Parse the file
24
+ result = mindee_client.parse(
25
+ input_source,
26
+ Mindee::Product::Universal::Universal,
27
+ endpoint: custom_endpoint,
28
+ options: {
29
+ rag: true,
30
+ workflow_id: workflow_id
31
+ }
32
+ )
33
+
34
+ # Print a full summary of the parsed data in RST format
35
+ puts result.document
36
+
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Financial Document API](https://platform.m
12
12
  > | Specification | Details |
13
13
  > | ------------------------------ | -------------------------------------------------- |
14
14
  > | Endpoint Name | `financial_document` |
15
- > | Recommended Version | `v1.11` |
15
+ > | Recommended Version | `v1.12` |
16
16
  > | Supports Polling/Webhooks | ✔️ Yes |
17
17
  > | Support Synchronous HTTP Calls | ✔️ Yes |
18
18
  > | Geography | 🌐 Global |
@@ -93,7 +93,7 @@ puts result.document
93
93
  ########
94
94
  Document
95
95
  ########
96
- :Mindee ID: f469a24d-3875-4a83-ad43-e0d5aa9da604
96
+ :Mindee ID: a80ac0ee-26f6-4e2e-988a-960b240d5ba7
97
97
  :Filename: default_sample.jpg
98
98
 
99
99
  Inference
@@ -118,7 +118,7 @@ Prediction
118
118
  +---------------+--------+----------+---------------+
119
119
  | Base | Code | Rate (%) | Amount |
120
120
  +===============+========+==========+===============+
121
- | | | 5.00 | 9.75 |
121
+ | 195.00 | | 5.00 | 9.75 |
122
122
  +---------------+--------+----------+---------------+
123
123
  :Supplier Payment Details:
124
124
  :Supplier Name: JOHN SMITH
@@ -170,7 +170,7 @@ Page 0
170
170
  +---------------+--------+----------+---------------+
171
171
  | Base | Code | Rate (%) | Amount |
172
172
  +===============+========+==========+===============+
173
- | | | 5.00 | 9.75 |
173
+ | 195.00 | | 5.00 | 9.75 |
174
174
  +---------------+--------+----------+---------------+
175
175
  :Supplier Payment Details:
176
176
  :Supplier Name: JOHN SMITH
@@ -382,6 +382,24 @@ puts result.document.inference.prediction.document_number.value
382
382
  puts result.document.inference.prediction.document_type.value
383
383
  ```
384
384
 
385
+ ## Document Type Extended
386
+ **document_type_extended** ([ClassificationField](#classification-field)): Document type extended.
387
+
388
+ #### Possible values include:
389
+ - 'CREDIT NOTE'
390
+ - 'INVOICE'
391
+ - 'OTHER'
392
+ - 'OTHER_FINANCIAL'
393
+ - 'PAYSLIP'
394
+ - 'PURCHASE ORDER'
395
+ - 'QUOTE'
396
+ - 'RECEIPT'
397
+ - 'STATEMENT'
398
+
399
+ ```rb
400
+ puts result.document.inference.prediction.document_type_extended.value
401
+ ```
402
+
385
403
  ## Due Date
386
404
  **due_date** ([DateField](#date-field)): The date on which the payment is due.
387
405
 
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Invoice API](https://platform.mindee.com/m
12
12
  > | Specification | Details |
13
13
  > | ------------------------------ | -------------------------------------------------- |
14
14
  > | Endpoint Name | `invoices` |
15
- > | Recommended Version | `v4.9` |
15
+ > | Recommended Version | `v4.10` |
16
16
  > | Supports Polling/Webhooks | ✔️ Yes |
17
17
  > | Support Synchronous HTTP Calls | ✔️ Yes |
18
18
  > | Geography | 🌐 Global |
@@ -93,7 +93,7 @@ puts result.document
93
93
  ########
94
94
  Document
95
95
  ########
96
- :Mindee ID: 86b1833f-138b-4a01-8387-860204b0e631
96
+ :Mindee ID: b55db8f9-ae3b-4f05-b2f1-ec0ced5e5b70
97
97
  :Filename: default_sample.jpg
98
98
 
99
99
  Inference
@@ -117,7 +117,7 @@ Prediction
117
117
  +---------------+--------+----------+---------------+
118
118
  | Base | Code | Rate (%) | Amount |
119
119
  +===============+========+==========+===============+
120
- | | | 8.00 | 193.20 |
120
+ | 2145.00 | | 8.00 | 193.20 |
121
121
  +---------------+--------+----------+---------------+
122
122
  :Supplier Payment Details:
123
123
  :Supplier Name: TURNPIKE DESIGNS
@@ -163,7 +163,7 @@ Page 0
163
163
  +---------------+--------+----------+---------------+
164
164
  | Base | Code | Rate (%) | Amount |
165
165
  +===============+========+==========+===============+
166
- | | | 8.00 | 193.20 |
166
+ | 2145.00 | | 8.00 | 193.20 |
167
167
  +---------------+--------+----------+---------------+
168
168
  :Supplier Payment Details:
169
169
  :Supplier Name: TURNPIKE DESIGNS
@@ -344,6 +344,24 @@ puts result.document.inference.prediction.date.value
344
344
  puts result.document.inference.prediction.document_type.value
345
345
  ```
346
346
 
347
+ ## Document Type Extended
348
+ **document_type_extended** ([ClassificationField](#classification-field)): Document type extended.
349
+
350
+ #### Possible values include:
351
+ - 'CREDIT NOTE'
352
+ - 'INVOICE'
353
+ - 'OTHER'
354
+ - 'OTHER_FINANCIAL'
355
+ - 'PAYSLIP'
356
+ - 'PURCHASE ORDER'
357
+ - 'QUOTE'
358
+ - 'RECEIPT'
359
+ - 'STATEMENT'
360
+
361
+ ```rb
362
+ puts result.document.inference.prediction.document_type_extended.value
363
+ ```
364
+
347
365
  ## Due Date
348
366
  **due_date** ([DateField](#date-field)): The date on which the payment is due.
349
367
 
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Healthcare Card API](https://platform.mind
12
12
  > | Specification | Details |
13
13
  > | ------------------------------ | -------------------------------------------------- |
14
14
  > | Endpoint Name | `us_healthcare_cards` |
15
- > | Recommended Version | `v1.1` |
15
+ > | Recommended Version | `v1.2` |
16
16
  > | Supports Polling/Webhooks | ✔️ Yes |
17
17
  > | Support Synchronous HTTP Calls | ❌ No |
18
18
  > | Geography | 🇺🇸 United States |
@@ -138,6 +138,15 @@ A `HealthcareCardV1Copay` implements the following attributes:
138
138
  * `service_fees` (Float): The price of service.
139
139
  * `service_name` (String): The name of service of the copay.
140
140
 
141
+ #### Possible values include:
142
+ - primary_care
143
+ - emergency_room
144
+ - urgent_care
145
+ - specialist
146
+ - office_visit
147
+ - prescription
148
+
149
+
141
150
  # Attributes
142
151
  The following fields are extracted for Healthcare Card V1:
143
152
 
@@ -222,6 +231,13 @@ puts result.document.inference.prediction.rx_bin.value
222
231
  puts result.document.inference.prediction.rx_grp.value
223
232
  ```
224
233
 
234
+ ## RX ID
235
+ **rx_id** ([StringField](#string-field)): The ID number for prescription drug coverage.
236
+
237
+ ```rb
238
+ puts result.document.inference.prediction.rx_id.value
239
+ ```
240
+
225
241
  ## RX PCN
226
242
  **rx_pcn** ([StringField](#string-field)): The PCN number for prescription drug coverage.
227
243
 
data/lib/mindee/client.rb CHANGED
@@ -54,8 +54,8 @@ module Mindee
54
54
  # @!attribute delay_sec [Numeric] Delay between polling attempts. Defaults to 1.5.
55
55
  # @!attribute max_retries [Integer] Maximum number of retries. Defaults to 80.
56
56
  class ParseOptions
57
- attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper,
58
- :initial_delay_sec, :delay_sec, :max_retries
57
+ attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper, :rag,
58
+ :workflow_id, :initial_delay_sec, :delay_sec, :max_retries
59
59
 
60
60
  def initialize(params: {})
61
61
  params = params.transform_keys(&:to_sym)
@@ -66,6 +66,8 @@ module Mindee
66
66
  raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
67
67
  @page_options = raw_page_options
68
68
  @cropper = params.fetch(:cropper, false)
69
+ @rag = params.fetch(:rag, false)
70
+ @workflow_id = params.fetch(:workflow_id, nil)
69
71
  @initial_delay_sec = params.fetch(:initial_delay_sec, 2)
70
72
  @delay_sec = params.fetch(:delay_sec, 1.5)
71
73
  @max_retries = params.fetch(:max_retries, 80)
@@ -176,13 +178,10 @@ module Mindee
176
178
 
177
179
  prediction, raw_http = endpoint.predict(
178
180
  input_source,
179
- options.all_words,
180
- options.full_text,
181
- options.close_file,
182
- options.cropper
181
+ options
183
182
  )
184
183
 
185
- Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
184
+ Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s)
186
185
  end
187
186
 
188
187
  # Enqueue a document for async parsing
@@ -207,6 +206,8 @@ module Mindee
207
206
  # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
208
207
  # * `:cropper` [bool] Whether to include cropper results for each page.
209
208
  # This performs a cropping operation on the server and will increase response time.
209
+ # * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
210
+ # * `:workflow_id` [String, nil] ID of the workflow to use.
210
211
  # @param endpoint [Mindee::HTTP::Endpoint] Endpoint of the API.
211
212
  # @return [Mindee::Parsing::Common::ApiResponse]
212
213
  def enqueue(input_source, product_class, endpoint: nil, options: {})
@@ -216,12 +217,9 @@ module Mindee
216
217
 
217
218
  prediction, raw_http = endpoint.predict_async(
218
219
  input_source,
219
- opts.all_words,
220
- opts.full_text,
221
- opts.close_file,
222
- opts.cropper
220
+ opts
223
221
  )
224
- Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
222
+ Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
225
223
  end
226
224
 
227
225
  # Parses a queued document
@@ -236,7 +234,7 @@ module Mindee
236
234
  endpoint = initialize_endpoint(product_class) if endpoint.nil?
237
235
  logger.debug("Fetching queued document as '#{endpoint.url_root}'")
238
236
  prediction, raw_http = endpoint.parse_async(job_id)
239
- Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
237
+ Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
240
238
  end
241
239
 
242
240
  # Enqueue a document for async parsing and automatically try to retrieve it
@@ -261,6 +259,8 @@ module Mindee
261
259
  # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
262
260
  # * `:cropper` [bool, nil] Whether to include cropper results for each page.
263
261
  # This performs a cropping operation on the server and will increase response time.
262
+ # * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
263
+ # * `:workflow_id` [String, nil] ID of the workflow to use.
264
264
  # * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2.
265
265
  # * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5.
266
266
  # * `:max_retries` [Integer] Maximum number of retries. Defaults to 80.
@@ -34,6 +34,8 @@ module Mindee
34
34
  attr_reader :request_timeout
35
35
  # @return [String]
36
36
  attr_reader :url_root
37
+ # @return [String]
38
+ attr_reader :base_url
37
39
 
38
40
  def initialize(owner, url_name, version, api_key: '')
39
41
  @owner = owner
@@ -44,25 +46,19 @@ module Mindee
44
46
  logger.debug('API key set from environment')
45
47
  end
46
48
  @api_key = api_key.nil? || api_key.empty? ? ENV.fetch(API_KEY_ENV_NAME, API_KEY_DEFAULT) : api_key
47
- base_url = ENV.fetch(BASE_URL_ENV_NAME, BASE_URL_DEFAULT)
48
- @url_root = "#{base_url.chomp('/')}/products/#{@owner}/#{@url_name}/v#{@version}"
49
+ @base_url = ENV.fetch(BASE_URL_ENV_NAME, BASE_URL_DEFAULT).chomp('/')
50
+ @url_root = "#{@base_url}/products/#{@owner}/#{@url_name}/v#{@version}"
49
51
  end
50
52
 
51
53
  # Call the prediction API.
52
54
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
53
- # @param all_words [bool] Whether the full word extraction needs to be performed
54
- # @param full_text [bool] Whether to include the full OCR text response in compatible APIs
55
- # @param close_file [bool] Whether the file will be closed after reading
56
- # @param cropper [bool] Whether a cropping operation will be applied
55
+ # @param opts [ParseOptions] Parse options.
57
56
  # @return [Array]
58
- def predict(input_source, all_words, full_text, close_file, cropper)
57
+ def predict(input_source, opts)
59
58
  check_api_key
60
59
  response = predict_req_post(
61
60
  input_source,
62
- all_words: all_words,
63
- full_text: full_text,
64
- close_file: close_file,
65
- cropper: cropper
61
+ opts
66
62
  )
67
63
  if !response.nil? && response.respond_to?(:body)
68
64
  hashed_response = JSON.parse(response.body, object_class: Hash)
@@ -76,14 +72,11 @@ module Mindee
76
72
 
77
73
  # Call the prediction API.
78
74
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
79
- # @param all_words [bool] Whether the full word extraction needs to be performed
80
- # @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
81
- # @param close_file [bool] Whether the file will be closed after reading
82
- # @param cropper [bool] Whether a cropping operation will be applied
75
+ # @param opts [ParseOptions, Hash] Parse options.
83
76
  # @return [Array]
84
- def predict_async(input_source, all_words, full_text, close_file, cropper)
77
+ def predict_async(input_source, opts)
85
78
  check_api_key
86
- response = document_queue_req_get(input_source, all_words, full_text, close_file, cropper)
79
+ response = document_queue_req_post(input_source, opts)
87
80
  if !response.nil? && response.respond_to?(:body)
88
81
  hashed_response = JSON.parse(response.body, object_class: Hash)
89
82
  return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response)
@@ -100,7 +93,7 @@ module Mindee
100
93
  # @return [Array]
101
94
  def parse_async(job_id)
102
95
  check_api_key
103
- response = document_queue_req(job_id)
96
+ response = document_queue_req_get(job_id)
104
97
  hashed_response = JSON.parse(response.body, object_class: Hash)
105
98
  return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response)
106
99
 
@@ -112,17 +105,14 @@ module Mindee
112
105
  private
113
106
 
114
107
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
115
- # @param all_words [bool] Whether the full word extraction needs to be performed
116
- # @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
117
- # @param close_file [bool] Whether the file will be closed after reading
118
- # @param cropper [bool] Whether a cropping operation will be applied
108
+ # @param opts [ParseOptions] Parse options.
119
109
  # @return [Net::HTTPResponse, nil]
120
- def predict_req_post(input_source, all_words: false, full_text: false, close_file: true, cropper: false)
110
+ def predict_req_post(input_source, opts)
121
111
  uri = URI("#{@url_root}/predict")
122
112
 
123
113
  params = {} # : Hash[Symbol | String, untyped]
124
- params[:cropper] = 'true' if cropper
125
- params[:full_text_ocr] = 'true' if full_text
114
+ params[:cropper] = 'true' if opts.cropper
115
+ params[:full_text_ocr] = 'true' if opts.full_text
126
116
  uri.query = URI.encode_www_form(params)
127
117
 
128
118
  headers = {
@@ -131,32 +121,33 @@ module Mindee
131
121
  }
132
122
  req = Net::HTTP::Post.new(uri, headers)
133
123
  form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource)
134
- [['document', input_source.url]]
124
+ [['document', input_source.url]] # : Array[untyped]
135
125
  else
136
- [input_source.read_contents(close: close_file)]
126
+ [input_source.read_contents(close: opts.close_file)] # : Array[untyped]
137
127
  end
138
- form_data.push ['include_mvision', 'true'] if all_words
128
+ form_data.push ['include_mvision', 'true'] if opts.all_words
139
129
 
140
130
  req.set_form(form_data, 'multipart/form-data')
141
- response = nil
142
131
  Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
143
- response = http.request(req)
132
+ return http.request(req)
144
133
  end
145
- response
134
+ raise Mindee::Errors::MindeeError, 'Could not resolve server response.'
146
135
  end
147
136
 
148
137
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
149
- # @param all_words [bool] Whether the full word extraction needs to be performed
150
- # @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
151
- # @param close_file [bool] Whether the file will be closed after reading
152
- # @param cropper [bool] Whether a cropping operation will be applied
153
- # @return [Net::HTTPResponse, nil]
154
- def document_queue_req_get(input_source, all_words, full_text, close_file, cropper)
155
- uri = URI("#{@url_root}/predict_async")
138
+ # @param opts [ParseOptions] Parse options.
139
+ # @return [Net::HTTPResponse]
140
+ def document_queue_req_post(input_source, opts)
141
+ uri = if opts.workflow_id
142
+ URI("#{@base_url}/workflows/#{opts.workflow_id}/predict_async")
143
+ else
144
+ URI("#{@url_root}/predict_async")
145
+ end
156
146
 
157
147
  params = {} # : Hash[Symbol | String, untyped]
158
- params[:cropper] = 'true' if cropper
159
- params[:full_text_ocr] = 'true' if full_text
148
+ params[:cropper] = 'true' if opts.cropper
149
+ params[:full_text_ocr] = 'true' if opts.full_text
150
+ params[:rag] = 'true' if opts.rag
160
151
  uri.query = URI.encode_www_form(params)
161
152
 
162
153
  headers = {
@@ -165,24 +156,23 @@ module Mindee
165
156
  }
166
157
  req = Net::HTTP::Post.new(uri, headers)
167
158
  form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource)
168
- [['document', input_source.url]]
159
+ [['document', input_source.url]] # : Array[untyped]
169
160
  else
170
- [input_source.read_contents(close: close_file)]
161
+ [input_source.read_contents(close: opts.close_file)] # : Array[untyped]
171
162
  end
172
- form_data.push ['include_mvision', 'true'] if all_words
163
+ form_data.push ['include_mvision', 'true'] if opts.all_words
173
164
 
174
165
  req.set_form(form_data, 'multipart/form-data')
175
166
 
176
- response = nil
177
167
  Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
178
- response = http.request(req)
168
+ return http.request(req)
179
169
  end
180
- response
170
+ raise Mindee::Errors::MindeeError, 'Could not resolve server response.'
181
171
  end
182
172
 
183
173
  # @param job_id [String]
184
174
  # @return [Net::HTTPResponse, nil]
185
- def document_queue_req(job_id)
175
+ def document_queue_req_get(job_id)
186
176
  uri = URI("#{@url_root}/documents/queue/#{job_id}")
187
177
 
188
178
  headers = {
@@ -42,7 +42,7 @@ module Mindee
42
42
  # Checks and correct the response object depending on the possible kinds of returns.
43
43
  # @param response [Net::HTTPResponse]
44
44
  def self.clean_request!(response)
45
- return response if (response.code.to_i < 200) || (response.code.to_i > 302)
45
+ return response if (response.code.to_i < 200) || (response.code.to_i > 302) # : Net::HTTPResponse
46
46
 
47
47
  return response if response.body.empty?
48
48
 
@@ -35,7 +35,7 @@ module Mindee
35
35
 
36
36
  # @param product_class [Mindee::Inference]
37
37
  # @param http_response [Hash]
38
- # @param raw_http [String]
38
+ # @param raw_http [Hash]
39
39
  def initialize(product_class, http_response, raw_http)
40
40
  logger.debug('Handling API response')
41
41
  @raw_http = raw_http.to_s
@@ -69,7 +69,7 @@ module Mindee
69
69
 
70
70
  full_text_ocr = String.new
71
71
  raw_prediction.dig('inference', 'pages').each do |page|
72
- full_text_ocr << (page['extras']['full_text_ocr']['content'])
72
+ full_text_ocr << page['extras']['full_text_ocr']['content']
73
73
  end
74
74
  artificial_text_obj = { 'content' => full_text_ocr }
75
75
  if @extras.nil? || @extras.empty?
@@ -13,6 +13,8 @@ module Mindee
13
13
  attr_reader :cropper
14
14
  # @return [Mindee::Parsing::Common::Extras::FullTextOCRExtra, nil]
15
15
  attr_reader :full_text_ocr
16
+ # @return [RAGExtra, nil]
17
+ attr_reader :rag
16
18
 
17
19
  def initialize(raw_prediction)
18
20
  if raw_prediction['cropper']
@@ -21,9 +23,10 @@ module Mindee
21
23
  if raw_prediction['full_text_ocr']
22
24
  @full_text_ocr = Mindee::Parsing::Common::Extras::FullTextOCRExtra.new(raw_prediction['full_text_ocr'])
23
25
  end
26
+ @rag = Mindee::Parsing::Common::Extras::RAGExtra.new(raw_prediction['rag']) if raw_prediction['rag']
24
27
 
25
28
  raw_prediction.each do |key, value|
26
- instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr'].include?(key)
29
+ instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr', 'rag'].include?(key)
27
30
  end
28
31
  end
29
32
 
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ module Extras
7
+ # Retrieval-Augmented Generation extra.
8
+ class RAGExtra
9
+ # ID of the matching document
10
+ # @return [String, nil]
11
+ attr_reader :matching_document_id
12
+
13
+ def initialize(raw_prediction)
14
+ @matching_document_id = raw_prediction['matching_document_id'] if raw_prediction['matching_document_id']
15
+ end
16
+
17
+ def to_s
18
+ @matching_document_id || ''
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -3,3 +3,4 @@
3
3
  require_relative 'extras/extras'
4
4
  require_relative 'extras/cropper_extra'
5
5
  require_relative 'extras/full_text_ocr_extra'
6
+ require_relative 'extras/rag_extra'
@@ -29,6 +29,8 @@ module Mindee
29
29
  # Whether this product has access to synchronous endpoint.
30
30
  # @return [bool]
31
31
  attr_reader :has_sync
32
+ # @return [Mindee::Parsing::Common::Extras::Extras] Potential Extras fields sent back along the prediction.
33
+ attr_reader :extras
32
34
 
33
35
  @endpoint_name = nil
34
36
  @endpoint_version = nil
@@ -40,6 +42,7 @@ module Mindee
40
42
  @is_rotation_applied = raw_prediction['is_rotation_applied']
41
43
  @product = Product.new(raw_prediction['product'])
42
44
  @pages = [] # : Array[Page]
45
+ @extras = Extras::Extras.new(raw_prediction['extras'])
43
46
  end
44
47
 
45
48
  # @return [String]
@@ -39,10 +39,10 @@ module Mindee
39
39
  def to_s
40
40
  printable = printable_values
41
41
  out_str = String.new
42
- out_str << ("Base: #{printable[:base]}")
43
- out_str << (", Code: #{printable[:code]}")
44
- out_str << (", Rate (%): #{printable[:rate]}")
45
- out_str << (", Amount: #{printable[:value]}")
42
+ out_str << "Base: #{printable[:base]}"
43
+ out_str << ", Code: #{printable[:code]}"
44
+ out_str << ", Rate (%): #{printable[:rate]}"
45
+ out_str << ", Amount: #{printable[:value]}"
46
46
  out_str.strip
47
47
  end
48
48
 
@@ -60,10 +60,10 @@ module Mindee
60
60
  def to_table_line
61
61
  printable = printable_values
62
62
  out_str = String.new
63
- out_str << ("| #{printable[:base].ljust(13, ' ')}")
64
- out_str << (" | #{printable[:code].ljust(6, ' ')}")
65
- out_str << (" | #{printable[:rate].ljust(8, ' ')}")
66
- out_str << (" | #{printable[:value].ljust(13, ' ')} |")
63
+ out_str << "| #{printable[:base].ljust(13, ' ')}"
64
+ out_str << " | #{printable[:code].ljust(6, ' ')}"
65
+ out_str << " | #{printable[:rate].ljust(8, ' ')}"
66
+ out_str << " | #{printable[:value].ljust(13, ' ')} |"
67
67
  out_str.strip
68
68
  end
69
69
  end
@@ -94,7 +94,7 @@ module Mindee
94
94
  return '' if nil? || empty?
95
95
 
96
96
  out_str = String.new
97
- out_str << ("\n#{line_separator('-')}")
97
+ out_str << "\n#{line_separator('-')}"
98
98
  out_str << "\n | Base | Code | Rate (%) | Amount |"
99
99
  out_str << "\n#{line_separator('=')}"
100
100
  each do |entry|
@@ -58,7 +58,7 @@ module Mindee
58
58
  end
59
59
  formatted_max_index = format('%03d', page_index_list[page_index_list.length - 1] + 1).to_s
60
60
  field_filename = "#{basename}_#{format('%03d',
61
- (page_index_list[0] + 1))}-#{formatted_max_index}#{extension}"
61
+ page_index_list[0] + 1)}-#{formatted_max_index}#{extension}"
62
62
  extracted_pdf = Mindee::PDF::PDFExtractor::ExtractedPDF.new(cut_pages(page_index_list),
63
63
  field_filename)
64
64
  extracted_pdfs << extracted_pdf