mindee 4.0.0 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/bin/cli_products.rb +172 -0
  4. data/bin/mindee.rb +6 -121
  5. data/docs/advanced_file_operations.md +7 -9
  6. data/docs/code_samples/invoice_splitter_v1_async.txt +2 -1
  7. data/docs/code_samples/{license_plates_v1.txt → us_mail_v2_async.txt} +1 -1
  8. data/docs/code_samples/workflow_execution.txt +28 -0
  9. data/docs/getting_started.md +1 -1
  10. data/docs/global_products/barcode_reader_v1.md +1 -1
  11. data/docs/global_products/bill_of_lading_v1.md +1 -1
  12. data/docs/global_products/business_card_v1.md +1 -1
  13. data/docs/global_products/cropper_v1.md +1 -1
  14. data/docs/global_products/delivery_notes_v1.md +2 -2
  15. data/docs/global_products/driver_license_v1.md +1 -1
  16. data/docs/global_products/expense_receipts_v5.md +26 -25
  17. data/docs/global_products/financial_document_v1.md +34 -33
  18. data/docs/global_products/international_id_v2.md +7 -7
  19. data/docs/global_products/invoice_splitter_v1.md +53 -33
  20. data/docs/global_products/invoices_v4.md +19 -19
  21. data/docs/global_products/multi_receipts_detector_v1.md +1 -1
  22. data/docs/global_products/nutrition_facts_v1.md +1 -1
  23. data/docs/global_products/passport_v1.md +1 -1
  24. data/docs/global_products/resume_v1.md +4 -4
  25. data/docs/global_products/universal.md +1 -1
  26. data/docs/global_products.md +1 -1
  27. data/docs/loading_a_document.md +87 -73
  28. data/docs/localized_products/bank_account_details_v2.md +1 -1
  29. data/docs/localized_products/bank_check_v1.md +1 -1
  30. data/docs/localized_products/bank_statement_fr_v2.md +1 -1
  31. data/docs/localized_products/carte_grise_v1.md +1 -1
  32. data/docs/localized_products/energy_bill_fra_v1.md +11 -3
  33. data/docs/localized_products/french_healthcard_v1.md +1 -1
  34. data/docs/localized_products/idcard_fr_v2.md +6 -6
  35. data/docs/localized_products/ind_passport_v1.md +5 -5
  36. data/docs/localized_products/payslip_fra_v3.md +1 -1
  37. data/docs/localized_products/us_healthcare_cards_v1.md +2 -2
  38. data/docs/localized_products/us_mail_v3.md +1 -1
  39. data/docs/localized_products/us_w9_v1.md +1 -1
  40. data/docs/localized_products.md +1 -1
  41. data/lib/mindee/client.rb +6 -8
  42. data/lib/mindee/http/workflow_endpoint.rb +23 -23
  43. data/lib/mindee/pdf/pdf_extractor.rb +3 -3
  44. data/lib/mindee/product/delivery_note/delivery_note_v1_document.rb +1 -1
  45. data/lib/mindee/product/delivery_note/delivery_note_v1_page.rb +1 -1
  46. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +15 -14
  47. data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +1 -1
  48. data/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +1 -1
  49. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb +5 -1
  50. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb +18 -0
  51. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb +4 -0
  52. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb +2 -2
  53. data/lib/mindee/product/fr/energy_bill/energy_bill_v1_page.rb +1 -1
  54. data/lib/mindee/product/invoice/invoice_v4_document.rb +14 -14
  55. data/lib/mindee/product/invoice/invoice_v4_line_item.rb +2 -2
  56. data/lib/mindee/product/invoice/invoice_v4_line_items.rb +1 -1
  57. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +2 -2
  58. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +38 -42
  59. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +55 -0
  60. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +48 -0
  61. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +9 -5
  62. data/lib/mindee/product/receipt/receipt_v5_document.rb +8 -8
  63. data/lib/mindee/product/receipt/receipt_v5_line_item.rb +1 -1
  64. data/lib/mindee/product/receipt/receipt_v5_line_items.rb +1 -1
  65. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +1 -1
  66. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +1 -1
  67. data/lib/mindee/product/{eu/license_plate/license_plate_v1.rb → us/us_mail/us_mail_v2.rb} +13 -13
  68. data/lib/mindee/product/us/us_mail/us_mail_v2_document.rb +105 -0
  69. data/lib/mindee/product/{eu/license_plate/license_plate_v1_page.rb → us/us_mail/us_mail_v2_page.rb} +8 -8
  70. data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_address.rb +105 -0
  71. data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb +63 -0
  72. data/lib/mindee/product/us/us_mail/us_mail_v2_sender_address.rb +66 -0
  73. data/lib/mindee/product.rb +5 -5
  74. data/lib/mindee/version.rb +1 -1
  75. data/mindee.gemspec +3 -3
  76. data/sig/custom/net_http.rbs +5 -0
  77. data/sig/mindee/client.rbs +1 -0
  78. data/sig/mindee/http/workflow_endpoint.rbs +8 -2
  79. data/sig/mindee/pdf/pdf_extractor.rbs +1 -1
  80. data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rbs +1 -1
  81. data/sig/mindee/product/financial_document/financial_document_v1_line_items.rbs +1 -1
  82. data/sig/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rbs +1 -1
  83. data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rbs +2 -0
  84. data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rbs +1 -1
  85. data/sig/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rbs +1 -1
  86. data/sig/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rbs +1 -1
  87. data/sig/mindee/product/fr/payslip/payslip_v2_salary_details.rbs +1 -1
  88. data/sig/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rbs +1 -1
  89. data/sig/mindee/product/fr/payslip/payslip_v3_salary_details.rbs +1 -1
  90. data/sig/mindee/product/invoice/invoice_v4_line_items.rbs +1 -1
  91. data/sig/mindee/product/invoice_splitter/invoice_splitter_v1.rbs +1 -4
  92. data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_document.rbs +5 -9
  93. data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +14 -0
  94. data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +13 -0
  95. data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_page.rbs +2 -0
  96. data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rbs +1 -1
  97. data/sig/mindee/product/receipt/receipt_v5_line_items.rbs +1 -1
  98. data/sig/mindee/product/resume/resume_v1_certificates.rbs +1 -1
  99. data/sig/mindee/product/resume/resume_v1_educations.rbs +1 -1
  100. data/sig/mindee/product/resume/resume_v1_languages.rbs +1 -1
  101. data/sig/mindee/product/resume/resume_v1_professional_experiences.rbs +1 -1
  102. data/sig/mindee/product/resume/resume_v1_social_networks_urls.rbs +1 -1
  103. data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rbs +1 -1
  104. data/sig/mindee/product/us/us_mail/us_mail_v2.rbs +13 -0
  105. data/sig/mindee/product/us/us_mail/us_mail_v2_document.rbs +20 -0
  106. data/sig/mindee/product/{eu/license_plate/license_plate_v1_page.rbs → us/us_mail/us_mail_v2_page.rbs} +5 -5
  107. data/sig/mindee/product/us/us_mail/us_mail_v2_recipient_address.rbs +22 -0
  108. data/sig/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rbs +15 -0
  109. data/sig/mindee/product/us/us_mail/us_mail_v2_sender_address.rbs +18 -0
  110. data/sig/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rbs +1 -1
  111. metadata +23 -12
  112. data/docs/localized_products/license_plates_v1.md +0 -112
  113. data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +0 -37
  114. data/sig/mindee/product/eu/license_plate/license_plate_v1.rbs +0 -13
  115. data/sig/mindee/product/eu/license_plate/license_plate_v1_document.rbs +0 -15
@@ -7,10 +7,11 @@ parentDoc: 6294d97ee723f1008d2ab28e
7
7
 
8
8
  ## Calling the Mindee API using webhooks
9
9
 
10
- > 🚧 This feature is only available for compatible products, see the see `Supports Polling/Webhooks` on the product's
11
- > documentation.
10
+ > 🚧 This feature is only available for compatible products.
11
+ >
12
+ > See the `Supports Polling/Webhooks` section on the product's documentation.
12
13
 
13
- After [setting up a webhook for your account](https://developers.mindee.com/docs/webhooks), you can send a document,
14
+ After [setting up a webhook for your account](https://developers.mindee.com/docs/webhooks), you can send a document,
14
15
  and then retrieve the results from an API call in the following fashion:
15
16
 
16
17
  ```rb
@@ -24,7 +25,7 @@ enqueue_response = mindee_client.enqueue(
24
25
  )
25
26
  ```
26
27
 
27
- Once your prediction is ready, the server will send it to your webhook. You can then use the payload as a regular
28
+ Once your prediction is ready, the server will send it to your webhook. You can then use the payload as a regular
28
29
  prediction:
29
30
 
30
31
  ```rb
@@ -54,63 +55,25 @@ result = mindee_client.load_prediction(
54
55
  puts result.document
55
56
  ```
56
57
 
57
- ## Enqueueing and polling manually
58
-
59
- > ❗️ We _strongly_ recommend you rely on a webhook setup, or a simple `parse()` call for most operations. Only use
60
- > manual polling if you are certain that it is the best solution for you.
61
-
62
- > 🚧 This feature is only available for compatible products, see the see `Supports Polling/Webhooks` on the product's
63
- > documentation.
64
-
65
- Instead of relying on the `parse()` method, you can enqueue documents and poll
66
- the server manually:
67
-
68
- ```rb
69
- # Load a file from disk
70
- input_source = mindee_client.source_from_path('/path/to/the/file.ext')
71
-
72
- # Send the file to the server
73
- enqueue_response = mindee_client.enqueue(
74
- input_source,
75
- Mindee::Product::Invoice::InvoiceV4 # InvoiceV4 supports asynchronous polling
76
- )
77
-
78
- job_id = enqueue_response.job.id
79
-
80
- queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4, endpoint: endpoint)
81
- polling_attempts = 0
82
-
83
- while [Mindee::Parsing::Common::JobStatus::PROCESSING, Mindee::Parsing::Common::JobStatus::WAITING].include?(
84
- queue_res.job.status) && polling_attempts < 80 # Recommended amounts of total retries for asynchronous polling.
85
- sleep(1.5) # Recommended waiting time for re-attempts
86
- queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4)
87
- polling_attempts += 1
88
- end
89
-
90
- # If all went well, print a short summary of the result.
91
- if queue_res.job.status == Mindee::Parsing::Common::JobStatus::COMPLETED
92
- puts queue_res.document
93
- end
94
- ```
95
-
96
58
  ## Parsing operations
97
59
 
98
- Operations pertaining to the Client's `parse()` method. The parsing process supports both synchronous and asynchronous
99
- modes, and you can fine-tune its behavior using several options.
60
+ Operations pertaining to the Client's `parse()` method.
61
+ The parsing process supports both synchronous and asynchronous modes, and you can fine-tune its behavior using several options.
100
62
 
101
63
  ### Polling options
102
64
 
103
65
  When performing an asynchronous parse (i.e. when the document is enqueued), the client will poll the API for the result.
104
66
  The following options control the polling behavior:
105
67
 
106
- - `initial_delay_sec`: The initial delay (in seconds) before the first polling attempt.
107
- - `delay_sec`: The delay (in seconds) between subsequent polls.
108
- - `max_retries`: The maximum number of polling attempts before timing out.
68
+ * `initial_delay_sec`: The initial delay (in seconds) before the first polling attempt.
69
+ * `delay_sec`: The delay (in seconds) between subsequent polls.
70
+ * `max_retries`: The maximum number of polling attempts before timing out.
109
71
 
110
- These parameters ensure that the client does not overload the API with too-frequent requests and also avoid premature
72
+ These parameters ensure that the client does not overload the API with too-frequent requests and also avoid premature
111
73
  timeouts.
112
74
 
113
75
  Example:
76
+
114
77
  ```rb
115
78
  result = mindee_client.parse(
116
79
  input_source,
@@ -122,16 +85,19 @@ result = mindee_client.parse(
122
85
  }
123
86
  )
124
87
  ```
125
- > ⚠️ Warning: Setting `delay_sec` too low might lead to insufficient wait time between polls, causing the server to
126
- > block your API calls for a short time.
88
+
89
+ > ⚠️ Warning: Setting `delay_sec` too low might lead to insufficient wait time between polls.
90
+ >
91
+ > This will cause the server to block your API calls for a short time (HTTP 429 errors).
127
92
 
128
93
  ### Page operations
129
94
 
130
- When parsing PDFs, you can preprocess the document using page operations. Using the `page_options` parameter, you can
131
- specify which pages to keep or remove even before the file is sent to the server. This is especially useful if your
132
- document contains extraneous pages that you do not want to process.
95
+ When parsing PDFs, you can preprocess the document using page operations.
96
+ Using the `page_options` parameter, you can specify which pages to keep or remove even before the file is sent to the server.
97
+ This is especially useful if your document contains extraneous pages that you do not want to process.
133
98
 
134
99
  The available options are:
100
+
135
101
  * `page_indexes`: An array of zero-based page indexes.
136
102
  * `operation`: The operation to perform—either:
137
103
  * `:KEEP_ONLY` (keep only the specified pages)
@@ -139,6 +105,7 @@ The available options are:
139
105
  * `on_min_pages`: Apply the operation only if the document has at least the specified number of pages.
140
106
 
141
107
  Example:
108
+
142
109
  ```rb
143
110
  page_options = {
144
111
  page_indexes:[1, 3], # Only target pages 1 and 3.
@@ -154,14 +121,17 @@ result = mindee_client.parse(
154
121
  }
155
122
  )
156
123
  ```
157
- > ⚠️ Warning: Page operations alter the document's content. Ensure that this behavior is acceptable for your use case,
158
- > as there is no undo once the pages are modified.
124
+
125
+ > ⚠️ Warning: Page operations alter the document's content.
126
+ >
127
+ > Ensure that this behavior is acceptable for your use case, as there is no undo once the pages are modified.
159
128
 
160
129
  ## Workflow operations
161
130
 
162
131
  Workflow operations are similar to parsing operations, but they apply to calls made through the workflow feature.
163
132
 
164
133
  Example:
134
+
165
135
  ```rb
166
136
  workflow_options = {
167
137
  document_alias: "my_document",
@@ -179,28 +149,68 @@ result = mindee_client.execute_workflow(
179
149
  )
180
150
  ```
181
151
 
152
+ ## Enqueueing and polling manually
153
+
154
+ > ❗️ We _strongly_ recommend you use a webhook setup, or a simple`parse()` call for most operations.
155
+ >
156
+ > Only use manual polling if you are **certain** that it is the best solution for you.
157
+
158
+ > 🚧 This feature is only available for compatible products.
159
+ >
160
+ > See the `Supports Polling/Webhooks` section on the product's documentation.
161
+
162
+ Instead of relying on the `parse()` method, you can enqueue documents and poll
163
+ the server manually:
164
+
165
+ ```rb
166
+ # Load a file from disk
167
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
168
+
169
+ # Send the file to the server
170
+ enqueue_response = mindee_client.enqueue(
171
+ input_source,
172
+ Mindee::Product::Invoice::InvoiceV4 # InvoiceV4 supports asynchronous polling
173
+ )
174
+
175
+ job_id = enqueue_response.job.id
176
+
177
+ queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4, endpoint: endpoint)
178
+ polling_attempts = 0
179
+
180
+ while [Mindee::Parsing::Common::JobStatus::PROCESSING, Mindee::Parsing::Common::JobStatus::WAITING].include?(
181
+ queue_res.job.status) && polling_attempts < 80 # Recommended amounts of total retries for asynchronous polling.
182
+ sleep(1.5) # Recommended waiting time for re-attempts
183
+ queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4)
184
+ polling_attempts += 1
185
+ end
186
+
187
+ # If all went well, print a short summary of the result.
188
+ if queue_res.job.status == Mindee::Parsing::Common::JobStatus::COMPLETED
189
+ puts queue_res.document
190
+ end
191
+ ```
192
+
182
193
  ## Loading a Document File
183
194
 
184
- Before sending a document to Mindee’s API, you first need to load the file into one of our input source wrappers.
185
- These wrappers not only validate the file type (using a trusted MIME type check) but also give you access the following
186
- helper methods:
195
+ Before sending a document to Mindee’s API, you first need to load the file into one of our input source wrappers.
196
+ These wrappers not only validate the file type (using a trusted MIME type check) but also give you access the following helper methods:
197
+
187
198
  * [image compression](https://developers.mindee.com/docs/ruby-advanced-file-operations#image-compression)
188
199
  * [pdf compression](https://developers.mindee.com/docs/ruby-advanced-file-operations#pdf-compression)
189
200
  * [PDF fixing](https://developers.mindee.com/docs/ruby-advanced-file-operations#pdf-fixing)
190
201
 
191
202
  > 📘 Regardless of how a document is loaded, the subsequent parsing or workflow operations remain the same.
192
203
 
193
-
194
204
  Mindee’s Ruby client supports several methods for loading a document.
195
205
 
196
-
197
206
  These can either be done locally:
207
+
198
208
  * Loading from a [local path](#loading-from-a-local-path)
199
209
  * Loading from a [File object](#loading-from-a-file-object)
200
210
  * Loading from a [Base64-encoded string](#loading-from-a-base64-encoded-string)
201
211
  * Loading from a [raw sequence of bytes](#loading-from-raw-bytes)
202
212
 
203
- These four methods inherit from the `LocalInputSource` class, which provides a few common utility features described
213
+ These four methods inherit from the `LocalInputSource` class, which provides a few common utility features described
204
214
  [here](#under-the-hood---local-input-source-details).
205
215
 
206
216
  Or loading from a [URL](#loading-by-url).
@@ -210,6 +220,7 @@ Or loading from a [URL](#loading-by-url).
210
220
  The most straightforward way of loading a document: load a file directly from disk by providing its path.
211
221
 
212
222
  Example:
223
+
213
224
  ```rb
214
225
  # Initialize the client.
215
226
  mindee_client = Mindee::Client.new(api_key: 'my-api-key')
@@ -232,10 +243,9 @@ File.open('invoice.jpg', 'rb') do |file_obj|
232
243
  end
233
244
  ```
234
245
 
235
-
236
246
  ### Loading from a Base64-Encoded String
237
247
 
238
- For cases where you have file data encoded in Base64, load the document by providing the encoded string along with the
248
+ For cases where you have file data encoded in Base64, load the document by providing the encoded string along with the
239
249
  original filename. This converts the Base64 string into a local input source for further processing.
240
250
 
241
251
  Example:
@@ -246,10 +256,9 @@ b64_string = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGB..." # Example dummy b64_
246
256
  input_source = mindee_client.source_from_b64string(b64_string, "receipt.jpg")
247
257
  ```
248
258
 
249
-
250
259
  ### Loading from Raw Bytes
251
260
 
252
- If you have the file’s raw binary data (as bytes), create an input source by passing the bytes and the original
261
+ If you have the file’s raw binary data (as bytes), create an input source by passing the bytes and the original
253
262
  filename.
254
263
 
255
264
  Example:
@@ -263,32 +272,36 @@ input_source = mindee_client.source_from_bytes(raw_bytes, "invoice.pdf")
263
272
  ### Loading by URL
264
273
 
265
274
  For remote documents, you can load a file through its URL. The server will accept direct urls if:
266
- * They begin with "https://".
275
+
276
+ * They begin with "https\://".
267
277
  * They point to a valid file.
268
278
  * They do not redirect the request (e.g. Google Drive documents or proxies).
269
279
 
270
- Under the hood, the
271
- [Mindee::Input::Source::URLInputSource](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/URLInputSource.html)
280
+ Under the hood, the
281
+ [Mindee::Input::Source::URLInputSource](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/URLInputSource.html)
272
282
  class validates the URL, but won't perform an HTTP GET request unless specifically requested (using Ruby’s Net::HTTP).
273
283
 
274
284
  Example:
285
+
275
286
  ```rb
276
287
  input_source = mindee_client.source_from_url("https://www.example.com/invoice.pdf")
277
288
  result = mindee_client.parse(input_source, Mindee::Product::Invoice::InvoiceV4)
278
289
  ```
279
290
 
280
- To download the files before sending them, you can use the `as_local_input_source` method. It allows to follow
281
- redirects, and supports optional authentication (via basic auth or JWT tokens). You can optionally download and save the file locally or convert it into a local input source for further processing—thus benefiting from the same processing methods as local files.
291
+ To download the files before sending them, you can use the `as_local_input_source` method.
292
+ It allows to follow redirects, and supports optional authentication (via basic auth or JWT tokens).
293
+ You can optionally download and save the file locally or convert it into a local input source for further processing—thus benefiting from the same processing methods as local files.
282
294
 
283
295
  Additional URL features include:
284
296
 
285
- * Validation: The URLInputSource throws an error if the URL does not start with “https://”.
297
+ * Validation: The URLInputSource throws an error if the URL does not start with “https\://”.
286
298
  * Authentication: You can supply basic authentication (username/password) or a bearer token.
287
299
  * Local Conversion: Methods such as `write_to_file` let you download and inspect the file locally. Alternatively,
288
300
  * `as_local_input_source` converts the downloaded content into a LocalInputSource so you can apply operations like
289
301
  * compression.
290
302
 
291
303
  Example:
304
+
292
305
  ```rb
293
306
  # Load the URL input normally:
294
307
  remote_input_source = mindee_client.source_from_url("https://www.example.com/invoice.pdf")
@@ -302,7 +315,7 @@ local_downloaded_file_path = remote_input_source.write_to_file("path/to/my/downl
302
315
 
303
316
  ### Under the Hood - Local Input Source Details
304
317
 
305
- When loading using from either a path, file, raw byte sequence or base64 string, the created object inherits from
318
+ When loading using from either a path, file, raw byte sequence or base64 string, the created object inherits from
306
319
  [Mindee::Input::Source::LocalInputSource](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/LocalInputSource.html). Key features include:
307
320
 
308
321
  * Automatic MIME Type Validation using Marcel to check for server file format compliance.
@@ -313,4 +326,5 @@ When loading using from either a path, file, raw byte sequence or base64 string,
313
326
  * [count_pages](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/LocalInputSource.html#count_pages-instance_method) – For PDF files, returns the total page count; by default, non-PDF files are assumed to be single-page documents.
314
327
 
315
328
  ## Questions?
329
+
316
330
  [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Bank Account Details
2
+ title: FR Bank Account Details
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-bank-account-details-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - US Bank Check
2
+ title: US Bank Check
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-us-bank-check-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Bank Statement
2
+ title: FR Bank Statement
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-bank-statement-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Carte Grise
2
+ title: FR Carte Grise
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-carte-grise-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Energy Bill
2
+ title: FR Energy Bill
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-energy-bill-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Energy Bill API](https://platform.mindee.c
12
12
  > | Specification | Details |
13
13
  > | ------------------------------ | -------------------------------------------------- |
14
14
  > | Endpoint Name | `energy_bill_fra` |
15
- > | Recommended Version | `v1.0` |
15
+ > | Recommended Version | `v1.2` |
16
16
  > | Supports Polling/Webhooks | ✔️ Yes |
17
17
  > | Support Synchronous HTTP Calls | ❌ No |
18
18
  > | Geography | 🇫🇷 France |
@@ -186,11 +186,19 @@ Details of energy consumption.
186
186
 
187
187
  A `EnergyBillV1EnergyUsage` implements the following attributes:
188
188
 
189
+ * `consumption` (Float): The price per unit of energy consumed.
189
190
  * `description` (String): Description or details of the energy usage.
190
191
  * `end_date` (String): The end date of the energy usage.
191
192
  * `start_date` (String): The start date of the energy usage.
192
193
  * `tax_rate` (Float): The rate of tax applied to the total cost.
193
194
  * `total` (Float): The total cost of energy consumed.
195
+ * `unit` (String): The unit of measurement for energy consumption.
196
+
197
+ #### Possible values include:
198
+ - kWh
199
+ - m3
200
+ - L
201
+
194
202
  * `unit_price` (Float): The price per unit of energy consumed.
195
203
  Fields which are specific to this product; they are not used in any other product.
196
204
 
@@ -221,7 +229,7 @@ A `EnergyBillV1MeterDetail` implements the following attributes:
221
229
  - water
222
230
  - None
223
231
 
224
- * `unit` (String): The unit of measurement for energy consumption, which can be kW, m³, or L.
232
+ * `unit` (String): The unit of power for energy consumption.
225
233
 
226
234
  # Attributes
227
235
  The following fields are extracted for Energy Bill V1:
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Health Card
2
+ title: FR Health Card
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-health-card-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Carte Nationale d'Identité
2
+ title: FR Carte Nationale d'Identité
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-carte-nationale-didentite-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -192,9 +192,9 @@ puts result.document.inference.prediction.document_number.value
192
192
  [📄](#page-level-fields "This field is only present on individual pages.")**document_side** ([ClassificationField](#classification-field)): The sides of the document which are visible.
193
193
 
194
194
  #### Possible values include:
195
- - RECTO
196
- - VERSO
197
- - RECTO & VERSO
195
+ - 'RECTO'
196
+ - 'VERSO'
197
+ - 'RECTO & VERSO'
198
198
 
199
199
  ```rb
200
200
  result.document.document_side.each do |document_side_elem|
@@ -206,8 +206,8 @@ puts result.document.inference.prediction.document_number.value
206
206
  [📄](#page-level-fields "This field is only present on individual pages.")**document_type** ([ClassificationField](#classification-field)): The document type or format.
207
207
 
208
208
  #### Possible values include:
209
- - NEW
210
- - OLD
209
+ - 'NEW'
210
+ - 'OLD'
211
211
 
212
212
  ```rb
213
213
  result.document.document_type.each do |document_type_elem|
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - IND Passport - India
2
+ title: IND Passport - India
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-ind-passport---india-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -194,8 +194,8 @@ puts result.document.inference.prediction.file_number.value
194
194
  **gender** ([ClassificationField](#classification-field)): The gender of the passport holder.
195
195
 
196
196
  #### Possible values include:
197
- - M
198
- - F
197
+ - 'M'
198
+ - 'F'
199
199
 
200
200
  ```rb
201
201
  puts result.document.inference.prediction.gender.value
@@ -289,8 +289,8 @@ puts result.document.inference.prediction.old_passport_place_of_issue.value
289
289
  **page_number** ([ClassificationField](#classification-field)): The page number of the passport document.
290
290
 
291
291
  #### Possible values include:
292
- - 1
293
- - 2
292
+ - '1'
293
+ - '2'
294
294
 
295
295
  ```rb
296
296
  puts result.document.inference.prediction.page_number.value
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - FR Payslip
2
+ title: FR Payslip
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-fr-payslip-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - US Healthcare Card
2
+ title: US Healthcare Card
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-us-healthcare-card-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Healthcare Card API](https://platform.mind
12
12
  > | Specification | Details |
13
13
  > | ------------------------------ | -------------------------------------------------- |
14
14
  > | Endpoint Name | `us_healthcare_cards` |
15
- > | Recommended Version | `v1.0` |
15
+ > | Recommended Version | `v1.1` |
16
16
  > | Supports Polling/Webhooks | ✔️ Yes |
17
17
  > | Support Synchronous HTTP Calls | ❌ No |
18
18
  > | Geography | 🇺🇸 United States |
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - US US Mail
2
+ title: US US Mail
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-us-us-mail-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -1,5 +1,5 @@
1
1
  ---
2
- title: Ruby Client Library - US W9
2
+ title: US W9
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-us-w9-ocr
5
5
  parentDoc: 67b49e29a2cd6f08d69a40d8
@@ -3,4 +3,4 @@ title: Localized Products
3
3
  category: 622b805aaec68102ea7fcbc2
4
4
  slug: ruby-localized-products
5
5
  parentDoc: 6294d97ee723f1008d2ab28e
6
- ---
6
+ ---
data/lib/mindee/client.rb CHANGED
@@ -80,6 +80,7 @@ module Mindee
80
80
  # This performs a full OCR operation on the server and may increase response time.
81
81
  # @!attribute public_url [String, nil] A unique, encrypted URL for accessing the document validation interface without
82
82
  # requiring authentication.
83
+ # @!attribute rag [bool, nil] Whether to enable Retrieval-Augmented Generation.
83
84
  # @!attribute page_options [PageOptions, Hash, nil] Page cutting/merge options:
84
85
  # * `:page_indexes` Zero-based list of page indexes.
85
86
  # * `:operation` Operation to apply on the document, given the specified page indexes:
@@ -87,7 +88,7 @@ module Mindee
87
88
  # * `:REMOVE` - remove the specified pages, and keep all others.
88
89
  # * `:on_min_pages` Apply the operation only if the document has at least this many pages.
89
90
  class WorkflowOptions
90
- attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options
91
+ attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag
91
92
 
92
93
  def initialize(params: {})
93
94
  params = params.transform_keys(&:to_sym)
@@ -95,6 +96,7 @@ module Mindee
95
96
  @priority = params.fetch(:priority, nil)
96
97
  @full_text = params.fetch(:full_text, false)
97
98
  @public_url = params.fetch(:public_url, nil)
99
+ @rag = params.fetch(:rag, nil)
98
100
  raw_page_options = params.fetch(:page_options, nil)
99
101
  raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
100
102
  @page_options = raw_page_options
@@ -297,8 +299,6 @@ module Mindee
297
299
  queue_res
298
300
  end
299
301
 
300
- # Same idea applies to execute_workflow:
301
- #
302
302
  # Sends a document to a workflow.
303
303
  #
304
304
  # Accepts options either as a Hash or as a WorkflowOptions struct.
@@ -309,6 +309,7 @@ module Mindee
309
309
  # * `document_alias` [String, nil] Alias to give to the document.
310
310
  # * `priority` [Symbol, nil] Priority to give to the document.
311
311
  # * `full_text` [bool] Whether to include the full OCR text response in compatible APIs.
312
+ # * `rag` [bool, nil] Whether to enable Retrieval-Augmented Generation.
312
313
  #
313
314
  # * `public_url` [String, nil] A unique, encrypted URL for accessing the document validation interface without
314
315
  # requiring authentication.
@@ -317,7 +318,7 @@ module Mindee
317
318
  # * `:operation` Operation to apply on the document, given the `page_indexes specified:
318
319
  # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
319
320
  # * `:REMOVE` - remove the specified pages, and keep all others.
320
- # * `:on_min_pages` Apply the operation only if document has at least this many pa
321
+ # * `:on_min_pages` Apply the operation only if document has at least this many pages.
321
322
  # @return [Mindee::Parsing::Common::WorkflowResponse]
322
323
  def execute_workflow(input_source, workflow_id, options: {})
323
324
  opts = options.is_a?(WorkflowOptions) ? options : WorkflowOptions.new(params: options)
@@ -331,10 +332,7 @@ module Mindee
331
332
 
332
333
  prediction, raw_http = workflow_endpoint.execute_workflow(
333
334
  input_source,
334
- opts.full_text,
335
- opts.document_alias,
336
- opts.priority,
337
- opts.public_url
335
+ opts
338
336
  )
339
337
 
340
338
  Mindee::Parsing::Common::WorkflowResponse.new(Product::Universal::Universal, prediction, raw_http)
@@ -24,15 +24,17 @@ module Mindee
24
24
 
25
25
  # Sends a document to the workflow.
26
26
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
27
- # @param document_alias [String, nil] Alias to give to the document.
28
- # @param priority [Symbol, nil] Priority to give to the document.
29
- # @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
30
- # @param public_url [String, nil] A unique, encrypted URL for accessing the document validation interface without
31
- # requiring authentication.
27
+ # @param opts [WorkflowOptions] Options to configure workflow execution behavior.
32
28
  # @return [Array]
33
- def execute_workflow(input_source, full_text, document_alias, priority, public_url)
29
+ def execute_workflow(input_source, opts)
34
30
  check_api_key
35
- response = workflow_execution_req_post(input_source, document_alias, priority, full_text, public_url)
31
+ response = workflow_execution_req_post(input_source, opts)
32
+ if response.nil?
33
+ raise Mindee::Errors::MindeeHTTPError.new(
34
+ { code: 0, details: 'Server response was nil.', message: 'Unknown error.' }, @url, 0
35
+ )
36
+ end
37
+
36
38
  hashed_response = JSON.parse(response.body, object_class: Hash)
37
39
  return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response)
38
40
 
@@ -42,31 +44,29 @@ module Mindee
42
44
  end
43
45
 
44
46
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
45
- # @param document_alias [String, nil] Alias to give to the document.
46
- # @param priority [Symbol, nil] Priority to give to the document.
47
- # @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
48
- # @param public_url [String, nil] A unique, encrypted URL for accessing the document validation interface without
49
- # requiring authentication.
47
+ # @param opts [WorkflowOptions] Options to configure workflow execution behavior.
50
48
  # @return [Net::HTTPResponse, nil]
51
- def workflow_execution_req_post(input_source, document_alias, priority, full_text, public_url)
49
+ def workflow_execution_req_post(input_source, opts)
52
50
  uri = URI(@url)
53
51
  params = {} # : Hash[Symbol | String, untyped]
54
- params[:full_text_ocr] = 'true' if full_text
55
- uri.query = URI.encode_www_form(params)
52
+ params[:full_text_ocr] = 'true' if opts.full_text
53
+ params[:rag] = 'true' if opts.rag
54
+ uri.query = URI.encode_www_form(params) if params.any?
56
55
 
57
56
  headers = {
58
57
  'Authorization' => "Token #{@api_key}",
59
58
  'User-Agent' => USER_AGENT,
60
59
  }
61
60
  req = Net::HTTP::Post.new(uri, headers)
62
- form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource)
63
- [['document', input_source.url]]
64
- else
65
- [input_source.read_contents]
66
- end
67
- form_data.push ['alias', document_alias] if document_alias
68
- form_data.push ['public_url', public_url] if public_url
69
- form_data.push ['priority', priority.to_s] if priority
61
+ form_data = [] # : Array[untyped]
62
+ if input_source.is_a?(Mindee::Input::Source::URLInputSource)
63
+ form_data.push ['document', input_source.url]
64
+ else
65
+ form_data.push input_source.read_contents
66
+ end
67
+ form_data.push ['alias', opts.document_alias] if opts.document_alias
68
+ form_data.push ['public_url', opts.public_url] if opts.public_url
69
+ form_data.push ['priority', opts.priority.to_s] if opts.priority
70
70
 
71
71
  req.set_form(form_data, 'multipart/form-data')
72
72
 
@@ -46,7 +46,7 @@ module Mindee
46
46
  extension = File.extname(@filename)
47
47
  basename = File.basename(@filename, extension)
48
48
  page_indexes.each do |page_index_list|
49
- if page_index_list.empty? || page_index_list.nil?
49
+ if page_index_list.nil? || page_index_list.empty?
50
50
  raise Errors::MindeePDFError, "Empty indexes aren't allowed for extraction #{page_index_list}"
51
51
  end
52
52
 
@@ -70,12 +70,12 @@ module Mindee
70
70
  # rubocop:disable Metrics/PerceivedComplexity
71
71
 
72
72
  # Extracts invoices as complete PDFs from the document.
73
- # @param page_indexes [Array<Array<Integer>, InvoiceSplitterV1PageGroup>]
73
+ # @param page_indexes [Array<Array<Integer>, InvoiceSplitterV1InvoicePageGroup>]
74
74
  # @param strict [bool]
75
75
  # @return [Array<Mindee::PDF::PDFExtractor::ExtractedPDF>]
76
76
  def extract_invoices(page_indexes, strict: false)
77
77
  raise Errors::MindeePDFError, 'No indexes provided.' if page_indexes.empty?
78
- unless page_indexes[0].is_a?(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1PageGroup)
78
+ if page_indexes[0].is_a?(Array) && page_indexes[0].all? { |i| i.is_a?(Integer) }
79
79
  return extract_sub_documents(page_indexes)
80
80
  end
81
81
  return extract_sub_documents(page_indexes.map(&:page_indexes)) unless strict