mindee 4.0.0 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/bin/cli_products.rb +172 -0
- data/bin/mindee.rb +6 -121
- data/docs/advanced_file_operations.md +7 -9
- data/docs/code_samples/invoice_splitter_v1_async.txt +2 -1
- data/docs/code_samples/{license_plates_v1.txt → us_mail_v2_async.txt} +1 -1
- data/docs/code_samples/workflow_execution.txt +28 -0
- data/docs/getting_started.md +1 -1
- data/docs/global_products/barcode_reader_v1.md +1 -1
- data/docs/global_products/bill_of_lading_v1.md +1 -1
- data/docs/global_products/business_card_v1.md +1 -1
- data/docs/global_products/cropper_v1.md +1 -1
- data/docs/global_products/delivery_notes_v1.md +2 -2
- data/docs/global_products/driver_license_v1.md +1 -1
- data/docs/global_products/expense_receipts_v5.md +26 -25
- data/docs/global_products/financial_document_v1.md +34 -33
- data/docs/global_products/international_id_v2.md +7 -7
- data/docs/global_products/invoice_splitter_v1.md +53 -33
- data/docs/global_products/invoices_v4.md +19 -19
- data/docs/global_products/multi_receipts_detector_v1.md +1 -1
- data/docs/global_products/nutrition_facts_v1.md +1 -1
- data/docs/global_products/passport_v1.md +1 -1
- data/docs/global_products/resume_v1.md +4 -4
- data/docs/global_products/universal.md +1 -1
- data/docs/global_products.md +1 -1
- data/docs/loading_a_document.md +87 -73
- data/docs/localized_products/bank_account_details_v2.md +1 -1
- data/docs/localized_products/bank_check_v1.md +1 -1
- data/docs/localized_products/bank_statement_fr_v2.md +1 -1
- data/docs/localized_products/carte_grise_v1.md +1 -1
- data/docs/localized_products/energy_bill_fra_v1.md +11 -3
- data/docs/localized_products/french_healthcard_v1.md +1 -1
- data/docs/localized_products/idcard_fr_v2.md +6 -6
- data/docs/localized_products/ind_passport_v1.md +5 -5
- data/docs/localized_products/payslip_fra_v3.md +1 -1
- data/docs/localized_products/us_healthcare_cards_v1.md +2 -2
- data/docs/localized_products/us_mail_v3.md +1 -1
- data/docs/localized_products/us_w9_v1.md +1 -1
- data/docs/localized_products.md +1 -1
- data/lib/mindee/client.rb +6 -8
- data/lib/mindee/http/workflow_endpoint.rb +23 -23
- data/lib/mindee/pdf/pdf_extractor.rb +3 -3
- data/lib/mindee/product/delivery_note/delivery_note_v1_document.rb +1 -1
- data/lib/mindee/product/delivery_note/delivery_note_v1_page.rb +1 -1
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +15 -14
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +1 -1
- data/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +1 -1
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb +5 -1
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb +18 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb +4 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb +2 -2
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_page.rb +1 -1
- data/lib/mindee/product/invoice/invoice_v4_document.rb +14 -14
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +2 -2
- data/lib/mindee/product/invoice/invoice_v4_line_items.rb +1 -1
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +2 -2
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +38 -42
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rb +55 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +48 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +9 -5
- data/lib/mindee/product/receipt/receipt_v5_document.rb +8 -8
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +1 -1
- data/lib/mindee/product/receipt/receipt_v5_line_items.rb +1 -1
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +1 -1
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +1 -1
- data/lib/mindee/product/{eu/license_plate/license_plate_v1.rb → us/us_mail/us_mail_v2.rb} +13 -13
- data/lib/mindee/product/us/us_mail/us_mail_v2_document.rb +105 -0
- data/lib/mindee/product/{eu/license_plate/license_plate_v1_page.rb → us/us_mail/us_mail_v2_page.rb} +8 -8
- data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_address.rb +105 -0
- data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb +63 -0
- data/lib/mindee/product/us/us_mail/us_mail_v2_sender_address.rb +66 -0
- data/lib/mindee/product.rb +5 -5
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +3 -3
- data/sig/custom/net_http.rbs +5 -0
- data/sig/mindee/client.rbs +1 -0
- data/sig/mindee/http/workflow_endpoint.rbs +8 -2
- data/sig/mindee/pdf/pdf_extractor.rbs +1 -1
- data/sig/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rbs +1 -1
- data/sig/mindee/product/financial_document/financial_document_v1_line_items.rbs +1 -1
- data/sig/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rbs +1 -1
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rbs +2 -0
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rbs +1 -1
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rbs +1 -1
- data/sig/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rbs +1 -1
- data/sig/mindee/product/fr/payslip/payslip_v2_salary_details.rbs +1 -1
- data/sig/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rbs +1 -1
- data/sig/mindee/product/fr/payslip/payslip_v3_salary_details.rbs +1 -1
- data/sig/mindee/product/invoice/invoice_v4_line_items.rbs +1 -1
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1.rbs +1 -4
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_document.rbs +5 -9
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_group.rbs +14 -0
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rbs +13 -0
- data/sig/mindee/product/invoice_splitter/invoice_splitter_v1_page.rbs +2 -0
- data/sig/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rbs +1 -1
- data/sig/mindee/product/receipt/receipt_v5_line_items.rbs +1 -1
- data/sig/mindee/product/resume/resume_v1_certificates.rbs +1 -1
- data/sig/mindee/product/resume/resume_v1_educations.rbs +1 -1
- data/sig/mindee/product/resume/resume_v1_languages.rbs +1 -1
- data/sig/mindee/product/resume/resume_v1_professional_experiences.rbs +1 -1
- data/sig/mindee/product/resume/resume_v1_social_networks_urls.rbs +1 -1
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rbs +1 -1
- data/sig/mindee/product/us/us_mail/us_mail_v2.rbs +13 -0
- data/sig/mindee/product/us/us_mail/us_mail_v2_document.rbs +20 -0
- data/sig/mindee/product/{eu/license_plate/license_plate_v1_page.rbs → us/us_mail/us_mail_v2_page.rbs} +5 -5
- data/sig/mindee/product/us/us_mail/us_mail_v2_recipient_address.rbs +22 -0
- data/sig/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rbs +15 -0
- data/sig/mindee/product/us/us_mail/us_mail_v2_sender_address.rbs +18 -0
- data/sig/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rbs +1 -1
- metadata +23 -12
- data/docs/localized_products/license_plates_v1.md +0 -112
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +0 -37
- data/sig/mindee/product/eu/license_plate/license_plate_v1.rbs +0 -13
- data/sig/mindee/product/eu/license_plate/license_plate_v1_document.rbs +0 -15
data/docs/loading_a_document.md
CHANGED
@@ -7,10 +7,11 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
7
7
|
|
8
8
|
## Calling the Mindee API using webhooks
|
9
9
|
|
10
|
-
> 🚧 This feature is only available for compatible products
|
11
|
-
>
|
10
|
+
> 🚧 This feature is only available for compatible products.
|
11
|
+
>
|
12
|
+
> See the `Supports Polling/Webhooks` section on the product's documentation.
|
12
13
|
|
13
|
-
After [setting up a webhook for your account](https://developers.mindee.com/docs/webhooks), you can send a document,
|
14
|
+
After [setting up a webhook for your account](https://developers.mindee.com/docs/webhooks), you can send a document,
|
14
15
|
and then retrieve the results from an API call in the following fashion:
|
15
16
|
|
16
17
|
```rb
|
@@ -24,7 +25,7 @@ enqueue_response = mindee_client.enqueue(
|
|
24
25
|
)
|
25
26
|
```
|
26
27
|
|
27
|
-
Once your prediction is ready, the server will send it to your webhook. You can then use the payload as a regular
|
28
|
+
Once your prediction is ready, the server will send it to your webhook. You can then use the payload as a regular
|
28
29
|
prediction:
|
29
30
|
|
30
31
|
```rb
|
@@ -54,63 +55,25 @@ result = mindee_client.load_prediction(
|
|
54
55
|
puts result.document
|
55
56
|
```
|
56
57
|
|
57
|
-
## Enqueueing and polling manually
|
58
|
-
|
59
|
-
> ❗️ We _strongly_ recommend you rely on a webhook setup, or a simple `parse()` call for most operations. Only use
|
60
|
-
> manual polling if you are certain that it is the best solution for you.
|
61
|
-
|
62
|
-
> 🚧 This feature is only available for compatible products, see the see `Supports Polling/Webhooks` on the product's
|
63
|
-
> documentation.
|
64
|
-
|
65
|
-
Instead of relying on the `parse()` method, you can enqueue documents and poll
|
66
|
-
the server manually:
|
67
|
-
|
68
|
-
```rb
|
69
|
-
# Load a file from disk
|
70
|
-
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
71
|
-
|
72
|
-
# Send the file to the server
|
73
|
-
enqueue_response = mindee_client.enqueue(
|
74
|
-
input_source,
|
75
|
-
Mindee::Product::Invoice::InvoiceV4 # InvoiceV4 supports asynchronous polling
|
76
|
-
)
|
77
|
-
|
78
|
-
job_id = enqueue_response.job.id
|
79
|
-
|
80
|
-
queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4, endpoint: endpoint)
|
81
|
-
polling_attempts = 0
|
82
|
-
|
83
|
-
while [Mindee::Parsing::Common::JobStatus::PROCESSING, Mindee::Parsing::Common::JobStatus::WAITING].include?(
|
84
|
-
queue_res.job.status) && polling_attempts < 80 # Recommended amounts of total retries for asynchronous polling.
|
85
|
-
sleep(1.5) # Recommended waiting time for re-attempts
|
86
|
-
queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4)
|
87
|
-
polling_attempts += 1
|
88
|
-
end
|
89
|
-
|
90
|
-
# If all went well, print a short summary of the result.
|
91
|
-
if queue_res.job.status == Mindee::Parsing::Common::JobStatus::COMPLETED
|
92
|
-
puts queue_res.document
|
93
|
-
end
|
94
|
-
```
|
95
|
-
|
96
58
|
## Parsing operations
|
97
59
|
|
98
|
-
Operations pertaining to the Client's `parse()` method.
|
99
|
-
modes, and you can fine-tune its behavior using several options.
|
60
|
+
Operations pertaining to the Client's `parse()` method.
|
61
|
+
The parsing process supports both synchronous and asynchronous modes, and you can fine-tune its behavior using several options.
|
100
62
|
|
101
63
|
### Polling options
|
102
64
|
|
103
65
|
When performing an asynchronous parse (i.e. when the document is enqueued), the client will poll the API for the result.
|
104
66
|
The following options control the polling behavior:
|
105
67
|
|
106
|
-
|
107
|
-
|
108
|
-
|
68
|
+
* `initial_delay_sec`: The initial delay (in seconds) before the first polling attempt.
|
69
|
+
* `delay_sec`: The delay (in seconds) between subsequent polls.
|
70
|
+
* `max_retries`: The maximum number of polling attempts before timing out.
|
109
71
|
|
110
|
-
These parameters ensure that the client does not overload the API with too-frequent requests and also avoid premature
|
72
|
+
These parameters ensure that the client does not overload the API with too-frequent requests and also avoid premature
|
111
73
|
timeouts.
|
112
74
|
|
113
75
|
Example:
|
76
|
+
|
114
77
|
```rb
|
115
78
|
result = mindee_client.parse(
|
116
79
|
input_source,
|
@@ -122,16 +85,19 @@ result = mindee_client.parse(
|
|
122
85
|
}
|
123
86
|
)
|
124
87
|
```
|
125
|
-
|
126
|
-
>
|
88
|
+
|
89
|
+
> ⚠️ Warning: Setting `delay_sec` too low might lead to insufficient wait time between polls.
|
90
|
+
>
|
91
|
+
> This will cause the server to block your API calls for a short time (HTTP 429 errors).
|
127
92
|
|
128
93
|
### Page operations
|
129
94
|
|
130
|
-
When parsing PDFs, you can preprocess the document using page operations.
|
131
|
-
specify which pages to keep or remove even before the file is sent to the server.
|
132
|
-
document contains extraneous pages that you do not want to process.
|
95
|
+
When parsing PDFs, you can preprocess the document using page operations.
|
96
|
+
Using the `page_options` parameter, you can specify which pages to keep or remove even before the file is sent to the server.
|
97
|
+
This is especially useful if your document contains extraneous pages that you do not want to process.
|
133
98
|
|
134
99
|
The available options are:
|
100
|
+
|
135
101
|
* `page_indexes`: An array of zero-based page indexes.
|
136
102
|
* `operation`: The operation to perform—either:
|
137
103
|
* `:KEEP_ONLY` (keep only the specified pages)
|
@@ -139,6 +105,7 @@ The available options are:
|
|
139
105
|
* `on_min_pages`: Apply the operation only if the document has at least the specified number of pages.
|
140
106
|
|
141
107
|
Example:
|
108
|
+
|
142
109
|
```rb
|
143
110
|
page_options = {
|
144
111
|
page_indexes:[1, 3], # Only target pages 1 and 3.
|
@@ -154,14 +121,17 @@ result = mindee_client.parse(
|
|
154
121
|
}
|
155
122
|
)
|
156
123
|
```
|
157
|
-
|
158
|
-
>
|
124
|
+
|
125
|
+
> ⚠️ Warning: Page operations alter the document's content.
|
126
|
+
>
|
127
|
+
> Ensure that this behavior is acceptable for your use case, as there is no undo once the pages are modified.
|
159
128
|
|
160
129
|
## Workflow operations
|
161
130
|
|
162
131
|
Workflow operations are similar to parsing operations, but they apply to calls made through the workflow feature.
|
163
132
|
|
164
133
|
Example:
|
134
|
+
|
165
135
|
```rb
|
166
136
|
workflow_options = {
|
167
137
|
document_alias: "my_document",
|
@@ -179,28 +149,68 @@ result = mindee_client.execute_workflow(
|
|
179
149
|
)
|
180
150
|
```
|
181
151
|
|
152
|
+
## Enqueueing and polling manually
|
153
|
+
|
154
|
+
> ❗️ We _strongly_ recommend you use a webhook setup, or a simple`parse()` call for most operations.
|
155
|
+
>
|
156
|
+
> Only use manual polling if you are **certain** that it is the best solution for you.
|
157
|
+
|
158
|
+
> 🚧 This feature is only available for compatible products.
|
159
|
+
>
|
160
|
+
> See the `Supports Polling/Webhooks` section on the product's documentation.
|
161
|
+
|
162
|
+
Instead of relying on the `parse()` method, you can enqueue documents and poll
|
163
|
+
the server manually:
|
164
|
+
|
165
|
+
```rb
|
166
|
+
# Load a file from disk
|
167
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
168
|
+
|
169
|
+
# Send the file to the server
|
170
|
+
enqueue_response = mindee_client.enqueue(
|
171
|
+
input_source,
|
172
|
+
Mindee::Product::Invoice::InvoiceV4 # InvoiceV4 supports asynchronous polling
|
173
|
+
)
|
174
|
+
|
175
|
+
job_id = enqueue_response.job.id
|
176
|
+
|
177
|
+
queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4, endpoint: endpoint)
|
178
|
+
polling_attempts = 0
|
179
|
+
|
180
|
+
while [Mindee::Parsing::Common::JobStatus::PROCESSING, Mindee::Parsing::Common::JobStatus::WAITING].include?(
|
181
|
+
queue_res.job.status) && polling_attempts < 80 # Recommended amounts of total retries for asynchronous polling.
|
182
|
+
sleep(1.5) # Recommended waiting time for re-attempts
|
183
|
+
queue_res = parse_queued(job_id, Mindee::Product::Invoice::InvoiceV4)
|
184
|
+
polling_attempts += 1
|
185
|
+
end
|
186
|
+
|
187
|
+
# If all went well, print a short summary of the result.
|
188
|
+
if queue_res.job.status == Mindee::Parsing::Common::JobStatus::COMPLETED
|
189
|
+
puts queue_res.document
|
190
|
+
end
|
191
|
+
```
|
192
|
+
|
182
193
|
## Loading a Document File
|
183
194
|
|
184
|
-
Before sending a document to Mindee’s API, you first need to load the file into one of our input source wrappers.
|
185
|
-
These wrappers not only validate the file type (using a trusted MIME type check) but also give you access the following
|
186
|
-
|
195
|
+
Before sending a document to Mindee’s API, you first need to load the file into one of our input source wrappers.
|
196
|
+
These wrappers not only validate the file type (using a trusted MIME type check) but also give you access the following helper methods:
|
197
|
+
|
187
198
|
* [image compression](https://developers.mindee.com/docs/ruby-advanced-file-operations#image-compression)
|
188
199
|
* [pdf compression](https://developers.mindee.com/docs/ruby-advanced-file-operations#pdf-compression)
|
189
200
|
* [PDF fixing](https://developers.mindee.com/docs/ruby-advanced-file-operations#pdf-fixing)
|
190
201
|
|
191
202
|
> 📘 Regardless of how a document is loaded, the subsequent parsing or workflow operations remain the same.
|
192
203
|
|
193
|
-
|
194
204
|
Mindee’s Ruby client supports several methods for loading a document.
|
195
205
|
|
196
|
-
|
197
206
|
These can either be done locally:
|
207
|
+
|
198
208
|
* Loading from a [local path](#loading-from-a-local-path)
|
199
209
|
* Loading from a [File object](#loading-from-a-file-object)
|
200
210
|
* Loading from a [Base64-encoded string](#loading-from-a-base64-encoded-string)
|
201
211
|
* Loading from a [raw sequence of bytes](#loading-from-raw-bytes)
|
202
212
|
|
203
|
-
These four methods inherit from the `LocalInputSource` class, which provides a few common utility features described
|
213
|
+
These four methods inherit from the `LocalInputSource` class, which provides a few common utility features described
|
204
214
|
[here](#under-the-hood---local-input-source-details).
|
205
215
|
|
206
216
|
Or loading from a [URL](#loading-by-url).
|
@@ -210,6 +220,7 @@ Or loading from a [URL](#loading-by-url).
|
|
210
220
|
The most straightforward way of loading a document: load a file directly from disk by providing its path.
|
211
221
|
|
212
222
|
Example:
|
223
|
+
|
213
224
|
```rb
|
214
225
|
# Initialize the client.
|
215
226
|
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
@@ -232,10 +243,9 @@ File.open('invoice.jpg', 'rb') do |file_obj|
|
|
232
243
|
end
|
233
244
|
```
|
234
245
|
|
235
|
-
|
236
246
|
### Loading from a Base64-Encoded String
|
237
247
|
|
238
|
-
For cases where you have file data encoded in Base64, load the document by providing the encoded string along with the
|
248
|
+
For cases where you have file data encoded in Base64, load the document by providing the encoded string along with the
|
239
249
|
original filename. This converts the Base64 string into a local input source for further processing.
|
240
250
|
|
241
251
|
Example:
|
@@ -246,10 +256,9 @@ b64_string = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGB..." # Example dummy b64_
|
|
246
256
|
input_source = mindee_client.source_from_b64string(b64_string, "receipt.jpg")
|
247
257
|
```
|
248
258
|
|
249
|
-
|
250
259
|
### Loading from Raw Bytes
|
251
260
|
|
252
|
-
If you have the file’s raw binary data (as bytes), create an input source by passing the bytes and the original
|
261
|
+
If you have the file’s raw binary data (as bytes), create an input source by passing the bytes and the original
|
253
262
|
filename.
|
254
263
|
|
255
264
|
Example:
|
@@ -263,32 +272,36 @@ input_source = mindee_client.source_from_bytes(raw_bytes, "invoice.pdf")
|
|
263
272
|
### Loading by URL
|
264
273
|
|
265
274
|
For remote documents, you can load a file through its URL. The server will accept direct urls if:
|
266
|
-
|
275
|
+
|
276
|
+
* They begin with "https\://".
|
267
277
|
* They point to a valid file.
|
268
278
|
* They do not redirect the request (e.g. Google Drive documents or proxies).
|
269
279
|
|
270
|
-
Under the hood, the
|
271
|
-
[Mindee::Input::Source::URLInputSource](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/URLInputSource.html)
|
280
|
+
Under the hood, the
|
281
|
+
[Mindee::Input::Source::URLInputSource](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/URLInputSource.html)
|
272
282
|
class validates the URL, but won't perform an HTTP GET request unless specifically requested (using Ruby’s Net::HTTP).
|
273
283
|
|
274
284
|
Example:
|
285
|
+
|
275
286
|
```rb
|
276
287
|
input_source = mindee_client.source_from_url("https://www.example.com/invoice.pdf")
|
277
288
|
result = mindee_client.parse(input_source, Mindee::Product::Invoice::InvoiceV4)
|
278
289
|
```
|
279
290
|
|
280
|
-
To download the files before sending them, you can use the `as_local_input_source` method.
|
281
|
-
redirects, and supports optional authentication (via basic auth or JWT tokens).
|
291
|
+
To download the files before sending them, you can use the `as_local_input_source` method.
|
292
|
+
It allows to follow redirects, and supports optional authentication (via basic auth or JWT tokens).
|
293
|
+
You can optionally download and save the file locally or convert it into a local input source for further processing—thus benefiting from the same processing methods as local files.
|
282
294
|
|
283
295
|
Additional URL features include:
|
284
296
|
|
285
|
-
* Validation: The URLInputSource throws an error if the URL does not start with “https
|
297
|
+
* Validation: The URLInputSource throws an error if the URL does not start with “https\://”.
|
286
298
|
* Authentication: You can supply basic authentication (username/password) or a bearer token.
|
287
299
|
* Local Conversion: Methods such as `write_to_file` let you download and inspect the file locally. Alternatively,
|
288
300
|
* `as_local_input_source` converts the downloaded content into a LocalInputSource so you can apply operations like
|
289
301
|
* compression.
|
290
302
|
|
291
303
|
Example:
|
304
|
+
|
292
305
|
```rb
|
293
306
|
# Load the URL input normally:
|
294
307
|
remote_input_source = mindee_client.source_from_url("https://www.example.com/invoice.pdf")
|
@@ -302,7 +315,7 @@ local_downloaded_file_path = remote_input_source.write_to_file("path/to/my/downl
|
|
302
315
|
|
303
316
|
### Under the Hood - Local Input Source Details
|
304
317
|
|
305
|
-
When loading using from either a path, file, raw byte sequence or base64 string, the created object inherits from
|
318
|
+
When loading using from either a path, file, raw byte sequence or base64 string, the created object inherits from
|
306
319
|
[Mindee::Input::Source::LocalInputSource](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/LocalInputSource.html). Key features include:
|
307
320
|
|
308
321
|
* Automatic MIME Type Validation using Marcel to check for server file format compliance.
|
@@ -313,4 +326,5 @@ When loading using from either a path, file, raw byte sequence or base64 string,
|
|
313
326
|
* [count_pages](https://mindee.github.io/mindee-api-ruby/Mindee/Input/Source/LocalInputSource.html#count_pages-instance_method) – For PDF files, returns the total page count; by default, non-PDF files are assumed to be single-page documents.
|
314
327
|
|
315
328
|
## Questions?
|
329
|
+
|
316
330
|
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
---
|
2
|
-
title:
|
2
|
+
title: FR Energy Bill
|
3
3
|
category: 622b805aaec68102ea7fcbc2
|
4
4
|
slug: ruby-fr-energy-bill-ocr
|
5
5
|
parentDoc: 67b49e29a2cd6f08d69a40d8
|
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Energy Bill API](https://platform.mindee.c
|
|
12
12
|
> | Specification | Details |
|
13
13
|
> | ------------------------------ | -------------------------------------------------- |
|
14
14
|
> | Endpoint Name | `energy_bill_fra` |
|
15
|
-
> | Recommended Version | `v1.
|
15
|
+
> | Recommended Version | `v1.2` |
|
16
16
|
> | Supports Polling/Webhooks | ✔️ Yes |
|
17
17
|
> | Support Synchronous HTTP Calls | ❌ No |
|
18
18
|
> | Geography | 🇫🇷 France |
|
@@ -186,11 +186,19 @@ Details of energy consumption.
|
|
186
186
|
|
187
187
|
A `EnergyBillV1EnergyUsage` implements the following attributes:
|
188
188
|
|
189
|
+
* `consumption` (Float): The price per unit of energy consumed.
|
189
190
|
* `description` (String): Description or details of the energy usage.
|
190
191
|
* `end_date` (String): The end date of the energy usage.
|
191
192
|
* `start_date` (String): The start date of the energy usage.
|
192
193
|
* `tax_rate` (Float): The rate of tax applied to the total cost.
|
193
194
|
* `total` (Float): The total cost of energy consumed.
|
195
|
+
* `unit` (String): The unit of measurement for energy consumption.
|
196
|
+
|
197
|
+
#### Possible values include:
|
198
|
+
- kWh
|
199
|
+
- m3
|
200
|
+
- L
|
201
|
+
|
194
202
|
* `unit_price` (Float): The price per unit of energy consumed.
|
195
203
|
Fields which are specific to this product; they are not used in any other product.
|
196
204
|
|
@@ -221,7 +229,7 @@ A `EnergyBillV1MeterDetail` implements the following attributes:
|
|
221
229
|
- water
|
222
230
|
- None
|
223
231
|
|
224
|
-
* `unit` (String): The unit of
|
232
|
+
* `unit` (String): The unit of power for energy consumption.
|
225
233
|
|
226
234
|
# Attributes
|
227
235
|
The following fields are extracted for Energy Bill V1:
|
@@ -1,5 +1,5 @@
|
|
1
1
|
---
|
2
|
-
title:
|
2
|
+
title: FR Carte Nationale d'Identité
|
3
3
|
category: 622b805aaec68102ea7fcbc2
|
4
4
|
slug: ruby-fr-carte-nationale-didentite-ocr
|
5
5
|
parentDoc: 67b49e29a2cd6f08d69a40d8
|
@@ -192,9 +192,9 @@ puts result.document.inference.prediction.document_number.value
|
|
192
192
|
[📄](#page-level-fields "This field is only present on individual pages.")**document_side** ([ClassificationField](#classification-field)): The sides of the document which are visible.
|
193
193
|
|
194
194
|
#### Possible values include:
|
195
|
-
- RECTO
|
196
|
-
- VERSO
|
197
|
-
- RECTO & VERSO
|
195
|
+
- 'RECTO'
|
196
|
+
- 'VERSO'
|
197
|
+
- 'RECTO & VERSO'
|
198
198
|
|
199
199
|
```rb
|
200
200
|
result.document.document_side.each do |document_side_elem|
|
@@ -206,8 +206,8 @@ puts result.document.inference.prediction.document_number.value
|
|
206
206
|
[📄](#page-level-fields "This field is only present on individual pages.")**document_type** ([ClassificationField](#classification-field)): The document type or format.
|
207
207
|
|
208
208
|
#### Possible values include:
|
209
|
-
- NEW
|
210
|
-
- OLD
|
209
|
+
- 'NEW'
|
210
|
+
- 'OLD'
|
211
211
|
|
212
212
|
```rb
|
213
213
|
result.document.document_type.each do |document_type_elem|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
---
|
2
|
-
title:
|
2
|
+
title: IND Passport - India
|
3
3
|
category: 622b805aaec68102ea7fcbc2
|
4
4
|
slug: ruby-ind-passport---india-ocr
|
5
5
|
parentDoc: 67b49e29a2cd6f08d69a40d8
|
@@ -194,8 +194,8 @@ puts result.document.inference.prediction.file_number.value
|
|
194
194
|
**gender** ([ClassificationField](#classification-field)): The gender of the passport holder.
|
195
195
|
|
196
196
|
#### Possible values include:
|
197
|
-
- M
|
198
|
-
- F
|
197
|
+
- 'M'
|
198
|
+
- 'F'
|
199
199
|
|
200
200
|
```rb
|
201
201
|
puts result.document.inference.prediction.gender.value
|
@@ -289,8 +289,8 @@ puts result.document.inference.prediction.old_passport_place_of_issue.value
|
|
289
289
|
**page_number** ([ClassificationField](#classification-field)): The page number of the passport document.
|
290
290
|
|
291
291
|
#### Possible values include:
|
292
|
-
- 1
|
293
|
-
- 2
|
292
|
+
- '1'
|
293
|
+
- '2'
|
294
294
|
|
295
295
|
```rb
|
296
296
|
puts result.document.inference.prediction.page_number.value
|
@@ -1,5 +1,5 @@
|
|
1
1
|
---
|
2
|
-
title:
|
2
|
+
title: US Healthcare Card
|
3
3
|
category: 622b805aaec68102ea7fcbc2
|
4
4
|
slug: ruby-us-healthcare-card-ocr
|
5
5
|
parentDoc: 67b49e29a2cd6f08d69a40d8
|
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Healthcare Card API](https://platform.mind
|
|
12
12
|
> | Specification | Details |
|
13
13
|
> | ------------------------------ | -------------------------------------------------- |
|
14
14
|
> | Endpoint Name | `us_healthcare_cards` |
|
15
|
-
> | Recommended Version | `v1.
|
15
|
+
> | Recommended Version | `v1.1` |
|
16
16
|
> | Supports Polling/Webhooks | ✔️ Yes |
|
17
17
|
> | Support Synchronous HTTP Calls | ❌ No |
|
18
18
|
> | Geography | 🇺🇸 United States |
|
data/docs/localized_products.md
CHANGED
data/lib/mindee/client.rb
CHANGED
@@ -80,6 +80,7 @@ module Mindee
|
|
80
80
|
# This performs a full OCR operation on the server and may increase response time.
|
81
81
|
# @!attribute public_url [String, nil] A unique, encrypted URL for accessing the document validation interface without
|
82
82
|
# requiring authentication.
|
83
|
+
# @!attribute rag [bool, nil] Whether to enable Retrieval-Augmented Generation.
|
83
84
|
# @!attribute page_options [PageOptions, Hash, nil] Page cutting/merge options:
|
84
85
|
# * `:page_indexes` Zero-based list of page indexes.
|
85
86
|
# * `:operation` Operation to apply on the document, given the specified page indexes:
|
@@ -87,7 +88,7 @@ module Mindee
|
|
87
88
|
# * `:REMOVE` - remove the specified pages, and keep all others.
|
88
89
|
# * `:on_min_pages` Apply the operation only if the document has at least this many pages.
|
89
90
|
class WorkflowOptions
|
90
|
-
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options
|
91
|
+
attr_accessor :document_alias, :priority, :full_text, :public_url, :page_options, :rag
|
91
92
|
|
92
93
|
def initialize(params: {})
|
93
94
|
params = params.transform_keys(&:to_sym)
|
@@ -95,6 +96,7 @@ module Mindee
|
|
95
96
|
@priority = params.fetch(:priority, nil)
|
96
97
|
@full_text = params.fetch(:full_text, false)
|
97
98
|
@public_url = params.fetch(:public_url, nil)
|
99
|
+
@rag = params.fetch(:rag, nil)
|
98
100
|
raw_page_options = params.fetch(:page_options, nil)
|
99
101
|
raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
|
100
102
|
@page_options = raw_page_options
|
@@ -297,8 +299,6 @@ module Mindee
|
|
297
299
|
queue_res
|
298
300
|
end
|
299
301
|
|
300
|
-
# Same idea applies to execute_workflow:
|
301
|
-
#
|
302
302
|
# Sends a document to a workflow.
|
303
303
|
#
|
304
304
|
# Accepts options either as a Hash or as a WorkflowOptions struct.
|
@@ -309,6 +309,7 @@ module Mindee
|
|
309
309
|
# * `document_alias` [String, nil] Alias to give to the document.
|
310
310
|
# * `priority` [Symbol, nil] Priority to give to the document.
|
311
311
|
# * `full_text` [bool] Whether to include the full OCR text response in compatible APIs.
|
312
|
+
# * `rag` [bool, nil] Whether to enable Retrieval-Augmented Generation.
|
312
313
|
#
|
313
314
|
# * `public_url` [String, nil] A unique, encrypted URL for accessing the document validation interface without
|
314
315
|
# requiring authentication.
|
@@ -317,7 +318,7 @@ module Mindee
|
|
317
318
|
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
318
319
|
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
319
320
|
# * `:REMOVE` - remove the specified pages, and keep all others.
|
320
|
-
# * `:on_min_pages` Apply the operation only if document has at least this many
|
321
|
+
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
321
322
|
# @return [Mindee::Parsing::Common::WorkflowResponse]
|
322
323
|
def execute_workflow(input_source, workflow_id, options: {})
|
323
324
|
opts = options.is_a?(WorkflowOptions) ? options : WorkflowOptions.new(params: options)
|
@@ -331,10 +332,7 @@ module Mindee
|
|
331
332
|
|
332
333
|
prediction, raw_http = workflow_endpoint.execute_workflow(
|
333
334
|
input_source,
|
334
|
-
opts
|
335
|
-
opts.document_alias,
|
336
|
-
opts.priority,
|
337
|
-
opts.public_url
|
335
|
+
opts
|
338
336
|
)
|
339
337
|
|
340
338
|
Mindee::Parsing::Common::WorkflowResponse.new(Product::Universal::Universal, prediction, raw_http)
|
@@ -24,15 +24,17 @@ module Mindee
|
|
24
24
|
|
25
25
|
# Sends a document to the workflow.
|
26
26
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
27
|
-
# @param
|
28
|
-
# @param priority [Symbol, nil] Priority to give to the document.
|
29
|
-
# @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
|
30
|
-
# @param public_url [String, nil] A unique, encrypted URL for accessing the document validation interface without
|
31
|
-
# requiring authentication.
|
27
|
+
# @param opts [WorkflowOptions] Options to configure workflow execution behavior.
|
32
28
|
# @return [Array]
|
33
|
-
def execute_workflow(input_source,
|
29
|
+
def execute_workflow(input_source, opts)
|
34
30
|
check_api_key
|
35
|
-
response = workflow_execution_req_post(input_source,
|
31
|
+
response = workflow_execution_req_post(input_source, opts)
|
32
|
+
if response.nil?
|
33
|
+
raise Mindee::Errors::MindeeHTTPError.new(
|
34
|
+
{ code: 0, details: 'Server response was nil.', message: 'Unknown error.' }, @url, 0
|
35
|
+
)
|
36
|
+
end
|
37
|
+
|
36
38
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
37
39
|
return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response)
|
38
40
|
|
@@ -42,31 +44,29 @@ module Mindee
|
|
42
44
|
end
|
43
45
|
|
44
46
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
45
|
-
# @param
|
46
|
-
# @param priority [Symbol, nil] Priority to give to the document.
|
47
|
-
# @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
|
48
|
-
# @param public_url [String, nil] A unique, encrypted URL for accessing the document validation interface without
|
49
|
-
# requiring authentication.
|
47
|
+
# @param opts [WorkflowOptions] Options to configure workflow execution behavior.
|
50
48
|
# @return [Net::HTTPResponse, nil]
|
51
|
-
def workflow_execution_req_post(input_source,
|
49
|
+
def workflow_execution_req_post(input_source, opts)
|
52
50
|
uri = URI(@url)
|
53
51
|
params = {} # : Hash[Symbol | String, untyped]
|
54
|
-
params[:full_text_ocr] = 'true' if full_text
|
55
|
-
|
52
|
+
params[:full_text_ocr] = 'true' if opts.full_text
|
53
|
+
params[:rag] = 'true' if opts.rag
|
54
|
+
uri.query = URI.encode_www_form(params) if params.any?
|
56
55
|
|
57
56
|
headers = {
|
58
57
|
'Authorization' => "Token #{@api_key}",
|
59
58
|
'User-Agent' => USER_AGENT,
|
60
59
|
}
|
61
60
|
req = Net::HTTP::Post.new(uri, headers)
|
62
|
-
form_data =
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
form_data.push ['
|
69
|
-
form_data.push ['
|
61
|
+
form_data = [] # : Array[untyped]
|
62
|
+
if input_source.is_a?(Mindee::Input::Source::URLInputSource)
|
63
|
+
form_data.push ['document', input_source.url]
|
64
|
+
else
|
65
|
+
form_data.push input_source.read_contents
|
66
|
+
end
|
67
|
+
form_data.push ['alias', opts.document_alias] if opts.document_alias
|
68
|
+
form_data.push ['public_url', opts.public_url] if opts.public_url
|
69
|
+
form_data.push ['priority', opts.priority.to_s] if opts.priority
|
70
70
|
|
71
71
|
req.set_form(form_data, 'multipart/form-data')
|
72
72
|
|
@@ -46,7 +46,7 @@ module Mindee
|
|
46
46
|
extension = File.extname(@filename)
|
47
47
|
basename = File.basename(@filename, extension)
|
48
48
|
page_indexes.each do |page_index_list|
|
49
|
-
if page_index_list.
|
49
|
+
if page_index_list.nil? || page_index_list.empty?
|
50
50
|
raise Errors::MindeePDFError, "Empty indexes aren't allowed for extraction #{page_index_list}"
|
51
51
|
end
|
52
52
|
|
@@ -70,12 +70,12 @@ module Mindee
|
|
70
70
|
# rubocop:disable Metrics/PerceivedComplexity
|
71
71
|
|
72
72
|
# Extracts invoices as complete PDFs from the document.
|
73
|
-
# @param page_indexes [Array<Array<Integer>,
|
73
|
+
# @param page_indexes [Array<Array<Integer>, InvoiceSplitterV1InvoicePageGroup>]
|
74
74
|
# @param strict [bool]
|
75
75
|
# @return [Array<Mindee::PDF::PDFExtractor::ExtractedPDF>]
|
76
76
|
def extract_invoices(page_indexes, strict: false)
|
77
77
|
raise Errors::MindeePDFError, 'No indexes provided.' if page_indexes.empty?
|
78
|
-
|
78
|
+
if page_indexes[0].is_a?(Array) && page_indexes[0].all? { |i| i.is_a?(Integer) }
|
79
79
|
return extract_sub_documents(page_indexes)
|
80
80
|
end
|
81
81
|
return extract_sub_documents(page_indexes.map(&:page_indexes)) unless strict
|