mindee 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +16 -0
- data/docs/code_samples/workflow_execution.txt +1 -1
- data/docs/code_samples/workflow_polling.txt +36 -0
- data/docs/global_products/financial_document_v1.md +22 -4
- data/docs/global_products/invoices_v4.md +22 -4
- data/docs/localized_products/us_healthcare_cards_v1.md +17 -1
- data/lib/mindee/client.rb +13 -13
- data/lib/mindee/http/endpoint.rb +37 -47
- data/lib/mindee/http/response_validation.rb +1 -1
- data/lib/mindee/parsing/common/api_response.rb +1 -1
- data/lib/mindee/parsing/common/document.rb +1 -1
- data/lib/mindee/parsing/common/extras/extras.rb +4 -1
- data/lib/mindee/parsing/common/extras/rag_extra.rb +24 -0
- data/lib/mindee/parsing/common/extras.rb +1 -0
- data/lib/mindee/parsing/common/inference.rb +3 -0
- data/lib/mindee/parsing/standard/tax_field.rb +9 -9
- data/lib/mindee/pdf/pdf_extractor.rb +1 -1
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb +2 -2
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +9 -1
- data/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +2 -2
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
- data/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb +2 -2
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb +2 -2
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb +2 -2
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb +2 -2
- data/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb +2 -2
- data/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb +2 -2
- data/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb +2 -2
- data/lib/mindee/product/invoice/invoice_v4_document.rb +9 -1
- data/lib/mindee/product/invoice/invoice_v4_line_items.rb +2 -2
- data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +2 -2
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb +2 -2
- data/lib/mindee/product/receipt/receipt_v5_line_items.rb +2 -2
- data/lib/mindee/product/resume/resume_v1_certificates.rb +2 -2
- data/lib/mindee/product/resume/resume_v1_educations.rb +2 -2
- data/lib/mindee/product/resume/resume_v1_languages.rb +2 -2
- data/lib/mindee/product/resume/resume_v1_professional_experiences.rb +2 -2
- data/lib/mindee/product/resume/resume_v1_social_networks_urls.rb +2 -2
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +2 -2
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb +4 -4
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +8 -3
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +1 -1
- data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb +2 -2
- data/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb +2 -2
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +1 -1
- data/sig/custom/net_http.rbs +3 -0
- data/sig/mindee/client.rbs +4 -2
- data/sig/mindee/http/endpoint.rbs +9 -8
- data/sig/mindee/http/response_validation.rbs +3 -3
- data/sig/mindee/parsing/common/api_response.rbs +1 -1
- data/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs +2 -2
- data/sig/mindee/parsing/common/extras/rag_extra.rbs +15 -0
- data/sig/mindee/product/financial_document/financial_document_v1_document.rbs +1 -0
- data/sig/mindee/product/invoice/invoice_v4_document.rbs +1 -0
- data/sig/mindee/product/us/healthcare_card/healthcare_card_v1_document.rbs +1 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9a2e1c98c3971454ebfaefe6902121bfab6e577f693e36a525739eb2dfbeeb9
|
4
|
+
data.tar.gz: 7f35351f0f647a165c82f512395e8883647e96165968869bf3904f4c2bf6b4ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '072284cd8bb18b6190b870aa014fb278744a6db35d1210ca57dbddb1d78c71a2675f2675a7ce581b83cbb1fdcd88cdeb6dedd6892c2402b43cf3f711425f9e46'
|
7
|
+
data.tar.gz: 4bad8302b9bef1496fc47e5f04cb301dd3c81a99dff5800ffbc1aaf3ef4bf1f5a6493f4908178b337e7f25596b7f9ce2946d47d7526b38065ccfbd7ac045624a
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v4.4.0 - 2025-04-23
|
4
|
+
### Changes
|
5
|
+
* :sparkles: add support for workflow polling
|
6
|
+
* :sparkles: add extras accessor from inference
|
7
|
+
### Fixes
|
8
|
+
* :recycle: fix misc typing issues
|
9
|
+
* :bug: fix improper return format for `raw_http`
|
10
|
+
|
11
|
+
|
12
|
+
## v4.3.0 - 2025-04-08
|
13
|
+
### Changes
|
14
|
+
* :sparkles: add support for Financial Document V1.12
|
15
|
+
* :sparkles: add support for Invoices V4.10
|
16
|
+
* :sparkles: add support for US Healthcare Cards V1.2
|
17
|
+
|
18
|
+
|
3
19
|
## v4.2.0 - 2025-03-28
|
4
20
|
### Changes
|
5
21
|
* :coffin: remove support for US W9
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#
|
2
|
+
# Install the Ruby client library by running:
|
3
|
+
# gem install mindee
|
4
|
+
#
|
5
|
+
|
6
|
+
require 'mindee'
|
7
|
+
|
8
|
+
workflow_id = 'workflow-id'
|
9
|
+
|
10
|
+
# Init a new client
|
11
|
+
mindee_client = Mindee::Client.new
|
12
|
+
|
13
|
+
# Load a file from disk
|
14
|
+
input_source = mindee_client.source_from_path('path/to/my/file.ext')
|
15
|
+
|
16
|
+
# Initialize a custom endpoint for this product
|
17
|
+
custom_endpoint = mindee_client.create_endpoint(
|
18
|
+
account_name: 'my-account',
|
19
|
+
endpoint_name: 'my-endpoint',
|
20
|
+
version: 'my-version'
|
21
|
+
)
|
22
|
+
|
23
|
+
# Parse the file
|
24
|
+
result = mindee_client.parse(
|
25
|
+
input_source,
|
26
|
+
Mindee::Product::Universal::Universal,
|
27
|
+
endpoint: custom_endpoint,
|
28
|
+
options: {
|
29
|
+
rag: true,
|
30
|
+
workflow_id: workflow_id
|
31
|
+
}
|
32
|
+
)
|
33
|
+
|
34
|
+
# Print a full summary of the parsed data in RST format
|
35
|
+
puts result.document
|
36
|
+
|
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Financial Document API](https://platform.m
|
|
12
12
|
> | Specification | Details |
|
13
13
|
> | ------------------------------ | -------------------------------------------------- |
|
14
14
|
> | Endpoint Name | `financial_document` |
|
15
|
-
> | Recommended Version | `v1.
|
15
|
+
> | Recommended Version | `v1.12` |
|
16
16
|
> | Supports Polling/Webhooks | ✔️ Yes |
|
17
17
|
> | Support Synchronous HTTP Calls | ✔️ Yes |
|
18
18
|
> | Geography | 🌐 Global |
|
@@ -93,7 +93,7 @@ puts result.document
|
|
93
93
|
########
|
94
94
|
Document
|
95
95
|
########
|
96
|
-
:Mindee ID:
|
96
|
+
:Mindee ID: a80ac0ee-26f6-4e2e-988a-960b240d5ba7
|
97
97
|
:Filename: default_sample.jpg
|
98
98
|
|
99
99
|
Inference
|
@@ -118,7 +118,7 @@ Prediction
|
|
118
118
|
+---------------+--------+----------+---------------+
|
119
119
|
| Base | Code | Rate (%) | Amount |
|
120
120
|
+===============+========+==========+===============+
|
121
|
-
|
|
121
|
+
| 195.00 | | 5.00 | 9.75 |
|
122
122
|
+---------------+--------+----------+---------------+
|
123
123
|
:Supplier Payment Details:
|
124
124
|
:Supplier Name: JOHN SMITH
|
@@ -170,7 +170,7 @@ Page 0
|
|
170
170
|
+---------------+--------+----------+---------------+
|
171
171
|
| Base | Code | Rate (%) | Amount |
|
172
172
|
+===============+========+==========+===============+
|
173
|
-
|
|
173
|
+
| 195.00 | | 5.00 | 9.75 |
|
174
174
|
+---------------+--------+----------+---------------+
|
175
175
|
:Supplier Payment Details:
|
176
176
|
:Supplier Name: JOHN SMITH
|
@@ -382,6 +382,24 @@ puts result.document.inference.prediction.document_number.value
|
|
382
382
|
puts result.document.inference.prediction.document_type.value
|
383
383
|
```
|
384
384
|
|
385
|
+
## Document Type Extended
|
386
|
+
**document_type_extended** ([ClassificationField](#classification-field)): Document type extended.
|
387
|
+
|
388
|
+
#### Possible values include:
|
389
|
+
- 'CREDIT NOTE'
|
390
|
+
- 'INVOICE'
|
391
|
+
- 'OTHER'
|
392
|
+
- 'OTHER_FINANCIAL'
|
393
|
+
- 'PAYSLIP'
|
394
|
+
- 'PURCHASE ORDER'
|
395
|
+
- 'QUOTE'
|
396
|
+
- 'RECEIPT'
|
397
|
+
- 'STATEMENT'
|
398
|
+
|
399
|
+
```rb
|
400
|
+
puts result.document.inference.prediction.document_type_extended.value
|
401
|
+
```
|
402
|
+
|
385
403
|
## Due Date
|
386
404
|
**due_date** ([DateField](#date-field)): The date on which the payment is due.
|
387
405
|
|
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Invoice API](https://platform.mindee.com/m
|
|
12
12
|
> | Specification | Details |
|
13
13
|
> | ------------------------------ | -------------------------------------------------- |
|
14
14
|
> | Endpoint Name | `invoices` |
|
15
|
-
> | Recommended Version | `v4.
|
15
|
+
> | Recommended Version | `v4.10` |
|
16
16
|
> | Supports Polling/Webhooks | ✔️ Yes |
|
17
17
|
> | Support Synchronous HTTP Calls | ✔️ Yes |
|
18
18
|
> | Geography | 🌐 Global |
|
@@ -93,7 +93,7 @@ puts result.document
|
|
93
93
|
########
|
94
94
|
Document
|
95
95
|
########
|
96
|
-
:Mindee ID:
|
96
|
+
:Mindee ID: b55db8f9-ae3b-4f05-b2f1-ec0ced5e5b70
|
97
97
|
:Filename: default_sample.jpg
|
98
98
|
|
99
99
|
Inference
|
@@ -117,7 +117,7 @@ Prediction
|
|
117
117
|
+---------------+--------+----------+---------------+
|
118
118
|
| Base | Code | Rate (%) | Amount |
|
119
119
|
+===============+========+==========+===============+
|
120
|
-
|
|
120
|
+
| 2145.00 | | 8.00 | 193.20 |
|
121
121
|
+---------------+--------+----------+---------------+
|
122
122
|
:Supplier Payment Details:
|
123
123
|
:Supplier Name: TURNPIKE DESIGNS
|
@@ -163,7 +163,7 @@ Page 0
|
|
163
163
|
+---------------+--------+----------+---------------+
|
164
164
|
| Base | Code | Rate (%) | Amount |
|
165
165
|
+===============+========+==========+===============+
|
166
|
-
|
|
166
|
+
| 2145.00 | | 8.00 | 193.20 |
|
167
167
|
+---------------+--------+----------+---------------+
|
168
168
|
:Supplier Payment Details:
|
169
169
|
:Supplier Name: TURNPIKE DESIGNS
|
@@ -344,6 +344,24 @@ puts result.document.inference.prediction.date.value
|
|
344
344
|
puts result.document.inference.prediction.document_type.value
|
345
345
|
```
|
346
346
|
|
347
|
+
## Document Type Extended
|
348
|
+
**document_type_extended** ([ClassificationField](#classification-field)): Document type extended.
|
349
|
+
|
350
|
+
#### Possible values include:
|
351
|
+
- 'CREDIT NOTE'
|
352
|
+
- 'INVOICE'
|
353
|
+
- 'OTHER'
|
354
|
+
- 'OTHER_FINANCIAL'
|
355
|
+
- 'PAYSLIP'
|
356
|
+
- 'PURCHASE ORDER'
|
357
|
+
- 'QUOTE'
|
358
|
+
- 'RECEIPT'
|
359
|
+
- 'STATEMENT'
|
360
|
+
|
361
|
+
```rb
|
362
|
+
puts result.document.inference.prediction.document_type_extended.value
|
363
|
+
```
|
364
|
+
|
347
365
|
## Due Date
|
348
366
|
**due_date** ([DateField](#date-field)): The date on which the payment is due.
|
349
367
|
|
@@ -12,7 +12,7 @@ The Ruby Client Library supports the [Healthcare Card API](https://platform.mind
|
|
12
12
|
> | Specification | Details |
|
13
13
|
> | ------------------------------ | -------------------------------------------------- |
|
14
14
|
> | Endpoint Name | `us_healthcare_cards` |
|
15
|
-
> | Recommended Version | `v1.
|
15
|
+
> | Recommended Version | `v1.2` |
|
16
16
|
> | Supports Polling/Webhooks | ✔️ Yes |
|
17
17
|
> | Support Synchronous HTTP Calls | ❌ No |
|
18
18
|
> | Geography | 🇺🇸 United States |
|
@@ -138,6 +138,15 @@ A `HealthcareCardV1Copay` implements the following attributes:
|
|
138
138
|
* `service_fees` (Float): The price of service.
|
139
139
|
* `service_name` (String): The name of service of the copay.
|
140
140
|
|
141
|
+
#### Possible values include:
|
142
|
+
- primary_care
|
143
|
+
- emergency_room
|
144
|
+
- urgent_care
|
145
|
+
- specialist
|
146
|
+
- office_visit
|
147
|
+
- prescription
|
148
|
+
|
149
|
+
|
141
150
|
# Attributes
|
142
151
|
The following fields are extracted for Healthcare Card V1:
|
143
152
|
|
@@ -222,6 +231,13 @@ puts result.document.inference.prediction.rx_bin.value
|
|
222
231
|
puts result.document.inference.prediction.rx_grp.value
|
223
232
|
```
|
224
233
|
|
234
|
+
## RX ID
|
235
|
+
**rx_id** ([StringField](#string-field)): The ID number for prescription drug coverage.
|
236
|
+
|
237
|
+
```rb
|
238
|
+
puts result.document.inference.prediction.rx_id.value
|
239
|
+
```
|
240
|
+
|
225
241
|
## RX PCN
|
226
242
|
**rx_pcn** ([StringField](#string-field)): The PCN number for prescription drug coverage.
|
227
243
|
|
data/lib/mindee/client.rb
CHANGED
@@ -54,8 +54,8 @@ module Mindee
|
|
54
54
|
# @!attribute delay_sec [Numeric] Delay between polling attempts. Defaults to 1.5.
|
55
55
|
# @!attribute max_retries [Integer] Maximum number of retries. Defaults to 80.
|
56
56
|
class ParseOptions
|
57
|
-
attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper,
|
58
|
-
:initial_delay_sec, :delay_sec, :max_retries
|
57
|
+
attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper, :rag,
|
58
|
+
:workflow_id, :initial_delay_sec, :delay_sec, :max_retries
|
59
59
|
|
60
60
|
def initialize(params: {})
|
61
61
|
params = params.transform_keys(&:to_sym)
|
@@ -66,6 +66,8 @@ module Mindee
|
|
66
66
|
raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions)
|
67
67
|
@page_options = raw_page_options
|
68
68
|
@cropper = params.fetch(:cropper, false)
|
69
|
+
@rag = params.fetch(:rag, false)
|
70
|
+
@workflow_id = params.fetch(:workflow_id, nil)
|
69
71
|
@initial_delay_sec = params.fetch(:initial_delay_sec, 2)
|
70
72
|
@delay_sec = params.fetch(:delay_sec, 1.5)
|
71
73
|
@max_retries = params.fetch(:max_retries, 80)
|
@@ -176,13 +178,10 @@ module Mindee
|
|
176
178
|
|
177
179
|
prediction, raw_http = endpoint.predict(
|
178
180
|
input_source,
|
179
|
-
options
|
180
|
-
options.full_text,
|
181
|
-
options.close_file,
|
182
|
-
options.cropper
|
181
|
+
options
|
183
182
|
)
|
184
183
|
|
185
|
-
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
184
|
+
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s)
|
186
185
|
end
|
187
186
|
|
188
187
|
# Enqueue a document for async parsing
|
@@ -207,6 +206,8 @@ module Mindee
|
|
207
206
|
# - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
|
208
207
|
# * `:cropper` [bool] Whether to include cropper results for each page.
|
209
208
|
# This performs a cropping operation on the server and will increase response time.
|
209
|
+
# * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
|
210
|
+
# * `:workflow_id` [String, nil] ID of the workflow to use.
|
210
211
|
# @param endpoint [Mindee::HTTP::Endpoint] Endpoint of the API.
|
211
212
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
212
213
|
def enqueue(input_source, product_class, endpoint: nil, options: {})
|
@@ -216,12 +217,9 @@ module Mindee
|
|
216
217
|
|
217
218
|
prediction, raw_http = endpoint.predict_async(
|
218
219
|
input_source,
|
219
|
-
opts
|
220
|
-
opts.full_text,
|
221
|
-
opts.close_file,
|
222
|
-
opts.cropper
|
220
|
+
opts
|
223
221
|
)
|
224
|
-
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
222
|
+
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
|
225
223
|
end
|
226
224
|
|
227
225
|
# Parses a queued document
|
@@ -236,7 +234,7 @@ module Mindee
|
|
236
234
|
endpoint = initialize_endpoint(product_class) if endpoint.nil?
|
237
235
|
logger.debug("Fetching queued document as '#{endpoint.url_root}'")
|
238
236
|
prediction, raw_http = endpoint.parse_async(job_id)
|
239
|
-
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
237
|
+
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
|
240
238
|
end
|
241
239
|
|
242
240
|
# Enqueue a document for async parsing and automatically try to retrieve it
|
@@ -261,6 +259,8 @@ module Mindee
|
|
261
259
|
# - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages.
|
262
260
|
# * `:cropper` [bool, nil] Whether to include cropper results for each page.
|
263
261
|
# This performs a cropping operation on the server and will increase response time.
|
262
|
+
# * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided.
|
263
|
+
# * `:workflow_id` [String, nil] ID of the workflow to use.
|
264
264
|
# * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2.
|
265
265
|
# * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5.
|
266
266
|
# * `:max_retries` [Integer] Maximum number of retries. Defaults to 80.
|
data/lib/mindee/http/endpoint.rb
CHANGED
@@ -34,6 +34,8 @@ module Mindee
|
|
34
34
|
attr_reader :request_timeout
|
35
35
|
# @return [String]
|
36
36
|
attr_reader :url_root
|
37
|
+
# @return [String]
|
38
|
+
attr_reader :base_url
|
37
39
|
|
38
40
|
def initialize(owner, url_name, version, api_key: '')
|
39
41
|
@owner = owner
|
@@ -44,25 +46,19 @@ module Mindee
|
|
44
46
|
logger.debug('API key set from environment')
|
45
47
|
end
|
46
48
|
@api_key = api_key.nil? || api_key.empty? ? ENV.fetch(API_KEY_ENV_NAME, API_KEY_DEFAULT) : api_key
|
47
|
-
base_url = ENV.fetch(BASE_URL_ENV_NAME, BASE_URL_DEFAULT)
|
48
|
-
@url_root = "#{base_url
|
49
|
+
@base_url = ENV.fetch(BASE_URL_ENV_NAME, BASE_URL_DEFAULT).chomp('/')
|
50
|
+
@url_root = "#{@base_url}/products/#{@owner}/#{@url_name}/v#{@version}"
|
49
51
|
end
|
50
52
|
|
51
53
|
# Call the prediction API.
|
52
54
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
53
|
-
# @param
|
54
|
-
# @param full_text [bool] Whether to include the full OCR text response in compatible APIs
|
55
|
-
# @param close_file [bool] Whether the file will be closed after reading
|
56
|
-
# @param cropper [bool] Whether a cropping operation will be applied
|
55
|
+
# @param opts [ParseOptions] Parse options.
|
57
56
|
# @return [Array]
|
58
|
-
def predict(input_source,
|
57
|
+
def predict(input_source, opts)
|
59
58
|
check_api_key
|
60
59
|
response = predict_req_post(
|
61
60
|
input_source,
|
62
|
-
|
63
|
-
full_text: full_text,
|
64
|
-
close_file: close_file,
|
65
|
-
cropper: cropper
|
61
|
+
opts
|
66
62
|
)
|
67
63
|
if !response.nil? && response.respond_to?(:body)
|
68
64
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
@@ -76,14 +72,11 @@ module Mindee
|
|
76
72
|
|
77
73
|
# Call the prediction API.
|
78
74
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
79
|
-
# @param
|
80
|
-
# @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
|
81
|
-
# @param close_file [bool] Whether the file will be closed after reading
|
82
|
-
# @param cropper [bool] Whether a cropping operation will be applied
|
75
|
+
# @param opts [ParseOptions, Hash] Parse options.
|
83
76
|
# @return [Array]
|
84
|
-
def predict_async(input_source,
|
77
|
+
def predict_async(input_source, opts)
|
85
78
|
check_api_key
|
86
|
-
response =
|
79
|
+
response = document_queue_req_post(input_source, opts)
|
87
80
|
if !response.nil? && response.respond_to?(:body)
|
88
81
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
89
82
|
return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response)
|
@@ -100,7 +93,7 @@ module Mindee
|
|
100
93
|
# @return [Array]
|
101
94
|
def parse_async(job_id)
|
102
95
|
check_api_key
|
103
|
-
response =
|
96
|
+
response = document_queue_req_get(job_id)
|
104
97
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
105
98
|
return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response)
|
106
99
|
|
@@ -112,17 +105,14 @@ module Mindee
|
|
112
105
|
private
|
113
106
|
|
114
107
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
115
|
-
# @param
|
116
|
-
# @param full_text [bool] Whether to include the full OCR text response in compatible APIs.
|
117
|
-
# @param close_file [bool] Whether the file will be closed after reading
|
118
|
-
# @param cropper [bool] Whether a cropping operation will be applied
|
108
|
+
# @param opts [ParseOptions] Parse options.
|
119
109
|
# @return [Net::HTTPResponse, nil]
|
120
|
-
def predict_req_post(input_source,
|
110
|
+
def predict_req_post(input_source, opts)
|
121
111
|
uri = URI("#{@url_root}/predict")
|
122
112
|
|
123
113
|
params = {} # : Hash[Symbol | String, untyped]
|
124
|
-
params[:cropper] = 'true' if cropper
|
125
|
-
params[:full_text_ocr] = 'true' if full_text
|
114
|
+
params[:cropper] = 'true' if opts.cropper
|
115
|
+
params[:full_text_ocr] = 'true' if opts.full_text
|
126
116
|
uri.query = URI.encode_www_form(params)
|
127
117
|
|
128
118
|
headers = {
|
@@ -131,32 +121,33 @@ module Mindee
|
|
131
121
|
}
|
132
122
|
req = Net::HTTP::Post.new(uri, headers)
|
133
123
|
form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource)
|
134
|
-
[['document', input_source.url]]
|
124
|
+
[['document', input_source.url]] # : Array[untyped]
|
135
125
|
else
|
136
|
-
[input_source.read_contents(close: close_file)]
|
126
|
+
[input_source.read_contents(close: opts.close_file)] # : Array[untyped]
|
137
127
|
end
|
138
|
-
form_data.push ['include_mvision', 'true'] if all_words
|
128
|
+
form_data.push ['include_mvision', 'true'] if opts.all_words
|
139
129
|
|
140
130
|
req.set_form(form_data, 'multipart/form-data')
|
141
|
-
response = nil
|
142
131
|
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
143
|
-
|
132
|
+
return http.request(req)
|
144
133
|
end
|
145
|
-
response
|
134
|
+
raise Mindee::Errors::MindeeError, 'Could not resolve server response.'
|
146
135
|
end
|
147
136
|
|
148
137
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource]
|
149
|
-
# @param
|
150
|
-
# @
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
138
|
+
# @param opts [ParseOptions] Parse options.
|
139
|
+
# @return [Net::HTTPResponse]
|
140
|
+
def document_queue_req_post(input_source, opts)
|
141
|
+
uri = if opts.workflow_id
|
142
|
+
URI("#{@base_url}/workflows/#{opts.workflow_id}/predict_async")
|
143
|
+
else
|
144
|
+
URI("#{@url_root}/predict_async")
|
145
|
+
end
|
156
146
|
|
157
147
|
params = {} # : Hash[Symbol | String, untyped]
|
158
|
-
params[:cropper] = 'true' if cropper
|
159
|
-
params[:full_text_ocr] = 'true' if full_text
|
148
|
+
params[:cropper] = 'true' if opts.cropper
|
149
|
+
params[:full_text_ocr] = 'true' if opts.full_text
|
150
|
+
params[:rag] = 'true' if opts.rag
|
160
151
|
uri.query = URI.encode_www_form(params)
|
161
152
|
|
162
153
|
headers = {
|
@@ -165,24 +156,23 @@ module Mindee
|
|
165
156
|
}
|
166
157
|
req = Net::HTTP::Post.new(uri, headers)
|
167
158
|
form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource)
|
168
|
-
[['document', input_source.url]]
|
159
|
+
[['document', input_source.url]] # : Array[untyped]
|
169
160
|
else
|
170
|
-
[input_source.read_contents(close: close_file)]
|
161
|
+
[input_source.read_contents(close: opts.close_file)] # : Array[untyped]
|
171
162
|
end
|
172
|
-
form_data.push ['include_mvision', 'true'] if all_words
|
163
|
+
form_data.push ['include_mvision', 'true'] if opts.all_words
|
173
164
|
|
174
165
|
req.set_form(form_data, 'multipart/form-data')
|
175
166
|
|
176
|
-
response = nil
|
177
167
|
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
178
|
-
|
168
|
+
return http.request(req)
|
179
169
|
end
|
180
|
-
response
|
170
|
+
raise Mindee::Errors::MindeeError, 'Could not resolve server response.'
|
181
171
|
end
|
182
172
|
|
183
173
|
# @param job_id [String]
|
184
174
|
# @return [Net::HTTPResponse, nil]
|
185
|
-
def
|
175
|
+
def document_queue_req_get(job_id)
|
186
176
|
uri = URI("#{@url_root}/documents/queue/#{job_id}")
|
187
177
|
|
188
178
|
headers = {
|
@@ -42,7 +42,7 @@ module Mindee
|
|
42
42
|
# Checks and correct the response object depending on the possible kinds of returns.
|
43
43
|
# @param response [Net::HTTPResponse]
|
44
44
|
def self.clean_request!(response)
|
45
|
-
return response if (response.code.to_i < 200) || (response.code.to_i > 302)
|
45
|
+
return response if (response.code.to_i < 200) || (response.code.to_i > 302) # : Net::HTTPResponse
|
46
46
|
|
47
47
|
return response if response.body.empty?
|
48
48
|
|
@@ -35,7 +35,7 @@ module Mindee
|
|
35
35
|
|
36
36
|
# @param product_class [Mindee::Inference]
|
37
37
|
# @param http_response [Hash]
|
38
|
-
# @param raw_http [
|
38
|
+
# @param raw_http [Hash]
|
39
39
|
def initialize(product_class, http_response, raw_http)
|
40
40
|
logger.debug('Handling API response')
|
41
41
|
@raw_http = raw_http.to_s
|
@@ -69,7 +69,7 @@ module Mindee
|
|
69
69
|
|
70
70
|
full_text_ocr = String.new
|
71
71
|
raw_prediction.dig('inference', 'pages').each do |page|
|
72
|
-
full_text_ocr <<
|
72
|
+
full_text_ocr << page['extras']['full_text_ocr']['content']
|
73
73
|
end
|
74
74
|
artificial_text_obj = { 'content' => full_text_ocr }
|
75
75
|
if @extras.nil? || @extras.empty?
|
@@ -13,6 +13,8 @@ module Mindee
|
|
13
13
|
attr_reader :cropper
|
14
14
|
# @return [Mindee::Parsing::Common::Extras::FullTextOCRExtra, nil]
|
15
15
|
attr_reader :full_text_ocr
|
16
|
+
# @return [RAGExtra, nil]
|
17
|
+
attr_reader :rag
|
16
18
|
|
17
19
|
def initialize(raw_prediction)
|
18
20
|
if raw_prediction['cropper']
|
@@ -21,9 +23,10 @@ module Mindee
|
|
21
23
|
if raw_prediction['full_text_ocr']
|
22
24
|
@full_text_ocr = Mindee::Parsing::Common::Extras::FullTextOCRExtra.new(raw_prediction['full_text_ocr'])
|
23
25
|
end
|
26
|
+
@rag = Mindee::Parsing::Common::Extras::RAGExtra.new(raw_prediction['rag']) if raw_prediction['rag']
|
24
27
|
|
25
28
|
raw_prediction.each do |key, value|
|
26
|
-
instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr'].include?(key)
|
29
|
+
instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr', 'rag'].include?(key)
|
27
30
|
end
|
28
31
|
end
|
29
32
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Common
|
6
|
+
module Extras
|
7
|
+
# Retrieval-Augmented Generation extra.
|
8
|
+
class RAGExtra
|
9
|
+
# ID of the matching document
|
10
|
+
# @return [String, nil]
|
11
|
+
attr_reader :matching_document_id
|
12
|
+
|
13
|
+
def initialize(raw_prediction)
|
14
|
+
@matching_document_id = raw_prediction['matching_document_id'] if raw_prediction['matching_document_id']
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
@matching_document_id || ''
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -29,6 +29,8 @@ module Mindee
|
|
29
29
|
# Whether this product has access to synchronous endpoint.
|
30
30
|
# @return [bool]
|
31
31
|
attr_reader :has_sync
|
32
|
+
# @return [Mindee::Parsing::Common::Extras::Extras] Potential Extras fields sent back along the prediction.
|
33
|
+
attr_reader :extras
|
32
34
|
|
33
35
|
@endpoint_name = nil
|
34
36
|
@endpoint_version = nil
|
@@ -40,6 +42,7 @@ module Mindee
|
|
40
42
|
@is_rotation_applied = raw_prediction['is_rotation_applied']
|
41
43
|
@product = Product.new(raw_prediction['product'])
|
42
44
|
@pages = [] # : Array[Page]
|
45
|
+
@extras = Extras::Extras.new(raw_prediction['extras'])
|
43
46
|
end
|
44
47
|
|
45
48
|
# @return [String]
|
@@ -39,10 +39,10 @@ module Mindee
|
|
39
39
|
def to_s
|
40
40
|
printable = printable_values
|
41
41
|
out_str = String.new
|
42
|
-
out_str <<
|
43
|
-
out_str <<
|
44
|
-
out_str <<
|
45
|
-
out_str <<
|
42
|
+
out_str << "Base: #{printable[:base]}"
|
43
|
+
out_str << ", Code: #{printable[:code]}"
|
44
|
+
out_str << ", Rate (%): #{printable[:rate]}"
|
45
|
+
out_str << ", Amount: #{printable[:value]}"
|
46
46
|
out_str.strip
|
47
47
|
end
|
48
48
|
|
@@ -60,10 +60,10 @@ module Mindee
|
|
60
60
|
def to_table_line
|
61
61
|
printable = printable_values
|
62
62
|
out_str = String.new
|
63
|
-
out_str <<
|
64
|
-
out_str <<
|
65
|
-
out_str <<
|
66
|
-
out_str <<
|
63
|
+
out_str << "| #{printable[:base].ljust(13, ' ')}"
|
64
|
+
out_str << " | #{printable[:code].ljust(6, ' ')}"
|
65
|
+
out_str << " | #{printable[:rate].ljust(8, ' ')}"
|
66
|
+
out_str << " | #{printable[:value].ljust(13, ' ')} |"
|
67
67
|
out_str.strip
|
68
68
|
end
|
69
69
|
end
|
@@ -94,7 +94,7 @@ module Mindee
|
|
94
94
|
return '' if nil? || empty?
|
95
95
|
|
96
96
|
out_str = String.new
|
97
|
-
out_str <<
|
97
|
+
out_str << "\n#{line_separator('-')}"
|
98
98
|
out_str << "\n | Base | Code | Rate (%) | Amount |"
|
99
99
|
out_str << "\n#{line_separator('=')}"
|
100
100
|
each do |entry|
|
@@ -58,7 +58,7 @@ module Mindee
|
|
58
58
|
end
|
59
59
|
formatted_max_index = format('%03d', page_index_list[page_index_list.length - 1] + 1).to_s
|
60
60
|
field_filename = "#{basename}_#{format('%03d',
|
61
|
-
|
61
|
+
page_index_list[0] + 1)}-#{formatted_max_index}#{extension}"
|
62
62
|
extracted_pdf = Mindee::PDF::PDFExtractor::ExtractedPDF.new(cut_pages(page_index_list),
|
63
63
|
field_filename)
|
64
64
|
extracted_pdfs << extracted_pdf
|