mindee 3.4.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/README.md +4 -0
- data/bin/mindee.rb +39 -15
- data/docs/bank_statement_fr_v1.md +175 -0
- data/docs/carte_grise_v1.md +4 -4
- data/docs/code_samples/bank_statement_fr_v1_async.txt +19 -0
- data/docs/code_samples/default.txt +19 -19
- data/docs/code_samples/default_async.txt +25 -0
- data/docs/code_samples/eu_driver_license_v1.txt +19 -0
- data/docs/code_samples/international_id_v1_async.txt +19 -0
- data/docs/code_samples/international_id_v2_async.txt +19 -0
- data/docs/eu_driver_license_v1.md +223 -0
- data/docs/financial_document_v1.md +48 -40
- data/docs/generated_v1.md +90 -0
- data/docs/invoices_v4.md +4 -1
- data/docs/proof_of_address_v1.md +4 -4
- data/docs/us_driver_license_v1.md +2 -2
- data/lib/mindee/client.rb +5 -3
- data/lib/mindee/http/endpoint.rb +13 -12
- data/lib/mindee/input/sources.rb +28 -5
- data/lib/mindee/parsing/common/inference.rb +3 -1
- data/lib/mindee/parsing/generated/generated_list_field.rb +58 -0
- data/lib/mindee/parsing/generated/generated_object_field.rb +109 -0
- data/lib/mindee/parsing/generated.rb +4 -0
- data/lib/mindee/parsing.rb +1 -0
- data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +3 -1
- data/lib/mindee/product/cropper/cropper_v1.rb +3 -1
- data/lib/mindee/product/eu/driver_license/driver_license_v1.rb +41 -0
- data/lib/mindee/product/eu/driver_license/driver_license_v1_document.rb +88 -0
- data/lib/mindee/product/eu/driver_license/driver_license_v1_page.rb +53 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +3 -1
- data/lib/mindee/product/financial_document/financial_document_v1.rb +3 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +3 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +3 -1
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1.rb +41 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_document.rb +130 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_transaction.rb +64 -0
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1.rb +3 -1
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +3 -1
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +3 -1
- data/lib/mindee/product/fr/id_card/id_card_v2.rb +3 -1
- data/lib/mindee/product/generated/generated_v1.rb +38 -0
- data/lib/mindee/product/generated/generated_v1_document.rb +35 -0
- data/lib/mindee/product/generated/generated_v1_page.rb +51 -0
- data/lib/mindee/product/generated/generated_v1_prediction.rb +114 -0
- data/lib/mindee/product/international_id/international_id_v1.rb +39 -0
- data/lib/mindee/product/international_id/international_id_v1_document.rb +109 -0
- data/lib/mindee/product/international_id/international_id_v1_page.rb +32 -0
- data/lib/mindee/product/international_id/international_id_v2.rb +39 -0
- data/lib/mindee/product/international_id/international_id_v2_document.rb +119 -0
- data/lib/mindee/product/international_id/international_id_v2_page.rb +32 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +3 -1
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +3 -1
- data/lib/mindee/product/passport/passport_v1.rb +3 -1
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +3 -1
- data/lib/mindee/product/receipt/receipt_v5.rb +3 -1
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +3 -1
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +3 -1
- data/lib/mindee/product/us/w9/w9_v1.rb +3 -1
- data/lib/mindee/product.rb +5 -0
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +4 -0
- metadata +30 -2
@@ -0,0 +1,223 @@
|
|
1
|
+
---
|
2
|
+
title: EU EU Driver License OCR Ruby
|
3
|
+
---
|
4
|
+
The Ruby OCR SDK supports the [EU Driver License API](https://platform.mindee.com/mindee/eu_driver_license).
|
5
|
+
|
6
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/eu_driver_license/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
7
|
+

|
8
|
+
|
9
|
+
# Quick-Start
|
10
|
+
```rb
|
11
|
+
require 'mindee'
|
12
|
+
|
13
|
+
# Init a new client
|
14
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
15
|
+
|
16
|
+
# Load a file from disk
|
17
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
18
|
+
|
19
|
+
# Parse the file
|
20
|
+
result = mindee_client.parse(
|
21
|
+
input_source,
|
22
|
+
Mindee::Product::EU::DriverLicense::DriverLicenseV1
|
23
|
+
)
|
24
|
+
|
25
|
+
# Print a full summary of the parsed data in RST format
|
26
|
+
puts result.document
|
27
|
+
|
28
|
+
# Print the document-level parsed data
|
29
|
+
# puts result.document.inference.prediction
|
30
|
+
```
|
31
|
+
|
32
|
+
**Output (RST):**
|
33
|
+
```rst
|
34
|
+
########
|
35
|
+
Document
|
36
|
+
########
|
37
|
+
:Mindee ID: b19cc32e-b3e6-4ff9-bdc7-619199355d54
|
38
|
+
:Filename: default_sample.jpg
|
39
|
+
|
40
|
+
Inference
|
41
|
+
#########
|
42
|
+
:Product: mindee/eu_driver_license v1.0
|
43
|
+
:Rotation applied: Yes
|
44
|
+
|
45
|
+
Prediction
|
46
|
+
==========
|
47
|
+
:Country Code: FR
|
48
|
+
:Document ID: 13AA00002
|
49
|
+
:Driver License Category: AM A1 B1 B D BE DE
|
50
|
+
:Last Name: MARTIN
|
51
|
+
:First Name: PAUL
|
52
|
+
:Date Of Birth: 1981-07-14
|
53
|
+
:Place Of Birth: Utopiacity
|
54
|
+
:Expiry Date: 2018-12-31
|
55
|
+
:Issue Date: 2013-01-01
|
56
|
+
:Issue Authority: 99999UpiaCity
|
57
|
+
:MRZ: D1FRA13AA000026181231MARTIN<<9
|
58
|
+
:Address:
|
59
|
+
|
60
|
+
Page Predictions
|
61
|
+
================
|
62
|
+
|
63
|
+
Page 0
|
64
|
+
------
|
65
|
+
:Photo: Polygon with 4 points.
|
66
|
+
:Signature: Polygon with 4 points.
|
67
|
+
:Country Code: FR
|
68
|
+
:Document ID: 13AA00002
|
69
|
+
:Driver License Category: AM A1 B1 B D BE DE
|
70
|
+
:Last Name: MARTIN
|
71
|
+
:First Name: PAUL
|
72
|
+
:Date Of Birth: 1981-07-14
|
73
|
+
:Place Of Birth: Utopiacity
|
74
|
+
:Expiry Date: 2018-12-31
|
75
|
+
:Issue Date: 2013-01-01
|
76
|
+
:Issue Authority: 99999UpiaCity
|
77
|
+
:MRZ: D1FRA13AA000026181231MARTIN<<9
|
78
|
+
:Address:
|
79
|
+
```
|
80
|
+
|
81
|
+
# Field Types
|
82
|
+
## Standard Fields
|
83
|
+
These fields are generic and used in several products.
|
84
|
+
|
85
|
+
### Basic Field
|
86
|
+
Each prediction object contains a set of fields that inherit from the generic `Field` class.
|
87
|
+
A typical `Field` object will have the following attributes:
|
88
|
+
|
89
|
+
* **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
|
90
|
+
* **confidence** (Float, nil): the confidence score of the field prediction.
|
91
|
+
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
92
|
+
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
93
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
|
94
|
+
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
95
|
+
|
96
|
+
|
97
|
+
Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
|
98
|
+
|
99
|
+
### Date Field
|
100
|
+
Aside from the basic `Field` attributes, the date field `DateField` also implements the following:
|
101
|
+
|
102
|
+
* **date_object** (`Date`): an accessible representation of the value as a JavaScript object.
|
103
|
+
|
104
|
+
|
105
|
+
### Position Field
|
106
|
+
The position field `PositionField` does not implement all the basic `Field` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to:
|
107
|
+
|
108
|
+
* **rectangle** (`Mindee::Geometry::Quadrilateral`): a Polygon with four points that may be oriented (even beyond canvas).
|
109
|
+
* **quadrangle** (`Mindee::Geometry::Quadrilateral`): a free polygon made up of four points.
|
110
|
+
|
111
|
+
### String Field
|
112
|
+
The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
|
113
|
+
|
114
|
+
## Page-Level Fields
|
115
|
+
Some fields are constrained to the page level, and so will not be retrievable to through the document.
|
116
|
+
|
117
|
+
# Attributes
|
118
|
+
The following fields are extracted for EU Driver License V1:
|
119
|
+
|
120
|
+
## Address
|
121
|
+
**address** ([StringField](#string-field)): EU driver license holders address
|
122
|
+
|
123
|
+
```rb
|
124
|
+
puts result.document.inference.prediction.address.value
|
125
|
+
```
|
126
|
+
|
127
|
+
## Driver License Category
|
128
|
+
**category** ([StringField](#string-field)): EU driver license holders categories
|
129
|
+
|
130
|
+
```rb
|
131
|
+
puts result.document.inference.prediction.category.value
|
132
|
+
```
|
133
|
+
|
134
|
+
## Country Code
|
135
|
+
**country_code** ([StringField](#string-field)): Country code extracted as a string.
|
136
|
+
|
137
|
+
```rb
|
138
|
+
puts result.document.inference.prediction.country_code.value
|
139
|
+
```
|
140
|
+
|
141
|
+
## Date Of Birth
|
142
|
+
**date_of_birth** ([DateField](#date-field)): The date of birth of the document holder
|
143
|
+
|
144
|
+
```rb
|
145
|
+
puts result.document.inference.prediction.date_of_birth.value
|
146
|
+
```
|
147
|
+
|
148
|
+
## Document ID
|
149
|
+
**document_id** ([StringField](#string-field)): ID number of the Document.
|
150
|
+
|
151
|
+
```rb
|
152
|
+
puts result.document.inference.prediction.document_id.value
|
153
|
+
```
|
154
|
+
|
155
|
+
## Expiry Date
|
156
|
+
**expiry_date** ([DateField](#date-field)): Date the document expires
|
157
|
+
|
158
|
+
```rb
|
159
|
+
puts result.document.inference.prediction.expiry_date.value
|
160
|
+
```
|
161
|
+
|
162
|
+
## First Name
|
163
|
+
**first_name** ([StringField](#string-field)): First name(s) of the driver license holder
|
164
|
+
|
165
|
+
```rb
|
166
|
+
puts result.document.inference.prediction.first_name.value
|
167
|
+
```
|
168
|
+
|
169
|
+
## Issue Authority
|
170
|
+
**issue_authority** ([StringField](#string-field)): Authority that issued the document
|
171
|
+
|
172
|
+
```rb
|
173
|
+
puts result.document.inference.prediction.issue_authority.value
|
174
|
+
```
|
175
|
+
|
176
|
+
## Issue Date
|
177
|
+
**issue_date** ([DateField](#date-field)): Date the document was issued
|
178
|
+
|
179
|
+
```rb
|
180
|
+
puts result.document.inference.prediction.issue_date.value
|
181
|
+
```
|
182
|
+
|
183
|
+
## Last Name
|
184
|
+
**last_name** ([StringField](#string-field)): Last name of the driver license holder.
|
185
|
+
|
186
|
+
```rb
|
187
|
+
puts result.document.inference.prediction.last_name.value
|
188
|
+
```
|
189
|
+
|
190
|
+
## MRZ
|
191
|
+
**mrz** ([StringField](#string-field)): Machine-readable license number
|
192
|
+
|
193
|
+
```rb
|
194
|
+
puts result.document.inference.prediction.mrz.value
|
195
|
+
```
|
196
|
+
|
197
|
+
## Photo
|
198
|
+
[📄](#page-level-fields "This field is only present on individual pages.")**photo** ([PositionField](#position-field)): Has a photo of the EU driver license holder
|
199
|
+
|
200
|
+
```rb
|
201
|
+
for photo_elem in result.document.photo do
|
202
|
+
puts photo_elem.polygon
|
203
|
+
end
|
204
|
+
```
|
205
|
+
|
206
|
+
## Place Of Birth
|
207
|
+
**place_of_birth** ([StringField](#string-field)): Place where the driver license holder was born
|
208
|
+
|
209
|
+
```rb
|
210
|
+
puts result.document.inference.prediction.place_of_birth.value
|
211
|
+
```
|
212
|
+
|
213
|
+
## Signature
|
214
|
+
[📄](#page-level-fields "This field is only present on individual pages.")**signature** ([PositionField](#position-field)): Has a signature of the EU driver license holder
|
215
|
+
|
216
|
+
```rb
|
217
|
+
for signature_elem in result.document.signature do
|
218
|
+
puts signature_elem.polygon
|
219
|
+
end
|
220
|
+
```
|
221
|
+
|
222
|
+
# Questions?
|
223
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
@@ -34,48 +34,52 @@ puts result.document
|
|
34
34
|
########
|
35
35
|
Document
|
36
36
|
########
|
37
|
-
:Mindee ID:
|
37
|
+
:Mindee ID: a6b54e2d-a7fa-4e08-8de6-6cd296f50f3d
|
38
38
|
:Filename: default_sample.jpg
|
39
39
|
|
40
40
|
Inference
|
41
41
|
#########
|
42
|
-
:Product: mindee/financial_document v1.
|
42
|
+
:Product: mindee/financial_document v1.2
|
43
43
|
:Rotation applied: Yes
|
44
44
|
|
45
45
|
Prediction
|
46
46
|
==========
|
47
47
|
:Locale: en; en; USD;
|
48
|
-
:Invoice Number:
|
49
|
-
:Reference Numbers:
|
50
|
-
:Purchase Date:
|
51
|
-
:Due Date:
|
52
|
-
:Total Net:
|
53
|
-
:Total Amount:
|
48
|
+
:Invoice Number: INT-001
|
49
|
+
:Reference Numbers: 2412/2019
|
50
|
+
:Purchase Date: 2019-02-11
|
51
|
+
:Due Date: 2019-02-26
|
52
|
+
:Total Net: 195.00
|
53
|
+
:Total Amount: 204.75
|
54
54
|
:Taxes:
|
55
55
|
+---------------+--------+----------+---------------+
|
56
56
|
| Base | Code | Rate (%) | Amount |
|
57
57
|
+===============+========+==========+===============+
|
58
|
-
| |
|
58
|
+
| | | 5.00 | 9.75 |
|
59
59
|
+---------------+--------+----------+---------------+
|
60
60
|
:Supplier Payment Details:
|
61
|
-
:Supplier Name:
|
61
|
+
:Supplier Name: JOHN SMITH
|
62
62
|
:Supplier Company Registrations:
|
63
|
-
:Supplier Address:
|
64
|
-
:Supplier Phone Number:
|
65
|
-
:Customer Name:
|
63
|
+
:Supplier Address: 4490 Oak Drive Albany, NY 12210
|
64
|
+
:Supplier Phone Number:
|
65
|
+
:Customer Name: JESSIE M HORNE
|
66
66
|
:Customer Company Registrations:
|
67
|
-
:Customer Address:
|
68
|
-
:Document Type:
|
69
|
-
:Purchase Subcategory:
|
70
|
-
:Purchase Category:
|
71
|
-
:Total Tax:
|
72
|
-
:Tip and Gratuity:
|
73
|
-
:Purchase Time:
|
67
|
+
:Customer Address: 2019 Redbud Drive New York, NY 10011
|
68
|
+
:Document Type: INVOICE
|
69
|
+
:Purchase Subcategory:
|
70
|
+
:Purchase Category: miscellaneous
|
71
|
+
:Total Tax: 9.75
|
72
|
+
:Tip and Gratuity:
|
73
|
+
:Purchase Time:
|
74
74
|
:Line Items:
|
75
75
|
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
76
76
|
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
|
77
77
|
+======================================+==============+==========+============+==============+==============+============+
|
78
|
-
|
|
78
|
+
| Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
|
79
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
80
|
+
| New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
|
81
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
82
|
+
| Labon 3hrs | | 3.00 | | | 45.00 | 15.00 |
|
79
83
|
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
80
84
|
|
81
85
|
Page Predictions
|
@@ -84,37 +88,41 @@ Page Predictions
|
|
84
88
|
Page 0
|
85
89
|
------
|
86
90
|
:Locale: en; en; USD;
|
87
|
-
:Invoice Number:
|
88
|
-
:Reference Numbers:
|
89
|
-
:Purchase Date:
|
90
|
-
:Due Date:
|
91
|
-
:Total Net:
|
92
|
-
:Total Amount:
|
91
|
+
:Invoice Number: INT-001
|
92
|
+
:Reference Numbers: 2412/2019
|
93
|
+
:Purchase Date: 2019-02-11
|
94
|
+
:Due Date: 2019-02-26
|
95
|
+
:Total Net: 195.00
|
96
|
+
:Total Amount: 204.75
|
93
97
|
:Taxes:
|
94
98
|
+---------------+--------+----------+---------------+
|
95
99
|
| Base | Code | Rate (%) | Amount |
|
96
100
|
+===============+========+==========+===============+
|
97
|
-
| |
|
101
|
+
| | | 5.00 | 9.75 |
|
98
102
|
+---------------+--------+----------+---------------+
|
99
103
|
:Supplier Payment Details:
|
100
|
-
:Supplier Name:
|
104
|
+
:Supplier Name: JOHN SMITH
|
101
105
|
:Supplier Company Registrations:
|
102
|
-
:Supplier Address:
|
103
|
-
:Supplier Phone Number:
|
104
|
-
:Customer Name:
|
106
|
+
:Supplier Address: 4490 Oak Drive Albany, NY 12210
|
107
|
+
:Supplier Phone Number:
|
108
|
+
:Customer Name: JESSIE M HORNE
|
105
109
|
:Customer Company Registrations:
|
106
|
-
:Customer Address:
|
107
|
-
:Document Type:
|
108
|
-
:Purchase Subcategory:
|
109
|
-
:Purchase Category:
|
110
|
-
:Total Tax:
|
111
|
-
:Tip and Gratuity:
|
112
|
-
:Purchase Time:
|
110
|
+
:Customer Address: 2019 Redbud Drive New York, NY 10011
|
111
|
+
:Document Type: INVOICE
|
112
|
+
:Purchase Subcategory:
|
113
|
+
:Purchase Category: miscellaneous
|
114
|
+
:Total Tax: 9.75
|
115
|
+
:Tip and Gratuity:
|
116
|
+
:Purchase Time:
|
113
117
|
:Line Items:
|
114
118
|
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
115
119
|
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
|
116
120
|
+======================================+==============+==========+============+==============+==============+============+
|
117
|
-
|
|
121
|
+
| Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
|
122
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
123
|
+
| New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
|
124
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
125
|
+
| Labon 3hrs | | 3.00 | | | 45.00 | 15.00 |
|
118
126
|
+--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
|
119
127
|
```
|
120
128
|
|
@@ -0,0 +1,90 @@
|
|
1
|
+
---
|
2
|
+
title: Generated API Ruby
|
3
|
+
---
|
4
|
+
The Ruby OCR SDK supports generated APIs.
|
5
|
+
Generated APIs can theoretically support all APIs in a catch-all generic format.
|
6
|
+
|
7
|
+
# Quick-Start
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'mindee'
|
11
|
+
|
12
|
+
# Init a new client
|
13
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
14
|
+
|
15
|
+
# Load a file from disk
|
16
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
17
|
+
|
18
|
+
# Initialize a custom endpoint for this product
|
19
|
+
custom_endpoint = mindee_client.create_endpoint(
|
20
|
+
account_name: 'my-account',
|
21
|
+
endpoint_name: 'my-endpoint',
|
22
|
+
version: 'my-version'
|
23
|
+
)
|
24
|
+
|
25
|
+
# Parse the file
|
26
|
+
result = mindee_client.enqueue_and_parse(
|
27
|
+
input_source,
|
28
|
+
Mindee::Product::Generated::GeneratedV1,
|
29
|
+
endpoint: custom_endpoint
|
30
|
+
)
|
31
|
+
|
32
|
+
# Print a full summary of the parsed data in RST format
|
33
|
+
puts result.document
|
34
|
+
```
|
35
|
+
|
36
|
+
# Generated Endpoints
|
37
|
+
|
38
|
+
You may have noticed in the previous step that in order to access a custom build, you will need to provide an account and an endpoint name at the very least.
|
39
|
+
|
40
|
+
Although it is optional, the version number should match the latest version of your build in most use-cases.
|
41
|
+
If it is not set, it will default to "1".
|
42
|
+
|
43
|
+
# Field Types
|
44
|
+
|
45
|
+
## Generated Fields
|
46
|
+
|
47
|
+
### Generated List Field
|
48
|
+
|
49
|
+
A `GeneratedListField` is a special type of custom list that implements the following:
|
50
|
+
|
51
|
+
- **values** (`Array<StringField`[GeneratedObjectField](#Generated-object-field)`>`): the confidence score of the field prediction.
|
52
|
+
- **page_id** (`Integer`): only available for some documents ATM.
|
53
|
+
|
54
|
+
Since the inner contents can vary, the value isn't accessed through a property, but rather through the following functions:
|
55
|
+
|
56
|
+
- **contents_list()** (`-> Array<String, Float>>`): returns a list of values for each element.
|
57
|
+
- **contents_string(separator=" ")** (`-> String`): returns a list of concatenated values, with an optional **separator** `String` between them.
|
58
|
+
> **Note:** the `to_s` method returns a string representation of all values of this object, with an empty space between each of them.
|
59
|
+
|
60
|
+
### Generated Object Field
|
61
|
+
|
62
|
+
Unrecognized structures and sometimes values of `ListField`s are stored in a `GeneratedObjectField` structure, which is implemented dynamically depending on the object's structure.
|
63
|
+
|
64
|
+
- **page_id** (`[Integer, nil]`): the ID of the page, is `nil` when at document-level.
|
65
|
+
- **raw_value** (`[String, nil]`): an optional field for when some post-processing has been done on fields (e.g. amounts). `nil` in most instances.
|
66
|
+
- **confidence** (`[Float, nil]`): the confidence score of the field prediction. Warning: support isn't guaranteed on all APIs.
|
67
|
+
|
68
|
+
|
69
|
+
> **Other fields**:No matter what, other fields will be stored in a dictionary-like structure with a `key: value` pair where `key` is a string and `value` is a nullable string. They can be accessed like any other regular value, but won't be suggested by your IDE.
|
70
|
+
|
71
|
+
|
72
|
+
### StringField
|
73
|
+
The text field `StringField` only has one constraint: its **value** is an `Optional[str]`.
|
74
|
+
|
75
|
+
|
76
|
+
# Attributes
|
77
|
+
|
78
|
+
Generated builds always have access to at least two attributes:
|
79
|
+
|
80
|
+
## Fields
|
81
|
+
|
82
|
+
**fields** (`Hash<String, Array<`[GeneratedListField](#generated-list-field),[GeneratedObjectField](#generated-object-field), `(#stringfield)[StringField]>>`):
|
83
|
+
|
84
|
+
```ruby
|
85
|
+
puts result.document.inference.prediction.fields["my-field"].to_s
|
86
|
+
```
|
87
|
+
|
88
|
+
# Questions?
|
89
|
+
|
90
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/docs/invoices_v4.md
CHANGED
@@ -310,6 +310,9 @@ puts result.document.inference.prediction.supplier_name.value
|
|
310
310
|
```rb
|
311
311
|
for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details do
|
312
312
|
puts supplier_payment_details_elem.value
|
313
|
+
puts supplier_payment_details_elem.rate
|
314
|
+
puts supplier_payment_details_elem.code
|
315
|
+
puts supplier_payment_details_elem.basis
|
313
316
|
end
|
314
317
|
```
|
315
318
|
|
@@ -318,7 +321,7 @@ end
|
|
318
321
|
|
319
322
|
```rb
|
320
323
|
for taxes_elem in result.document.inference.prediction.taxes do
|
321
|
-
puts taxes_elem.
|
324
|
+
puts taxes_elem.value
|
322
325
|
end
|
323
326
|
```
|
324
327
|
|
data/docs/proof_of_address_v1.md
CHANGED
@@ -34,12 +34,12 @@ puts result.document
|
|
34
34
|
########
|
35
35
|
Document
|
36
36
|
########
|
37
|
-
:Mindee ID:
|
37
|
+
:Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38
|
38
38
|
:Filename: default_sample.jpg
|
39
39
|
|
40
40
|
Inference
|
41
41
|
#########
|
42
|
-
:Product: mindee/proof_of_address v1.
|
42
|
+
:Product: mindee/proof_of_address v1.1
|
43
43
|
:Rotation applied: Yes
|
44
44
|
|
45
45
|
Prediction
|
@@ -47,7 +47,7 @@ Prediction
|
|
47
47
|
:Locale: en; en; USD;
|
48
48
|
:Issuer Name: PPL ELECTRIC UTILITIES
|
49
49
|
:Issuer Company Registrations:
|
50
|
-
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN
|
50
|
+
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
|
51
51
|
:Recipient Name:
|
52
52
|
:Recipient Company Registrations:
|
53
53
|
:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
|
@@ -73,7 +73,7 @@ Page 0
|
|
73
73
|
:Locale: en; en; USD;
|
74
74
|
:Issuer Name: PPL ELECTRIC UTILITIES
|
75
75
|
:Issuer Company Registrations:
|
76
|
-
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN
|
76
|
+
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
|
77
77
|
:Recipient Name:
|
78
78
|
:Recipient Company Registrations:
|
79
79
|
:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
|
@@ -54,7 +54,7 @@ Prediction
|
|
54
54
|
:Date Of Birth: 1957-02-01
|
55
55
|
:Restrictions: NONE
|
56
56
|
:Endorsements: NONE
|
57
|
-
:Class:
|
57
|
+
:Driver License Class: D
|
58
58
|
:Sex: M
|
59
59
|
:Height: 5-08
|
60
60
|
:Weight: 185
|
@@ -79,7 +79,7 @@ Page 0
|
|
79
79
|
:Date Of Birth: 1957-02-01
|
80
80
|
:Restrictions: NONE
|
81
81
|
:Endorsements: NONE
|
82
|
-
:Class:
|
82
|
+
:Driver License Class: D
|
83
83
|
:Sex: M
|
84
84
|
:Height: 5-08
|
85
85
|
:Weight: 185
|
data/lib/mindee/client.rb
CHANGED
@@ -38,6 +38,7 @@ module Mindee
|
|
38
38
|
# @param cropper [Boolean] Whether to include cropper results for each page.
|
39
39
|
# This performs a cropping operation on the server and will increase response time.
|
40
40
|
#
|
41
|
+
#
|
41
42
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
42
43
|
def parse(
|
43
44
|
input_source,
|
@@ -80,6 +81,7 @@ module Mindee
|
|
80
81
|
# @param cropper [Boolean] Whether to include cropper results for each page.
|
81
82
|
# This performs a cropping operation on the server and will increase response time.
|
82
83
|
#
|
84
|
+
#
|
83
85
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
84
86
|
def enqueue(
|
85
87
|
input_source,
|
@@ -118,7 +120,6 @@ module Mindee
|
|
118
120
|
end
|
119
121
|
|
120
122
|
# rubocop:disable Metrics/ParameterLists
|
121
|
-
|
122
123
|
# Enqueue a document for async parsing and automatically try to retrieve it
|
123
124
|
#
|
124
125
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
@@ -139,7 +140,7 @@ module Mindee
|
|
139
140
|
# This performs a cropping operation on the server and will increase response time.
|
140
141
|
# @param initial_delay_sec [Integer, Float, nil] initial delay before polling. Defaults to 4.
|
141
142
|
# @param delay_sec [Integer, Float, nil] delay between polling attempts. Defaults to 2.
|
142
|
-
# @param max_retries [Integer, nil] maximum amount of retries. Defaults to
|
143
|
+
# @param max_retries [Integer, nil] maximum amount of retries. Defaults to 60.
|
143
144
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
144
145
|
def enqueue_and_parse(
|
145
146
|
input_source,
|
@@ -151,7 +152,7 @@ module Mindee
|
|
151
152
|
cropper: false,
|
152
153
|
initial_delay_sec: 4,
|
153
154
|
delay_sec: 2,
|
154
|
-
max_retries:
|
155
|
+
max_retries: 60
|
155
156
|
)
|
156
157
|
enqueue_res = enqueue(
|
157
158
|
input_source,
|
@@ -178,6 +179,7 @@ module Mindee
|
|
178
179
|
|
179
180
|
queue_res
|
180
181
|
end
|
182
|
+
|
181
183
|
# rubocop:enable Metrics/ParameterLists
|
182
184
|
|
183
185
|
# Load a document from an absolute path, as a string.
|
data/lib/mindee/http/endpoint.rb
CHANGED
@@ -46,9 +46,9 @@ module Mindee
|
|
46
46
|
|
47
47
|
# Call the prediction API.
|
48
48
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
49
|
-
# @param all_words [Boolean]
|
50
|
-
# @param close_file [Boolean]
|
51
|
-
# @param cropper [Boolean]
|
49
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
50
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
51
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
52
52
|
# @return [Hash]
|
53
53
|
def predict(input_source, all_words, close_file, cropper)
|
54
54
|
check_api_key
|
@@ -62,8 +62,9 @@ module Mindee
|
|
62
62
|
|
63
63
|
# Call the prediction API.
|
64
64
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
65
|
-
# @param
|
66
|
-
# @param
|
65
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
66
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
67
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
67
68
|
# @return [Hash]
|
68
69
|
def predict_async(input_source, all_words, close_file, cropper)
|
69
70
|
check_api_key
|
@@ -91,10 +92,10 @@ module Mindee
|
|
91
92
|
private
|
92
93
|
|
93
94
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
94
|
-
# @param all_words [Boolean]
|
95
|
-
# @param close_file [Boolean]
|
96
|
-
# @param cropper [Boolean]
|
97
|
-
# @return [Net::
|
95
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
96
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
97
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
98
|
+
# @return [Net::HTTP, nil]
|
98
99
|
def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
|
99
100
|
uri = URI("#{@url_root}/predict")
|
100
101
|
|
@@ -122,9 +123,9 @@ module Mindee
|
|
122
123
|
end
|
123
124
|
|
124
125
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
125
|
-
# @param all_words [Boolean]
|
126
|
-
# @param close_file [Boolean]
|
127
|
-
# @param cropper [Boolean]
|
126
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
127
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
128
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
128
129
|
# @return [Net::HTTPResponse]
|
129
130
|
def document_queue_req_get(input_source, all_words, close_file, cropper)
|
130
131
|
uri = URI("#{@url_root}/predict_async")
|
data/lib/mindee/input/sources.rb
CHANGED
@@ -7,6 +7,7 @@ require_relative '../pdf'
|
|
7
7
|
|
8
8
|
module Mindee
|
9
9
|
module Input
|
10
|
+
# Document source handling.
|
10
11
|
module Source
|
11
12
|
# Mime types accepted by the server.
|
12
13
|
ALLOWED_MIME_TYPES = [
|
@@ -107,8 +108,7 @@ module Mindee
|
|
107
108
|
@io_stream = PdfProcessor.parse(@io_stream, options)
|
108
109
|
end
|
109
110
|
|
110
|
-
# Reads a document.
|
111
|
-
# Note: only needs filename in case of some pdf files.
|
111
|
+
# Reads a document.
|
112
112
|
# @param close [Boolean]
|
113
113
|
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
114
114
|
def read_document(close: true)
|
@@ -116,9 +116,7 @@ module Mindee
|
|
116
116
|
# Avoids needlessly re-packing some files
|
117
117
|
data = @io_stream.read
|
118
118
|
@io_stream.close if close
|
119
|
-
|
120
|
-
|
121
|
-
['document', [data].pack('m')]
|
119
|
+
['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
122
120
|
end
|
123
121
|
end
|
124
122
|
|
@@ -142,6 +140,16 @@ module Mindee
|
|
142
140
|
io_stream.set_encoding Encoding::BINARY
|
143
141
|
super(io_stream, filename, fix_pdf: fix_pdf)
|
144
142
|
end
|
143
|
+
|
144
|
+
# Overload of the same function to prevent a base64 from being re-encoded.
|
145
|
+
# @param close [Boolean]
|
146
|
+
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
147
|
+
def read_document(close: true)
|
148
|
+
@io_stream.seek(0)
|
149
|
+
data = @io_stream.read
|
150
|
+
@io_stream.close if close
|
151
|
+
['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
|
152
|
+
end
|
145
153
|
end
|
146
154
|
|
147
155
|
# Load a document from raw bytes.
|
@@ -178,6 +186,21 @@ module Mindee
|
|
178
186
|
@url = url
|
179
187
|
end
|
180
188
|
end
|
189
|
+
|
190
|
+
# Replaces non-ASCII characters by their unicode escape sequence.
|
191
|
+
# Keeps other characters as is.
|
192
|
+
# @return A clean String.
|
193
|
+
def self.convert_to_unicode_escape(string)
|
194
|
+
unicode_escape_string = ''.dup
|
195
|
+
string.each_char do |char|
|
196
|
+
unicode_escape_string << if char.bytesize > 1
|
197
|
+
"\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
|
198
|
+
else
|
199
|
+
char
|
200
|
+
end
|
201
|
+
end
|
202
|
+
unicode_escape_string
|
203
|
+
end
|
181
204
|
end
|
182
205
|
end
|
183
206
|
end
|