mindee 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +57 -7
- data/bin/mindee.rb +160 -83
- data/docs/bank_account_details_v2.md +137 -0
- data/docs/bank_check_v1.md +179 -0
- data/docs/barcode_reader_v1.md +104 -0
- data/docs/carte_vitale_v1.md +123 -0
- data/docs/code_samples/barcode_reader_v1.txt +19 -0
- data/docs/code_samples/cropper_v1.txt +16 -0
- data/docs/code_samples/idcard_fr_v2.txt +19 -0
- data/docs/code_samples/invoice_splitter_v1_async.txt +6 -54
- data/docs/code_samples/multi_receipts_detector_v1.txt +19 -0
- data/docs/code_samples/us_w9_v1.txt +16 -0
- data/docs/cropper_v1.md +97 -0
- data/docs/custom_v1.md +101 -0
- data/docs/expense_receipts_v5.md +306 -0
- data/docs/financial_document_v1.md +384 -0
- data/docs/{ruby-getting-started.md → getting_started.md} +22 -6
- data/docs/idcard_fr_v2.md +253 -0
- data/docs/invoice_splitter_v1.md +85 -0
- data/docs/invoices_v4.md +369 -0
- data/docs/license_plates_v1.md +91 -0
- data/docs/multi_receipts_detector_v1.md +105 -0
- data/docs/passport_v1.md +186 -0
- data/docs/proof_of_address_v1.md +207 -0
- data/docs/us_driver_license_v1.md +268 -0
- data/docs/us_w9_v1.md +207 -0
- data/lib/mindee/client.rb +95 -16
- data/lib/mindee/geometry/quadrilateral.rb +5 -0
- data/lib/mindee/http/.rubocop.yml +8 -0
- data/lib/mindee/http/endpoint.rb +14 -6
- data/lib/mindee/http/error.rb +104 -0
- data/lib/mindee/http.rb +1 -0
- data/lib/mindee/input/sources.rb +84 -15
- data/lib/mindee/parsing/common/api_response.rb +11 -1
- data/lib/mindee/parsing/common/inference.rb +2 -2
- data/lib/mindee/parsing/common/ocr/ocr.rb +1 -0
- data/lib/mindee/parsing/common.rb +0 -1
- data/lib/mindee/parsing/standard/company_registration_field.rb +1 -1
- data/lib/mindee/parsing/standard/locale_field.rb +1 -1
- data/lib/mindee/parsing/standard/payment_details_field.rb +1 -1
- data/lib/mindee/parsing/standard/position_field.rb +10 -3
- data/lib/mindee/parsing/standard/{text_field.rb → string_field.rb} +1 -1
- data/lib/mindee/parsing/standard.rb +1 -1
- data/lib/mindee/pdf/pdf_processing.rb +2 -1
- data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +37 -0
- data/lib/mindee/product/barcode_reader/barcode_reader_v1_document.rb +44 -0
- data/lib/mindee/product/barcode_reader/barcode_reader_v1_page.rb +32 -0
- data/lib/mindee/product/cropper/cropper_v1.rb +37 -0
- data/lib/mindee/product/cropper/cropper_v1_document.rb +13 -0
- data/lib/mindee/product/cropper/cropper_v1_page.rb +49 -0
- data/lib/mindee/product/custom/custom_v1.rb +1 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +1 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +2 -2
- data/lib/mindee/product/financial_document/financial_document_v1.rb +1 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +24 -24
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +1 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +6 -6
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +1 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +6 -6
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +1 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +6 -6
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +1 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +16 -16
- data/lib/mindee/product/fr/id_card/id_card_v2.rb +39 -0
- data/lib/mindee/product/fr/id_card/id_card_v2_document.rb +107 -0
- data/lib/mindee/product/fr/id_card/id_card_v2_page.rb +53 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +1 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +24 -24
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +1 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +5 -3
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +37 -0
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +35 -0
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +1 -0
- data/lib/mindee/product/passport/passport_v1_document.rb +16 -16
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +1 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +14 -14
- data/lib/mindee/product/receipt/receipt_v4_document.rb +6 -6
- data/lib/mindee/product/receipt/receipt_v5.rb +1 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +12 -12
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +1 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +8 -8
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +1 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1_document.rb +28 -28
- data/lib/mindee/product/us/w9/w9_v1.rb +39 -0
- data/lib/mindee/product/us/w9/w9_v1_document.rb +15 -0
- data/lib/mindee/product/us/w9/w9_v1_page.rb +102 -0
- data/lib/mindee/product.rb +5 -0
- data/lib/mindee/version.rb +5 -1
- data/lib/mindee.rb +47 -0
- metadata +43 -9
- data/docs/ruby-api-builder.md +0 -123
- data/docs/ruby-invoice-ocr.md +0 -271
- data/docs/ruby-passport-ocr.md +0 -165
- data/docs/ruby-receipt-ocr.md +0 -196
- data/lib/mindee/parsing/common/error.rb +0 -24
data/docs/ruby-invoice-ocr.md
DELETED
|
@@ -1,271 +0,0 @@
|
|
|
1
|
-
The Ruby OCR SDK supports the [invoice API](https://developers.mindee.com/docs/invoice-ocr) for extracting data from invoices.
|
|
2
|
-
|
|
3
|
-
Using this sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.
|
|
4
|
-
|
|
5
|
-

|
|
6
|
-
|
|
7
|
-
## Quick Start
|
|
8
|
-
```ruby
|
|
9
|
-
# Init a new client
|
|
10
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
11
|
-
|
|
12
|
-
# Load a file from disk
|
|
13
|
-
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
14
|
-
|
|
15
|
-
# Parse the file
|
|
16
|
-
result = mindee_client.parse(
|
|
17
|
-
input_source,
|
|
18
|
-
Mindee::Product::Invoice::InvoiceV4
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
# Print a full summary of the parsed data in RST format
|
|
22
|
-
puts result.document
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
Output:
|
|
26
|
-
```
|
|
27
|
-
Invoice V4 Prediction
|
|
28
|
-
=====================
|
|
29
|
-
:Filename:
|
|
30
|
-
:Locale: fr; fr; EUR;
|
|
31
|
-
:Invoice number: 0042004801351
|
|
32
|
-
:Reference numbers: AD29094
|
|
33
|
-
:Invoice date: 2020-02-17
|
|
34
|
-
:Invoice due date: 2020-02-17
|
|
35
|
-
:Supplier name: TURNPIKE DESIGNS CO.
|
|
36
|
-
:Supplier address: 156 University Ave, Toronto ON, Canada M5H 2H7
|
|
37
|
-
:Supplier company registrations: 501124705; FR33501124705
|
|
38
|
-
:Supplier payment details: FR7640254025476501124705368;
|
|
39
|
-
:Customer name: JIRO DOI
|
|
40
|
-
:Customer company registrations: FR00000000000; 111222333
|
|
41
|
-
:Customer address: 1954 Bloon Street West Toronto, ON, M6P 3K9 Canada
|
|
42
|
-
:Line Items:
|
|
43
|
-
Code | QTY | Price | Amount | Tax (Rate) | Description
|
|
44
|
-
| | | 4.31 | (2.10%) | PQ20 ETIQ ULTRA RESIS METAXXDC
|
|
45
|
-
| 1.00 | 65.00 | 75.00 | 10.00 | Platinum web hosting package Dow...
|
|
46
|
-
XXX81125600010 | 1.00 | 250.01 | 275.51 | 25.50 (10.20%) | a long string describing the ite...
|
|
47
|
-
ABC456 | 200.30 | 8.101 | 1622.63 | 121.70 (7.50%) | Liquid perfection
|
|
48
|
-
| | | | | CARTOUCHE L NR BROTHER TN247BK
|
|
49
|
-
:Taxes:
|
|
50
|
-
+---------------+--------+----------+---------------+
|
|
51
|
-
| Base | Code | Rate (%) | Amount |
|
|
52
|
-
+===============+========+==========+===============+
|
|
53
|
-
| | | 20.00 | 97.98 |
|
|
54
|
-
+---------------+--------+----------+---------------+
|
|
55
|
-
:Total tax: 97.98
|
|
56
|
-
:Total net: 489.97
|
|
57
|
-
:Total amount: 587.95
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
> 📘 **Info**
|
|
61
|
-
>
|
|
62
|
-
> Line item descriptions are truncated here only for display purposes.
|
|
63
|
-
> The full text is available in the [details](#line-items).
|
|
64
|
-
|
|
65
|
-
## Fields
|
|
66
|
-
Each prediction object contains a set of different fields.
|
|
67
|
-
Each `Field` object contains at a minimum the following attributes:
|
|
68
|
-
|
|
69
|
-
* `value` (String or Float depending on the field type): corresponds to the field value. Can be `nil` if no value was extracted.
|
|
70
|
-
* `confidence` (Float): the confidence score of the field prediction.
|
|
71
|
-
* `bounding_box` (Array< Array< Float > >): contains exactly 4 relative vertices coordinates (points) of a right rectangle containing the field in the document.
|
|
72
|
-
* `polygon` (Array< Array< Float > >): contains the relative vertices coordinates (points) of a polygon containing the field in the image.
|
|
73
|
-
* `reconstructed` (Boolean): True if the field was reconstructed or computed using other fields.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
## Attributes
|
|
77
|
-
Depending on the field type, there might be additional attributes that will be extracted in the `Invoice` object.
|
|
78
|
-
|
|
79
|
-
Using the above sample, the following are the basic fields that can be extracted:
|
|
80
|
-
|
|
81
|
-
- [Quick Start](#quick-start)
|
|
82
|
-
- [Fields](#fields)
|
|
83
|
-
- [Attributes](#attributes)
|
|
84
|
-
- [Customer Information](#customer-information)
|
|
85
|
-
- [Dates](#dates)
|
|
86
|
-
- [Locale](#locale)
|
|
87
|
-
- [Supplier Information](#supplier-information)
|
|
88
|
-
- [Taxes](#taxes)
|
|
89
|
-
- [Totals](#totals)
|
|
90
|
-
- [Line items](#line-items)
|
|
91
|
-
- [Questions?](#questions)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
### Customer Information
|
|
95
|
-
**`customer_name`** (Field): Customer's name
|
|
96
|
-
|
|
97
|
-
```ruby
|
|
98
|
-
puts result.document.inference.prediction.customer_name.value
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
**`customer_address`** (Field): Customer's postal address
|
|
102
|
-
|
|
103
|
-
```ruby
|
|
104
|
-
puts result.document.inference.prediction.customer_address.value
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
**`customer_company_registrations`** (Array<CompanyRegistration>): Customer's company registration
|
|
108
|
-
|
|
109
|
-
```ruby
|
|
110
|
-
result.document.inference.prediction.customer_company_registrations.each do |registration|
|
|
111
|
-
puts registration.value
|
|
112
|
-
puts registration.type
|
|
113
|
-
end
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
### Dates
|
|
117
|
-
Date fields:
|
|
118
|
-
|
|
119
|
-
* contain the `date_object` attribute, which is a standard Ruby [date object](https://ruby-doc.org/stdlib-2.7.1/libdoc/date/rdoc/Date.html)
|
|
120
|
-
* have a `value` attribute which is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) representation of the date.
|
|
121
|
-
|
|
122
|
-
The following date fields are available:
|
|
123
|
-
|
|
124
|
-
**`date`**: Date the invoice was issued
|
|
125
|
-
|
|
126
|
-
```ruby
|
|
127
|
-
puts result.document.inference.prediction.date.value
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
**`due_date`**: Payment due date of the invoice.
|
|
131
|
-
|
|
132
|
-
```ruby
|
|
133
|
-
puts result.document.inference.prediction.due_date.value
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### Locale
|
|
137
|
-
**`locale`** [Locale]: Locale information.
|
|
138
|
-
|
|
139
|
-
* `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
|
|
140
|
-
```ruby
|
|
141
|
-
puts result.document.inference.prediction.locale.language
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
* `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
|
|
145
|
-
```ruby
|
|
146
|
-
puts result.document.inference.prediction.locale.currency
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
* `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
|
|
150
|
-
```ruby
|
|
151
|
-
puts result.document.inference.prediction.locale.country
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
### Supplier Information
|
|
155
|
-
|
|
156
|
-
**`supplier_name`**: Supplier name as written in the invoice (logo or supplier Info).
|
|
157
|
-
|
|
158
|
-
```ruby
|
|
159
|
-
puts result.document.inference.prediction.supplier_name.value
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
**`supplier_address`**: Supplier address as written in the invoice.
|
|
163
|
-
|
|
164
|
-
```ruby
|
|
165
|
-
puts result.document.inference.prediction.supplier_address.value
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
**`supplier__payment_details`** (Array< PaymentDetails >): List of invoice's supplier payment details.
|
|
169
|
-
Each object in the list contains extra attributes:
|
|
170
|
-
|
|
171
|
-
* `iban` (String)
|
|
172
|
-
```ruby
|
|
173
|
-
# Show the IBAN of the first payment
|
|
174
|
-
puts result.document.inference.prediction.supplier_payment_details[0].iban
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
* `swift` (String)
|
|
178
|
-
```ruby
|
|
179
|
-
# Show the SWIFT of the first payment
|
|
180
|
-
puts result.document.inference.prediction.supplier_payment_details[0].swift
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
* `routing_number` (String)
|
|
184
|
-
```ruby
|
|
185
|
-
# Show the routing number of the first payment
|
|
186
|
-
puts result.document.inference.prediction.supplier_payment_details[0].routing_number
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
* `account_number` (String)
|
|
190
|
-
```ruby
|
|
191
|
-
# Show the account number of the first payment
|
|
192
|
-
puts result.document.inference.prediction.supplier_payment_details[0].account_number
|
|
193
|
-
```
|
|
194
|
-
|
|
195
|
-
**`supplier_company_registrations`** (Array< CompanyRegistration >):
|
|
196
|
-
List of detected supplier's company registration numbers.
|
|
197
|
-
Each object in the list contains an extra attribute:
|
|
198
|
-
|
|
199
|
-
* `type` (String): Type of company registration number among predefined categories.
|
|
200
|
-
```ruby
|
|
201
|
-
result.document.inference.prediction.supplier_company_registrations.each do |registration|
|
|
202
|
-
puts registration.value
|
|
203
|
-
puts registration.type
|
|
204
|
-
end
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
### Taxes
|
|
208
|
-
**`taxes`** (Array< TaxField >): Contains tax fields as seen on the receipt.
|
|
209
|
-
|
|
210
|
-
* `value` (Float): The tax amount.
|
|
211
|
-
```ruby
|
|
212
|
-
# Show the amount of the first tax
|
|
213
|
-
puts result.document.inference.prediction.taxes[0].value
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
* `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
|
|
217
|
-
```ruby
|
|
218
|
-
# Show the code of the first tax
|
|
219
|
-
puts result.document.inference.prediction.taxes[0].code
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
* `rate` (Float): The tax rate.
|
|
223
|
-
```ruby
|
|
224
|
-
# Show the rate of the first tax
|
|
225
|
-
puts result.document.inference.prediction.taxes[0].rate
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
### Totals
|
|
229
|
-
|
|
230
|
-
**`total_amount`** (Field): Total amount including taxes.
|
|
231
|
-
|
|
232
|
-
```ruby
|
|
233
|
-
puts result.document.inference.prediction.total_amount.value
|
|
234
|
-
```
|
|
235
|
-
|
|
236
|
-
**`total_net`** (Field): Total amount excluding taxes.
|
|
237
|
-
|
|
238
|
-
```ruby
|
|
239
|
-
puts result.document.inference.prediction.total_net.value
|
|
240
|
-
```
|
|
241
|
-
|
|
242
|
-
**`total_tax`** (Field): Total tax value from tax lines.
|
|
243
|
-
|
|
244
|
-
```ruby
|
|
245
|
-
puts result.document.inference.prediction.total_tax.value
|
|
246
|
-
```
|
|
247
|
-
|
|
248
|
-
### Line items
|
|
249
|
-
|
|
250
|
-
**`line_items`** (Array<InvoiceLineItem>): Line items details.
|
|
251
|
-
Each object in the list contains:
|
|
252
|
-
|
|
253
|
-
* `product_code` (String)
|
|
254
|
-
* `description` (String)
|
|
255
|
-
* `quantity` (Float)
|
|
256
|
-
* `unit_price` (Float)
|
|
257
|
-
* `total_amount` (Float)
|
|
258
|
-
* `tax_rate` (Float)
|
|
259
|
-
* `tax_amount` (Float)
|
|
260
|
-
* `confidence` (Float)
|
|
261
|
-
* `page_id` (Integer)
|
|
262
|
-
* `polygon` (Polygon)
|
|
263
|
-
|
|
264
|
-
```ruby
|
|
265
|
-
result.document.inference.prediction.line_items.each do |line_item|
|
|
266
|
-
pp line_item
|
|
267
|
-
end
|
|
268
|
-
```
|
|
269
|
-
|
|
270
|
-
## Questions?
|
|
271
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/docs/ruby-passport-ocr.md
DELETED
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
The Ruby OCR SDK supports the [passport API](https://developers.mindee.com/docs/passport-ocr) for extracting data from passports.
|
|
2
|
-
|
|
3
|
-
Using the sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.
|
|
4
|
-
|
|
5
|
-

|
|
6
|
-
|
|
7
|
-
## Quick Start
|
|
8
|
-
```ruby
|
|
9
|
-
# Init a new client
|
|
10
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
11
|
-
|
|
12
|
-
# Load a file from disk
|
|
13
|
-
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
14
|
-
|
|
15
|
-
# Parse the file
|
|
16
|
-
result = mindee_client.parse(
|
|
17
|
-
input_source,
|
|
18
|
-
Mindee::Product::Passport::PassportV1
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
# Print a full summary of the parsed data in RST format
|
|
22
|
-
puts result.document
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
Output:
|
|
26
|
-
```
|
|
27
|
-
----- Passport V1 -----
|
|
28
|
-
Filename:
|
|
29
|
-
Full name: HENERT PUDARSAN
|
|
30
|
-
Given names: HENERT
|
|
31
|
-
Surname: PUDARSAN
|
|
32
|
-
Country: GBR
|
|
33
|
-
ID Number: 707797979
|
|
34
|
-
Issuance date: 2012-04-22
|
|
35
|
-
Birth date: 1995-05-20
|
|
36
|
-
Expiry date: 2057-04-22
|
|
37
|
-
MRZ 1: P<GBRPUDARSAN<<HENERT<<<<<<<<<<<<<<<<<<<<<<<
|
|
38
|
-
MRZ 2: 7077979792GBR9505209M1704224<<<<<<<<<<<<<<00
|
|
39
|
-
MRZ: P<GBRPUDARSAN<<HENERT<<<<<<<<<<<<<<<<<<<<<<<7077979792GBR9505209M1704224<<<<<<<<<<<<<<00
|
|
40
|
-
----------------------
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
## Fields
|
|
44
|
-
Each prediction object contains a set of different fields.
|
|
45
|
-
Each `Field` object contains at a minimum the following attributes:
|
|
46
|
-
|
|
47
|
-
* `value` (String or Float depending on the field type): corresponds to the field value. Can be `nil` if no value was extracted.
|
|
48
|
-
* `confidence` (Float): the confidence score of the field prediction.
|
|
49
|
-
* `bounding_box` (Array< Array< Float > >): contains exactly 4 relative vertices coordinates (points) of a right rectangle containing the field in the document.
|
|
50
|
-
* `polygon` (Array< Array< Float > >): contains the relative vertices coordinates (points) of a polygon containing the field in the image.
|
|
51
|
-
* `reconstructed` (Boolean): True if the field was reconstructed or computed using other fields.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
## Attributes
|
|
55
|
-
Depending on the field type specified, additional attributes can be extracted from the `Passport` object.
|
|
56
|
-
|
|
57
|
-
Using the above sample, the following are the basic fields that can be extracted:
|
|
58
|
-
|
|
59
|
-
- [Quick Start](#quick-start)
|
|
60
|
-
- [Fields](#fields)
|
|
61
|
-
- [Attributes](#attributes)
|
|
62
|
-
- [Birth Place](#birth-place)
|
|
63
|
-
- [Country](#country)
|
|
64
|
-
- [Dates](#dates)
|
|
65
|
-
- [Gender](#gender)
|
|
66
|
-
- [Names](#names)
|
|
67
|
-
- [ID](#id)
|
|
68
|
-
- [Machine-Readable Zone](#machine-readable-zone)
|
|
69
|
-
- [Questions?](#questions)
|
|
70
|
-
|
|
71
|
-
### Birth Place
|
|
72
|
-
|
|
73
|
-
**`birth_place`** (Field): Passport owner birthplace.
|
|
74
|
-
|
|
75
|
-
```ruby
|
|
76
|
-
puts result.document.inference.prediction.birth_place.value
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
### Country
|
|
80
|
-
**`country`** (Field): Passport country in [ISO 3166-1 alpha-3 code format](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) (3-letter code).
|
|
81
|
-
|
|
82
|
-
```ruby
|
|
83
|
-
puts result.document.inference.prediction.country.value
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
### Dates
|
|
87
|
-
Date fields:
|
|
88
|
-
|
|
89
|
-
* contain the `date_object` attribute, which is a standard Ruby [date object](https://ruby-doc.org/stdlib-2.7.1/libdoc/date/rdoc/Date.html)
|
|
90
|
-
* have a `value` attribute which is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) representation of the date.
|
|
91
|
-
|
|
92
|
-
The following date fields are available:
|
|
93
|
-
|
|
94
|
-
**`expiry_date`**: Passport expiry date.
|
|
95
|
-
|
|
96
|
-
```ruby
|
|
97
|
-
puts result.document.inference.prediction.expiry_date.value
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
**`issuance_date`**: Passport date of issuance.
|
|
101
|
-
|
|
102
|
-
```ruby
|
|
103
|
-
puts result.document.inference.prediction.issuance_date.value
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
**`birth_date`**: Passport's owner date of birth.
|
|
107
|
-
|
|
108
|
-
```ruby
|
|
109
|
-
puts result.document.inference.prediction.birth_date.value
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
### Gender
|
|
113
|
-
|
|
114
|
-
**`gender`** (Field): Passport owner's gender (M / F).
|
|
115
|
-
|
|
116
|
-
```ruby
|
|
117
|
-
puts result.document.inference.prediction.gender.value
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
### Names
|
|
121
|
-
|
|
122
|
-
**`given_names`** (Array< Field >): List of passport owner's given names.
|
|
123
|
-
|
|
124
|
-
```ruby
|
|
125
|
-
result.document.inference.prediction.given_names.each do |name|
|
|
126
|
-
puts name
|
|
127
|
-
end
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
**`surname`** (Field): Passport's owner surname.
|
|
131
|
-
|
|
132
|
-
```ruby
|
|
133
|
-
puts result.document.inference.prediction.surname.value
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### ID
|
|
137
|
-
|
|
138
|
-
**`id_number`** (Field): Passport identification number.
|
|
139
|
-
|
|
140
|
-
```ruby
|
|
141
|
-
puts result.document.inference.prediction.id_number.value
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
### Machine-Readable Zone
|
|
145
|
-
|
|
146
|
-
**`mrz1`** (Field): Passport first line of machine-readable zone.
|
|
147
|
-
|
|
148
|
-
```ruby
|
|
149
|
-
puts result.document.inference.prediction.mrz1.value
|
|
150
|
-
```
|
|
151
|
-
|
|
152
|
-
**`mrz2`** (Field): Passport second line of machine-readable zone.
|
|
153
|
-
|
|
154
|
-
```ruby
|
|
155
|
-
puts result.document.inference.prediction.mrz2.value
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
**`mrz`** (Field): Reconstructed passport full machine-readable zone from mrz1 and mrz2.
|
|
159
|
-
|
|
160
|
-
```ruby
|
|
161
|
-
puts result.document.inference.prediction.mrz.value
|
|
162
|
-
```
|
|
163
|
-
|
|
164
|
-
## Questions?
|
|
165
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/docs/ruby-receipt-ocr.md
DELETED
|
@@ -1,196 +0,0 @@
|
|
|
1
|
-
The Ruby OCR SDK supports the [receipt API](https://developers.mindee.com/docs/receipt-ocr) for extracting data from receipts.
|
|
2
|
-
|
|
3
|
-
Using this sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.
|
|
4
|
-
|
|
5
|
-

|
|
6
|
-
|
|
7
|
-
## Quick Start
|
|
8
|
-
```ruby
|
|
9
|
-
require 'mindee'
|
|
10
|
-
|
|
11
|
-
# Init a new client
|
|
12
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
13
|
-
|
|
14
|
-
# Load a file from disk
|
|
15
|
-
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
16
|
-
|
|
17
|
-
# Parse the file
|
|
18
|
-
result = mindee_client.parse(
|
|
19
|
-
input_source,
|
|
20
|
-
Mindee::Product::Receipt::ReceiptV5
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
# Print a full summary of the parsed data in RST format
|
|
24
|
-
puts result.document
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
Output:
|
|
28
|
-
```
|
|
29
|
-
Receipt V5 Prediction
|
|
30
|
-
=====================
|
|
31
|
-
:Filename:
|
|
32
|
-
:Expense Locale: en-GB; en; GB; GBP;
|
|
33
|
-
:Expense Category: food
|
|
34
|
-
:Expense Sub Category: restaurant
|
|
35
|
-
:Document Type: EXPENSE RECEIPT
|
|
36
|
-
:Purchase Date: 2016-02-26
|
|
37
|
-
:Purchase Time: 15:20
|
|
38
|
-
:Total Amount: 10.20
|
|
39
|
-
:Total Excluding Taxes: 8.50
|
|
40
|
-
:Total Tax: 1.70
|
|
41
|
-
:Tip and Gratuity:
|
|
42
|
-
:Taxes:
|
|
43
|
-
+---------------+--------+----------+---------------+
|
|
44
|
-
| Base | Code | Rate (%) | Amount |
|
|
45
|
-
+===============+========+==========+===============+
|
|
46
|
-
| 8.50 | VAT | 20.00 | 1.70 |
|
|
47
|
-
+---------------+--------+----------+---------------+
|
|
48
|
-
:Supplier Name: CLACHAN
|
|
49
|
-
:Supplier Company Registrations: 232153895
|
|
50
|
-
:Supplier Address: 34 kingley street w1b 5qh
|
|
51
|
-
:Supplier Phone Number: 02074940834
|
|
52
|
-
:Line Items:
|
|
53
|
-
+--------------------------------------+----------+--------------+------------+
|
|
54
|
-
| Description | Quantity | Total Amount | Unit Price |
|
|
55
|
-
+======================================+==========+==============+============+
|
|
56
|
-
| meantime pale | 2.00 | 10.20 | |
|
|
57
|
-
+--------------------------------------+----------+--------------+------------+
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
## Fields
|
|
61
|
-
Each prediction object contains a set of different fields.
|
|
62
|
-
Each `Field` object contains at a minimum the following attributes:
|
|
63
|
-
|
|
64
|
-
* `value` (String or Float depending on the field type): corresponds to the field value. Can be `nil` if no value was extracted.
|
|
65
|
-
* `confidence` (Float): the confidence score of the field prediction.
|
|
66
|
-
* `bounding_box` (Array< Array< Float > >): contains exactly 4 relative vertices coordinates (points) of a right rectangle containing the field in the document.
|
|
67
|
-
* `polygon` (Array< Array< Float > >): contains the relative vertices coordinates (points) of a polygon containing the field in the image.
|
|
68
|
-
* `reconstructed` (Boolean): True if the field was reconstructed or computed using other fields.
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
## Attributes
|
|
72
|
-
Depending on the field type specified, additional attributes can be extracted in the `Receipt` object.
|
|
73
|
-
|
|
74
|
-
Using the above sample, the following are the basic fields that can be extracted:
|
|
75
|
-
|
|
76
|
-
- [Quick Start](#quick-start)
|
|
77
|
-
- [Fields](#fields)
|
|
78
|
-
- [Attributes](#attributes)
|
|
79
|
-
- [Category](#category)
|
|
80
|
-
- [Date](#date)
|
|
81
|
-
- [Locale](#locale)
|
|
82
|
-
- [Supplier Information](#supplier-information)
|
|
83
|
-
- [Taxes](#taxes)
|
|
84
|
-
- [Time](#time)
|
|
85
|
-
- [Totals](#totals)
|
|
86
|
-
- [Questions?](#questions)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
### Category
|
|
90
|
-
* **`category`** (Field): Receipt category as seen on the receipt.
|
|
91
|
-
The following categories are supported: toll, food, parking, transport, accommodation, gasoline, miscellaneous.
|
|
92
|
-
|
|
93
|
-
```ruby
|
|
94
|
-
puts result.document.inference.prediction.category.value
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
### Date
|
|
99
|
-
Date fields:
|
|
100
|
-
|
|
101
|
-
* contain the `date_object` attribute, which is a standard Ruby [date object](https://ruby-doc.org/stdlib-2.7.1/libdoc/date/rdoc/Date.html)
|
|
102
|
-
* have a `value` attribute which is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) representation of the date.
|
|
103
|
-
|
|
104
|
-
The following date fields are available:
|
|
105
|
-
|
|
106
|
-
* **`date`**: Date the receipt was issued
|
|
107
|
-
|
|
108
|
-
```ruby
|
|
109
|
-
puts result.document.inference.prediction.date.value
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
### Locale
|
|
114
|
-
**`locale`** (Locale): Locale information.
|
|
115
|
-
|
|
116
|
-
* `locale.value` (String): Locale with country and language codes.
|
|
117
|
-
```ruby
|
|
118
|
-
puts result.document.inference.prediction.locale
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
* `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
|
|
122
|
-
*
|
|
123
|
-
```ruby
|
|
124
|
-
puts result.document.inference.prediction.locale.language
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
* `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
|
|
128
|
-
|
|
129
|
-
```ruby
|
|
130
|
-
puts result.document.inference.prediction.locale.currency
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
* `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
|
|
134
|
-
|
|
135
|
-
```ruby
|
|
136
|
-
puts result.document.inference.prediction.locale.country
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
### Supplier Information
|
|
140
|
-
* **`supplier_name`** (Field): Supplier name as written in the receipt.
|
|
141
|
-
|
|
142
|
-
```ruby
|
|
143
|
-
puts result.document.inference.prediction.supplier_name.value
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
### Taxes
|
|
148
|
-
**`taxes`** (Array< TaxField >): Contains tax fields as seen on the receipt.
|
|
149
|
-
|
|
150
|
-
* `value` (Float): The tax amount.
|
|
151
|
-
```ruby
|
|
152
|
-
# Show the amount of the first tax
|
|
153
|
-
puts result.document.inference.prediction.taxes[0].value
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
* `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
|
|
157
|
-
```ruby
|
|
158
|
-
# Show the code of the first tax
|
|
159
|
-
puts result.document.inference.prediction.taxes[0].code
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
* `rate` (Float): The tax rate.
|
|
163
|
-
```ruby
|
|
164
|
-
# Show the rate of the first tax
|
|
165
|
-
puts result.document.inference.prediction.taxes[0].rate
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
### Time
|
|
169
|
-
* **`time`**: Time of purchase as seen on the receipt
|
|
170
|
-
* `value` (string): Time of purchase with 24 hours formatting (hh:mm).
|
|
171
|
-
|
|
172
|
-
```ruby
|
|
173
|
-
puts result.document.inference.prediction.time.value
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
### Totals
|
|
177
|
-
* **`total_amount`** (Field): Total amount including taxes
|
|
178
|
-
|
|
179
|
-
```ruby
|
|
180
|
-
puts result.document.inference.prediction.total_amount.value
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
* **`total_net`** (Field): Total amount paid excluding taxes
|
|
184
|
-
|
|
185
|
-
```ruby
|
|
186
|
-
puts result.document.inference.prediction.total_net.value
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
* **`total_tax`** (Field): Total tax value from tax lines
|
|
190
|
-
|
|
191
|
-
```ruby
|
|
192
|
-
puts result.document.inference.prediction.total_tax.value
|
|
193
|
-
```
|
|
194
|
-
|
|
195
|
-
## Questions?
|
|
196
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Mindee
|
|
4
|
-
module Parsing
|
|
5
|
-
module Common
|
|
6
|
-
# API HttpError
|
|
7
|
-
class HttpError < StandardError
|
|
8
|
-
# @return [String]
|
|
9
|
-
attr_reader :api_code
|
|
10
|
-
# @return [String]
|
|
11
|
-
attr_reader :api_details
|
|
12
|
-
# @return [String]
|
|
13
|
-
attr_reader :api_message
|
|
14
|
-
|
|
15
|
-
def initialize(error)
|
|
16
|
-
@api_code = error['code']
|
|
17
|
-
@api_details = error['details']
|
|
18
|
-
@api_message = error['message']
|
|
19
|
-
super("#{@api_code}: #{@api_details} - #{@api_message}")
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|