mindee 2.2.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +40 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/code_samples/us_driver_license_v1.txt +19 -0
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +115 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +90 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +47 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +202 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +51 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +149 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +62 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +57 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +38 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1_document.rb +113 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1_page.rb +53 -0
- data/lib/mindee/product.rb +17 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +91 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -44
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
@@ -3,12 +3,17 @@ require 'mindee'
|
|
3
3
|
# Init a new client
|
4
4
|
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
5
5
|
|
6
|
-
# Load a file from disk
|
7
|
-
|
8
|
-
|
6
|
+
# Load a file from disk
|
7
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
8
|
+
|
9
|
+
# Parse the file
|
10
|
+
result = mindee_client.parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::Passport::PassportV1
|
13
|
+
)
|
9
14
|
|
10
15
|
# Print a full summary of the parsed data in RST format
|
11
|
-
puts result
|
16
|
+
puts result.document
|
12
17
|
|
13
18
|
# Print the document-level parsed data
|
14
|
-
# puts result.inference.prediction
|
19
|
+
# puts result.document.inference.prediction
|
@@ -3,12 +3,17 @@ require 'mindee'
|
|
3
3
|
# Init a new client
|
4
4
|
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
5
5
|
|
6
|
-
# Load a file from disk
|
7
|
-
|
8
|
-
|
6
|
+
# Load a file from disk
|
7
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
8
|
+
|
9
|
+
# Parse the file
|
10
|
+
result = mindee_client.parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::ProofOfAddress::ProofOfAddressV1
|
13
|
+
)
|
9
14
|
|
10
15
|
# Print a full summary of the parsed data in RST format
|
11
|
-
puts result
|
16
|
+
puts result.document
|
12
17
|
|
13
18
|
# Print the document-level parsed data
|
14
|
-
# puts result.inference.prediction
|
19
|
+
# puts result.document.inference.prediction
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'mindee'
|
2
|
+
|
3
|
+
# Init a new client
|
4
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
5
|
+
|
6
|
+
# Load a file from disk
|
7
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
8
|
+
|
9
|
+
# Parse the file
|
10
|
+
result = mindee_client.parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::US::DriverLicense::DriverLicenseV1
|
13
|
+
)
|
14
|
+
|
15
|
+
# Print a full summary of the parsed data in RST format
|
16
|
+
puts result.document
|
17
|
+
|
18
|
+
# Print the document-level parsed data
|
19
|
+
# puts result.document.inference.prediction
|
data/docs/ruby-api-builder.md
CHANGED
@@ -9,52 +9,51 @@ created with the [API Builder](https://developers.mindee.com/docs/overview).
|
|
9
9
|
> 📘 **Info**
|
10
10
|
>
|
11
11
|
> We used a data model that will be different from yours.
|
12
|
-
> To modify this to your own custom API, change the `
|
12
|
+
> To modify this to your own custom API, change the `mindee_client.create_endpoint` call with your own parameters.
|
13
13
|
|
14
14
|
```ruby
|
15
15
|
require 'mindee'
|
16
16
|
|
17
17
|
# Init a new client and configure your custom document
|
18
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
19
|
-
'john',
|
20
|
-
'wnine',
|
21
|
-
version: '1.1' # optional, if not set, use the latest version of the model
|
22
|
-
)
|
23
|
-
|
24
|
-
# Load a file from disk and parse it
|
25
|
-
result = mindee_client.doc_from_path('/path/to/file.ext')
|
26
|
-
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
18
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
27
19
|
|
28
|
-
#
|
29
|
-
|
20
|
+
# Create an endpoint for your custom product
|
21
|
+
custom_endpoint = mindee_client.create_endpoint(
|
22
|
+
account_name: 'john',
|
23
|
+
endpoint_name: 'wnine',
|
24
|
+
version: '1.1' # optional, if not set, uses the latest version of the model
|
25
|
+
)
|
30
26
|
```
|
31
27
|
|
32
|
-
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
33
|
-
This is probably not needed for development but essential for production use.
|
28
|
+
> **Note:** If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
29
|
+
> This is probably not needed for development but essential for production use.
|
34
30
|
|
35
31
|
## Parsing Documents
|
36
32
|
The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
|
37
|
-
|
33
|
+
If your document is not an OTS API, the document's endpoint must be specified when calling the `parse` method.
|
38
34
|
|
39
35
|
```ruby
|
40
|
-
|
41
|
-
|
42
|
-
|
36
|
+
mindee_client.parse(
|
37
|
+
input_source,
|
38
|
+
Mindee::Product::Custom::CustomV1,
|
39
|
+
endpoint: custom_endpoint
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
# Print a summary of the document prediction in RST format
|
44
|
+
puts result.document
|
43
45
|
```
|
44
46
|
|
45
47
|
> 📘 **Info**
|
46
48
|
>
|
47
|
-
> If your custom document has the same name as
|
48
|
-
> you **must** specify your account name when
|
49
|
+
> If your custom document has the same name as one of the [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
|
50
|
+
> you **must** specify your account name when creating the `create_endpoint` method:
|
49
51
|
|
50
52
|
```ruby
|
51
|
-
|
52
|
-
'receipt',
|
53
|
-
'john'
|
53
|
+
custom_endpoint = mindee_client.create_endpoint(
|
54
|
+
endpoint_name: 'receipt',
|
55
|
+
account_name: 'john'
|
54
56
|
)
|
55
|
-
|
56
|
-
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
57
|
-
.parse(Mindee::Prediction::CustomV1, account_name: 'john')
|
58
57
|
```
|
59
58
|
|
60
59
|
## Document Fields
|
@@ -81,21 +80,21 @@ Individual field values can be accessed by using the field's API name, in the ex
|
|
81
80
|
|
82
81
|
```ruby
|
83
82
|
# raw data, list of each word object
|
84
|
-
pp result.inference.prediction.fields[:address].values
|
83
|
+
pp result.document.inference.prediction.fields[:address].values
|
85
84
|
|
86
85
|
# list of all values
|
87
|
-
puts result.inference.prediction.fields[:address].contents_list
|
86
|
+
puts result.document.inference.prediction.fields[:address].contents_list
|
88
87
|
|
89
88
|
# default string representation
|
90
|
-
puts result.inference.prediction.fields[:address].to_s
|
89
|
+
puts result.document.inference.prediction.fields[:address].to_s
|
91
90
|
|
92
91
|
# custom string representation
|
93
|
-
puts result.inference.prediction.fields[:address].contents_str(separator: '_')
|
92
|
+
puts result.document.inference.prediction.fields[:address].contents_str(separator: '_')
|
94
93
|
```
|
95
94
|
|
96
95
|
To iterate over all the fields:
|
97
96
|
```ruby
|
98
|
-
result.inference.prediction.fields.each do |name, info|
|
97
|
+
result.document.inference.prediction.fields.each do |name, info|
|
99
98
|
puts name
|
100
99
|
puts info.values
|
101
100
|
end
|
@@ -72,7 +72,7 @@ Using Mindee's APIs can be broken down into the following steps:
|
|
72
72
|
Let's take a deep dive into how this works.
|
73
73
|
|
74
74
|
## Initializing the Client
|
75
|
-
The `Client`
|
75
|
+
The `Client` automatically connects to the default endpoints for each product (or creates one with given parameters for Custom APIs).
|
76
76
|
|
77
77
|
The `Client` requires your [API key](https://developers.mindee.com/docs/make-your-first-request#create-an-api-key).
|
78
78
|
|
@@ -121,15 +121,25 @@ There are a few different ways of loading a document file, depending on your use
|
|
121
121
|
* [File Object](#file-object)
|
122
122
|
* [Base64](#base64)
|
123
123
|
* [Bytes](#bytes)
|
124
|
+
* [Urls](#url)
|
124
125
|
|
125
126
|
### Path
|
126
127
|
Load from a file directly from disk. Requires an absolute path, as a string.
|
127
128
|
|
128
129
|
```ruby
|
129
|
-
|
130
|
+
# Init a new client
|
131
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
132
|
+
|
133
|
+
# Load a file from disk
|
134
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
130
135
|
|
136
|
+
# Parse the file
|
137
|
+
result = mindee_client.parse(
|
138
|
+
input_source,
|
139
|
+
Mindee::Product::Invoice::InvoiceV4
|
140
|
+
)
|
131
141
|
# Print a full summary of the parsed data in RST format
|
132
|
-
puts result
|
142
|
+
puts result.document
|
133
143
|
```
|
134
144
|
|
135
145
|
### File Object
|
@@ -138,13 +148,14 @@ A normal Ruby file object with a path. Must be in binary mode.
|
|
138
148
|
**Note**: The original filename is required when calling the method.
|
139
149
|
|
140
150
|
```ruby
|
141
|
-
result = nil
|
142
151
|
File.open(INVOICE_FILE, 'rb') do |fo|
|
143
|
-
|
152
|
+
input_source = mindee_client.source_from_file(fo, "invoice.jpg")
|
144
153
|
end
|
145
154
|
|
146
|
-
|
147
|
-
|
155
|
+
result = mindee_client.parse(
|
156
|
+
input_source,
|
157
|
+
Mindee::Product::Invoice::InvoiceV4
|
158
|
+
)
|
148
159
|
```
|
149
160
|
|
150
161
|
### Base64
|
@@ -154,10 +165,13 @@ Load file contents from a base64-encoded string.
|
|
154
165
|
|
155
166
|
```ruby
|
156
167
|
b64_string = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLD...."
|
157
|
-
result = mindee_client.doc_from_b64string(b64_string, "receipt.jpg").parse(Mindee::Prediction::ReceiptV4)
|
158
168
|
|
159
|
-
|
160
|
-
|
169
|
+
input_source = mindee_client.source_from_b64string(b64_string, "receipt.jpg")
|
170
|
+
|
171
|
+
result = mindee_client.parse(
|
172
|
+
input_source,
|
173
|
+
Mindee::Product::Receipt::ReceiptV5
|
174
|
+
)
|
161
175
|
```
|
162
176
|
|
163
177
|
### Bytes
|
@@ -167,33 +181,60 @@ Requires raw bytes.
|
|
167
181
|
|
168
182
|
```ruby
|
169
183
|
raw_bytes = b"%PDF-1.3\n%\xbf\xf7\xa2\xfe\n1 0 ob..."
|
170
|
-
|
184
|
+
input_source = mindee_client.source_from_bytes(raw_bytes, "invoice.pdf")
|
171
185
|
|
172
|
-
|
173
|
-
|
186
|
+
result = mindee_client.parse(
|
187
|
+
input_source,
|
188
|
+
Mindee::Product::Invoice::InvoiceV4
|
189
|
+
)
|
174
190
|
```
|
175
191
|
|
192
|
+
### URL
|
193
|
+
Requires an url as a String.
|
194
|
+
|
195
|
+
**Note**: the url must start with `https://`.
|
196
|
+
```ruby
|
197
|
+
input_source = mindee_client.source_from_url("https://www.example.com/invoice.pdf")
|
198
|
+
|
199
|
+
result = mindee_client.parse(
|
200
|
+
input_source,
|
201
|
+
Mindee::Product::Invoice::InvoiceV4
|
202
|
+
)
|
203
|
+
```
|
204
|
+
|
205
|
+
|
176
206
|
## Sending a File
|
177
207
|
To send a file to the API, we need to specify how to process the document.
|
178
208
|
This will determine which API endpoint is used and how the API return will be handled internally by the library.
|
179
209
|
|
180
|
-
More specifically, we need to set a `Mindee::
|
210
|
+
More specifically, we need to set a `Mindee::Product` class as the first parameter of the `create_endpoint` method.
|
181
211
|
|
182
|
-
This is because the `
|
212
|
+
This is because the `Endpoint`'s urls will be set according to it
|
183
213
|
|
184
|
-
Each document type available in the library has its corresponding class, which inherit from the base `Mindee::
|
214
|
+
Each document type available in the library has its corresponding class, which inherit from the base `Mindee::Parsing::Common::Predict` class.
|
185
215
|
This is detailed in each document-specific guide.
|
186
216
|
|
187
217
|
### Off-the-Shelf Documents
|
188
218
|
Simply setting the correct class is enough:
|
189
219
|
```ruby
|
190
|
-
|
220
|
+
|
221
|
+
result = mindee_client.parse(
|
222
|
+
input_source,
|
223
|
+
Mindee::Product::Invoice::InvoiceV4
|
224
|
+
)
|
191
225
|
```
|
192
226
|
|
193
227
|
### Custom Documents
|
194
|
-
|
228
|
+
For custom documents, the endpoint to use must also be set, and it must take in an `endpoint_name`:
|
229
|
+
|
195
230
|
```ruby
|
196
|
-
|
231
|
+
endpoint = mindee_client.create_endpoint(endpoint_name: 'wnine')
|
232
|
+
|
233
|
+
result = mindee_client.parse(
|
234
|
+
input_source,
|
235
|
+
Mindee::Product::Custom::CustomV1,
|
236
|
+
endpoint: endpoint
|
237
|
+
)
|
197
238
|
```
|
198
239
|
|
199
240
|
This is because the `CustomV1` class is enough to handle the return processing, but the actual endpoint needs to be specified.
|
@@ -220,10 +261,10 @@ only the highest confidence field data will be shown (this is all done automatic
|
|
220
261
|
|
221
262
|
```ruby
|
222
263
|
# as an object, complete
|
223
|
-
pp result.inference.prediction
|
264
|
+
pp result.document.inference.prediction
|
224
265
|
|
225
266
|
# as a string, summary in RST format
|
226
|
-
puts result.inference.prediction
|
267
|
+
puts result.document.inference.prediction
|
227
268
|
```
|
228
269
|
|
229
270
|
#### Page level prediction
|
@@ -237,7 +278,7 @@ Single page documents will have a single entry.
|
|
237
278
|
|
238
279
|
Iteration is done like any Ruby array:
|
239
280
|
```ruby
|
240
|
-
response.inference.pages.each do |page|
|
281
|
+
response.document.inference.pages.each do |page|
|
241
282
|
# as an object, complete
|
242
283
|
pp page.prediction
|
243
284
|
|
@@ -256,7 +297,7 @@ If the page requires rotation for correct display, the orientation field gives a
|
|
256
297
|
* 270 degrees: the page must be rotated counterclockwise to be upright
|
257
298
|
|
258
299
|
```ruby
|
259
|
-
response.inference.pages.each do |page|
|
300
|
+
response.document.inference.pages.each do |page|
|
260
301
|
puts page.orientation.value
|
261
302
|
end
|
262
303
|
```
|
data/docs/ruby-invoice-ocr.md
CHANGED
@@ -6,46 +6,55 @@ Using this sample below, we are going to illustrate how to extract the data that
|
|
6
6
|
|
7
7
|
## Quick Start
|
8
8
|
```ruby
|
9
|
-
|
10
|
-
|
11
|
-
# Init a new client, specifying an API key
|
9
|
+
# Init a new client
|
12
10
|
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
13
11
|
|
14
|
-
#
|
15
|
-
|
12
|
+
# Load a file from disk
|
13
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
14
|
+
|
15
|
+
# Parse the file
|
16
|
+
result = mindee_client.parse(
|
17
|
+
input_source,
|
18
|
+
Mindee::Product::Invoice::InvoiceV4
|
19
|
+
)
|
16
20
|
|
17
|
-
# Print a summary of the
|
18
|
-
puts result.
|
21
|
+
# Print a full summary of the parsed data in RST format
|
22
|
+
puts result.document
|
19
23
|
```
|
20
24
|
|
21
25
|
Output:
|
22
26
|
```
|
23
|
-
|
24
|
-
|
25
|
-
:
|
27
|
+
Invoice V4 Prediction
|
28
|
+
=====================
|
29
|
+
:Filename:
|
30
|
+
:Locale: fr; fr; EUR;
|
31
|
+
:Invoice number: 0042004801351
|
26
32
|
:Reference numbers: AD29094
|
27
|
-
:Invoice date:
|
28
|
-
:Invoice due date:
|
33
|
+
:Invoice date: 2020-02-17
|
34
|
+
:Invoice due date: 2020-02-17
|
29
35
|
:Supplier name: TURNPIKE DESIGNS CO.
|
30
36
|
:Supplier address: 156 University Ave, Toronto ON, Canada M5H 2H7
|
31
|
-
:Supplier company registrations:
|
32
|
-
:Supplier payment details:
|
37
|
+
:Supplier company registrations: 501124705; FR33501124705
|
38
|
+
:Supplier payment details: FR7640254025476501124705368;
|
33
39
|
:Customer name: JIRO DOI
|
34
|
-
:Customer
|
35
|
-
:Customer
|
36
|
-
:Taxes: 193.20 8.00%
|
37
|
-
:Total net: 2415.00
|
38
|
-
:Total taxes: 193.20
|
39
|
-
:Total amount: 2608.20
|
40
|
-
|
40
|
+
:Customer company registrations: FR00000000000; 111222333
|
41
|
+
:Customer address: 1954 Bloon Street West Toronto, ON, M6P 3K9 Canada
|
41
42
|
:Line Items:
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
Code | QTY | Price | Amount | Tax (Rate) | Description
|
44
|
+
| | | 4.31 | (2.10%) | PQ20 ETIQ ULTRA RESIS METAXXDC
|
45
|
+
| 1.00 | 65.00 | 75.00 | 10.00 | Platinum web hosting package Dow...
|
46
|
+
XXX81125600010 | 1.00 | 250.01 | 275.51 | 25.50 (10.20%) | a long string describing the ite...
|
47
|
+
ABC456 | 200.30 | 8.101 | 1622.63 | 121.70 (7.50%) | Liquid perfection
|
48
|
+
| | | | | CARTOUCHE L NR BROTHER TN247BK
|
49
|
+
:Taxes:
|
50
|
+
+---------------+--------+----------+---------------+
|
51
|
+
| Base | Code | Rate (%) | Amount |
|
52
|
+
+===============+========+==========+===============+
|
53
|
+
| | | 20.00 | 97.98 |
|
54
|
+
+---------------+--------+----------+---------------+
|
55
|
+
:Total tax: 97.98
|
56
|
+
:Total net: 489.97
|
57
|
+
:Total amount: 587.95
|
49
58
|
```
|
50
59
|
|
51
60
|
> 📘 **Info**
|
@@ -69,34 +78,36 @@ Depending on the field type, there might be additional attributes that will be e
|
|
69
78
|
|
70
79
|
Using the above sample, the following are the basic fields that can be extracted:
|
71
80
|
|
72
|
-
- [
|
73
|
-
- [
|
74
|
-
- [
|
75
|
-
- [
|
76
|
-
- [
|
77
|
-
- [
|
78
|
-
- [
|
79
|
-
- [
|
80
|
-
- [
|
81
|
+
- [Quick Start](#quick-start)
|
82
|
+
- [Fields](#fields)
|
83
|
+
- [Attributes](#attributes)
|
84
|
+
- [Customer Information](#customer-information)
|
85
|
+
- [Dates](#dates)
|
86
|
+
- [Locale](#locale)
|
87
|
+
- [Supplier Information](#supplier-information)
|
88
|
+
- [Taxes](#taxes)
|
89
|
+
- [Totals](#totals)
|
90
|
+
- [Line items](#line-items)
|
91
|
+
- [Questions?](#questions)
|
81
92
|
|
82
93
|
|
83
94
|
### Customer Information
|
84
95
|
**`customer_name`** (Field): Customer's name
|
85
96
|
|
86
97
|
```ruby
|
87
|
-
puts result.inference.prediction.customer_name.value
|
98
|
+
puts result.document.inference.prediction.customer_name.value
|
88
99
|
```
|
89
100
|
|
90
101
|
**`customer_address`** (Field): Customer's postal address
|
91
102
|
|
92
103
|
```ruby
|
93
|
-
puts result.inference.prediction.customer_address.value
|
104
|
+
puts result.document.inference.prediction.customer_address.value
|
94
105
|
```
|
95
106
|
|
96
107
|
**`customer_company_registrations`** (Array<CompanyRegistration>): Customer's company registration
|
97
108
|
|
98
109
|
```ruby
|
99
|
-
result.inference.prediction.customer_company_registrations.each do |registration|
|
110
|
+
result.document.inference.prediction.customer_company_registrations.each do |registration|
|
100
111
|
puts registration.value
|
101
112
|
puts registration.type
|
102
113
|
end
|
@@ -113,13 +124,13 @@ The following date fields are available:
|
|
113
124
|
**`date`**: Date the invoice was issued
|
114
125
|
|
115
126
|
```ruby
|
116
|
-
puts result.inference.prediction.date.value
|
127
|
+
puts result.document.inference.prediction.date.value
|
117
128
|
```
|
118
129
|
|
119
130
|
**`due_date`**: Payment due date of the invoice.
|
120
131
|
|
121
132
|
```ruby
|
122
|
-
puts result.inference.prediction.due_date.value
|
133
|
+
puts result.document.inference.prediction.due_date.value
|
123
134
|
```
|
124
135
|
|
125
136
|
### Locale
|
@@ -127,17 +138,17 @@ puts result.inference.prediction.due_date.value
|
|
127
138
|
|
128
139
|
* `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
|
129
140
|
```ruby
|
130
|
-
puts result.inference.prediction.locale.language
|
141
|
+
puts result.document.inference.prediction.locale.language
|
131
142
|
```
|
132
143
|
|
133
144
|
* `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
|
134
145
|
```ruby
|
135
|
-
puts result.inference.prediction.locale.currency
|
146
|
+
puts result.document.inference.prediction.locale.currency
|
136
147
|
```
|
137
148
|
|
138
149
|
* `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
|
139
150
|
```ruby
|
140
|
-
puts result.inference.prediction.locale.country
|
151
|
+
puts result.document.inference.prediction.locale.country
|
141
152
|
```
|
142
153
|
|
143
154
|
### Supplier Information
|
@@ -145,13 +156,13 @@ puts result.inference.prediction.locale.country
|
|
145
156
|
**`supplier_name`**: Supplier name as written in the invoice (logo or supplier Info).
|
146
157
|
|
147
158
|
```ruby
|
148
|
-
puts result.inference.prediction.supplier_name.value
|
159
|
+
puts result.document.inference.prediction.supplier_name.value
|
149
160
|
```
|
150
161
|
|
151
162
|
**`supplier_address`**: Supplier address as written in the invoice.
|
152
163
|
|
153
164
|
```ruby
|
154
|
-
puts result.inference.prediction.supplier_address.value
|
165
|
+
puts result.document.inference.prediction.supplier_address.value
|
155
166
|
```
|
156
167
|
|
157
168
|
**`supplier__payment_details`** (Array< PaymentDetails >): List of invoice's supplier payment details.
|
@@ -160,25 +171,25 @@ Each object in the list contains extra attributes:
|
|
160
171
|
* `iban` (String)
|
161
172
|
```ruby
|
162
173
|
# Show the IBAN of the first payment
|
163
|
-
puts result.inference.prediction.supplier_payment_details[0].iban
|
174
|
+
puts result.document.inference.prediction.supplier_payment_details[0].iban
|
164
175
|
```
|
165
176
|
|
166
177
|
* `swift` (String)
|
167
178
|
```ruby
|
168
179
|
# Show the SWIFT of the first payment
|
169
|
-
puts result.inference.prediction.supplier_payment_details[0].swift
|
180
|
+
puts result.document.inference.prediction.supplier_payment_details[0].swift
|
170
181
|
```
|
171
182
|
|
172
183
|
* `routing_number` (String)
|
173
184
|
```ruby
|
174
185
|
# Show the routing number of the first payment
|
175
|
-
puts result.inference.prediction.supplier_payment_details[0].routing_number
|
186
|
+
puts result.document.inference.prediction.supplier_payment_details[0].routing_number
|
176
187
|
```
|
177
188
|
|
178
189
|
* `account_number` (String)
|
179
190
|
```ruby
|
180
191
|
# Show the account number of the first payment
|
181
|
-
puts result.inference.prediction.supplier_payment_details[0].account_number
|
192
|
+
puts result.document.inference.prediction.supplier_payment_details[0].account_number
|
182
193
|
```
|
183
194
|
|
184
195
|
**`supplier_company_registrations`** (Array< CompanyRegistration >):
|
@@ -187,7 +198,7 @@ Each object in the list contains an extra attribute:
|
|
187
198
|
|
188
199
|
* `type` (String): Type of company registration number among predefined categories.
|
189
200
|
```ruby
|
190
|
-
result.inference.prediction.supplier_company_registrations.each do |registration|
|
201
|
+
result.document.inference.prediction.supplier_company_registrations.each do |registration|
|
191
202
|
puts registration.value
|
192
203
|
puts registration.type
|
193
204
|
end
|
@@ -199,19 +210,19 @@ end
|
|
199
210
|
* `value` (Float): The tax amount.
|
200
211
|
```ruby
|
201
212
|
# Show the amount of the first tax
|
202
|
-
puts result.inference.prediction.taxes[0].value
|
213
|
+
puts result.document.inference.prediction.taxes[0].value
|
203
214
|
```
|
204
215
|
|
205
216
|
* `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
|
206
217
|
```ruby
|
207
218
|
# Show the code of the first tax
|
208
|
-
puts result.inference.prediction.taxes[0].code
|
219
|
+
puts result.document.inference.prediction.taxes[0].code
|
209
220
|
```
|
210
221
|
|
211
222
|
* `rate` (Float): The tax rate.
|
212
223
|
```ruby
|
213
224
|
# Show the rate of the first tax
|
214
|
-
puts result.inference.prediction.taxes[0].rate
|
225
|
+
puts result.document.inference.prediction.taxes[0].rate
|
215
226
|
```
|
216
227
|
|
217
228
|
### Totals
|
@@ -219,19 +230,19 @@ puts result.inference.prediction.taxes[0].rate
|
|
219
230
|
**`total_amount`** (Field): Total amount including taxes.
|
220
231
|
|
221
232
|
```ruby
|
222
|
-
puts result.inference.prediction.total_amount.value
|
233
|
+
puts result.document.inference.prediction.total_amount.value
|
223
234
|
```
|
224
235
|
|
225
236
|
**`total_net`** (Field): Total amount excluding taxes.
|
226
237
|
|
227
238
|
```ruby
|
228
|
-
puts result.inference.prediction.total_net.value
|
239
|
+
puts result.document.inference.prediction.total_net.value
|
229
240
|
```
|
230
241
|
|
231
242
|
**`total_tax`** (Field): Total tax value from tax lines.
|
232
243
|
|
233
244
|
```ruby
|
234
|
-
puts result.inference.prediction.total_tax.value
|
245
|
+
puts result.document.inference.prediction.total_tax.value
|
235
246
|
```
|
236
247
|
|
237
248
|
### Line items
|
@@ -251,7 +262,7 @@ Each object in the list contains:
|
|
251
262
|
* `polygon` (Polygon)
|
252
263
|
|
253
264
|
```ruby
|
254
|
-
result.inference.prediction.line_items.each do |line_item|
|
265
|
+
result.document.inference.prediction.line_items.each do |line_item|
|
255
266
|
pp line_item
|
256
267
|
end
|
257
268
|
```
|