mindee 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +36 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +123 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +87 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +44 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
- data/lib/mindee/product.rb +16 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +87 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
|
@@ -3,12 +3,17 @@ require 'mindee'
|
|
|
3
3
|
# Init a new client
|
|
4
4
|
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
5
5
|
|
|
6
|
-
# Load a file from disk
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
# Load a file from disk
|
|
7
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
8
|
+
|
|
9
|
+
# Parse the file
|
|
10
|
+
result = mindee_client.parse(
|
|
11
|
+
input_source,
|
|
12
|
+
Mindee::Product::ProofOfAddress::ProofOfAddressV1
|
|
13
|
+
)
|
|
9
14
|
|
|
10
15
|
# Print a full summary of the parsed data in RST format
|
|
11
|
-
puts result
|
|
16
|
+
puts result.document
|
|
12
17
|
|
|
13
18
|
# Print the document-level parsed data
|
|
14
|
-
# puts result.inference.prediction
|
|
19
|
+
# puts result.document.inference.prediction
|
data/docs/ruby-api-builder.md
CHANGED
|
@@ -9,52 +9,51 @@ created with the [API Builder](https://developers.mindee.com/docs/overview).
|
|
|
9
9
|
> 📘 **Info**
|
|
10
10
|
>
|
|
11
11
|
> We used a data model that will be different from yours.
|
|
12
|
-
> To modify this to your own custom API, change the `
|
|
12
|
+
> To modify this to your own custom API, change the `mindee_client.create_endpoint` call with your own parameters.
|
|
13
13
|
|
|
14
14
|
```ruby
|
|
15
15
|
require 'mindee'
|
|
16
16
|
|
|
17
17
|
# Init a new client and configure your custom document
|
|
18
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
19
|
-
'john',
|
|
20
|
-
'wnine',
|
|
21
|
-
version: '1.1' # optional, if not set, use the latest version of the model
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
# Load a file from disk and parse it
|
|
25
|
-
result = mindee_client.doc_from_path('/path/to/file.ext')
|
|
26
|
-
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
|
18
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
27
19
|
|
|
28
|
-
#
|
|
29
|
-
|
|
20
|
+
# Create an endpoint for your custom product
|
|
21
|
+
custom_endpoint = mindee_client.create_endpoint(
|
|
22
|
+
account_name: 'john',
|
|
23
|
+
endpoint_name: 'wnine',
|
|
24
|
+
version: '1.1' # optional, if not set, uses the latest version of the model
|
|
25
|
+
)
|
|
30
26
|
```
|
|
31
27
|
|
|
32
|
-
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
|
33
|
-
This is probably not needed for development but essential for production use.
|
|
28
|
+
> **Note:** If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
|
29
|
+
> This is probably not needed for development but essential for production use.
|
|
34
30
|
|
|
35
31
|
## Parsing Documents
|
|
36
32
|
The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
|
|
37
|
-
|
|
33
|
+
If your document is not an OTS API, the document's endpoint must be specified when calling the `parse` method.
|
|
38
34
|
|
|
39
35
|
```ruby
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
mindee_client.parse(
|
|
37
|
+
input_source,
|
|
38
|
+
Mindee::Product::Custom::CustomV1,
|
|
39
|
+
endpoint: custom_endpoint
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Print a summary of the document prediction in RST format
|
|
44
|
+
puts result.document
|
|
43
45
|
```
|
|
44
46
|
|
|
45
47
|
> 📘 **Info**
|
|
46
48
|
>
|
|
47
|
-
> If your custom document has the same name as
|
|
48
|
-
> you **must** specify your account name when
|
|
49
|
+
> If your custom document has the same name as one of the [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
|
|
50
|
+
> you **must** specify your account name when creating the `create_endpoint` method:
|
|
49
51
|
|
|
50
52
|
```ruby
|
|
51
|
-
|
|
52
|
-
'receipt',
|
|
53
|
-
'john'
|
|
53
|
+
custom_endpoint = mindee_client.create_endpoint(
|
|
54
|
+
endpoint_name: 'receipt',
|
|
55
|
+
account_name: 'john'
|
|
54
56
|
)
|
|
55
|
-
|
|
56
|
-
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
|
57
|
-
.parse(Mindee::Prediction::CustomV1, account_name: 'john')
|
|
58
57
|
```
|
|
59
58
|
|
|
60
59
|
## Document Fields
|
|
@@ -81,21 +80,21 @@ Individual field values can be accessed by using the field's API name, in the ex
|
|
|
81
80
|
|
|
82
81
|
```ruby
|
|
83
82
|
# raw data, list of each word object
|
|
84
|
-
pp result.inference.prediction.fields[:address].values
|
|
83
|
+
pp result.document.inference.prediction.fields[:address].values
|
|
85
84
|
|
|
86
85
|
# list of all values
|
|
87
|
-
puts result.inference.prediction.fields[:address].contents_list
|
|
86
|
+
puts result.document.inference.prediction.fields[:address].contents_list
|
|
88
87
|
|
|
89
88
|
# default string representation
|
|
90
|
-
puts result.inference.prediction.fields[:address].to_s
|
|
89
|
+
puts result.document.inference.prediction.fields[:address].to_s
|
|
91
90
|
|
|
92
91
|
# custom string representation
|
|
93
|
-
puts result.inference.prediction.fields[:address].contents_str(separator: '_')
|
|
92
|
+
puts result.document.inference.prediction.fields[:address].contents_str(separator: '_')
|
|
94
93
|
```
|
|
95
94
|
|
|
96
95
|
To iterate over all the fields:
|
|
97
96
|
```ruby
|
|
98
|
-
result.inference.prediction.fields.each do |name, info|
|
|
97
|
+
result.document.inference.prediction.fields.each do |name, info|
|
|
99
98
|
puts name
|
|
100
99
|
puts info.values
|
|
101
100
|
end
|
|
@@ -72,7 +72,7 @@ Using Mindee's APIs can be broken down into the following steps:
|
|
|
72
72
|
Let's take a deep dive into how this works.
|
|
73
73
|
|
|
74
74
|
## Initializing the Client
|
|
75
|
-
The `Client`
|
|
75
|
+
The `Client` automatically connects to the default endpoints for each product (or creates one with given parameters for Custom APIs).
|
|
76
76
|
|
|
77
77
|
The `Client` requires your [API key](https://developers.mindee.com/docs/make-your-first-request#create-an-api-key).
|
|
78
78
|
|
|
@@ -121,15 +121,25 @@ There are a few different ways of loading a document file, depending on your use
|
|
|
121
121
|
* [File Object](#file-object)
|
|
122
122
|
* [Base64](#base64)
|
|
123
123
|
* [Bytes](#bytes)
|
|
124
|
+
* [Urls](#url)
|
|
124
125
|
|
|
125
126
|
### Path
|
|
126
127
|
Load from a file directly from disk. Requires an absolute path, as a string.
|
|
127
128
|
|
|
128
129
|
```ruby
|
|
129
|
-
|
|
130
|
+
# Init a new client
|
|
131
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
132
|
+
|
|
133
|
+
# Load a file from disk
|
|
134
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
130
135
|
|
|
136
|
+
# Parse the file
|
|
137
|
+
result = mindee_client.parse(
|
|
138
|
+
input_source,
|
|
139
|
+
Mindee::Product::Invoice::InvoiceV4
|
|
140
|
+
)
|
|
131
141
|
# Print a full summary of the parsed data in RST format
|
|
132
|
-
puts result
|
|
142
|
+
puts result.document
|
|
133
143
|
```
|
|
134
144
|
|
|
135
145
|
### File Object
|
|
@@ -138,13 +148,14 @@ A normal Ruby file object with a path. Must be in binary mode.
|
|
|
138
148
|
**Note**: The original filename is required when calling the method.
|
|
139
149
|
|
|
140
150
|
```ruby
|
|
141
|
-
result = nil
|
|
142
151
|
File.open(INVOICE_FILE, 'rb') do |fo|
|
|
143
|
-
|
|
152
|
+
input_source = mindee_client.source_from_file(fo, "invoice.jpg")
|
|
144
153
|
end
|
|
145
154
|
|
|
146
|
-
|
|
147
|
-
|
|
155
|
+
result = mindee_client.parse(
|
|
156
|
+
input_source,
|
|
157
|
+
Mindee::Product::Invoice::InvoiceV4
|
|
158
|
+
)
|
|
148
159
|
```
|
|
149
160
|
|
|
150
161
|
### Base64
|
|
@@ -154,10 +165,13 @@ Load file contents from a base64-encoded string.
|
|
|
154
165
|
|
|
155
166
|
```ruby
|
|
156
167
|
b64_string = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLD...."
|
|
157
|
-
result = mindee_client.doc_from_b64string(b64_string, "receipt.jpg").parse(Mindee::Prediction::ReceiptV4)
|
|
158
168
|
|
|
159
|
-
|
|
160
|
-
|
|
169
|
+
input_source = mindee_client.source_from_b64string(b64_string, "receipt.jpg")
|
|
170
|
+
|
|
171
|
+
result = mindee_client.parse(
|
|
172
|
+
input_source,
|
|
173
|
+
Mindee::Product::Receipt::ReceiptV5
|
|
174
|
+
)
|
|
161
175
|
```
|
|
162
176
|
|
|
163
177
|
### Bytes
|
|
@@ -167,33 +181,60 @@ Requires raw bytes.
|
|
|
167
181
|
|
|
168
182
|
```ruby
|
|
169
183
|
raw_bytes = b"%PDF-1.3\n%\xbf\xf7\xa2\xfe\n1 0 ob..."
|
|
170
|
-
|
|
184
|
+
input_source = mindee_client.source_from_bytes(raw_bytes, "invoice.pdf")
|
|
171
185
|
|
|
172
|
-
|
|
173
|
-
|
|
186
|
+
result = mindee_client.parse(
|
|
187
|
+
input_source,
|
|
188
|
+
Mindee::Product::Invoice::InvoiceV4
|
|
189
|
+
)
|
|
174
190
|
```
|
|
175
191
|
|
|
192
|
+
### URL
|
|
193
|
+
Requires an url as a String.
|
|
194
|
+
|
|
195
|
+
**Note**: the url must start with `https://`.
|
|
196
|
+
```ruby
|
|
197
|
+
input_source = mindee_client.source_from_url("https://www.example.com/invoice.pdf")
|
|
198
|
+
|
|
199
|
+
result = mindee_client.parse(
|
|
200
|
+
input_source,
|
|
201
|
+
Mindee::Product::Invoice::InvoiceV4
|
|
202
|
+
)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
|
|
176
206
|
## Sending a File
|
|
177
207
|
To send a file to the API, we need to specify how to process the document.
|
|
178
208
|
This will determine which API endpoint is used and how the API return will be handled internally by the library.
|
|
179
209
|
|
|
180
|
-
More specifically, we need to set a `Mindee::
|
|
210
|
+
More specifically, we need to set a `Mindee::Product` class as the first parameter of the `create_endpoint` method.
|
|
181
211
|
|
|
182
|
-
This is because the `
|
|
212
|
+
This is because the `Endpoint`'s urls will be set according to it
|
|
183
213
|
|
|
184
|
-
Each document type available in the library has its corresponding class, which inherit from the base `Mindee::
|
|
214
|
+
Each document type available in the library has its corresponding class, which inherit from the base `Mindee::Parsing::Common::Predict` class.
|
|
185
215
|
This is detailed in each document-specific guide.
|
|
186
216
|
|
|
187
217
|
### Off-the-Shelf Documents
|
|
188
218
|
Simply setting the correct class is enough:
|
|
189
219
|
```ruby
|
|
190
|
-
|
|
220
|
+
|
|
221
|
+
result = mindee_client.parse(
|
|
222
|
+
input_source,
|
|
223
|
+
Mindee::Product::Invoice::InvoiceV4
|
|
224
|
+
)
|
|
191
225
|
```
|
|
192
226
|
|
|
193
227
|
### Custom Documents
|
|
194
|
-
|
|
228
|
+
For custom documents, the endpoint to use must also be set, and it must take in an `endpoint_name`:
|
|
229
|
+
|
|
195
230
|
```ruby
|
|
196
|
-
|
|
231
|
+
endpoint = mindee_client.create_endpoint(endpoint_name: 'wnine')
|
|
232
|
+
|
|
233
|
+
result = mindee_client.parse(
|
|
234
|
+
input_source,
|
|
235
|
+
Mindee::Product::Custom::CustomV1,
|
|
236
|
+
endpoint: endpoint
|
|
237
|
+
)
|
|
197
238
|
```
|
|
198
239
|
|
|
199
240
|
This is because the `CustomV1` class is enough to handle the return processing, but the actual endpoint needs to be specified.
|
|
@@ -220,10 +261,10 @@ only the highest confidence field data will be shown (this is all done automatic
|
|
|
220
261
|
|
|
221
262
|
```ruby
|
|
222
263
|
# as an object, complete
|
|
223
|
-
pp result.inference.prediction
|
|
264
|
+
pp result.document.inference.prediction
|
|
224
265
|
|
|
225
266
|
# as a string, summary in RST format
|
|
226
|
-
puts result.inference.prediction
|
|
267
|
+
puts result.document.inference.prediction
|
|
227
268
|
```
|
|
228
269
|
|
|
229
270
|
#### Page level prediction
|
|
@@ -237,7 +278,7 @@ Single page documents will have a single entry.
|
|
|
237
278
|
|
|
238
279
|
Iteration is done like any Ruby array:
|
|
239
280
|
```ruby
|
|
240
|
-
response.inference.pages.each do |page|
|
|
281
|
+
response.document.inference.pages.each do |page|
|
|
241
282
|
# as an object, complete
|
|
242
283
|
pp page.prediction
|
|
243
284
|
|
|
@@ -256,7 +297,7 @@ If the page requires rotation for correct display, the orientation field gives a
|
|
|
256
297
|
* 270 degrees: the page must be rotated counterclockwise to be upright
|
|
257
298
|
|
|
258
299
|
```ruby
|
|
259
|
-
response.inference.pages.each do |page|
|
|
300
|
+
response.document.inference.pages.each do |page|
|
|
260
301
|
puts page.orientation.value
|
|
261
302
|
end
|
|
262
303
|
```
|
data/docs/ruby-invoice-ocr.md
CHANGED
|
@@ -6,46 +6,55 @@ Using this sample below, we are going to illustrate how to extract the data that
|
|
|
6
6
|
|
|
7
7
|
## Quick Start
|
|
8
8
|
```ruby
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
# Init a new client, specifying an API key
|
|
9
|
+
# Init a new client
|
|
12
10
|
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
13
11
|
|
|
14
|
-
#
|
|
15
|
-
|
|
12
|
+
# Load a file from disk
|
|
13
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
14
|
+
|
|
15
|
+
# Parse the file
|
|
16
|
+
result = mindee_client.parse(
|
|
17
|
+
input_source,
|
|
18
|
+
Mindee::Product::Invoice::InvoiceV4
|
|
19
|
+
)
|
|
16
20
|
|
|
17
|
-
# Print a summary of the
|
|
18
|
-
puts result.
|
|
21
|
+
# Print a full summary of the parsed data in RST format
|
|
22
|
+
puts result.document
|
|
19
23
|
```
|
|
20
24
|
|
|
21
25
|
Output:
|
|
22
26
|
```
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
:
|
|
27
|
+
Invoice V4 Prediction
|
|
28
|
+
=====================
|
|
29
|
+
:Filename:
|
|
30
|
+
:Locale: fr; fr; EUR;
|
|
31
|
+
:Invoice number: 0042004801351
|
|
26
32
|
:Reference numbers: AD29094
|
|
27
|
-
:Invoice date:
|
|
28
|
-
:Invoice due date:
|
|
33
|
+
:Invoice date: 2020-02-17
|
|
34
|
+
:Invoice due date: 2020-02-17
|
|
29
35
|
:Supplier name: TURNPIKE DESIGNS CO.
|
|
30
36
|
:Supplier address: 156 University Ave, Toronto ON, Canada M5H 2H7
|
|
31
|
-
:Supplier company registrations:
|
|
32
|
-
:Supplier payment details:
|
|
37
|
+
:Supplier company registrations: 501124705; FR33501124705
|
|
38
|
+
:Supplier payment details: FR7640254025476501124705368;
|
|
33
39
|
:Customer name: JIRO DOI
|
|
34
|
-
:Customer
|
|
35
|
-
:Customer
|
|
36
|
-
:Taxes: 193.20 8.00%
|
|
37
|
-
:Total net: 2415.00
|
|
38
|
-
:Total taxes: 193.20
|
|
39
|
-
:Total amount: 2608.20
|
|
40
|
-
|
|
40
|
+
:Customer company registrations: FR00000000000; 111222333
|
|
41
|
+
:Customer address: 1954 Bloon Street West Toronto, ON, M6P 3K9 Canada
|
|
41
42
|
:Line Items:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
43
|
+
Code | QTY | Price | Amount | Tax (Rate) | Description
|
|
44
|
+
| | | 4.31 | (2.10%) | PQ20 ETIQ ULTRA RESIS METAXXDC
|
|
45
|
+
| 1.00 | 65.00 | 75.00 | 10.00 | Platinum web hosting package Dow...
|
|
46
|
+
XXX81125600010 | 1.00 | 250.01 | 275.51 | 25.50 (10.20%) | a long string describing the ite...
|
|
47
|
+
ABC456 | 200.30 | 8.101 | 1622.63 | 121.70 (7.50%) | Liquid perfection
|
|
48
|
+
| | | | | CARTOUCHE L NR BROTHER TN247BK
|
|
49
|
+
:Taxes:
|
|
50
|
+
+---------------+--------+----------+---------------+
|
|
51
|
+
| Base | Code | Rate (%) | Amount |
|
|
52
|
+
+===============+========+==========+===============+
|
|
53
|
+
| | | 20.00 | 97.98 |
|
|
54
|
+
+---------------+--------+----------+---------------+
|
|
55
|
+
:Total tax: 97.98
|
|
56
|
+
:Total net: 489.97
|
|
57
|
+
:Total amount: 587.95
|
|
49
58
|
```
|
|
50
59
|
|
|
51
60
|
> 📘 **Info**
|
|
@@ -69,34 +78,36 @@ Depending on the field type, there might be additional attributes that will be e
|
|
|
69
78
|
|
|
70
79
|
Using the above sample, the following are the basic fields that can be extracted:
|
|
71
80
|
|
|
72
|
-
- [
|
|
73
|
-
- [
|
|
74
|
-
- [
|
|
75
|
-
- [
|
|
76
|
-
- [
|
|
77
|
-
- [
|
|
78
|
-
- [
|
|
79
|
-
- [
|
|
80
|
-
- [
|
|
81
|
+
- [Quick Start](#quick-start)
|
|
82
|
+
- [Fields](#fields)
|
|
83
|
+
- [Attributes](#attributes)
|
|
84
|
+
- [Customer Information](#customer-information)
|
|
85
|
+
- [Dates](#dates)
|
|
86
|
+
- [Locale](#locale)
|
|
87
|
+
- [Supplier Information](#supplier-information)
|
|
88
|
+
- [Taxes](#taxes)
|
|
89
|
+
- [Totals](#totals)
|
|
90
|
+
- [Line items](#line-items)
|
|
91
|
+
- [Questions?](#questions)
|
|
81
92
|
|
|
82
93
|
|
|
83
94
|
### Customer Information
|
|
84
95
|
**`customer_name`** (Field): Customer's name
|
|
85
96
|
|
|
86
97
|
```ruby
|
|
87
|
-
puts result.inference.prediction.customer_name.value
|
|
98
|
+
puts result.document.inference.prediction.customer_name.value
|
|
88
99
|
```
|
|
89
100
|
|
|
90
101
|
**`customer_address`** (Field): Customer's postal address
|
|
91
102
|
|
|
92
103
|
```ruby
|
|
93
|
-
puts result.inference.prediction.customer_address.value
|
|
104
|
+
puts result.document.inference.prediction.customer_address.value
|
|
94
105
|
```
|
|
95
106
|
|
|
96
107
|
**`customer_company_registrations`** (Array<CompanyRegistration>): Customer's company registration
|
|
97
108
|
|
|
98
109
|
```ruby
|
|
99
|
-
result.inference.prediction.customer_company_registrations.each do |registration|
|
|
110
|
+
result.document.inference.prediction.customer_company_registrations.each do |registration|
|
|
100
111
|
puts registration.value
|
|
101
112
|
puts registration.type
|
|
102
113
|
end
|
|
@@ -113,13 +124,13 @@ The following date fields are available:
|
|
|
113
124
|
**`date`**: Date the invoice was issued
|
|
114
125
|
|
|
115
126
|
```ruby
|
|
116
|
-
puts result.inference.prediction.date.value
|
|
127
|
+
puts result.document.inference.prediction.date.value
|
|
117
128
|
```
|
|
118
129
|
|
|
119
130
|
**`due_date`**: Payment due date of the invoice.
|
|
120
131
|
|
|
121
132
|
```ruby
|
|
122
|
-
puts result.inference.prediction.due_date.value
|
|
133
|
+
puts result.document.inference.prediction.due_date.value
|
|
123
134
|
```
|
|
124
135
|
|
|
125
136
|
### Locale
|
|
@@ -127,17 +138,17 @@ puts result.inference.prediction.due_date.value
|
|
|
127
138
|
|
|
128
139
|
* `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
|
|
129
140
|
```ruby
|
|
130
|
-
puts result.inference.prediction.locale.language
|
|
141
|
+
puts result.document.inference.prediction.locale.language
|
|
131
142
|
```
|
|
132
143
|
|
|
133
144
|
* `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
|
|
134
145
|
```ruby
|
|
135
|
-
puts result.inference.prediction.locale.currency
|
|
146
|
+
puts result.document.inference.prediction.locale.currency
|
|
136
147
|
```
|
|
137
148
|
|
|
138
149
|
* `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
|
|
139
150
|
```ruby
|
|
140
|
-
puts result.inference.prediction.locale.country
|
|
151
|
+
puts result.document.inference.prediction.locale.country
|
|
141
152
|
```
|
|
142
153
|
|
|
143
154
|
### Supplier Information
|
|
@@ -145,13 +156,13 @@ puts result.inference.prediction.locale.country
|
|
|
145
156
|
**`supplier_name`**: Supplier name as written in the invoice (logo or supplier Info).
|
|
146
157
|
|
|
147
158
|
```ruby
|
|
148
|
-
puts result.inference.prediction.supplier_name.value
|
|
159
|
+
puts result.document.inference.prediction.supplier_name.value
|
|
149
160
|
```
|
|
150
161
|
|
|
151
162
|
**`supplier_address`**: Supplier address as written in the invoice.
|
|
152
163
|
|
|
153
164
|
```ruby
|
|
154
|
-
puts result.inference.prediction.supplier_address.value
|
|
165
|
+
puts result.document.inference.prediction.supplier_address.value
|
|
155
166
|
```
|
|
156
167
|
|
|
157
168
|
**`supplier__payment_details`** (Array< PaymentDetails >): List of invoice's supplier payment details.
|
|
@@ -160,25 +171,25 @@ Each object in the list contains extra attributes:
|
|
|
160
171
|
* `iban` (String)
|
|
161
172
|
```ruby
|
|
162
173
|
# Show the IBAN of the first payment
|
|
163
|
-
puts result.inference.prediction.supplier_payment_details[0].iban
|
|
174
|
+
puts result.document.inference.prediction.supplier_payment_details[0].iban
|
|
164
175
|
```
|
|
165
176
|
|
|
166
177
|
* `swift` (String)
|
|
167
178
|
```ruby
|
|
168
179
|
# Show the SWIFT of the first payment
|
|
169
|
-
puts result.inference.prediction.supplier_payment_details[0].swift
|
|
180
|
+
puts result.document.inference.prediction.supplier_payment_details[0].swift
|
|
170
181
|
```
|
|
171
182
|
|
|
172
183
|
* `routing_number` (String)
|
|
173
184
|
```ruby
|
|
174
185
|
# Show the routing number of the first payment
|
|
175
|
-
puts result.inference.prediction.supplier_payment_details[0].routing_number
|
|
186
|
+
puts result.document.inference.prediction.supplier_payment_details[0].routing_number
|
|
176
187
|
```
|
|
177
188
|
|
|
178
189
|
* `account_number` (String)
|
|
179
190
|
```ruby
|
|
180
191
|
# Show the account number of the first payment
|
|
181
|
-
puts result.inference.prediction.supplier_payment_details[0].account_number
|
|
192
|
+
puts result.document.inference.prediction.supplier_payment_details[0].account_number
|
|
182
193
|
```
|
|
183
194
|
|
|
184
195
|
**`supplier_company_registrations`** (Array< CompanyRegistration >):
|
|
@@ -187,7 +198,7 @@ Each object in the list contains an extra attribute:
|
|
|
187
198
|
|
|
188
199
|
* `type` (String): Type of company registration number among predefined categories.
|
|
189
200
|
```ruby
|
|
190
|
-
result.inference.prediction.supplier_company_registrations.each do |registration|
|
|
201
|
+
result.document.inference.prediction.supplier_company_registrations.each do |registration|
|
|
191
202
|
puts registration.value
|
|
192
203
|
puts registration.type
|
|
193
204
|
end
|
|
@@ -199,19 +210,19 @@ end
|
|
|
199
210
|
* `value` (Float): The tax amount.
|
|
200
211
|
```ruby
|
|
201
212
|
# Show the amount of the first tax
|
|
202
|
-
puts result.inference.prediction.taxes[0].value
|
|
213
|
+
puts result.document.inference.prediction.taxes[0].value
|
|
203
214
|
```
|
|
204
215
|
|
|
205
216
|
* `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
|
|
206
217
|
```ruby
|
|
207
218
|
# Show the code of the first tax
|
|
208
|
-
puts result.inference.prediction.taxes[0].code
|
|
219
|
+
puts result.document.inference.prediction.taxes[0].code
|
|
209
220
|
```
|
|
210
221
|
|
|
211
222
|
* `rate` (Float): The tax rate.
|
|
212
223
|
```ruby
|
|
213
224
|
# Show the rate of the first tax
|
|
214
|
-
puts result.inference.prediction.taxes[0].rate
|
|
225
|
+
puts result.document.inference.prediction.taxes[0].rate
|
|
215
226
|
```
|
|
216
227
|
|
|
217
228
|
### Totals
|
|
@@ -219,19 +230,19 @@ puts result.inference.prediction.taxes[0].rate
|
|
|
219
230
|
**`total_amount`** (Field): Total amount including taxes.
|
|
220
231
|
|
|
221
232
|
```ruby
|
|
222
|
-
puts result.inference.prediction.total_amount.value
|
|
233
|
+
puts result.document.inference.prediction.total_amount.value
|
|
223
234
|
```
|
|
224
235
|
|
|
225
236
|
**`total_net`** (Field): Total amount excluding taxes.
|
|
226
237
|
|
|
227
238
|
```ruby
|
|
228
|
-
puts result.inference.prediction.total_net.value
|
|
239
|
+
puts result.document.inference.prediction.total_net.value
|
|
229
240
|
```
|
|
230
241
|
|
|
231
242
|
**`total_tax`** (Field): Total tax value from tax lines.
|
|
232
243
|
|
|
233
244
|
```ruby
|
|
234
|
-
puts result.inference.prediction.total_tax.value
|
|
245
|
+
puts result.document.inference.prediction.total_tax.value
|
|
235
246
|
```
|
|
236
247
|
|
|
237
248
|
### Line items
|
|
@@ -251,7 +262,7 @@ Each object in the list contains:
|
|
|
251
262
|
* `polygon` (Polygon)
|
|
252
263
|
|
|
253
264
|
```ruby
|
|
254
|
-
result.inference.prediction.line_items.each do |line_item|
|
|
265
|
+
result.document.inference.prediction.line_items.each do |line_item|
|
|
255
266
|
pp line_item
|
|
256
267
|
end
|
|
257
268
|
```
|