mindee 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/bin/mindee.rb +8 -0
- data/docs/ruby-api-builder.md +35 -42
- data/docs/ruby-getting-started.md +1 -1
- data/docs/ruby-invoice-ocr.md +12 -13
- data/docs/ruby-passport-ocr.md +1 -1
- data/docs/ruby-receipt-ocr.md +1 -1
- data/lib/mindee/client.rb +34 -30
- data/lib/mindee/document_config.rb +15 -23
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +245 -0
- data/lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +82 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +3 -0
- data/lib/mindee/parsing/prediction.rb +2 -0
- data/lib/mindee/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a4e7590c321e473df6da717d6854caf198ee8dc3d818502d6e588d14497c0da
|
4
|
+
data.tar.gz: b9922d76cbc0115dff59489b9c77757decd69fcfd8a6da445e0a38229cd85023
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01e9dce9c2ba44dea757f1061c4a368b9a675f75d8ab54dd74c06c7c70589aaa80fd320a241c51f618efec9d6ebae8aa9fa033233731e5c214e607464821deb1
|
7
|
+
data.tar.gz: 0467da78b85085e0df6cdae33e6135be4dd1be6c8d28f2c2888127c2340c114b404314f80741f0304aca4d34b19c6462ae8798fcb3936d5f2130860610b4ded3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v2.1.0 - 2023-01-30
|
4
|
+
### Changes
|
5
|
+
* :sparkles: Add financial document v1 support (Co-authored-by: Oriol Gual)
|
6
|
+
* :sparkles: Add Proof of Address v1 support
|
7
|
+
|
3
8
|
## v2.0.0 - 2023-01-13
|
4
9
|
### ¡Breaking Changes!
|
5
10
|
* :sparkles: add improved PDF merge system
|
data/bin/mindee.rb
CHANGED
@@ -10,6 +10,14 @@ DOCUMENTS = {
|
|
10
10
|
help: "Custom document type from API builder",
|
11
11
|
prediction: Mindee::Prediction::CustomV1,
|
12
12
|
},
|
13
|
+
"proof-of-address" => {
|
14
|
+
help: 'Proof of Address',
|
15
|
+
prediction: Mindee::Prediction::ProofOfAddressV1,
|
16
|
+
},
|
17
|
+
"financial-document" => {
|
18
|
+
help: 'Financial Document',
|
19
|
+
prediction: Mindee::Prediction::FinancialDocumentV1,
|
20
|
+
},
|
13
21
|
"invoice" => {
|
14
22
|
help: 'Invoice',
|
15
23
|
prediction: Mindee::Prediction::InvoiceV4,
|
data/docs/ruby-api-builder.md
CHANGED
@@ -1,34 +1,32 @@
|
|
1
|
-
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api)
|
2
|
-
from the API Builder.
|
1
|
+
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api) from the API Builder.
|
3
2
|
|
4
3
|
If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
|
5
4
|
[API Builder](https://developers.mindee.com/docs/overview).
|
6
5
|
|
7
|
-
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr)
|
6
|
+
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr),
|
8
7
|
created with the [API Builder](https://developers.mindee.com/docs/overview).
|
9
8
|
|
10
9
|
> 📘 **Info**
|
11
10
|
>
|
12
|
-
> We used a data model that
|
13
|
-
> change the `
|
11
|
+
> We used a data model that will be different from yours.
|
12
|
+
> To modify this to your own custom API, change the `add_endpoint` call with your own parameters.
|
14
13
|
|
15
14
|
```ruby
|
16
15
|
require 'mindee'
|
17
16
|
|
18
17
|
# Init a new client and configure your custom document
|
19
|
-
mindee_client = Mindee::Client.new(
|
20
|
-
api_key: 'my-api-key', # optional, can be set in environment
|
21
|
-
).config_custom_doc(
|
22
|
-
'wsnine',
|
18
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
|
23
19
|
'john',
|
20
|
+
'wnine',
|
24
21
|
version: '1.1' # optional, if not set, use the latest version of the model
|
25
22
|
)
|
26
23
|
|
27
24
|
# Load a file from disk and parse it
|
28
|
-
|
25
|
+
result = mindee_client.doc_from_path('/path/to/file.ext')
|
26
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
29
27
|
|
30
|
-
# Print a
|
31
|
-
puts
|
28
|
+
# Print a summary of the document prediction in RST format
|
29
|
+
puts result
|
32
30
|
```
|
33
31
|
|
34
32
|
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
@@ -39,7 +37,8 @@ The client calls the `parse` method when parsing your custom document, which wil
|
|
39
37
|
The document type must be specified when calling the parse method.
|
40
38
|
|
41
39
|
```ruby
|
42
|
-
result = mindee_client.doc_from_path('/path/to/custom_file')
|
40
|
+
result = mindee_client.doc_from_path('/path/to/custom_file')
|
41
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
43
42
|
puts result
|
44
43
|
```
|
45
44
|
|
@@ -49,13 +48,13 @@ puts result
|
|
49
48
|
> you **must** specify your account name when calling the `parse` method:
|
50
49
|
|
51
50
|
```ruby
|
52
|
-
mindee_client = Mindee::Client.new.
|
51
|
+
mindee_client = Mindee::Client.new.add_endpoint(
|
53
52
|
'receipt',
|
54
53
|
'john'
|
55
54
|
)
|
56
55
|
|
57
56
|
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
58
|
-
.parse(
|
57
|
+
.parse(Mindee::Prediction::CustomV1, account_name: 'john')
|
59
58
|
```
|
60
59
|
|
61
60
|
## Document Fields
|
@@ -64,64 +63,58 @@ All the fields defined in the API builder when creating your custom document are
|
|
64
63
|
In custom documents, each field will hold an array of all the words in the document which are related to that field.
|
65
64
|
Each word is an object that has the text content, geometry information, and confidence score.
|
66
65
|
|
67
|
-
Value fields can be accessed
|
66
|
+
Value fields can be accessed via the `fields` attribute.
|
68
67
|
|
69
|
-
Classification fields can be accessed
|
68
|
+
Classification fields can be accessed via the `classifications` attribute.
|
70
69
|
|
71
70
|
> 📘 **Info**
|
72
71
|
>
|
73
72
|
> Both document level and page level objects work in the same way.
|
74
73
|
|
75
|
-
###
|
76
|
-
|
74
|
+
### Fields Attribute
|
75
|
+
The `fields` attribute is a hashmap with the following structure:
|
76
|
+
|
77
|
+
* key: the API name of the field, as a `symbol`
|
78
|
+
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
79
|
+
|
80
|
+
Individual field values can be accessed by using the field's API name, in the examples below we'll use the `address` field.
|
77
81
|
|
78
82
|
```ruby
|
79
83
|
# raw data, list of each word object
|
80
|
-
|
84
|
+
pp result.inference.prediction.fields[:address].values
|
81
85
|
|
82
86
|
# list of all values
|
83
|
-
puts
|
87
|
+
puts result.inference.prediction.fields[:address].contents_list
|
84
88
|
|
85
89
|
# default string representation
|
86
|
-
puts
|
90
|
+
puts result.inference.prediction.fields[:address].to_s
|
87
91
|
|
88
92
|
# custom string representation
|
89
|
-
puts
|
93
|
+
puts result.inference.prediction.fields[:address].contents_str(separator: '_')
|
90
94
|
```
|
91
95
|
|
92
|
-
|
93
|
-
In addition to accessing a value field directly, it's possible to access it through the `fields` attribute.
|
94
|
-
It's a hashmap with the following structure:
|
95
|
-
* key: the API name of the field, as a `symbol`
|
96
|
-
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
97
|
-
|
98
|
-
```ruby
|
99
|
-
# raw data, list of each word object
|
100
|
-
puts w9_data.document.fields[:address].values
|
101
|
-
```
|
102
|
-
|
103
|
-
This makes it simple to iterate over all the fields:
|
96
|
+
To iterate over all the fields:
|
104
97
|
```ruby
|
105
|
-
|
98
|
+
result.inference.prediction.fields.each do |name, info|
|
106
99
|
puts name
|
107
100
|
puts info.values
|
108
101
|
end
|
109
102
|
```
|
110
103
|
|
111
|
-
### Classifications
|
112
|
-
|
113
|
-
|
104
|
+
### Classifications Attribute
|
105
|
+
The `classifications` attribute is a hashmap with the following structure:
|
106
|
+
|
114
107
|
* key: the API name of the field, as a `symbol`
|
115
108
|
* value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
|
116
109
|
|
117
110
|
```ruby
|
118
111
|
# raw data, list of each word object
|
119
|
-
puts
|
112
|
+
puts result.document.classifications[:doc_type].value
|
120
113
|
```
|
121
114
|
|
122
|
-
|
115
|
+
To iterate over all the classifications:
|
123
116
|
```ruby
|
124
|
-
|
117
|
+
result.document.classifications.each do |name, info|
|
125
118
|
puts name
|
126
119
|
puts info.value
|
127
120
|
end
|
data/docs/ruby-invoice-ocr.md
CHANGED
@@ -19,7 +19,7 @@ puts result.inference.prediction
|
|
19
19
|
```
|
20
20
|
|
21
21
|
Output:
|
22
|
-
```
|
22
|
+
```
|
23
23
|
:Locale: en; en; CAD;
|
24
24
|
:Document type: INVOICE
|
25
25
|
:Invoice number: 14
|
@@ -48,8 +48,10 @@ Code QTY Price Amount Tax (Rate) Descript
|
|
48
48
|
====================== ======== ========= ========== ================== ====================================
|
49
49
|
```
|
50
50
|
|
51
|
-
|
52
|
-
|
51
|
+
> 📘 **Info**
|
52
|
+
>
|
53
|
+
> Line item descriptions are truncated here only for display purposes.
|
54
|
+
> The full text is available in the [details](#line-items).
|
53
55
|
|
54
56
|
## Fields
|
55
57
|
Each prediction object contains a set of different fields.
|
@@ -91,11 +93,12 @@ puts result.inference.prediction.customer_name.value
|
|
91
93
|
puts result.inference.prediction.customer_address.value
|
92
94
|
```
|
93
95
|
|
94
|
-
**`
|
96
|
+
**`customer_company_registrations`** (Array<CompanyRegistration>): Customer's company registration
|
95
97
|
|
96
98
|
```ruby
|
97
99
|
result.inference.prediction.customer_company_registrations.each do |registration|
|
98
|
-
puts registration
|
100
|
+
puts registration.value
|
101
|
+
puts registration.type
|
99
102
|
end
|
100
103
|
```
|
101
104
|
|
@@ -184,14 +187,10 @@ Each object in the list contains an extra attribute:
|
|
184
187
|
|
185
188
|
* `type` (String): Type of company registration number among predefined categories.
|
186
189
|
```ruby
|
187
|
-
|
188
|
-
puts
|
189
|
-
|
190
|
-
|
191
|
-
* `value` (String): Value of the company identifier
|
192
|
-
```ruby
|
193
|
-
# Show the value of the first registration
|
194
|
-
puts result.inference.prediction.supplier_company_registrations[0].value
|
190
|
+
result.inference.prediction.supplier_company_registrations.each do |registration|
|
191
|
+
puts registration.value
|
192
|
+
puts registration.type
|
193
|
+
end
|
195
194
|
```
|
196
195
|
|
197
196
|
### Taxes
|
data/docs/ruby-passport-ocr.md
CHANGED
data/docs/ruby-receipt-ocr.md
CHANGED
data/lib/mindee/client.rb
CHANGED
@@ -23,8 +23,8 @@ module Mindee
|
|
23
23
|
# API Builder. Do not set for standard (off the shelf) endpoints.
|
24
24
|
#
|
25
25
|
# @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
|
26
|
-
# This is normally not required unless you have a custom endpoint which has the
|
27
|
-
#
|
26
|
+
# This is normally not required unless you have a custom endpoint which has the same name as a
|
27
|
+
# standard (off the shelf) endpoint.
|
28
28
|
# Do not set for standard (off the shelf) endpoints.
|
29
29
|
#
|
30
30
|
# @param include_words [Boolean] Whether to include the full text for each page.
|
@@ -121,7 +121,7 @@ module Mindee
|
|
121
121
|
)
|
122
122
|
@doc_configs[[account_name, endpoint_name]] = DocumentConfig.new(
|
123
123
|
Prediction::CustomV1,
|
124
|
-
|
124
|
+
HTTP::CustomEndpoint.new(account_name, endpoint_name, version, @api_key)
|
125
125
|
)
|
126
126
|
self
|
127
127
|
end
|
@@ -163,42 +163,46 @@ module Mindee
|
|
163
163
|
|
164
164
|
private
|
165
165
|
|
166
|
+
def standard_document_config(prediction_class, endpoint_name, version)
|
167
|
+
DocumentConfig.new(
|
168
|
+
prediction_class,
|
169
|
+
HTTP::StandardEndpoint.new(endpoint_name, version, @api_key)
|
170
|
+
)
|
171
|
+
end
|
172
|
+
|
166
173
|
def init_default_endpoints
|
167
|
-
@doc_configs[['mindee', Prediction::
|
168
|
-
Prediction::
|
169
|
-
|
174
|
+
@doc_configs[['mindee', Prediction::ProofOfAddressV1.name]] = standard_document_config(
|
175
|
+
Prediction::ProofOfAddressV1, 'proof_of_address', '1'
|
176
|
+
)
|
177
|
+
@doc_configs[['mindee', Prediction::FinancialDocumentV1.name]] = standard_document_config(
|
178
|
+
Prediction::FinancialDocumentV1, 'financial_document', '1'
|
179
|
+
)
|
180
|
+
@doc_configs[['mindee', Prediction::InvoiceV4.name]] = standard_document_config(
|
181
|
+
Prediction::InvoiceV4, 'invoices', '4'
|
170
182
|
)
|
171
|
-
@doc_configs[['mindee', Prediction::ReceiptV4.name]] =
|
172
|
-
Prediction::ReceiptV4,
|
173
|
-
[HTTP::StandardEndpoint.new('expense_receipts', '4', @api_key)]
|
183
|
+
@doc_configs[['mindee', Prediction::ReceiptV4.name]] = standard_document_config(
|
184
|
+
Prediction::ReceiptV4, 'expense_receipts', '4'
|
174
185
|
)
|
175
|
-
@doc_configs[['mindee', Prediction::PassportV1.name]] =
|
176
|
-
Prediction::PassportV1,
|
177
|
-
[HTTP::StandardEndpoint.new('passport', '1', @api_key)]
|
186
|
+
@doc_configs[['mindee', Prediction::PassportV1.name]] = standard_document_config(
|
187
|
+
Prediction::PassportV1, 'passport', '1'
|
178
188
|
)
|
179
|
-
@doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] =
|
180
|
-
Prediction::EU::LicensePlateV1,
|
181
|
-
[HTTP::StandardEndpoint.new('license_plates', '1', @api_key)]
|
189
|
+
@doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] = standard_document_config(
|
190
|
+
Prediction::EU::LicensePlateV1, 'license_plates', '1'
|
182
191
|
)
|
183
|
-
@doc_configs[['mindee', Prediction::ShippingContainerV1.name]] =
|
184
|
-
Prediction::ShippingContainerV1,
|
185
|
-
[HTTP::StandardEndpoint.new('shipping_containers', '1', @api_key)]
|
192
|
+
@doc_configs[['mindee', Prediction::ShippingContainerV1.name]] = standard_document_config(
|
193
|
+
Prediction::ShippingContainerV1, 'shipping_containers', '1'
|
186
194
|
)
|
187
|
-
@doc_configs[['mindee', Prediction::US::BankCheckV1.name]] =
|
188
|
-
Prediction::US::BankCheckV1,
|
189
|
-
[HTTP::StandardEndpoint.new('bank_check', '1', @api_key)]
|
195
|
+
@doc_configs[['mindee', Prediction::US::BankCheckV1.name]] = standard_document_config(
|
196
|
+
Prediction::US::BankCheckV1, 'bank_check', '1'
|
190
197
|
)
|
191
|
-
@doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] =
|
192
|
-
Prediction::FR::BankAccountDetailsV1,
|
193
|
-
[HTTP::StandardEndpoint.new('bank_account_details', '1', @api_key)]
|
198
|
+
@doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] = standard_document_config(
|
199
|
+
Prediction::FR::BankAccountDetailsV1, 'bank_account_details', '1'
|
194
200
|
)
|
195
|
-
@doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] =
|
196
|
-
Prediction::FR::CarteVitaleV1,
|
197
|
-
[HTTP::StandardEndpoint.new('carte_vitale', '1', @api_key)]
|
201
|
+
@doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] = standard_document_config(
|
202
|
+
Prediction::FR::CarteVitaleV1, 'carte_vitale', '1'
|
198
203
|
)
|
199
|
-
@doc_configs[['mindee', Prediction::FR::IdCardV1.name]] =
|
200
|
-
Prediction::FR::IdCardV1,
|
201
|
-
[HTTP::StandardEndpoint.new('idcard_fr', '1', @api_key)]
|
204
|
+
@doc_configs[['mindee', Prediction::FR::IdCardV1.name]] = standard_document_config(
|
205
|
+
Prediction::FR::IdCardV1, 'idcard_fr', '1'
|
202
206
|
)
|
203
207
|
self
|
204
208
|
end
|
@@ -11,14 +11,14 @@ module Mindee
|
|
11
11
|
# Specific client for sending a document to the API.
|
12
12
|
class DocumentConfig
|
13
13
|
# Array of possible Mindee::Endpoint to be used.
|
14
|
-
# @return [
|
15
|
-
attr_reader :
|
14
|
+
# @return [Mindee::HTTP::Endpoint]
|
15
|
+
attr_reader :endpoint
|
16
16
|
|
17
17
|
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
|
18
|
-
# @param
|
19
|
-
def initialize(prediction_class,
|
18
|
+
# @param endpoint [Mindee::HTTP::Endpoint]
|
19
|
+
def initialize(prediction_class, endpoint)
|
20
20
|
@prediction_class = prediction_class
|
21
|
-
@
|
21
|
+
@endpoint = endpoint
|
22
22
|
end
|
23
23
|
|
24
24
|
# Call the prediction API.
|
@@ -28,16 +28,8 @@ module Mindee
|
|
28
28
|
# @param cropper [Boolean]
|
29
29
|
# @return [Mindee::DocumentResponse]
|
30
30
|
def predict(input_doc, include_words, close_file, cropper)
|
31
|
-
|
31
|
+
check_api_key
|
32
32
|
response = predict_request(input_doc, include_words, close_file, cropper)
|
33
|
-
parse_response(response)
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# @param response [Net::HTTPResponse]
|
39
|
-
# @return [Mindee::DocumentResponse]
|
40
|
-
def parse_response(response)
|
41
33
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
42
34
|
return Document.new(@prediction_class, hashed_response['document']) if (200..299).include?(response.code.to_i)
|
43
35
|
|
@@ -45,24 +37,24 @@ module Mindee
|
|
45
37
|
raise error
|
46
38
|
end
|
47
39
|
|
40
|
+
private
|
41
|
+
|
48
42
|
# @param input_doc [Mindee::InputDocument]
|
49
43
|
# @param include_words [Boolean]
|
50
44
|
# @param close_file [Boolean]
|
51
45
|
# # @param cropper [Boolean]
|
52
46
|
# @return [Net::HTTPResponse]
|
53
47
|
def predict_request(input_doc, include_words, close_file, cropper)
|
54
|
-
@
|
48
|
+
@endpoint.predict_req_post(input_doc, include_words: include_words, close_file: close_file, cropper: cropper)
|
55
49
|
end
|
56
50
|
|
57
|
-
def
|
58
|
-
@
|
59
|
-
next unless endpoint.api_key.nil? || endpoint.api_key.empty?
|
51
|
+
def check_api_key
|
52
|
+
return unless @endpoint.api_key.nil? || @endpoint.api_key.empty?
|
60
53
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
end
|
54
|
+
raise "Missing API key for '#{@document_type}', " \
|
55
|
+
"check your Client Configuration.\n" \
|
56
|
+
'You can set this using the ' \
|
57
|
+
"'#{HTTP::API_KEY_ENV_NAME}' environment variable."
|
66
58
|
end
|
67
59
|
end
|
68
60
|
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../common_fields'
|
4
|
+
require_relative '../base'
|
5
|
+
require_relative 'invoice_line_item'
|
6
|
+
|
7
|
+
module Mindee
|
8
|
+
module Prediction
|
9
|
+
# Invoice document.
|
10
|
+
class FinancialDocumentV1 < Prediction
|
11
|
+
# Locale information.
|
12
|
+
# @return [Mindee::Locale]
|
13
|
+
attr_reader :locale
|
14
|
+
# The nature of the invoice.
|
15
|
+
# @return [Mindee::TextField]
|
16
|
+
attr_reader :document_type
|
17
|
+
# The total amount with tax included.
|
18
|
+
# @return [Mindee::AmountField]
|
19
|
+
attr_reader :total_amount
|
20
|
+
# The total amount without the tax value.
|
21
|
+
# @return [Mindee::AmountField]
|
22
|
+
attr_reader :total_net
|
23
|
+
# The total tax.
|
24
|
+
# @return [Mindee::AmountField]
|
25
|
+
attr_reader :total_tax
|
26
|
+
# The creation date of the invoice.
|
27
|
+
# @return [Mindee::DateField]
|
28
|
+
attr_reader :date
|
29
|
+
# The invoice number.
|
30
|
+
# @return [Mindee::TextField]
|
31
|
+
attr_reader :invoice_number
|
32
|
+
# List of Reference numbers including PO number.
|
33
|
+
# @return [Mindee::TextField]
|
34
|
+
attr_reader :reference_numbers
|
35
|
+
# The due date of the invoice.
|
36
|
+
# @return [Mindee::DateField]
|
37
|
+
attr_reader :due_date
|
38
|
+
# The list of taxes.
|
39
|
+
# @return [Array<Mindee::TaxField>]
|
40
|
+
attr_reader :taxes
|
41
|
+
# The name of the customer.
|
42
|
+
# @return [Mindee::TextField]
|
43
|
+
attr_reader :customer_name
|
44
|
+
# The address of the customer.
|
45
|
+
# @return [Mindee::TextField]
|
46
|
+
attr_reader :customer_address
|
47
|
+
# The company registration information for the customer.
|
48
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
49
|
+
attr_reader :customer_company_registrations
|
50
|
+
# The supplier's name.
|
51
|
+
# @return [Mindee::TextField]
|
52
|
+
attr_reader :supplier_name
|
53
|
+
# The supplier's address.
|
54
|
+
# @return [Mindee::TextField]
|
55
|
+
attr_reader :supplier_address
|
56
|
+
# The payment information.
|
57
|
+
# @return [Array<Mindee::PaymentDetails>]
|
58
|
+
attr_reader :supplier_payment_details
|
59
|
+
# The supplier's company registration information.
|
60
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
61
|
+
attr_reader :supplier_company_registrations
|
62
|
+
# Line items details.
|
63
|
+
# @return [Array<Mindee::InvoiceLineItem>]
|
64
|
+
attr_reader :line_items
|
65
|
+
# Time as seen on the receipt in HH:MM format.
|
66
|
+
# @return [Mindee::TextField]
|
67
|
+
attr_reader :time
|
68
|
+
# The receipt category among predefined classes.
|
69
|
+
# @return [Mindee::TextField]
|
70
|
+
attr_reader :category
|
71
|
+
# The receipt sub-category among predefined classes.
|
72
|
+
# @return [Mindee::TextField]
|
73
|
+
attr_reader :subcategory
|
74
|
+
# A classification field, that can return 4 values : 'EXPENSE RECEIPT' ,
|
75
|
+
# 'CREDIT CARD RECEIPT', 'INVOICE', 'CREDIT NOTE'
|
76
|
+
# @return [Mindee::TextField]
|
77
|
+
attr_reader :document_type # rubocop:todo Lint/DuplicateMethods
|
78
|
+
# Total amount of tip and gratuity. Both typed and handwritten characters are supported.
|
79
|
+
# @return [Mindee::AmountField]
|
80
|
+
attr_reader :tip
|
81
|
+
|
82
|
+
# @param prediction [Hash]
|
83
|
+
# @param page_id [Integer, nil]
|
84
|
+
def initialize(prediction, page_id) # rubocop:todo Metrics/AbcSize
|
85
|
+
super
|
86
|
+
|
87
|
+
@time = TextField.new(prediction['time'], page_id)
|
88
|
+
@category = TextField.new(prediction['category'], page_id)
|
89
|
+
@subcategory = TextField.new(prediction['subcategory'], page_id)
|
90
|
+
@document_type = TextField.new(prediction['document_type'], page_id)
|
91
|
+
@tip = AmountField.new(prediction['tip'], page_id)
|
92
|
+
@locale = Locale.new(prediction['locale'])
|
93
|
+
@document_type = TextField.new(prediction['document_type'], page_id)
|
94
|
+
@total_amount = AmountField.new(prediction['total_amount'], page_id)
|
95
|
+
@total_net = AmountField.new(prediction['total_net'], page_id)
|
96
|
+
@customer_address = TextField.new(prediction['customer_address'], page_id)
|
97
|
+
@customer_name = TextField.new(prediction['customer_name'], page_id)
|
98
|
+
@date = DateField.new(prediction['date'], page_id)
|
99
|
+
@due_date = DateField.new(prediction['due_date'], page_id)
|
100
|
+
@invoice_number = TextField.new(prediction['invoice_number'], page_id)
|
101
|
+
@supplier_name = TextField.new(prediction['supplier_name'], page_id)
|
102
|
+
@supplier_address = TextField.new(prediction['supplier_address'], page_id)
|
103
|
+
|
104
|
+
@reference_numbers = []
|
105
|
+
prediction['reference_numbers'].each do |item|
|
106
|
+
@reference_numbers.push(TextField.new(item, page_id))
|
107
|
+
end
|
108
|
+
@customer_company_registrations = []
|
109
|
+
prediction['customer_company_registrations'].each do |item|
|
110
|
+
@customer_company_registrations.push(CompanyRegistration.new(item, page_id))
|
111
|
+
end
|
112
|
+
@taxes = []
|
113
|
+
prediction['taxes'].each do |item|
|
114
|
+
@taxes.push(TaxField.new(item, page_id))
|
115
|
+
end
|
116
|
+
@supplier_payment_details = []
|
117
|
+
prediction['supplier_payment_details'].each do |item|
|
118
|
+
@supplier_payment_details.push(PaymentDetails.new(item, page_id))
|
119
|
+
end
|
120
|
+
@supplier_company_registrations = []
|
121
|
+
prediction['supplier_company_registrations'].each do |item|
|
122
|
+
@supplier_company_registrations.push(CompanyRegistration.new(item, page_id))
|
123
|
+
end
|
124
|
+
|
125
|
+
@total_tax = AmountField.new(
|
126
|
+
{ value: nil, confidence: 0.0 }, page_id
|
127
|
+
)
|
128
|
+
|
129
|
+
@line_items = []
|
130
|
+
prediction['line_items'].each do |item|
|
131
|
+
@line_items.push(InvoiceLineItem.new(item, page_id))
|
132
|
+
end
|
133
|
+
reconstruct(page_id)
|
134
|
+
end
|
135
|
+
|
136
|
+
def to_s
|
137
|
+
customer_company_registrations = @customer_company_registrations.map(&:value).join('; ')
|
138
|
+
supplier_payment_details = @supplier_payment_details.map(&:to_s).join("\n ")
|
139
|
+
supplier_company_registrations = @supplier_company_registrations.map(&:to_s).join('; ')
|
140
|
+
reference_numbers = @reference_numbers.map(&:to_s).join(', ')
|
141
|
+
taxes = @taxes.join("\n ")
|
142
|
+
out_str = String.new
|
143
|
+
out_str << "\n:Document type: #{@document_type}".rstrip
|
144
|
+
out_str << "\n:Category: #{@category}".rstrip
|
145
|
+
out_str << "\n:Subcategory: #{@subcategory}".rstrip
|
146
|
+
out_str << "\n:Locale: #{@locale}".rstrip
|
147
|
+
out_str << "\n:Date: #{@date}".rstrip
|
148
|
+
out_str << "\n:Due date: #{@due_date}".rstrip
|
149
|
+
out_str << "\n:Time: #{@time}".rstrip
|
150
|
+
out_str << "\n:Number: #{@invoice_number}".rstrip
|
151
|
+
out_str << "\n:Reference numbers: #{reference_numbers}".rstrip
|
152
|
+
out_str << "\n:Supplier name: #{@supplier_name}".rstrip
|
153
|
+
out_str << "\n:Supplier address: #{@supplier_address}".rstrip
|
154
|
+
out_str << "\n:Supplier company registrations: #{supplier_company_registrations}".rstrip
|
155
|
+
out_str << "\n:Supplier payment details: #{supplier_payment_details}".rstrip
|
156
|
+
|
157
|
+
out_str << "\n:Customer name: #{@customer_name}".rstrip
|
158
|
+
out_str << "\n:Customer address: #{@customer_address}".rstrip
|
159
|
+
out_str << "\n:Customer company registrations: #{customer_company_registrations}".rstrip
|
160
|
+
|
161
|
+
out_str << "\n:Tip: #{@tip}".rstrip
|
162
|
+
|
163
|
+
out_str << "\n:Taxes: #{taxes}".rstrip
|
164
|
+
out_str << "\n:Total taxes: #{@total_tax}".rstrip
|
165
|
+
out_str << "\n:Total net: #{@total_net}".rstrip
|
166
|
+
out_str << "\n:Total amount: #{@total_amount}".rstrip
|
167
|
+
|
168
|
+
out_str << line_items_to_s
|
169
|
+
|
170
|
+
out_str[1..].to_s
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def line_items_to_s
|
176
|
+
line_item_separator = "#{'=' * 22} #{'=' * 8} #{'=' * 9} #{'=' * 10} #{'=' * 18} #{'=' * 36}"
|
177
|
+
line_items = @line_items.map(&:to_s).join("\n")
|
178
|
+
|
179
|
+
out_str = String.new
|
180
|
+
out_str << "\n\n:Line Items:"
|
181
|
+
|
182
|
+
return out_str if line_items.empty?
|
183
|
+
|
184
|
+
out_str << "\n#{line_item_separator}"
|
185
|
+
out_str << "\nCode QTY Price Amount Tax (Rate) Description"
|
186
|
+
out_str << "\n#{line_item_separator}"
|
187
|
+
out_str << "\n#{line_items}"
|
188
|
+
out_str << "\n#{line_item_separator}"
|
189
|
+
end
|
190
|
+
|
191
|
+
def reconstruct(page_id)
|
192
|
+
construct_total_tax_from_taxes(page_id)
|
193
|
+
return unless page_id.nil?
|
194
|
+
|
195
|
+
construct_total_excl_from_tcc_and_taxes(page_id)
|
196
|
+
construct_total_incl_from_taxes_plus_excl(page_id)
|
197
|
+
construct_total_tax_from_totals(page_id)
|
198
|
+
end
|
199
|
+
|
200
|
+
def construct_total_excl_from_tcc_and_taxes(page_id)
|
201
|
+
return if @total_amount.value.nil? || taxes.empty? || !@total_net.value.nil?
|
202
|
+
|
203
|
+
total_excl = {
|
204
|
+
'value' => @total_amount.value - @taxes.map(&:value).sum,
|
205
|
+
'confidence' => TextField.array_confidence(@taxes) * @total_amount.confidence,
|
206
|
+
}
|
207
|
+
@total_net = AmountField.new(total_excl, page_id, reconstructed: true)
|
208
|
+
end
|
209
|
+
|
210
|
+
def construct_total_incl_from_taxes_plus_excl(page_id)
|
211
|
+
return if @total_net.value.nil? || @taxes.empty? || !@total_amount.value.nil?
|
212
|
+
|
213
|
+
total_incl = {
|
214
|
+
'value' => @taxes.map(&:value).sum + @total_net.value,
|
215
|
+
'confidence' => TextField.array_confidence(@taxes) * @total_net.confidence,
|
216
|
+
}
|
217
|
+
@total_amount = AmountField.new(total_incl, page_id, reconstructed: true)
|
218
|
+
end
|
219
|
+
|
220
|
+
def construct_total_tax_from_taxes(page_id)
|
221
|
+
return if @taxes.empty?
|
222
|
+
|
223
|
+
total_tax = {
|
224
|
+
'value' => @taxes.map(&:value).sum,
|
225
|
+
'confidence' => TextField.array_confidence(@taxes),
|
226
|
+
}
|
227
|
+
return unless total_tax['value'].positive?
|
228
|
+
|
229
|
+
@total_tax = AmountField.new(total_tax, page_id, reconstructed: true)
|
230
|
+
end
|
231
|
+
|
232
|
+
def construct_total_tax_from_totals(page_id)
|
233
|
+
return if !@total_tax.value.nil? || @total_amount.value.nil? || @total_net.value.nil?
|
234
|
+
|
235
|
+
total_tax = {
|
236
|
+
'value' => @total_amount.value - @total_net.value,
|
237
|
+
'confidence' => TextField.array_confidence(@taxes),
|
238
|
+
}
|
239
|
+
return unless total_tax['value'] >= 0
|
240
|
+
|
241
|
+
@total_tax = AmountField.new(total_tax, page_id, reconstructed: true)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../common_fields/base'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
# Line items for invoices
|
7
|
+
class InvoiceLineItem
|
8
|
+
# @return [String] The product code referring to the item.
|
9
|
+
attr_reader :product_code
|
10
|
+
# @return [String]
|
11
|
+
attr_reader :description
|
12
|
+
# @return [Float]
|
13
|
+
attr_reader :quantity
|
14
|
+
# @return [Float]
|
15
|
+
attr_reader :unit_price
|
16
|
+
# @return [Float]
|
17
|
+
attr_reader :total_amount
|
18
|
+
# @return [Float] The item tax rate percentage.
|
19
|
+
attr_reader :tax_rate
|
20
|
+
# @return [Float]
|
21
|
+
attr_reader :tax_amount
|
22
|
+
# @return [Float]
|
23
|
+
attr_reader :confidence
|
24
|
+
# @return [Integer]
|
25
|
+
attr_reader :page_id
|
26
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
27
|
+
attr_reader :bounding_box
|
28
|
+
# @return [Array<Mindee::Geometry::Polygon>]
|
29
|
+
attr_reader :polygon
|
30
|
+
|
31
|
+
def initialize(prediction, page_id)
|
32
|
+
@product_code = prediction['product_code']
|
33
|
+
@quantity = prediction['quantity']
|
34
|
+
@unit_price = prediction['unit_price']
|
35
|
+
@total_amount = prediction['total_amount']
|
36
|
+
@tax_amount = prediction['tax_amount']
|
37
|
+
@tax_rate = prediction['tax_rate']
|
38
|
+
@description = prediction['description']
|
39
|
+
@page_id = page_id
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
tax = Field.float_to_string(@tax_amount)
|
44
|
+
tax << " (#{Field.float_to_string(@tax_rate)}%)" unless @tax_rate.nil?
|
45
|
+
|
46
|
+
description = @description.nil? ? '' : @description
|
47
|
+
description = "#{description[0..32]}..." if description.size > 35
|
48
|
+
|
49
|
+
out_str = String.new
|
50
|
+
out_str << format('%- 22s', @product_code)
|
51
|
+
out_str << " #{format('%- 8s', Field.float_to_string(@quantity))}"
|
52
|
+
out_str << " #{format('%- 9s', Field.float_to_string(@unit_price))}"
|
53
|
+
out_str << " #{format('%- 10s', Field.float_to_string(@total_amount))}"
|
54
|
+
out_str << " #{format('%- 18s', tax)}"
|
55
|
+
out_str << " #{description}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mrz'
|
4
|
+
|
5
|
+
require_relative '../common_fields'
|
6
|
+
require_relative '../base'
|
7
|
+
|
8
|
+
module Mindee
|
9
|
+
module Prediction
|
10
|
+
# Passport document.
|
11
|
+
class ProofOfAddressV1 < Prediction
|
12
|
+
# ISO 639-1 code, works best with ca, de, en, es, fr, it, nl and pt.
|
13
|
+
# @return [Mindee::Locale]
|
14
|
+
attr_reader :locale
|
15
|
+
# ISO date yyyy-mm-dd. Works both for European and US dates.
|
16
|
+
# @return [Mindee::DateField]
|
17
|
+
attr_reader :date
|
18
|
+
# All extracted ISO date yyyy-mm-dd. Works both for European and US dates.
|
19
|
+
# @return [Array<Mindee::DateField>]
|
20
|
+
attr_reader :dates
|
21
|
+
# Address of the document's issuer.
|
22
|
+
# @return [Mindee::TextField]
|
23
|
+
attr_reader :issuer_address
|
24
|
+
# Generic: VAT NUMBER, TAX ID, COMPANY REGISTRATION NUMBER or country specific.
|
25
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
26
|
+
attr_reader :issuer_company_registration
|
27
|
+
# Name of the person or company issuing the document.
|
28
|
+
# @return [Mindee::TextField]
|
29
|
+
attr_reader :issuer_name
|
30
|
+
# Address of the recipient.
|
31
|
+
# @return [Mindee::TextField]
|
32
|
+
attr_reader :recipient_address
|
33
|
+
# Generic: VAT NUMBER, TAX ID, COMPANY REGISTRATION NUMBER or country specific.
|
34
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
35
|
+
attr_reader :recipient_company_registration
|
36
|
+
# Name of the document's recipient.
|
37
|
+
# @return [Mindee::TextField]
|
38
|
+
attr_reader :recipient_name
|
39
|
+
|
40
|
+
# @param prediction [Hash]
|
41
|
+
# @param page_id [Integer, nil]
|
42
|
+
def initialize(prediction, page_id)
|
43
|
+
super
|
44
|
+
@locale = Locale.new(prediction['locale'])
|
45
|
+
@date = DateField.new(prediction['date'], page_id)
|
46
|
+
@dates = []
|
47
|
+
prediction['dates'].each do |item|
|
48
|
+
@dates.push(DateField.new(item, page_id))
|
49
|
+
end
|
50
|
+
@issuer_name = TextField.new(prediction['issuer_name'], page_id)
|
51
|
+
@issuer_address = TextField.new(prediction['issuer_address'], page_id)
|
52
|
+
@issuer_company_registration = []
|
53
|
+
prediction['issuer_company_registration'].each do |item|
|
54
|
+
@issuer_company_registration.push(CompanyRegistration.new(item, page_id))
|
55
|
+
end
|
56
|
+
@recipient_name = TextField.new(prediction['recipient_name'], page_id)
|
57
|
+
@recipient_address = TextField.new(prediction['recipient_address'], page_id)
|
58
|
+
@recipient_company_registration = []
|
59
|
+
prediction['recipient_company_registration'].each do |item|
|
60
|
+
@recipient_company_registration.push(CompanyRegistration.new(item, page_id))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s
|
65
|
+
recipient_company_registrations = @recipient_company_registration.join(' ')
|
66
|
+
issuer_company_registrations = @issuer_company_registration.join(' ')
|
67
|
+
dates = @dates.join("\n ")
|
68
|
+
out_str = String.new
|
69
|
+
out_str << "\n:Locale: #{@locale}".rstrip
|
70
|
+
out_str << "\n:Issuer name: #{@issuer_name}".rstrip
|
71
|
+
out_str << "\n:Issuer Address: #{@issuer_address}".rstrip
|
72
|
+
out_str << "\n:Issuer Company Registrations: #{issuer_company_registrations}".rstrip
|
73
|
+
out_str << "\n:Recipient name: #{@recipient_name}".rstrip
|
74
|
+
out_str << "\n:Recipient Address: #{@recipient_address}".rstrip
|
75
|
+
out_str << "\n:Recipient Company Registrations: #{recipient_company_registrations}".rstrip
|
76
|
+
out_str << "\n:Issuance date: #{@date}".rstrip
|
77
|
+
out_str << "\n:Dates: #{dates}".rstrip
|
78
|
+
out_str[1..].to_s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -40,6 +40,9 @@ module Mindee
|
|
40
40
|
# Whether the document is an expense receipt or a credit card receipt.
|
41
41
|
# @return [Mindee::TextField]
|
42
42
|
attr_reader :document_type
|
43
|
+
# Total amount of tip and gratuity. Both typed and handwritten characters are supported.
|
44
|
+
# @return [Mindee::AmountField]
|
45
|
+
attr_reader :tip
|
43
46
|
|
44
47
|
# @param prediction [Hash]
|
45
48
|
# @param page_id [Integer, nil]
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'prediction/custom/custom_v1'
|
4
|
+
require_relative 'prediction/proof_of_address/proof_of_address_v1'
|
5
|
+
require_relative 'prediction/financial_document/financial_document_v1'
|
4
6
|
require_relative 'prediction/invoice/invoice_v4'
|
5
7
|
require_relative 'prediction/passport/passport_v1'
|
6
8
|
require_relative 'prediction/receipt/receipt_v4'
|
data/lib/mindee/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marcel
|
@@ -164,12 +164,15 @@ files:
|
|
164
164
|
- lib/mindee/parsing/prediction/custom/custom_v1.rb
|
165
165
|
- lib/mindee/parsing/prediction/custom/fields.rb
|
166
166
|
- lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb
|
167
|
+
- lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb
|
168
|
+
- lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb
|
167
169
|
- lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb
|
168
170
|
- lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb
|
169
171
|
- lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb
|
170
172
|
- lib/mindee/parsing/prediction/invoice/invoice_line_item.rb
|
171
173
|
- lib/mindee/parsing/prediction/invoice/invoice_v4.rb
|
172
174
|
- lib/mindee/parsing/prediction/passport/passport_v1.rb
|
175
|
+
- lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb
|
173
176
|
- lib/mindee/parsing/prediction/receipt/receipt_v4.rb
|
174
177
|
- lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb
|
175
178
|
- lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb
|