mindee 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/bin/mindee.rb +8 -0
- data/docs/ruby-api-builder.md +35 -42
- data/docs/ruby-getting-started.md +1 -1
- data/docs/ruby-invoice-ocr.md +12 -13
- data/docs/ruby-passport-ocr.md +1 -1
- data/docs/ruby-receipt-ocr.md +1 -1
- data/lib/mindee/client.rb +34 -30
- data/lib/mindee/document_config.rb +15 -23
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +245 -0
- data/lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +82 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +3 -0
- data/lib/mindee/parsing/prediction.rb +2 -0
- data/lib/mindee/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a4e7590c321e473df6da717d6854caf198ee8dc3d818502d6e588d14497c0da
|
4
|
+
data.tar.gz: b9922d76cbc0115dff59489b9c77757decd69fcfd8a6da445e0a38229cd85023
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01e9dce9c2ba44dea757f1061c4a368b9a675f75d8ab54dd74c06c7c70589aaa80fd320a241c51f618efec9d6ebae8aa9fa033233731e5c214e607464821deb1
|
7
|
+
data.tar.gz: 0467da78b85085e0df6cdae33e6135be4dd1be6c8d28f2c2888127c2340c114b404314f80741f0304aca4d34b19c6462ae8798fcb3936d5f2130860610b4ded3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v2.1.0 - 2023-01-30
|
4
|
+
### Changes
|
5
|
+
* :sparkles: Add financial document v1 support (Co-authored-by: Oriol Gual)
|
6
|
+
* :sparkles: Add Proof of Address v1 support
|
7
|
+
|
3
8
|
## v2.0.0 - 2023-01-13
|
4
9
|
### ¡Breaking Changes!
|
5
10
|
* :sparkles: add improved PDF merge system
|
data/bin/mindee.rb
CHANGED
@@ -10,6 +10,14 @@ DOCUMENTS = {
|
|
10
10
|
help: "Custom document type from API builder",
|
11
11
|
prediction: Mindee::Prediction::CustomV1,
|
12
12
|
},
|
13
|
+
"proof-of-address" => {
|
14
|
+
help: 'Proof of Address',
|
15
|
+
prediction: Mindee::Prediction::ProofOfAddressV1,
|
16
|
+
},
|
17
|
+
"financial-document" => {
|
18
|
+
help: 'Financial Document',
|
19
|
+
prediction: Mindee::Prediction::FinancialDocumentV1,
|
20
|
+
},
|
13
21
|
"invoice" => {
|
14
22
|
help: 'Invoice',
|
15
23
|
prediction: Mindee::Prediction::InvoiceV4,
|
data/docs/ruby-api-builder.md
CHANGED
@@ -1,34 +1,32 @@
|
|
1
|
-
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api)
|
2
|
-
from the API Builder.
|
1
|
+
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api) from the API Builder.
|
3
2
|
|
4
3
|
If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
|
5
4
|
[API Builder](https://developers.mindee.com/docs/overview).
|
6
5
|
|
7
|
-
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr)
|
6
|
+
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr),
|
8
7
|
created with the [API Builder](https://developers.mindee.com/docs/overview).
|
9
8
|
|
10
9
|
> 📘 **Info**
|
11
10
|
>
|
12
|
-
> We used a data model that
|
13
|
-
> change the `
|
11
|
+
> We used a data model that will be different from yours.
|
12
|
+
> To modify this to your own custom API, change the `add_endpoint` call with your own parameters.
|
14
13
|
|
15
14
|
```ruby
|
16
15
|
require 'mindee'
|
17
16
|
|
18
17
|
# Init a new client and configure your custom document
|
19
|
-
mindee_client = Mindee::Client.new(
|
20
|
-
api_key: 'my-api-key', # optional, can be set in environment
|
21
|
-
).config_custom_doc(
|
22
|
-
'wsnine',
|
18
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
|
23
19
|
'john',
|
20
|
+
'wnine',
|
24
21
|
version: '1.1' # optional, if not set, use the latest version of the model
|
25
22
|
)
|
26
23
|
|
27
24
|
# Load a file from disk and parse it
|
28
|
-
|
25
|
+
result = mindee_client.doc_from_path('/path/to/file.ext')
|
26
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
29
27
|
|
30
|
-
# Print a
|
31
|
-
puts
|
28
|
+
# Print a summary of the document prediction in RST format
|
29
|
+
puts result
|
32
30
|
```
|
33
31
|
|
34
32
|
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
@@ -39,7 +37,8 @@ The client calls the `parse` method when parsing your custom document, which wil
|
|
39
37
|
The document type must be specified when calling the parse method.
|
40
38
|
|
41
39
|
```ruby
|
42
|
-
result = mindee_client.doc_from_path('/path/to/custom_file')
|
40
|
+
result = mindee_client.doc_from_path('/path/to/custom_file')
|
41
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
43
42
|
puts result
|
44
43
|
```
|
45
44
|
|
@@ -49,13 +48,13 @@ puts result
|
|
49
48
|
> you **must** specify your account name when calling the `parse` method:
|
50
49
|
|
51
50
|
```ruby
|
52
|
-
mindee_client = Mindee::Client.new.
|
51
|
+
mindee_client = Mindee::Client.new.add_endpoint(
|
53
52
|
'receipt',
|
54
53
|
'john'
|
55
54
|
)
|
56
55
|
|
57
56
|
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
58
|
-
.parse(
|
57
|
+
.parse(Mindee::Prediction::CustomV1, account_name: 'john')
|
59
58
|
```
|
60
59
|
|
61
60
|
## Document Fields
|
@@ -64,64 +63,58 @@ All the fields defined in the API builder when creating your custom document are
|
|
64
63
|
In custom documents, each field will hold an array of all the words in the document which are related to that field.
|
65
64
|
Each word is an object that has the text content, geometry information, and confidence score.
|
66
65
|
|
67
|
-
Value fields can be accessed
|
66
|
+
Value fields can be accessed via the `fields` attribute.
|
68
67
|
|
69
|
-
Classification fields can be accessed
|
68
|
+
Classification fields can be accessed via the `classifications` attribute.
|
70
69
|
|
71
70
|
> 📘 **Info**
|
72
71
|
>
|
73
72
|
> Both document level and page level objects work in the same way.
|
74
73
|
|
75
|
-
###
|
76
|
-
|
74
|
+
### Fields Attribute
|
75
|
+
The `fields` attribute is a hashmap with the following structure:
|
76
|
+
|
77
|
+
* key: the API name of the field, as a `symbol`
|
78
|
+
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
79
|
+
|
80
|
+
Individual field values can be accessed by using the field's API name, in the examples below we'll use the `address` field.
|
77
81
|
|
78
82
|
```ruby
|
79
83
|
# raw data, list of each word object
|
80
|
-
|
84
|
+
pp result.inference.prediction.fields[:address].values
|
81
85
|
|
82
86
|
# list of all values
|
83
|
-
puts
|
87
|
+
puts result.inference.prediction.fields[:address].contents_list
|
84
88
|
|
85
89
|
# default string representation
|
86
|
-
puts
|
90
|
+
puts result.inference.prediction.fields[:address].to_s
|
87
91
|
|
88
92
|
# custom string representation
|
89
|
-
puts
|
93
|
+
puts result.inference.prediction.fields[:address].contents_str(separator: '_')
|
90
94
|
```
|
91
95
|
|
92
|
-
|
93
|
-
In addition to accessing a value field directly, it's possible to access it through the `fields` attribute.
|
94
|
-
It's a hashmap with the following structure:
|
95
|
-
* key: the API name of the field, as a `symbol`
|
96
|
-
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
97
|
-
|
98
|
-
```ruby
|
99
|
-
# raw data, list of each word object
|
100
|
-
puts w9_data.document.fields[:address].values
|
101
|
-
```
|
102
|
-
|
103
|
-
This makes it simple to iterate over all the fields:
|
96
|
+
To iterate over all the fields:
|
104
97
|
```ruby
|
105
|
-
|
98
|
+
result.inference.prediction.fields.each do |name, info|
|
106
99
|
puts name
|
107
100
|
puts info.values
|
108
101
|
end
|
109
102
|
```
|
110
103
|
|
111
|
-
### Classifications
|
112
|
-
|
113
|
-
|
104
|
+
### Classifications Attribute
|
105
|
+
The `classifications` attribute is a hashmap with the following structure:
|
106
|
+
|
114
107
|
* key: the API name of the field, as a `symbol`
|
115
108
|
* value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
|
116
109
|
|
117
110
|
```ruby
|
118
111
|
# raw data, list of each word object
|
119
|
-
puts
|
112
|
+
puts result.document.classifications[:doc_type].value
|
120
113
|
```
|
121
114
|
|
122
|
-
|
115
|
+
To iterate over all the classifications:
|
123
116
|
```ruby
|
124
|
-
|
117
|
+
result.document.classifications.each do |name, info|
|
125
118
|
puts name
|
126
119
|
puts info.value
|
127
120
|
end
|
data/docs/ruby-invoice-ocr.md
CHANGED
@@ -19,7 +19,7 @@ puts result.inference.prediction
|
|
19
19
|
```
|
20
20
|
|
21
21
|
Output:
|
22
|
-
```
|
22
|
+
```
|
23
23
|
:Locale: en; en; CAD;
|
24
24
|
:Document type: INVOICE
|
25
25
|
:Invoice number: 14
|
@@ -48,8 +48,10 @@ Code QTY Price Amount Tax (Rate) Descript
|
|
48
48
|
====================== ======== ========= ========== ================== ====================================
|
49
49
|
```
|
50
50
|
|
51
|
-
|
52
|
-
|
51
|
+
> 📘 **Info**
|
52
|
+
>
|
53
|
+
> Line item descriptions are truncated here only for display purposes.
|
54
|
+
> The full text is available in the [details](#line-items).
|
53
55
|
|
54
56
|
## Fields
|
55
57
|
Each prediction object contains a set of different fields.
|
@@ -91,11 +93,12 @@ puts result.inference.prediction.customer_name.value
|
|
91
93
|
puts result.inference.prediction.customer_address.value
|
92
94
|
```
|
93
95
|
|
94
|
-
**`
|
96
|
+
**`customer_company_registrations`** (Array<CompanyRegistration>): Customer's company registration
|
95
97
|
|
96
98
|
```ruby
|
97
99
|
result.inference.prediction.customer_company_registrations.each do |registration|
|
98
|
-
puts registration
|
100
|
+
puts registration.value
|
101
|
+
puts registration.type
|
99
102
|
end
|
100
103
|
```
|
101
104
|
|
@@ -184,14 +187,10 @@ Each object in the list contains an extra attribute:
|
|
184
187
|
|
185
188
|
* `type` (String): Type of company registration number among predefined categories.
|
186
189
|
```ruby
|
187
|
-
|
188
|
-
puts
|
189
|
-
|
190
|
-
|
191
|
-
* `value` (String): Value of the company identifier
|
192
|
-
```ruby
|
193
|
-
# Show the value of the first registration
|
194
|
-
puts result.inference.prediction.supplier_company_registrations[0].value
|
190
|
+
result.inference.prediction.supplier_company_registrations.each do |registration|
|
191
|
+
puts registration.value
|
192
|
+
puts registration.type
|
193
|
+
end
|
195
194
|
```
|
196
195
|
|
197
196
|
### Taxes
|
data/docs/ruby-passport-ocr.md
CHANGED
data/docs/ruby-receipt-ocr.md
CHANGED
data/lib/mindee/client.rb
CHANGED
@@ -23,8 +23,8 @@ module Mindee
|
|
23
23
|
# API Builder. Do not set for standard (off the shelf) endpoints.
|
24
24
|
#
|
25
25
|
# @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
|
26
|
-
# This is normally not required unless you have a custom endpoint which has the
|
27
|
-
#
|
26
|
+
# This is normally not required unless you have a custom endpoint which has the same name as a
|
27
|
+
# standard (off the shelf) endpoint.
|
28
28
|
# Do not set for standard (off the shelf) endpoints.
|
29
29
|
#
|
30
30
|
# @param include_words [Boolean] Whether to include the full text for each page.
|
@@ -121,7 +121,7 @@ module Mindee
|
|
121
121
|
)
|
122
122
|
@doc_configs[[account_name, endpoint_name]] = DocumentConfig.new(
|
123
123
|
Prediction::CustomV1,
|
124
|
-
|
124
|
+
HTTP::CustomEndpoint.new(account_name, endpoint_name, version, @api_key)
|
125
125
|
)
|
126
126
|
self
|
127
127
|
end
|
@@ -163,42 +163,46 @@ module Mindee
|
|
163
163
|
|
164
164
|
private
|
165
165
|
|
166
|
+
def standard_document_config(prediction_class, endpoint_name, version)
|
167
|
+
DocumentConfig.new(
|
168
|
+
prediction_class,
|
169
|
+
HTTP::StandardEndpoint.new(endpoint_name, version, @api_key)
|
170
|
+
)
|
171
|
+
end
|
172
|
+
|
166
173
|
def init_default_endpoints
|
167
|
-
@doc_configs[['mindee', Prediction::
|
168
|
-
Prediction::
|
169
|
-
|
174
|
+
@doc_configs[['mindee', Prediction::ProofOfAddressV1.name]] = standard_document_config(
|
175
|
+
Prediction::ProofOfAddressV1, 'proof_of_address', '1'
|
176
|
+
)
|
177
|
+
@doc_configs[['mindee', Prediction::FinancialDocumentV1.name]] = standard_document_config(
|
178
|
+
Prediction::FinancialDocumentV1, 'financial_document', '1'
|
179
|
+
)
|
180
|
+
@doc_configs[['mindee', Prediction::InvoiceV4.name]] = standard_document_config(
|
181
|
+
Prediction::InvoiceV4, 'invoices', '4'
|
170
182
|
)
|
171
|
-
@doc_configs[['mindee', Prediction::ReceiptV4.name]] =
|
172
|
-
Prediction::ReceiptV4,
|
173
|
-
[HTTP::StandardEndpoint.new('expense_receipts', '4', @api_key)]
|
183
|
+
@doc_configs[['mindee', Prediction::ReceiptV4.name]] = standard_document_config(
|
184
|
+
Prediction::ReceiptV4, 'expense_receipts', '4'
|
174
185
|
)
|
175
|
-
@doc_configs[['mindee', Prediction::PassportV1.name]] =
|
176
|
-
Prediction::PassportV1,
|
177
|
-
[HTTP::StandardEndpoint.new('passport', '1', @api_key)]
|
186
|
+
@doc_configs[['mindee', Prediction::PassportV1.name]] = standard_document_config(
|
187
|
+
Prediction::PassportV1, 'passport', '1'
|
178
188
|
)
|
179
|
-
@doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] =
|
180
|
-
Prediction::EU::LicensePlateV1,
|
181
|
-
[HTTP::StandardEndpoint.new('license_plates', '1', @api_key)]
|
189
|
+
@doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] = standard_document_config(
|
190
|
+
Prediction::EU::LicensePlateV1, 'license_plates', '1'
|
182
191
|
)
|
183
|
-
@doc_configs[['mindee', Prediction::ShippingContainerV1.name]] =
|
184
|
-
Prediction::ShippingContainerV1,
|
185
|
-
[HTTP::StandardEndpoint.new('shipping_containers', '1', @api_key)]
|
192
|
+
@doc_configs[['mindee', Prediction::ShippingContainerV1.name]] = standard_document_config(
|
193
|
+
Prediction::ShippingContainerV1, 'shipping_containers', '1'
|
186
194
|
)
|
187
|
-
@doc_configs[['mindee', Prediction::US::BankCheckV1.name]] =
|
188
|
-
Prediction::US::BankCheckV1,
|
189
|
-
[HTTP::StandardEndpoint.new('bank_check', '1', @api_key)]
|
195
|
+
@doc_configs[['mindee', Prediction::US::BankCheckV1.name]] = standard_document_config(
|
196
|
+
Prediction::US::BankCheckV1, 'bank_check', '1'
|
190
197
|
)
|
191
|
-
@doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] =
|
192
|
-
Prediction::FR::BankAccountDetailsV1,
|
193
|
-
[HTTP::StandardEndpoint.new('bank_account_details', '1', @api_key)]
|
198
|
+
@doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] = standard_document_config(
|
199
|
+
Prediction::FR::BankAccountDetailsV1, 'bank_account_details', '1'
|
194
200
|
)
|
195
|
-
@doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] =
|
196
|
-
Prediction::FR::CarteVitaleV1,
|
197
|
-
[HTTP::StandardEndpoint.new('carte_vitale', '1', @api_key)]
|
201
|
+
@doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] = standard_document_config(
|
202
|
+
Prediction::FR::CarteVitaleV1, 'carte_vitale', '1'
|
198
203
|
)
|
199
|
-
@doc_configs[['mindee', Prediction::FR::IdCardV1.name]] =
|
200
|
-
Prediction::FR::IdCardV1,
|
201
|
-
[HTTP::StandardEndpoint.new('idcard_fr', '1', @api_key)]
|
204
|
+
@doc_configs[['mindee', Prediction::FR::IdCardV1.name]] = standard_document_config(
|
205
|
+
Prediction::FR::IdCardV1, 'idcard_fr', '1'
|
202
206
|
)
|
203
207
|
self
|
204
208
|
end
|
@@ -11,14 +11,14 @@ module Mindee
|
|
11
11
|
# Specific client for sending a document to the API.
|
12
12
|
class DocumentConfig
|
13
13
|
# Array of possible Mindee::Endpoint to be used.
|
14
|
-
# @return [
|
15
|
-
attr_reader :
|
14
|
+
# @return [Mindee::HTTP::Endpoint]
|
15
|
+
attr_reader :endpoint
|
16
16
|
|
17
17
|
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
|
18
|
-
# @param
|
19
|
-
def initialize(prediction_class,
|
18
|
+
# @param endpoint [Mindee::HTTP::Endpoint]
|
19
|
+
def initialize(prediction_class, endpoint)
|
20
20
|
@prediction_class = prediction_class
|
21
|
-
@
|
21
|
+
@endpoint = endpoint
|
22
22
|
end
|
23
23
|
|
24
24
|
# Call the prediction API.
|
@@ -28,16 +28,8 @@ module Mindee
|
|
28
28
|
# @param cropper [Boolean]
|
29
29
|
# @return [Mindee::DocumentResponse]
|
30
30
|
def predict(input_doc, include_words, close_file, cropper)
|
31
|
-
|
31
|
+
check_api_key
|
32
32
|
response = predict_request(input_doc, include_words, close_file, cropper)
|
33
|
-
parse_response(response)
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# @param response [Net::HTTPResponse]
|
39
|
-
# @return [Mindee::DocumentResponse]
|
40
|
-
def parse_response(response)
|
41
33
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
42
34
|
return Document.new(@prediction_class, hashed_response['document']) if (200..299).include?(response.code.to_i)
|
43
35
|
|
@@ -45,24 +37,24 @@ module Mindee
|
|
45
37
|
raise error
|
46
38
|
end
|
47
39
|
|
40
|
+
private
|
41
|
+
|
48
42
|
# @param input_doc [Mindee::InputDocument]
|
49
43
|
# @param include_words [Boolean]
|
50
44
|
# @param close_file [Boolean]
|
51
45
|
# # @param cropper [Boolean]
|
52
46
|
# @return [Net::HTTPResponse]
|
53
47
|
def predict_request(input_doc, include_words, close_file, cropper)
|
54
|
-
@
|
48
|
+
@endpoint.predict_req_post(input_doc, include_words: include_words, close_file: close_file, cropper: cropper)
|
55
49
|
end
|
56
50
|
|
57
|
-
def
|
58
|
-
@
|
59
|
-
next unless endpoint.api_key.nil? || endpoint.api_key.empty?
|
51
|
+
def check_api_key
|
52
|
+
return unless @endpoint.api_key.nil? || @endpoint.api_key.empty?
|
60
53
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
end
|
54
|
+
raise "Missing API key for '#{@document_type}', " \
|
55
|
+
"check your Client Configuration.\n" \
|
56
|
+
'You can set this using the ' \
|
57
|
+
"'#{HTTP::API_KEY_ENV_NAME}' environment variable."
|
66
58
|
end
|
67
59
|
end
|
68
60
|
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../common_fields'
|
4
|
+
require_relative '../base'
|
5
|
+
require_relative 'invoice_line_item'
|
6
|
+
|
7
|
+
module Mindee
|
8
|
+
module Prediction
|
9
|
+
# Invoice document.
|
10
|
+
class FinancialDocumentV1 < Prediction
|
11
|
+
# Locale information.
|
12
|
+
# @return [Mindee::Locale]
|
13
|
+
attr_reader :locale
|
14
|
+
# The nature of the invoice.
|
15
|
+
# @return [Mindee::TextField]
|
16
|
+
attr_reader :document_type
|
17
|
+
# The total amount with tax included.
|
18
|
+
# @return [Mindee::AmountField]
|
19
|
+
attr_reader :total_amount
|
20
|
+
# The total amount without the tax value.
|
21
|
+
# @return [Mindee::AmountField]
|
22
|
+
attr_reader :total_net
|
23
|
+
# The total tax.
|
24
|
+
# @return [Mindee::AmountField]
|
25
|
+
attr_reader :total_tax
|
26
|
+
# The creation date of the invoice.
|
27
|
+
# @return [Mindee::DateField]
|
28
|
+
attr_reader :date
|
29
|
+
# The invoice number.
|
30
|
+
# @return [Mindee::TextField]
|
31
|
+
attr_reader :invoice_number
|
32
|
+
# List of Reference numbers including PO number.
|
33
|
+
# @return [Mindee::TextField]
|
34
|
+
attr_reader :reference_numbers
|
35
|
+
# The due date of the invoice.
|
36
|
+
# @return [Mindee::DateField]
|
37
|
+
attr_reader :due_date
|
38
|
+
# The list of taxes.
|
39
|
+
# @return [Array<Mindee::TaxField>]
|
40
|
+
attr_reader :taxes
|
41
|
+
# The name of the customer.
|
42
|
+
# @return [Mindee::TextField]
|
43
|
+
attr_reader :customer_name
|
44
|
+
# The address of the customer.
|
45
|
+
# @return [Mindee::TextField]
|
46
|
+
attr_reader :customer_address
|
47
|
+
# The company registration information for the customer.
|
48
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
49
|
+
attr_reader :customer_company_registrations
|
50
|
+
# The supplier's name.
|
51
|
+
# @return [Mindee::TextField]
|
52
|
+
attr_reader :supplier_name
|
53
|
+
# The supplier's address.
|
54
|
+
# @return [Mindee::TextField]
|
55
|
+
attr_reader :supplier_address
|
56
|
+
# The payment information.
|
57
|
+
# @return [Array<Mindee::PaymentDetails>]
|
58
|
+
attr_reader :supplier_payment_details
|
59
|
+
# The supplier's company registration information.
|
60
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
61
|
+
attr_reader :supplier_company_registrations
|
62
|
+
# Line items details.
|
63
|
+
# @return [Array<Mindee::InvoiceLineItem>]
|
64
|
+
attr_reader :line_items
|
65
|
+
# Time as seen on the receipt in HH:MM format.
|
66
|
+
# @return [Mindee::TextField]
|
67
|
+
attr_reader :time
|
68
|
+
# The receipt category among predefined classes.
|
69
|
+
# @return [Mindee::TextField]
|
70
|
+
attr_reader :category
|
71
|
+
# The receipt sub-category among predefined classes.
|
72
|
+
# @return [Mindee::TextField]
|
73
|
+
attr_reader :subcategory
|
74
|
+
# A classification field, that can return 4 values : 'EXPENSE RECEIPT' ,
|
75
|
+
# 'CREDIT CARD RECEIPT', 'INVOICE', 'CREDIT NOTE'
|
76
|
+
# @return [Mindee::TextField]
|
77
|
+
attr_reader :document_type # rubocop:todo Lint/DuplicateMethods
|
78
|
+
# Total amount of tip and gratuity. Both typed and handwritten characters are supported.
|
79
|
+
# @return [Mindee::AmountField]
|
80
|
+
attr_reader :tip
|
81
|
+
|
82
|
+
# @param prediction [Hash]
|
83
|
+
# @param page_id [Integer, nil]
|
84
|
+
def initialize(prediction, page_id) # rubocop:todo Metrics/AbcSize
|
85
|
+
super
|
86
|
+
|
87
|
+
@time = TextField.new(prediction['time'], page_id)
|
88
|
+
@category = TextField.new(prediction['category'], page_id)
|
89
|
+
@subcategory = TextField.new(prediction['subcategory'], page_id)
|
90
|
+
@document_type = TextField.new(prediction['document_type'], page_id)
|
91
|
+
@tip = AmountField.new(prediction['tip'], page_id)
|
92
|
+
@locale = Locale.new(prediction['locale'])
|
93
|
+
@document_type = TextField.new(prediction['document_type'], page_id)
|
94
|
+
@total_amount = AmountField.new(prediction['total_amount'], page_id)
|
95
|
+
@total_net = AmountField.new(prediction['total_net'], page_id)
|
96
|
+
@customer_address = TextField.new(prediction['customer_address'], page_id)
|
97
|
+
@customer_name = TextField.new(prediction['customer_name'], page_id)
|
98
|
+
@date = DateField.new(prediction['date'], page_id)
|
99
|
+
@due_date = DateField.new(prediction['due_date'], page_id)
|
100
|
+
@invoice_number = TextField.new(prediction['invoice_number'], page_id)
|
101
|
+
@supplier_name = TextField.new(prediction['supplier_name'], page_id)
|
102
|
+
@supplier_address = TextField.new(prediction['supplier_address'], page_id)
|
103
|
+
|
104
|
+
@reference_numbers = []
|
105
|
+
prediction['reference_numbers'].each do |item|
|
106
|
+
@reference_numbers.push(TextField.new(item, page_id))
|
107
|
+
end
|
108
|
+
@customer_company_registrations = []
|
109
|
+
prediction['customer_company_registrations'].each do |item|
|
110
|
+
@customer_company_registrations.push(CompanyRegistration.new(item, page_id))
|
111
|
+
end
|
112
|
+
@taxes = []
|
113
|
+
prediction['taxes'].each do |item|
|
114
|
+
@taxes.push(TaxField.new(item, page_id))
|
115
|
+
end
|
116
|
+
@supplier_payment_details = []
|
117
|
+
prediction['supplier_payment_details'].each do |item|
|
118
|
+
@supplier_payment_details.push(PaymentDetails.new(item, page_id))
|
119
|
+
end
|
120
|
+
@supplier_company_registrations = []
|
121
|
+
prediction['supplier_company_registrations'].each do |item|
|
122
|
+
@supplier_company_registrations.push(CompanyRegistration.new(item, page_id))
|
123
|
+
end
|
124
|
+
|
125
|
+
@total_tax = AmountField.new(
|
126
|
+
{ value: nil, confidence: 0.0 }, page_id
|
127
|
+
)
|
128
|
+
|
129
|
+
@line_items = []
|
130
|
+
prediction['line_items'].each do |item|
|
131
|
+
@line_items.push(InvoiceLineItem.new(item, page_id))
|
132
|
+
end
|
133
|
+
reconstruct(page_id)
|
134
|
+
end
|
135
|
+
|
136
|
+
def to_s
|
137
|
+
customer_company_registrations = @customer_company_registrations.map(&:value).join('; ')
|
138
|
+
supplier_payment_details = @supplier_payment_details.map(&:to_s).join("\n ")
|
139
|
+
supplier_company_registrations = @supplier_company_registrations.map(&:to_s).join('; ')
|
140
|
+
reference_numbers = @reference_numbers.map(&:to_s).join(', ')
|
141
|
+
taxes = @taxes.join("\n ")
|
142
|
+
out_str = String.new
|
143
|
+
out_str << "\n:Document type: #{@document_type}".rstrip
|
144
|
+
out_str << "\n:Category: #{@category}".rstrip
|
145
|
+
out_str << "\n:Subcategory: #{@subcategory}".rstrip
|
146
|
+
out_str << "\n:Locale: #{@locale}".rstrip
|
147
|
+
out_str << "\n:Date: #{@date}".rstrip
|
148
|
+
out_str << "\n:Due date: #{@due_date}".rstrip
|
149
|
+
out_str << "\n:Time: #{@time}".rstrip
|
150
|
+
out_str << "\n:Number: #{@invoice_number}".rstrip
|
151
|
+
out_str << "\n:Reference numbers: #{reference_numbers}".rstrip
|
152
|
+
out_str << "\n:Supplier name: #{@supplier_name}".rstrip
|
153
|
+
out_str << "\n:Supplier address: #{@supplier_address}".rstrip
|
154
|
+
out_str << "\n:Supplier company registrations: #{supplier_company_registrations}".rstrip
|
155
|
+
out_str << "\n:Supplier payment details: #{supplier_payment_details}".rstrip
|
156
|
+
|
157
|
+
out_str << "\n:Customer name: #{@customer_name}".rstrip
|
158
|
+
out_str << "\n:Customer address: #{@customer_address}".rstrip
|
159
|
+
out_str << "\n:Customer company registrations: #{customer_company_registrations}".rstrip
|
160
|
+
|
161
|
+
out_str << "\n:Tip: #{@tip}".rstrip
|
162
|
+
|
163
|
+
out_str << "\n:Taxes: #{taxes}".rstrip
|
164
|
+
out_str << "\n:Total taxes: #{@total_tax}".rstrip
|
165
|
+
out_str << "\n:Total net: #{@total_net}".rstrip
|
166
|
+
out_str << "\n:Total amount: #{@total_amount}".rstrip
|
167
|
+
|
168
|
+
out_str << line_items_to_s
|
169
|
+
|
170
|
+
out_str[1..].to_s
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def line_items_to_s
|
176
|
+
line_item_separator = "#{'=' * 22} #{'=' * 8} #{'=' * 9} #{'=' * 10} #{'=' * 18} #{'=' * 36}"
|
177
|
+
line_items = @line_items.map(&:to_s).join("\n")
|
178
|
+
|
179
|
+
out_str = String.new
|
180
|
+
out_str << "\n\n:Line Items:"
|
181
|
+
|
182
|
+
return out_str if line_items.empty?
|
183
|
+
|
184
|
+
out_str << "\n#{line_item_separator}"
|
185
|
+
out_str << "\nCode QTY Price Amount Tax (Rate) Description"
|
186
|
+
out_str << "\n#{line_item_separator}"
|
187
|
+
out_str << "\n#{line_items}"
|
188
|
+
out_str << "\n#{line_item_separator}"
|
189
|
+
end
|
190
|
+
|
191
|
+
def reconstruct(page_id)
|
192
|
+
construct_total_tax_from_taxes(page_id)
|
193
|
+
return unless page_id.nil?
|
194
|
+
|
195
|
+
construct_total_excl_from_tcc_and_taxes(page_id)
|
196
|
+
construct_total_incl_from_taxes_plus_excl(page_id)
|
197
|
+
construct_total_tax_from_totals(page_id)
|
198
|
+
end
|
199
|
+
|
200
|
+
def construct_total_excl_from_tcc_and_taxes(page_id)
|
201
|
+
return if @total_amount.value.nil? || taxes.empty? || !@total_net.value.nil?
|
202
|
+
|
203
|
+
total_excl = {
|
204
|
+
'value' => @total_amount.value - @taxes.map(&:value).sum,
|
205
|
+
'confidence' => TextField.array_confidence(@taxes) * @total_amount.confidence,
|
206
|
+
}
|
207
|
+
@total_net = AmountField.new(total_excl, page_id, reconstructed: true)
|
208
|
+
end
|
209
|
+
|
210
|
+
def construct_total_incl_from_taxes_plus_excl(page_id)
|
211
|
+
return if @total_net.value.nil? || @taxes.empty? || !@total_amount.value.nil?
|
212
|
+
|
213
|
+
total_incl = {
|
214
|
+
'value' => @taxes.map(&:value).sum + @total_net.value,
|
215
|
+
'confidence' => TextField.array_confidence(@taxes) * @total_net.confidence,
|
216
|
+
}
|
217
|
+
@total_amount = AmountField.new(total_incl, page_id, reconstructed: true)
|
218
|
+
end
|
219
|
+
|
220
|
+
def construct_total_tax_from_taxes(page_id)
|
221
|
+
return if @taxes.empty?
|
222
|
+
|
223
|
+
total_tax = {
|
224
|
+
'value' => @taxes.map(&:value).sum,
|
225
|
+
'confidence' => TextField.array_confidence(@taxes),
|
226
|
+
}
|
227
|
+
return unless total_tax['value'].positive?
|
228
|
+
|
229
|
+
@total_tax = AmountField.new(total_tax, page_id, reconstructed: true)
|
230
|
+
end
|
231
|
+
|
232
|
+
def construct_total_tax_from_totals(page_id)
|
233
|
+
return if !@total_tax.value.nil? || @total_amount.value.nil? || @total_net.value.nil?
|
234
|
+
|
235
|
+
total_tax = {
|
236
|
+
'value' => @total_amount.value - @total_net.value,
|
237
|
+
'confidence' => TextField.array_confidence(@taxes),
|
238
|
+
}
|
239
|
+
return unless total_tax['value'] >= 0
|
240
|
+
|
241
|
+
@total_tax = AmountField.new(total_tax, page_id, reconstructed: true)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../common_fields/base'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
# Line items for invoices
|
7
|
+
class InvoiceLineItem
|
8
|
+
# @return [String] The product code referring to the item.
|
9
|
+
attr_reader :product_code
|
10
|
+
# @return [String]
|
11
|
+
attr_reader :description
|
12
|
+
# @return [Float]
|
13
|
+
attr_reader :quantity
|
14
|
+
# @return [Float]
|
15
|
+
attr_reader :unit_price
|
16
|
+
# @return [Float]
|
17
|
+
attr_reader :total_amount
|
18
|
+
# @return [Float] The item tax rate percentage.
|
19
|
+
attr_reader :tax_rate
|
20
|
+
# @return [Float]
|
21
|
+
attr_reader :tax_amount
|
22
|
+
# @return [Float]
|
23
|
+
attr_reader :confidence
|
24
|
+
# @return [Integer]
|
25
|
+
attr_reader :page_id
|
26
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
27
|
+
attr_reader :bounding_box
|
28
|
+
# @return [Array<Mindee::Geometry::Polygon>]
|
29
|
+
attr_reader :polygon
|
30
|
+
|
31
|
+
def initialize(prediction, page_id)
|
32
|
+
@product_code = prediction['product_code']
|
33
|
+
@quantity = prediction['quantity']
|
34
|
+
@unit_price = prediction['unit_price']
|
35
|
+
@total_amount = prediction['total_amount']
|
36
|
+
@tax_amount = prediction['tax_amount']
|
37
|
+
@tax_rate = prediction['tax_rate']
|
38
|
+
@description = prediction['description']
|
39
|
+
@page_id = page_id
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
tax = Field.float_to_string(@tax_amount)
|
44
|
+
tax << " (#{Field.float_to_string(@tax_rate)}%)" unless @tax_rate.nil?
|
45
|
+
|
46
|
+
description = @description.nil? ? '' : @description
|
47
|
+
description = "#{description[0..32]}..." if description.size > 35
|
48
|
+
|
49
|
+
out_str = String.new
|
50
|
+
out_str << format('%- 22s', @product_code)
|
51
|
+
out_str << " #{format('%- 8s', Field.float_to_string(@quantity))}"
|
52
|
+
out_str << " #{format('%- 9s', Field.float_to_string(@unit_price))}"
|
53
|
+
out_str << " #{format('%- 10s', Field.float_to_string(@total_amount))}"
|
54
|
+
out_str << " #{format('%- 18s', tax)}"
|
55
|
+
out_str << " #{description}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mrz'
|
4
|
+
|
5
|
+
require_relative '../common_fields'
|
6
|
+
require_relative '../base'
|
7
|
+
|
8
|
+
module Mindee
|
9
|
+
module Prediction
|
10
|
+
# Passport document.
|
11
|
+
class ProofOfAddressV1 < Prediction
|
12
|
+
# ISO 639-1 code, works best with ca, de, en, es, fr, it, nl and pt.
|
13
|
+
# @return [Mindee::Locale]
|
14
|
+
attr_reader :locale
|
15
|
+
# ISO date yyyy-mm-dd. Works both for European and US dates.
|
16
|
+
# @return [Mindee::DateField]
|
17
|
+
attr_reader :date
|
18
|
+
# All extracted ISO date yyyy-mm-dd. Works both for European and US dates.
|
19
|
+
# @return [Array<Mindee::DateField>]
|
20
|
+
attr_reader :dates
|
21
|
+
# Address of the document's issuer.
|
22
|
+
# @return [Mindee::TextField]
|
23
|
+
attr_reader :issuer_address
|
24
|
+
# Generic: VAT NUMBER, TAX ID, COMPANY REGISTRATION NUMBER or country specific.
|
25
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
26
|
+
attr_reader :issuer_company_registration
|
27
|
+
# Name of the person or company issuing the document.
|
28
|
+
# @return [Mindee::TextField]
|
29
|
+
attr_reader :issuer_name
|
30
|
+
# Address of the recipient.
|
31
|
+
# @return [Mindee::TextField]
|
32
|
+
attr_reader :recipient_address
|
33
|
+
# Generic: VAT NUMBER, TAX ID, COMPANY REGISTRATION NUMBER or country specific.
|
34
|
+
# @return [Array<Mindee::CompanyRegistration>]
|
35
|
+
attr_reader :recipient_company_registration
|
36
|
+
# Name of the document's recipient.
|
37
|
+
# @return [Mindee::TextField]
|
38
|
+
attr_reader :recipient_name
|
39
|
+
|
40
|
+
# @param prediction [Hash]
|
41
|
+
# @param page_id [Integer, nil]
|
42
|
+
def initialize(prediction, page_id)
|
43
|
+
super
|
44
|
+
@locale = Locale.new(prediction['locale'])
|
45
|
+
@date = DateField.new(prediction['date'], page_id)
|
46
|
+
@dates = []
|
47
|
+
prediction['dates'].each do |item|
|
48
|
+
@dates.push(DateField.new(item, page_id))
|
49
|
+
end
|
50
|
+
@issuer_name = TextField.new(prediction['issuer_name'], page_id)
|
51
|
+
@issuer_address = TextField.new(prediction['issuer_address'], page_id)
|
52
|
+
@issuer_company_registration = []
|
53
|
+
prediction['issuer_company_registration'].each do |item|
|
54
|
+
@issuer_company_registration.push(CompanyRegistration.new(item, page_id))
|
55
|
+
end
|
56
|
+
@recipient_name = TextField.new(prediction['recipient_name'], page_id)
|
57
|
+
@recipient_address = TextField.new(prediction['recipient_address'], page_id)
|
58
|
+
@recipient_company_registration = []
|
59
|
+
prediction['recipient_company_registration'].each do |item|
|
60
|
+
@recipient_company_registration.push(CompanyRegistration.new(item, page_id))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s
|
65
|
+
recipient_company_registrations = @recipient_company_registration.join(' ')
|
66
|
+
issuer_company_registrations = @issuer_company_registration.join(' ')
|
67
|
+
dates = @dates.join("\n ")
|
68
|
+
out_str = String.new
|
69
|
+
out_str << "\n:Locale: #{@locale}".rstrip
|
70
|
+
out_str << "\n:Issuer name: #{@issuer_name}".rstrip
|
71
|
+
out_str << "\n:Issuer Address: #{@issuer_address}".rstrip
|
72
|
+
out_str << "\n:Issuer Company Registrations: #{issuer_company_registrations}".rstrip
|
73
|
+
out_str << "\n:Recipient name: #{@recipient_name}".rstrip
|
74
|
+
out_str << "\n:Recipient Address: #{@recipient_address}".rstrip
|
75
|
+
out_str << "\n:Recipient Company Registrations: #{recipient_company_registrations}".rstrip
|
76
|
+
out_str << "\n:Issuance date: #{@date}".rstrip
|
77
|
+
out_str << "\n:Dates: #{dates}".rstrip
|
78
|
+
out_str[1..].to_s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -40,6 +40,9 @@ module Mindee
|
|
40
40
|
# Whether the document is an expense receipt or a credit card receipt.
|
41
41
|
# @return [Mindee::TextField]
|
42
42
|
attr_reader :document_type
|
43
|
+
# Total amount of tip and gratuity. Both typed and handwritten characters are supported.
|
44
|
+
# @return [Mindee::AmountField]
|
45
|
+
attr_reader :tip
|
43
46
|
|
44
47
|
# @param prediction [Hash]
|
45
48
|
# @param page_id [Integer, nil]
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'prediction/custom/custom_v1'
|
4
|
+
require_relative 'prediction/proof_of_address/proof_of_address_v1'
|
5
|
+
require_relative 'prediction/financial_document/financial_document_v1'
|
4
6
|
require_relative 'prediction/invoice/invoice_v4'
|
5
7
|
require_relative 'prediction/passport/passport_v1'
|
6
8
|
require_relative 'prediction/receipt/receipt_v4'
|
data/lib/mindee/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marcel
|
@@ -164,12 +164,15 @@ files:
|
|
164
164
|
- lib/mindee/parsing/prediction/custom/custom_v1.rb
|
165
165
|
- lib/mindee/parsing/prediction/custom/fields.rb
|
166
166
|
- lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb
|
167
|
+
- lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb
|
168
|
+
- lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb
|
167
169
|
- lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb
|
168
170
|
- lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb
|
169
171
|
- lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb
|
170
172
|
- lib/mindee/parsing/prediction/invoice/invoice_line_item.rb
|
171
173
|
- lib/mindee/parsing/prediction/invoice/invoice_v4.rb
|
172
174
|
- lib/mindee/parsing/prediction/passport/passport_v1.rb
|
175
|
+
- lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb
|
173
176
|
- lib/mindee/parsing/prediction/receipt/receipt_v4.rb
|
174
177
|
- lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb
|
175
178
|
- lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb
|