mindee 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +21 -0
- data/README.md +46 -23
- data/Rakefile +6 -1
- data/bin/mindee.rb +70 -61
- data/docs/ruby-api-builder.md +131 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +261 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +128 -93
- data/lib/mindee/document_config.rb +22 -154
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +12 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +2 -1
- metadata +57 -24
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
@@ -0,0 +1,170 @@
|
|
1
|
+
The Ruby OCR SDK supports the [receipt API](https://developers.mindee.com/docs/receipt-ocr) for extracting data from receipts.
|
2
|
+
|
3
|
+
Using this sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.
|
4
|
+
|
5
|
+
![sample receipt](https://raw.githubusercontent.com/mindee/client-lib-test-data/main/receipt/receipt-with-tip.jpg)
|
6
|
+
|
7
|
+
## Quick Start
|
8
|
+
```ruby
|
9
|
+
require 'mindee'
|
10
|
+
|
11
|
+
# Init a new client, specifying an API key
|
12
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
13
|
+
|
14
|
+
# Send the file
|
15
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext').parse(Mindee::Prediction::ReceiptV4)
|
16
|
+
|
17
|
+
# Print a summary of the document prediction in RST format
|
18
|
+
puts result.inference.prediction
|
19
|
+
```
|
20
|
+
|
21
|
+
Output:
|
22
|
+
```shell
|
23
|
+
:Locale: en-US; en; US; USD;
|
24
|
+
:Date: 2014-07-07
|
25
|
+
:Category: food
|
26
|
+
:Subcategory: restaurant
|
27
|
+
:Document type: EXPENSE RECEIPT
|
28
|
+
:Time: 20:20
|
29
|
+
:Supplier name: LOGANS
|
30
|
+
:Taxes: 3.34 TAX
|
31
|
+
:Total net: 40.48
|
32
|
+
:Total taxes: 3.34
|
33
|
+
:Tip: 10.00
|
34
|
+
:Total amount: 53.8
|
35
|
+
```
|
36
|
+
|
37
|
+
## Fields
|
38
|
+
Each prediction object contains a set of different fields.
|
39
|
+
Each `Field` object contains at a minimum the following attributes:
|
40
|
+
|
41
|
+
* `value` (String or Float depending on the field type): corresponds to the field value. Can be `nil` if no value was extracted.
|
42
|
+
* `confidence` (Float): the confidence score of the field prediction.
|
43
|
+
* `bounding_box` (Array< Array< Float > >): contains exactly 4 relative vertices coordinates (points) of a right rectangle containing the field in the document.
|
44
|
+
* `polygon` (Array< Array< Float > >): contains the relative vertices coordinates (points) of a polygon containing the field in the image.
|
45
|
+
* `reconstructed` (Boolean): True if the field was reconstructed or computed using other fields.
|
46
|
+
|
47
|
+
|
48
|
+
## Attributes
|
49
|
+
Depending on the field type specified, additional attributes can be extracted in the `Receipt` object.
|
50
|
+
|
51
|
+
Using the above sample, the following are the basic fields that can be extracted:
|
52
|
+
|
53
|
+
- [Orientation](#orientation)
|
54
|
+
- [Category](#category)
|
55
|
+
- [Date](#date)
|
56
|
+
- [Locale](#locale)
|
57
|
+
- [Supplier Information](#supplier-information)
|
58
|
+
- [Taxes](#taxes)
|
59
|
+
- [Time](#time)
|
60
|
+
- [Totals](#totals)
|
61
|
+
|
62
|
+
|
63
|
+
### Category
|
64
|
+
* **`category`** (Field): Receipt category as seen on the receipt.
|
65
|
+
The following categories are supported: toll, food, parking, transport, accommodation, gasoline, miscellaneous.
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
puts result.inference.prediction.category.value
|
69
|
+
```
|
70
|
+
|
71
|
+
|
72
|
+
### Date
|
73
|
+
Date fields:
|
74
|
+
|
75
|
+
* contain the `date_object` attribute, which is a standard Ruby [date object](https://ruby-doc.org/stdlib-2.7.1/libdoc/date/rdoc/Date.html)
|
76
|
+
* have a `value` attribute which is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) representation of the date.
|
77
|
+
|
78
|
+
The following date fields are available:
|
79
|
+
|
80
|
+
* **`date`**: Date the receipt was issued
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
puts result.inference.prediction.date.value
|
84
|
+
```
|
85
|
+
|
86
|
+
|
87
|
+
### Locale
|
88
|
+
**`locale`** (Locale): Locale information.
|
89
|
+
|
90
|
+
* `locale.value` (String): Locale with country and language codes.
|
91
|
+
```ruby
|
92
|
+
puts result.inference.prediction.locale
|
93
|
+
```
|
94
|
+
|
95
|
+
* `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
|
96
|
+
*
|
97
|
+
```ruby
|
98
|
+
puts result.inference.prediction.locale.language
|
99
|
+
```
|
100
|
+
|
101
|
+
* `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
puts result.inference.prediction.locale.currency
|
105
|
+
```
|
106
|
+
|
107
|
+
* `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
puts result.inference.prediction.locale.country
|
111
|
+
```
|
112
|
+
|
113
|
+
### Supplier Information
|
114
|
+
* **`supplier_name`** (Field): Supplier name as written in the receipt.
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
puts result.inference.prediction.supplier_name.value
|
118
|
+
```
|
119
|
+
|
120
|
+
|
121
|
+
### Taxes
|
122
|
+
**`taxes`** (Array< TaxField >): Contains tax fields as seen on the receipt.
|
123
|
+
|
124
|
+
* `value` (Float): The tax amount.
|
125
|
+
```ruby
|
126
|
+
# Show the amount of the first tax
|
127
|
+
puts result.inference.prediction.taxes[0].value
|
128
|
+
```
|
129
|
+
|
130
|
+
* `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
|
131
|
+
```ruby
|
132
|
+
# Show the code of the first tax
|
133
|
+
puts result.inference.prediction.taxes[0].code
|
134
|
+
```
|
135
|
+
|
136
|
+
* `rate` (Float): The tax rate.
|
137
|
+
```ruby
|
138
|
+
# Show the rate of the first tax
|
139
|
+
puts result.inference.prediction.taxes[0].rate
|
140
|
+
```
|
141
|
+
|
142
|
+
### Time
|
143
|
+
* **`time`**: Time of purchase as seen on the receipt
|
144
|
+
* `value` (string): Time of purchase with 24 hours formatting (hh:mm).
|
145
|
+
|
146
|
+
```ruby
|
147
|
+
puts result.inference.prediction.time.value
|
148
|
+
```
|
149
|
+
|
150
|
+
### Totals
|
151
|
+
* **`total_amount`** (Field): Total amount including taxes
|
152
|
+
|
153
|
+
```ruby
|
154
|
+
puts result.inference.prediction.total_amount.value
|
155
|
+
```
|
156
|
+
|
157
|
+
* **`total_net`** (Field): Total amount paid excluding taxes
|
158
|
+
|
159
|
+
```ruby
|
160
|
+
puts result.inference.prediction.total_net.value
|
161
|
+
```
|
162
|
+
|
163
|
+
* **`total_tax`** (Field): Total tax value from tax lines
|
164
|
+
|
165
|
+
```ruby
|
166
|
+
puts result.inference.prediction.total_tax.value
|
167
|
+
```
|
168
|
+
|
169
|
+
## Questions?
|
170
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/lib/mindee/client.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative '
|
3
|
+
require_relative 'input'
|
4
4
|
require_relative 'document_config'
|
5
|
-
require_relative 'endpoint'
|
5
|
+
require_relative 'http/endpoint'
|
6
|
+
require_relative 'parsing/prediction'
|
6
7
|
|
7
8
|
module Mindee
|
8
9
|
# General client for sending a document to the API.
|
@@ -15,157 +16,191 @@ module Mindee
|
|
15
16
|
end
|
16
17
|
|
17
18
|
# Call prediction API on the document and parse the results.
|
18
|
-
#
|
19
|
-
# @param
|
20
|
-
#
|
21
|
-
# @param
|
19
|
+
#
|
20
|
+
# @param prediction_class [Mindee::Prediction::Prediction]
|
21
|
+
#
|
22
|
+
# @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
|
23
|
+
# API Builder. Do not set for standard (off the shelf) endpoints.
|
24
|
+
#
|
25
|
+
# @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
|
26
|
+
# This is normally not required unless you have a custom endpoint which has the
|
27
|
+
# same name as standard (off the shelf) endpoint.
|
28
|
+
# Do not set for standard (off the shelf) endpoints.
|
29
|
+
#
|
30
|
+
# @param include_words [Boolean] Whether to include the full text for each page.
|
31
|
+
# This performs a full OCR operation on the server and will increase response time.
|
32
|
+
#
|
33
|
+
# @param close_file [Boolean] Whether to `close()` the file after parsing it.
|
34
|
+
# Set to false if you need to access the file after this operation.
|
35
|
+
#
|
36
|
+
# @param page_options [Hash, nil] Page cutting/merge options:
|
37
|
+
#
|
38
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
39
|
+
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
40
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
41
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
42
|
+
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
43
|
+
#
|
44
|
+
# @param cropper [Boolean] Whether to include cropper results for each page.
|
45
|
+
# This performs a cropping operation on the server and will increase response time.
|
46
|
+
#
|
22
47
|
# @return [Mindee::DocumentResponse]
|
23
|
-
def parse(
|
48
|
+
def parse(
|
49
|
+
prediction_class,
|
50
|
+
endpoint_name: '',
|
51
|
+
account_name: '',
|
52
|
+
include_words: false,
|
53
|
+
close_file: true,
|
54
|
+
page_options: nil,
|
55
|
+
cropper: false
|
56
|
+
)
|
57
|
+
doc_config = find_doc_config(prediction_class, endpoint_name, account_name)
|
58
|
+
@input_doc.process_pdf(page_options) if !page_options.nil? && @input_doc.pdf?
|
59
|
+
doc_config.predict(@input_doc, include_words, close_file, cropper)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param document_class [Mindee::Prediction::Prediction]
|
65
|
+
# @param endpoint_name [String]
|
66
|
+
def determine_endpoint_name(document_class, endpoint_name)
|
67
|
+
return document_class.name if document_class.name != Prediction::CustomV1.name
|
68
|
+
|
69
|
+
raise "endpoint_name is required when using #{document_class.name} class" if endpoint_name.empty?
|
70
|
+
|
71
|
+
endpoint_name
|
72
|
+
end
|
73
|
+
|
74
|
+
# @param document_class [Mindee::Prediction::Prediction]
|
75
|
+
# @param endpoint_name [String]
|
76
|
+
# @param account_name [String]
|
77
|
+
def find_doc_config(document_class, endpoint_name, account_name)
|
78
|
+
endpoint_name = determine_endpoint_name(document_class, endpoint_name)
|
79
|
+
|
24
80
|
found = []
|
25
81
|
@doc_configs.each_key do |conf|
|
26
|
-
found.push(conf) if conf[1] ==
|
82
|
+
found.push(conf) if conf[1] == endpoint_name
|
27
83
|
end
|
28
|
-
raise "
|
84
|
+
raise "Endpoint not configured: #{endpoint_name}" if found.empty?
|
29
85
|
|
30
|
-
if !
|
31
|
-
config_key = [
|
86
|
+
if !account_name.empty?
|
87
|
+
config_key = [account_name, endpoint_name]
|
32
88
|
elsif found.length == 1
|
33
89
|
config_key = found[0]
|
34
90
|
else
|
35
91
|
usernames = found.map { |conf| conf[0] }
|
36
92
|
raise "Duplicate configuration detected.\n" \
|
37
|
-
"You specified the document '#{
|
93
|
+
"You specified the document '#{endpoint_name}' in your custom config.\n" \
|
38
94
|
"To avoid confusion, please add the 'account_name' attribute to " \
|
39
95
|
"the parse method, one of #{usernames}."
|
40
96
|
end
|
41
97
|
|
42
|
-
|
43
|
-
doc_config.predict(@input_doc, include_words, close_file)
|
98
|
+
@doc_configs[config_key]
|
44
99
|
end
|
45
100
|
end
|
46
101
|
|
47
102
|
# Mindee API Client.
|
48
103
|
# See: https://developers.mindee.com/docs/
|
49
104
|
class Client
|
50
|
-
|
51
|
-
|
52
|
-
DOC_TYPE_PASSPORT = 'passport'
|
53
|
-
DOC_TYPE_FINANCIAL = 'financial_doc'
|
54
|
-
|
55
|
-
# @param raise_on_error [Boolean]
|
56
|
-
def initialize(api_key: nil, raise_on_error: true)
|
57
|
-
@raise_on_error = raise_on_error
|
105
|
+
# @param api_key [String]
|
106
|
+
def initialize(api_key: '')
|
58
107
|
@doc_configs = {}
|
59
108
|
@api_key = api_key
|
60
|
-
|
61
|
-
|
62
|
-
# Configure a 'Mindee Invoice' document.
|
63
|
-
# @param api_key [String] Override the client API key for this endpoint
|
64
|
-
# @return [Mindee::Client]
|
65
|
-
def config_invoice(api_key: nil)
|
66
|
-
@doc_configs[['mindee', DOC_TYPE_INVOICE]] = InvoiceConfig.new(
|
67
|
-
api_key || @api_key,
|
68
|
-
@raise_on_error
|
69
|
-
)
|
70
|
-
self
|
71
|
-
end
|
72
|
-
|
73
|
-
# Configure a 'Mindee Expense Receipts' document.
|
74
|
-
# @param api_key [String] Override the client API key for this endpoint
|
75
|
-
# @return [Mindee::Client]
|
76
|
-
def config_receipt(api_key: nil)
|
77
|
-
@doc_configs[['mindee', DOC_TYPE_RECEIPT]] = ReceiptConfig.new(
|
78
|
-
api_key || @api_key,
|
79
|
-
@raise_on_error
|
80
|
-
)
|
81
|
-
self
|
82
|
-
end
|
83
|
-
|
84
|
-
# Configure a 'Mindee Passport' document.
|
85
|
-
# @param api_key [String] Override the client API key for this endpoint
|
86
|
-
# @return [Mindee::Client]
|
87
|
-
def config_passport(api_key: nil)
|
88
|
-
@doc_configs[['mindee', DOC_TYPE_PASSPORT]] = PassportConfig.new(
|
89
|
-
api_key || @api_key,
|
90
|
-
@raise_on_error
|
91
|
-
)
|
92
|
-
self
|
93
|
-
end
|
94
|
-
|
95
|
-
# Configure a 'Mindee Financial document'. Uses 'Invoice' and 'Expense Receipt' internally.
|
96
|
-
# @param api_key [String] Override the client API key for this endpoint
|
97
|
-
# @return [Mindee::Client]
|
98
|
-
def config_financial_doc(api_key: nil)
|
99
|
-
@doc_configs[['mindee', DOC_TYPE_FINANCIAL]] = FinancialDocConfig.new(
|
100
|
-
api_key || @api_key,
|
101
|
-
@raise_on_error
|
102
|
-
)
|
103
|
-
self
|
109
|
+
init_default_endpoints
|
104
110
|
end
|
105
111
|
|
106
112
|
# Configure a custom document using the 'Mindee API Builder'.
|
107
113
|
# @param account_name [String] Your organization's username on the API Builder
|
108
|
-
# @param
|
109
|
-
# @param api_key [String] Override the client API key for this endpoint
|
114
|
+
# @param endpoint_name [String] The "API name" field in the "Settings" page of the API Builder
|
110
115
|
# @param version [String] Specify the version of the model to use. If not set, use the latest version of the model.
|
111
116
|
# @return [Mindee::Client]
|
112
|
-
def
|
113
|
-
document_name,
|
117
|
+
def add_endpoint(
|
114
118
|
account_name,
|
115
|
-
|
119
|
+
endpoint_name,
|
116
120
|
version: '1'
|
117
121
|
)
|
118
|
-
@doc_configs[[account_name,
|
119
|
-
|
120
|
-
account_name,
|
121
|
-
version,
|
122
|
-
api_key || @api_key,
|
123
|
-
@raise_on_error
|
122
|
+
@doc_configs[[account_name, endpoint_name]] = DocumentConfig.new(
|
123
|
+
Prediction::CustomV1,
|
124
|
+
[HTTP::CustomEndpoint.new(account_name, endpoint_name, version, @api_key)]
|
124
125
|
)
|
125
126
|
self
|
126
127
|
end
|
127
128
|
|
128
129
|
# Load a document from an absolute path, as a string.
|
129
130
|
# @param input_path [String] Path of file to open
|
130
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
131
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
132
131
|
# @return [Mindee::DocumentClient]
|
133
|
-
def doc_from_path(input_path
|
134
|
-
doc = PathDocument.new(input_path
|
132
|
+
def doc_from_path(input_path)
|
133
|
+
doc = Input::PathDocument.new(input_path)
|
135
134
|
DocumentClient.new(doc, @doc_configs)
|
136
135
|
end
|
137
136
|
|
138
137
|
# Load a document from raw bytes.
|
139
138
|
# @param input_bytes [String] Encoding::BINARY byte input
|
140
139
|
# @param filename [String] The name of the file (without the path)
|
141
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
142
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
143
140
|
# @return [Mindee::DocumentClient]
|
144
|
-
def doc_from_bytes(input_bytes, filename
|
145
|
-
doc = BytesDocument.new(input_bytes, filename
|
141
|
+
def doc_from_bytes(input_bytes, filename)
|
142
|
+
doc = Input::BytesDocument.new(input_bytes, filename)
|
146
143
|
DocumentClient.new(doc, @doc_configs)
|
147
144
|
end
|
148
145
|
|
149
146
|
# Load a document from a base64 encoded string.
|
150
147
|
# @param base64_string [String] Input to parse as base64 string
|
151
148
|
# @param filename [String] The name of the file (without the path)
|
152
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
153
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
154
149
|
# @return [Mindee::DocumentClient]
|
155
|
-
def doc_from_b64string(base64_string, filename
|
156
|
-
doc = Base64Document.new(base64_string, filename
|
150
|
+
def doc_from_b64string(base64_string, filename)
|
151
|
+
doc = Input::Base64Document.new(base64_string, filename)
|
157
152
|
DocumentClient.new(doc, @doc_configs)
|
158
153
|
end
|
159
154
|
|
160
155
|
# Load a document from a normal Ruby `File`.
|
161
156
|
# @param input_file [File] Input file handle
|
162
157
|
# @param filename [String] The name of the file (without the path)
|
163
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
164
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
165
158
|
# @return [Mindee::DocumentClient]
|
166
|
-
def doc_from_file(input_file, filename
|
167
|
-
doc = FileDocument.new(input_file, filename
|
159
|
+
def doc_from_file(input_file, filename)
|
160
|
+
doc = Input::FileDocument.new(input_file, filename)
|
168
161
|
DocumentClient.new(doc, @doc_configs)
|
169
162
|
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
def init_default_endpoints
|
167
|
+
@doc_configs[['mindee', Prediction::InvoiceV4.name]] = DocumentConfig.new(
|
168
|
+
Prediction::InvoiceV4,
|
169
|
+
[HTTP::StandardEndpoint.new('invoices', '4', @api_key)]
|
170
|
+
)
|
171
|
+
@doc_configs[['mindee', Prediction::ReceiptV4.name]] = DocumentConfig.new(
|
172
|
+
Prediction::ReceiptV4,
|
173
|
+
[HTTP::StandardEndpoint.new('expense_receipts', '4', @api_key)]
|
174
|
+
)
|
175
|
+
@doc_configs[['mindee', Prediction::PassportV1.name]] = DocumentConfig.new(
|
176
|
+
Prediction::PassportV1,
|
177
|
+
[HTTP::StandardEndpoint.new('passport', '1', @api_key)]
|
178
|
+
)
|
179
|
+
@doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] = DocumentConfig.new(
|
180
|
+
Prediction::EU::LicensePlateV1,
|
181
|
+
[HTTP::StandardEndpoint.new('license_plates', '1', @api_key)]
|
182
|
+
)
|
183
|
+
@doc_configs[['mindee', Prediction::ShippingContainerV1.name]] = DocumentConfig.new(
|
184
|
+
Prediction::ShippingContainerV1,
|
185
|
+
[HTTP::StandardEndpoint.new('shipping_containers', '1', @api_key)]
|
186
|
+
)
|
187
|
+
@doc_configs[['mindee', Prediction::US::BankCheckV1.name]] = DocumentConfig.new(
|
188
|
+
Prediction::US::BankCheckV1,
|
189
|
+
[HTTP::StandardEndpoint.new('bank_check', '1', @api_key)]
|
190
|
+
)
|
191
|
+
@doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] = DocumentConfig.new(
|
192
|
+
Prediction::FR::BankAccountDetailsV1,
|
193
|
+
[HTTP::StandardEndpoint.new('bank_account_details', '1', @api_key)]
|
194
|
+
)
|
195
|
+
@doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] = DocumentConfig.new(
|
196
|
+
Prediction::FR::CarteVitaleV1,
|
197
|
+
[HTTP::StandardEndpoint.new('carte_vitale', '1', @api_key)]
|
198
|
+
)
|
199
|
+
@doc_configs[['mindee', Prediction::FR::IdCardV1.name]] = DocumentConfig.new(
|
200
|
+
Prediction::FR::IdCardV1,
|
201
|
+
[HTTP::StandardEndpoint.new('idcard_fr', '1', @api_key)]
|
202
|
+
)
|
203
|
+
self
|
204
|
+
end
|
170
205
|
end
|
171
206
|
end
|
@@ -2,88 +2,56 @@
|
|
2
2
|
|
3
3
|
require 'json'
|
4
4
|
|
5
|
-
require_relative 'endpoint'
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
5
|
+
require_relative 'http/endpoint'
|
6
|
+
require_relative 'parsing/document'
|
7
|
+
require_relative 'parsing/error'
|
8
|
+
require_relative 'parsing/prediction'
|
8
9
|
|
9
10
|
module Mindee
|
10
11
|
# Specific client for sending a document to the API.
|
11
12
|
class DocumentConfig
|
12
13
|
# Array of possible Mindee::Endpoint to be used.
|
13
|
-
# @return [Array<Mindee::Endpoint>]
|
14
|
+
# @return [Array<Mindee::HTTP::Endpoint>]
|
14
15
|
attr_reader :endpoints
|
15
16
|
|
16
|
-
# @param
|
17
|
-
# @param
|
18
|
-
|
19
|
-
|
20
|
-
def initialize(doc_class, document_type, endpoints, raise_on_error)
|
21
|
-
@doc_class = doc_class
|
22
|
-
@document_type = document_type
|
17
|
+
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
|
18
|
+
# @param endpoints [Array<Mindee::HTTP::Endpoint>]
|
19
|
+
def initialize(prediction_class, endpoints)
|
20
|
+
@prediction_class = prediction_class
|
23
21
|
@endpoints = endpoints
|
24
|
-
@raise_on_error = raise_on_error
|
25
|
-
end
|
26
|
-
|
27
|
-
# Parse a prediction API result.
|
28
|
-
# @param input_doc [Mindee::InputDocument]
|
29
|
-
# @param response [Hash]
|
30
|
-
# @return [Mindee::DocumentResponse]
|
31
|
-
def build_predict_result(input_doc, response)
|
32
|
-
document = @doc_class.new(
|
33
|
-
response['document']['inference']['prediction'],
|
34
|
-
input_file: input_doc,
|
35
|
-
page_id: nil
|
36
|
-
)
|
37
|
-
pages = []
|
38
|
-
response['document']['inference']['pages'].each do |page|
|
39
|
-
pages.push(
|
40
|
-
@doc_class.new(
|
41
|
-
page['prediction'],
|
42
|
-
input_file: input_doc,
|
43
|
-
page_id: page['id']
|
44
|
-
)
|
45
|
-
)
|
46
|
-
end
|
47
|
-
DocumentResponse.new(response, @document_type, document, pages)
|
48
22
|
end
|
49
23
|
|
50
24
|
# Call the prediction API.
|
51
25
|
# @param input_doc [Mindee::InputDocument]
|
52
26
|
# @param include_words [Boolean]
|
53
27
|
# @param close_file [Boolean]
|
28
|
+
# @param cropper [Boolean]
|
54
29
|
# @return [Mindee::DocumentResponse]
|
55
|
-
def predict(input_doc, include_words, close_file)
|
30
|
+
def predict(input_doc, include_words, close_file, cropper)
|
56
31
|
check_api_keys
|
57
|
-
response = predict_request(input_doc, include_words, close_file)
|
58
|
-
parse_response(
|
32
|
+
response = predict_request(input_doc, include_words, close_file, cropper)
|
33
|
+
parse_response(response)
|
59
34
|
end
|
60
35
|
|
61
36
|
private
|
62
37
|
|
63
|
-
# @param input_doc [Mindee::InputDocument]
|
64
38
|
# @param response [Net::HTTPResponse]
|
65
39
|
# @return [Mindee::DocumentResponse]
|
66
|
-
def parse_response(
|
40
|
+
def parse_response(response)
|
67
41
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
)
|
73
|
-
end
|
74
|
-
return DocumentResponse.new(
|
75
|
-
hashed_response, @document_type, {}, []
|
76
|
-
)
|
77
|
-
end
|
78
|
-
build_predict_result(input_doc, hashed_response)
|
42
|
+
return Document.new(@prediction_class, hashed_response['document']) if (200..299).include?(response.code.to_i)
|
43
|
+
|
44
|
+
error = Parsing::Error.new(hashed_response['api_request']['error'])
|
45
|
+
raise error
|
79
46
|
end
|
80
47
|
|
81
48
|
# @param input_doc [Mindee::InputDocument]
|
82
49
|
# @param include_words [Boolean]
|
83
50
|
# @param close_file [Boolean]
|
51
|
+
# # @param cropper [Boolean]
|
84
52
|
# @return [Net::HTTPResponse]
|
85
|
-
def predict_request(input_doc, include_words, close_file)
|
86
|
-
@endpoints[0].
|
53
|
+
def predict_request(input_doc, include_words, close_file, cropper)
|
54
|
+
@endpoints[0].predict_req_post(input_doc, include_words: include_words, close_file: close_file, cropper: cropper)
|
87
55
|
end
|
88
56
|
|
89
57
|
def check_api_keys
|
@@ -93,108 +61,8 @@ module Mindee
|
|
93
61
|
raise "Missing API key for '#{@document_type}', " \
|
94
62
|
"check your Client Configuration.\n" \
|
95
63
|
'You can set this using the ' \
|
96
|
-
"'#{
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# Client for Invoice documents
|
102
|
-
class InvoiceConfig < DocumentConfig
|
103
|
-
def initialize(api_key, raise_on_error)
|
104
|
-
endpoints = [InvoiceEndpoint.new(api_key)]
|
105
|
-
super(
|
106
|
-
Invoice,
|
107
|
-
'invoice',
|
108
|
-
endpoints,
|
109
|
-
raise_on_error
|
110
|
-
)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
# Client for Receipt documents
|
115
|
-
class ReceiptConfig < DocumentConfig
|
116
|
-
def initialize(api_key, raise_on_error)
|
117
|
-
endpoints = [ReceiptEndpoint.new(api_key)]
|
118
|
-
super(
|
119
|
-
Receipt,
|
120
|
-
'receipt',
|
121
|
-
endpoints,
|
122
|
-
raise_on_error
|
123
|
-
)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
# Client for Passport documents
|
128
|
-
class PassportConfig < DocumentConfig
|
129
|
-
def initialize(api_key, raise_on_error)
|
130
|
-
endpoints = [PassportEndpoint.new(api_key)]
|
131
|
-
super(
|
132
|
-
Passport,
|
133
|
-
'passport',
|
134
|
-
endpoints,
|
135
|
-
raise_on_error
|
136
|
-
)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Client for Financial documents
|
141
|
-
class FinancialDocConfig < DocumentConfig
|
142
|
-
def initialize(api_key, raise_on_error)
|
143
|
-
endpoints = [
|
144
|
-
InvoiceEndpoint.new(api_key),
|
145
|
-
ReceiptEndpoint.new(api_key),
|
146
|
-
]
|
147
|
-
super(
|
148
|
-
FinancialDocument,
|
149
|
-
'financial_doc',
|
150
|
-
endpoints,
|
151
|
-
raise_on_error
|
152
|
-
)
|
153
|
-
end
|
154
|
-
|
155
|
-
private
|
156
|
-
|
157
|
-
def predict_request(input_doc, include_words, close_file)
|
158
|
-
endpoint = input_doc.pdf? ? @endpoints[0] : @endpoints[1]
|
159
|
-
endpoint.predict_request(input_doc, include_words: include_words, close_file: close_file)
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
# Client for Custom (constructed) documents
|
164
|
-
class CustomDocConfig < DocumentConfig
|
165
|
-
def initialize(document_type, account_name, version, api_key, raise_on_error)
|
166
|
-
endpoints = [CustomEndpoint.new(document_type, account_name, version, api_key)]
|
167
|
-
super(
|
168
|
-
CustomDocument,
|
169
|
-
document_type,
|
170
|
-
endpoints,
|
171
|
-
raise_on_error
|
172
|
-
)
|
173
|
-
end
|
174
|
-
|
175
|
-
# Parse a prediction API result.
|
176
|
-
# @param input_doc [Mindee::InputDocument]
|
177
|
-
# @param response [Hash]
|
178
|
-
# @return [Mindee::DocumentResponse]
|
179
|
-
def build_predict_result(input_doc, response)
|
180
|
-
document = CustomDocument.new(
|
181
|
-
@document_type,
|
182
|
-
response['document']['inference']['prediction'],
|
183
|
-
input_file: input_doc,
|
184
|
-
page_id: nil
|
185
|
-
)
|
186
|
-
pages = []
|
187
|
-
response['document']['inference']['pages'].each do |page|
|
188
|
-
pages.push(
|
189
|
-
CustomDocument.new(
|
190
|
-
@document_type,
|
191
|
-
page['prediction'],
|
192
|
-
input_file: input_doc,
|
193
|
-
page_id: page['id']
|
194
|
-
)
|
195
|
-
)
|
64
|
+
"'#{HTTP::API_KEY_ENV_NAME}' environment variable."
|
196
65
|
end
|
197
|
-
DocumentResponse.new(response, @document_type, document, pages)
|
198
66
|
end
|
199
67
|
end
|
200
68
|
end
|