mindee 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +0 -7
- data/README.md +52 -21
- data/Rakefile +6 -1
- data/bin/mindee.rb +70 -61
- data/docs/ruby-api-builder.md +131 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +261 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +128 -93
- data/lib/mindee/document_config.rb +22 -154
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +12 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +11 -5
- metadata +105 -30
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
@@ -0,0 +1,170 @@
|
|
1
|
+
The Ruby OCR SDK supports the [receipt API](https://developers.mindee.com/docs/receipt-ocr) for extracting data from receipts.
|
2
|
+
|
3
|
+
Using this sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.
|
4
|
+
|
5
|
+

|
6
|
+
|
7
|
+
## Quick Start
|
8
|
+
```ruby
|
9
|
+
require 'mindee'
|
10
|
+
|
11
|
+
# Init a new client, specifying an API key
|
12
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
13
|
+
|
14
|
+
# Send the file
|
15
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext').parse(Mindee::Prediction::ReceiptV4)
|
16
|
+
|
17
|
+
# Print a summary of the document prediction in RST format
|
18
|
+
puts result.inference.prediction
|
19
|
+
```
|
20
|
+
|
21
|
+
Output:
|
22
|
+
```shell
|
23
|
+
:Locale: en-US; en; US; USD;
|
24
|
+
:Date: 2014-07-07
|
25
|
+
:Category: food
|
26
|
+
:Subcategory: restaurant
|
27
|
+
:Document type: EXPENSE RECEIPT
|
28
|
+
:Time: 20:20
|
29
|
+
:Supplier name: LOGANS
|
30
|
+
:Taxes: 3.34 TAX
|
31
|
+
:Total net: 40.48
|
32
|
+
:Total taxes: 3.34
|
33
|
+
:Tip: 10.00
|
34
|
+
:Total amount: 53.8
|
35
|
+
```
|
36
|
+
|
37
|
+
## Fields
|
38
|
+
Each prediction object contains a set of different fields.
|
39
|
+
Each `Field` object contains at a minimum the following attributes:
|
40
|
+
|
41
|
+
* `value` (String or Float depending on the field type): corresponds to the field value. Can be `nil` if no value was extracted.
|
42
|
+
* `confidence` (Float): the confidence score of the field prediction.
|
43
|
+
* `bounding_box` (Array< Array< Float > >): contains exactly 4 relative vertices coordinates (points) of a right rectangle containing the field in the document.
|
44
|
+
* `polygon` (Array< Array< Float > >): contains the relative vertices coordinates (points) of a polygon containing the field in the image.
|
45
|
+
* `reconstructed` (Boolean): True if the field was reconstructed or computed using other fields.
|
46
|
+
|
47
|
+
|
48
|
+
## Attributes
|
49
|
+
Depending on the field type specified, additional attributes can be extracted in the `Receipt` object.
|
50
|
+
|
51
|
+
Using the above sample, the following are the basic fields that can be extracted:
|
52
|
+
|
53
|
+
- [Orientation](#orientation)
|
54
|
+
- [Category](#category)
|
55
|
+
- [Date](#date)
|
56
|
+
- [Locale](#locale)
|
57
|
+
- [Supplier Information](#supplier-information)
|
58
|
+
- [Taxes](#taxes)
|
59
|
+
- [Time](#time)
|
60
|
+
- [Totals](#totals)
|
61
|
+
|
62
|
+
|
63
|
+
### Category
|
64
|
+
* **`category`** (Field): Receipt category as seen on the receipt.
|
65
|
+
The following categories are supported: toll, food, parking, transport, accommodation, gasoline, miscellaneous.
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
puts result.inference.prediction.category.value
|
69
|
+
```
|
70
|
+
|
71
|
+
|
72
|
+
### Date
|
73
|
+
Date fields:
|
74
|
+
|
75
|
+
* contain the `date_object` attribute, which is a standard Ruby [date object](https://ruby-doc.org/stdlib-2.7.1/libdoc/date/rdoc/Date.html)
|
76
|
+
* have a `value` attribute which is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) representation of the date.
|
77
|
+
|
78
|
+
The following date fields are available:
|
79
|
+
|
80
|
+
* **`date`**: Date the receipt was issued
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
puts result.inference.prediction.date.value
|
84
|
+
```
|
85
|
+
|
86
|
+
|
87
|
+
### Locale
|
88
|
+
**`locale`** (Locale): Locale information.
|
89
|
+
|
90
|
+
* `locale.value` (String): Locale with country and language codes.
|
91
|
+
```ruby
|
92
|
+
puts result.inference.prediction.locale
|
93
|
+
```
|
94
|
+
|
95
|
+
* `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
|
96
|
+
*
|
97
|
+
```ruby
|
98
|
+
puts result.inference.prediction.locale.language
|
99
|
+
```
|
100
|
+
|
101
|
+
* `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
puts result.inference.prediction.locale.currency
|
105
|
+
```
|
106
|
+
|
107
|
+
* `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
puts result.inference.prediction.locale.country
|
111
|
+
```
|
112
|
+
|
113
|
+
### Supplier Information
|
114
|
+
* **`supplier_name`** (Field): Supplier name as written in the receipt.
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
puts result.inference.prediction.supplier_name.value
|
118
|
+
```
|
119
|
+
|
120
|
+
|
121
|
+
### Taxes
|
122
|
+
**`taxes`** (Array< TaxField >): Contains tax fields as seen on the receipt.
|
123
|
+
|
124
|
+
* `value` (Float): The tax amount.
|
125
|
+
```ruby
|
126
|
+
# Show the amount of the first tax
|
127
|
+
puts result.inference.prediction.taxes[0].value
|
128
|
+
```
|
129
|
+
|
130
|
+
* `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
|
131
|
+
```ruby
|
132
|
+
# Show the code of the first tax
|
133
|
+
puts result.inference.prediction.taxes[0].code
|
134
|
+
```
|
135
|
+
|
136
|
+
* `rate` (Float): The tax rate.
|
137
|
+
```ruby
|
138
|
+
# Show the rate of the first tax
|
139
|
+
puts result.inference.prediction.taxes[0].rate
|
140
|
+
```
|
141
|
+
|
142
|
+
### Time
|
143
|
+
* **`time`**: Time of purchase as seen on the receipt
|
144
|
+
* `value` (string): Time of purchase with 24 hours formatting (hh:mm).
|
145
|
+
|
146
|
+
```ruby
|
147
|
+
puts result.inference.prediction.time.value
|
148
|
+
```
|
149
|
+
|
150
|
+
### Totals
|
151
|
+
* **`total_amount`** (Field): Total amount including taxes
|
152
|
+
|
153
|
+
```ruby
|
154
|
+
puts result.inference.prediction.total_amount.value
|
155
|
+
```
|
156
|
+
|
157
|
+
* **`total_net`** (Field): Total amount paid excluding taxes
|
158
|
+
|
159
|
+
```ruby
|
160
|
+
puts result.inference.prediction.total_net.value
|
161
|
+
```
|
162
|
+
|
163
|
+
* **`total_tax`** (Field): Total tax value from tax lines
|
164
|
+
|
165
|
+
```ruby
|
166
|
+
puts result.inference.prediction.total_tax.value
|
167
|
+
```
|
168
|
+
|
169
|
+
## Questions?
|
170
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/lib/mindee/client.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative '
|
3
|
+
require_relative 'input'
|
4
4
|
require_relative 'document_config'
|
5
|
-
require_relative 'endpoint'
|
5
|
+
require_relative 'http/endpoint'
|
6
|
+
require_relative 'parsing/prediction'
|
6
7
|
|
7
8
|
module Mindee
|
8
9
|
# General client for sending a document to the API.
|
@@ -15,157 +16,191 @@ module Mindee
|
|
15
16
|
end
|
16
17
|
|
17
18
|
# Call prediction API on the document and parse the results.
|
18
|
-
#
|
19
|
-
# @param
|
20
|
-
#
|
21
|
-
# @param
|
19
|
+
#
|
20
|
+
# @param prediction_class [Mindee::Prediction::Prediction]
|
21
|
+
#
|
22
|
+
# @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
|
23
|
+
# API Builder. Do not set for standard (off the shelf) endpoints.
|
24
|
+
#
|
25
|
+
# @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
|
26
|
+
# This is normally not required unless you have a custom endpoint which has the
|
27
|
+
# same name as standard (off the shelf) endpoint.
|
28
|
+
# Do not set for standard (off the shelf) endpoints.
|
29
|
+
#
|
30
|
+
# @param include_words [Boolean] Whether to include the full text for each page.
|
31
|
+
# This performs a full OCR operation on the server and will increase response time.
|
32
|
+
#
|
33
|
+
# @param close_file [Boolean] Whether to `close()` the file after parsing it.
|
34
|
+
# Set to false if you need to access the file after this operation.
|
35
|
+
#
|
36
|
+
# @param page_options [Hash, nil] Page cutting/merge options:
|
37
|
+
#
|
38
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
39
|
+
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
40
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
41
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
42
|
+
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
43
|
+
#
|
44
|
+
# @param cropper [Boolean] Whether to include cropper results for each page.
|
45
|
+
# This performs a cropping operation on the server and will increase response time.
|
46
|
+
#
|
22
47
|
# @return [Mindee::DocumentResponse]
|
23
|
-
def parse(
|
48
|
+
def parse(
|
49
|
+
prediction_class,
|
50
|
+
endpoint_name: '',
|
51
|
+
account_name: '',
|
52
|
+
include_words: false,
|
53
|
+
close_file: true,
|
54
|
+
page_options: nil,
|
55
|
+
cropper: false
|
56
|
+
)
|
57
|
+
doc_config = find_doc_config(prediction_class, endpoint_name, account_name)
|
58
|
+
@input_doc.process_pdf(page_options) if !page_options.nil? && @input_doc.pdf?
|
59
|
+
doc_config.predict(@input_doc, include_words, close_file, cropper)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param document_class [Mindee::Prediction::Prediction]
|
65
|
+
# @param endpoint_name [String]
|
66
|
+
def determine_endpoint_name(document_class, endpoint_name)
|
67
|
+
return document_class.name if document_class.name != Prediction::CustomV1.name
|
68
|
+
|
69
|
+
raise "endpoint_name is required when using #{document_class.name} class" if endpoint_name.empty?
|
70
|
+
|
71
|
+
endpoint_name
|
72
|
+
end
|
73
|
+
|
74
|
+
# @param document_class [Mindee::Prediction::Prediction]
|
75
|
+
# @param endpoint_name [String]
|
76
|
+
# @param account_name [String]
|
77
|
+
def find_doc_config(document_class, endpoint_name, account_name)
|
78
|
+
endpoint_name = determine_endpoint_name(document_class, endpoint_name)
|
79
|
+
|
24
80
|
found = []
|
25
81
|
@doc_configs.each_key do |conf|
|
26
|
-
found.push(conf) if conf[1] ==
|
82
|
+
found.push(conf) if conf[1] == endpoint_name
|
27
83
|
end
|
28
|
-
raise "
|
84
|
+
raise "Endpoint not configured: #{endpoint_name}" if found.empty?
|
29
85
|
|
30
|
-
if !
|
31
|
-
config_key = [
|
86
|
+
if !account_name.empty?
|
87
|
+
config_key = [account_name, endpoint_name]
|
32
88
|
elsif found.length == 1
|
33
89
|
config_key = found[0]
|
34
90
|
else
|
35
91
|
usernames = found.map { |conf| conf[0] }
|
36
92
|
raise "Duplicate configuration detected.\n" \
|
37
|
-
"You specified the document '#{
|
93
|
+
"You specified the document '#{endpoint_name}' in your custom config.\n" \
|
38
94
|
"To avoid confusion, please add the 'account_name' attribute to " \
|
39
95
|
"the parse method, one of #{usernames}."
|
40
96
|
end
|
41
97
|
|
42
|
-
|
43
|
-
doc_config.predict(@input_doc, include_words, close_file)
|
98
|
+
@doc_configs[config_key]
|
44
99
|
end
|
45
100
|
end
|
46
101
|
|
47
102
|
# Mindee API Client.
|
48
103
|
# See: https://developers.mindee.com/docs/
|
49
104
|
class Client
|
50
|
-
|
51
|
-
|
52
|
-
DOC_TYPE_PASSPORT = 'passport'
|
53
|
-
DOC_TYPE_FINANCIAL = 'financial_doc'
|
54
|
-
|
55
|
-
# @param raise_on_error [Boolean]
|
56
|
-
def initialize(api_key: nil, raise_on_error: true)
|
57
|
-
@raise_on_error = raise_on_error
|
105
|
+
# @param api_key [String]
|
106
|
+
def initialize(api_key: '')
|
58
107
|
@doc_configs = {}
|
59
108
|
@api_key = api_key
|
60
|
-
|
61
|
-
|
62
|
-
# Configure a 'Mindee Invoice' document.
|
63
|
-
# @param api_key [String] Override the client API key for this endpoint
|
64
|
-
# @return [Mindee::Client]
|
65
|
-
def config_invoice(api_key: nil)
|
66
|
-
@doc_configs[['mindee', DOC_TYPE_INVOICE]] = InvoiceConfig.new(
|
67
|
-
api_key || @api_key,
|
68
|
-
@raise_on_error
|
69
|
-
)
|
70
|
-
self
|
71
|
-
end
|
72
|
-
|
73
|
-
# Configure a 'Mindee Expense Receipts' document.
|
74
|
-
# @param api_key [String] Override the client API key for this endpoint
|
75
|
-
# @return [Mindee::Client]
|
76
|
-
def config_receipt(api_key: nil)
|
77
|
-
@doc_configs[['mindee', DOC_TYPE_RECEIPT]] = ReceiptConfig.new(
|
78
|
-
api_key || @api_key,
|
79
|
-
@raise_on_error
|
80
|
-
)
|
81
|
-
self
|
82
|
-
end
|
83
|
-
|
84
|
-
# Configure a 'Mindee Passport' document.
|
85
|
-
# @param api_key [String] Override the client API key for this endpoint
|
86
|
-
# @return [Mindee::Client]
|
87
|
-
def config_passport(api_key: nil)
|
88
|
-
@doc_configs[['mindee', DOC_TYPE_PASSPORT]] = PassportConfig.new(
|
89
|
-
api_key || @api_key,
|
90
|
-
@raise_on_error
|
91
|
-
)
|
92
|
-
self
|
93
|
-
end
|
94
|
-
|
95
|
-
# Configure a 'Mindee Financial document'. Uses 'Invoice' and 'Expense Receipt' internally.
|
96
|
-
# @param api_key [String] Override the client API key for this endpoint
|
97
|
-
# @return [Mindee::Client]
|
98
|
-
def config_financial_doc(api_key: nil)
|
99
|
-
@doc_configs[['mindee', DOC_TYPE_FINANCIAL]] = FinancialDocConfig.new(
|
100
|
-
api_key || @api_key,
|
101
|
-
@raise_on_error
|
102
|
-
)
|
103
|
-
self
|
109
|
+
init_default_endpoints
|
104
110
|
end
|
105
111
|
|
106
112
|
# Configure a custom document using the 'Mindee API Builder'.
|
107
113
|
# @param account_name [String] Your organization's username on the API Builder
|
108
|
-
# @param
|
109
|
-
# @param api_key [String] Override the client API key for this endpoint
|
114
|
+
# @param endpoint_name [String] The "API name" field in the "Settings" page of the API Builder
|
110
115
|
# @param version [String] Specify the version of the model to use. If not set, use the latest version of the model.
|
111
116
|
# @return [Mindee::Client]
|
112
|
-
def
|
113
|
-
document_name,
|
117
|
+
def add_endpoint(
|
114
118
|
account_name,
|
115
|
-
|
119
|
+
endpoint_name,
|
116
120
|
version: '1'
|
117
121
|
)
|
118
|
-
@doc_configs[[account_name,
|
119
|
-
|
120
|
-
account_name,
|
121
|
-
version,
|
122
|
-
api_key || @api_key,
|
123
|
-
@raise_on_error
|
122
|
+
@doc_configs[[account_name, endpoint_name]] = DocumentConfig.new(
|
123
|
+
Prediction::CustomV1,
|
124
|
+
[HTTP::CustomEndpoint.new(account_name, endpoint_name, version, @api_key)]
|
124
125
|
)
|
125
126
|
self
|
126
127
|
end
|
127
128
|
|
128
129
|
# Load a document from an absolute path, as a string.
|
129
130
|
# @param input_path [String] Path of file to open
|
130
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
131
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
132
131
|
# @return [Mindee::DocumentClient]
|
133
|
-
def doc_from_path(input_path
|
134
|
-
doc = PathDocument.new(input_path
|
132
|
+
def doc_from_path(input_path)
|
133
|
+
doc = Input::PathDocument.new(input_path)
|
135
134
|
DocumentClient.new(doc, @doc_configs)
|
136
135
|
end
|
137
136
|
|
138
137
|
# Load a document from raw bytes.
|
139
138
|
# @param input_bytes [String] Encoding::BINARY byte input
|
140
139
|
# @param filename [String] The name of the file (without the path)
|
141
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
142
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
143
140
|
# @return [Mindee::DocumentClient]
|
144
|
-
def doc_from_bytes(input_bytes, filename
|
145
|
-
doc = BytesDocument.new(input_bytes, filename
|
141
|
+
def doc_from_bytes(input_bytes, filename)
|
142
|
+
doc = Input::BytesDocument.new(input_bytes, filename)
|
146
143
|
DocumentClient.new(doc, @doc_configs)
|
147
144
|
end
|
148
145
|
|
149
146
|
# Load a document from a base64 encoded string.
|
150
147
|
# @param base64_string [String] Input to parse as base64 string
|
151
148
|
# @param filename [String] The name of the file (without the path)
|
152
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
153
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
154
149
|
# @return [Mindee::DocumentClient]
|
155
|
-
def doc_from_b64string(base64_string, filename
|
156
|
-
doc = Base64Document.new(base64_string, filename
|
150
|
+
def doc_from_b64string(base64_string, filename)
|
151
|
+
doc = Input::Base64Document.new(base64_string, filename)
|
157
152
|
DocumentClient.new(doc, @doc_configs)
|
158
153
|
end
|
159
154
|
|
160
155
|
# Load a document from a normal Ruby `File`.
|
161
156
|
# @param input_file [File] Input file handle
|
162
157
|
# @param filename [String] The name of the file (without the path)
|
163
|
-
# @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
|
164
|
-
# @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
|
165
158
|
# @return [Mindee::DocumentClient]
|
166
|
-
def doc_from_file(input_file, filename
|
167
|
-
doc = FileDocument.new(input_file, filename
|
159
|
+
def doc_from_file(input_file, filename)
|
160
|
+
doc = Input::FileDocument.new(input_file, filename)
|
168
161
|
DocumentClient.new(doc, @doc_configs)
|
169
162
|
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
def init_default_endpoints
|
167
|
+
@doc_configs[['mindee', Prediction::InvoiceV4.name]] = DocumentConfig.new(
|
168
|
+
Prediction::InvoiceV4,
|
169
|
+
[HTTP::StandardEndpoint.new('invoices', '4', @api_key)]
|
170
|
+
)
|
171
|
+
@doc_configs[['mindee', Prediction::ReceiptV4.name]] = DocumentConfig.new(
|
172
|
+
Prediction::ReceiptV4,
|
173
|
+
[HTTP::StandardEndpoint.new('expense_receipts', '4', @api_key)]
|
174
|
+
)
|
175
|
+
@doc_configs[['mindee', Prediction::PassportV1.name]] = DocumentConfig.new(
|
176
|
+
Prediction::PassportV1,
|
177
|
+
[HTTP::StandardEndpoint.new('passport', '1', @api_key)]
|
178
|
+
)
|
179
|
+
@doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] = DocumentConfig.new(
|
180
|
+
Prediction::EU::LicensePlateV1,
|
181
|
+
[HTTP::StandardEndpoint.new('license_plates', '1', @api_key)]
|
182
|
+
)
|
183
|
+
@doc_configs[['mindee', Prediction::ShippingContainerV1.name]] = DocumentConfig.new(
|
184
|
+
Prediction::ShippingContainerV1,
|
185
|
+
[HTTP::StandardEndpoint.new('shipping_containers', '1', @api_key)]
|
186
|
+
)
|
187
|
+
@doc_configs[['mindee', Prediction::US::BankCheckV1.name]] = DocumentConfig.new(
|
188
|
+
Prediction::US::BankCheckV1,
|
189
|
+
[HTTP::StandardEndpoint.new('bank_check', '1', @api_key)]
|
190
|
+
)
|
191
|
+
@doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] = DocumentConfig.new(
|
192
|
+
Prediction::FR::BankAccountDetailsV1,
|
193
|
+
[HTTP::StandardEndpoint.new('bank_account_details', '1', @api_key)]
|
194
|
+
)
|
195
|
+
@doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] = DocumentConfig.new(
|
196
|
+
Prediction::FR::CarteVitaleV1,
|
197
|
+
[HTTP::StandardEndpoint.new('carte_vitale', '1', @api_key)]
|
198
|
+
)
|
199
|
+
@doc_configs[['mindee', Prediction::FR::IdCardV1.name]] = DocumentConfig.new(
|
200
|
+
Prediction::FR::IdCardV1,
|
201
|
+
[HTTP::StandardEndpoint.new('idcard_fr', '1', @api_key)]
|
202
|
+
)
|
203
|
+
self
|
204
|
+
end
|
170
205
|
end
|
171
206
|
end
|
@@ -2,88 +2,56 @@
|
|
2
2
|
|
3
3
|
require 'json'
|
4
4
|
|
5
|
-
require_relative 'endpoint'
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
5
|
+
require_relative 'http/endpoint'
|
6
|
+
require_relative 'parsing/document'
|
7
|
+
require_relative 'parsing/error'
|
8
|
+
require_relative 'parsing/prediction'
|
8
9
|
|
9
10
|
module Mindee
|
10
11
|
# Specific client for sending a document to the API.
|
11
12
|
class DocumentConfig
|
12
13
|
# Array of possible Mindee::Endpoint to be used.
|
13
|
-
# @return [Array<Mindee::Endpoint>]
|
14
|
+
# @return [Array<Mindee::HTTP::Endpoint>]
|
14
15
|
attr_reader :endpoints
|
15
16
|
|
16
|
-
# @param
|
17
|
-
# @param
|
18
|
-
|
19
|
-
|
20
|
-
def initialize(doc_class, document_type, endpoints, raise_on_error)
|
21
|
-
@doc_class = doc_class
|
22
|
-
@document_type = document_type
|
17
|
+
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
|
18
|
+
# @param endpoints [Array<Mindee::HTTP::Endpoint>]
|
19
|
+
def initialize(prediction_class, endpoints)
|
20
|
+
@prediction_class = prediction_class
|
23
21
|
@endpoints = endpoints
|
24
|
-
@raise_on_error = raise_on_error
|
25
|
-
end
|
26
|
-
|
27
|
-
# Parse a prediction API result.
|
28
|
-
# @param input_doc [Mindee::InputDocument]
|
29
|
-
# @param response [Hash]
|
30
|
-
# @return [Mindee::DocumentResponse]
|
31
|
-
def build_predict_result(input_doc, response)
|
32
|
-
document = @doc_class.new(
|
33
|
-
response['document']['inference']['prediction'],
|
34
|
-
input_file: input_doc,
|
35
|
-
page_id: nil
|
36
|
-
)
|
37
|
-
pages = []
|
38
|
-
response['document']['inference']['pages'].each do |page|
|
39
|
-
pages.push(
|
40
|
-
@doc_class.new(
|
41
|
-
page['prediction'],
|
42
|
-
input_file: input_doc,
|
43
|
-
page_id: page['id']
|
44
|
-
)
|
45
|
-
)
|
46
|
-
end
|
47
|
-
DocumentResponse.new(response, @document_type, document, pages)
|
48
22
|
end
|
49
23
|
|
50
24
|
# Call the prediction API.
|
51
25
|
# @param input_doc [Mindee::InputDocument]
|
52
26
|
# @param include_words [Boolean]
|
53
27
|
# @param close_file [Boolean]
|
28
|
+
# @param cropper [Boolean]
|
54
29
|
# @return [Mindee::DocumentResponse]
|
55
|
-
def predict(input_doc, include_words, close_file)
|
30
|
+
def predict(input_doc, include_words, close_file, cropper)
|
56
31
|
check_api_keys
|
57
|
-
response = predict_request(input_doc, include_words, close_file)
|
58
|
-
parse_response(
|
32
|
+
response = predict_request(input_doc, include_words, close_file, cropper)
|
33
|
+
parse_response(response)
|
59
34
|
end
|
60
35
|
|
61
36
|
private
|
62
37
|
|
63
|
-
# @param input_doc [Mindee::InputDocument]
|
64
38
|
# @param response [Net::HTTPResponse]
|
65
39
|
# @return [Mindee::DocumentResponse]
|
66
|
-
def parse_response(
|
40
|
+
def parse_response(response)
|
67
41
|
hashed_response = JSON.parse(response.body, object_class: Hash)
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
)
|
73
|
-
end
|
74
|
-
return DocumentResponse.new(
|
75
|
-
hashed_response, @document_type, {}, []
|
76
|
-
)
|
77
|
-
end
|
78
|
-
build_predict_result(input_doc, hashed_response)
|
42
|
+
return Document.new(@prediction_class, hashed_response['document']) if (200..299).include?(response.code.to_i)
|
43
|
+
|
44
|
+
error = Parsing::Error.new(hashed_response['api_request']['error'])
|
45
|
+
raise error
|
79
46
|
end
|
80
47
|
|
81
48
|
# @param input_doc [Mindee::InputDocument]
|
82
49
|
# @param include_words [Boolean]
|
83
50
|
# @param close_file [Boolean]
|
51
|
+
# # @param cropper [Boolean]
|
84
52
|
# @return [Net::HTTPResponse]
|
85
|
-
def predict_request(input_doc, include_words, close_file)
|
86
|
-
@endpoints[0].
|
53
|
+
def predict_request(input_doc, include_words, close_file, cropper)
|
54
|
+
@endpoints[0].predict_req_post(input_doc, include_words: include_words, close_file: close_file, cropper: cropper)
|
87
55
|
end
|
88
56
|
|
89
57
|
def check_api_keys
|
@@ -93,108 +61,8 @@ module Mindee
|
|
93
61
|
raise "Missing API key for '#{@document_type}', " \
|
94
62
|
"check your Client Configuration.\n" \
|
95
63
|
'You can set this using the ' \
|
96
|
-
"'#{
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# Client for Invoice documents
|
102
|
-
class InvoiceConfig < DocumentConfig
|
103
|
-
def initialize(api_key, raise_on_error)
|
104
|
-
endpoints = [InvoiceEndpoint.new(api_key)]
|
105
|
-
super(
|
106
|
-
Invoice,
|
107
|
-
'invoice',
|
108
|
-
endpoints,
|
109
|
-
raise_on_error
|
110
|
-
)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
# Client for Receipt documents
|
115
|
-
class ReceiptConfig < DocumentConfig
|
116
|
-
def initialize(api_key, raise_on_error)
|
117
|
-
endpoints = [ReceiptEndpoint.new(api_key)]
|
118
|
-
super(
|
119
|
-
Receipt,
|
120
|
-
'receipt',
|
121
|
-
endpoints,
|
122
|
-
raise_on_error
|
123
|
-
)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
# Client for Passport documents
|
128
|
-
class PassportConfig < DocumentConfig
|
129
|
-
def initialize(api_key, raise_on_error)
|
130
|
-
endpoints = [PassportEndpoint.new(api_key)]
|
131
|
-
super(
|
132
|
-
Passport,
|
133
|
-
'passport',
|
134
|
-
endpoints,
|
135
|
-
raise_on_error
|
136
|
-
)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Client for Financial documents
|
141
|
-
class FinancialDocConfig < DocumentConfig
|
142
|
-
def initialize(api_key, raise_on_error)
|
143
|
-
endpoints = [
|
144
|
-
InvoiceEndpoint.new(api_key),
|
145
|
-
ReceiptEndpoint.new(api_key),
|
146
|
-
]
|
147
|
-
super(
|
148
|
-
FinancialDocument,
|
149
|
-
'financial_doc',
|
150
|
-
endpoints,
|
151
|
-
raise_on_error
|
152
|
-
)
|
153
|
-
end
|
154
|
-
|
155
|
-
private
|
156
|
-
|
157
|
-
def predict_request(input_doc, include_words, close_file)
|
158
|
-
endpoint = input_doc.pdf? ? @endpoints[0] : @endpoints[1]
|
159
|
-
endpoint.predict_request(input_doc, include_words: include_words, close_file: close_file)
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
# Client for Custom (constructed) documents
|
164
|
-
class CustomDocConfig < DocumentConfig
|
165
|
-
def initialize(document_type, account_name, version, api_key, raise_on_error)
|
166
|
-
endpoints = [CustomEndpoint.new(document_type, account_name, version, api_key)]
|
167
|
-
super(
|
168
|
-
CustomDocument,
|
169
|
-
document_type,
|
170
|
-
endpoints,
|
171
|
-
raise_on_error
|
172
|
-
)
|
173
|
-
end
|
174
|
-
|
175
|
-
# Parse a prediction API result.
|
176
|
-
# @param input_doc [Mindee::InputDocument]
|
177
|
-
# @param response [Hash]
|
178
|
-
# @return [Mindee::DocumentResponse]
|
179
|
-
def build_predict_result(input_doc, response)
|
180
|
-
document = CustomDocument.new(
|
181
|
-
@document_type,
|
182
|
-
response['document']['inference']['prediction'],
|
183
|
-
input_file: input_doc,
|
184
|
-
page_id: nil
|
185
|
-
)
|
186
|
-
pages = []
|
187
|
-
response['document']['inference']['pages'].each do |page|
|
188
|
-
pages.push(
|
189
|
-
CustomDocument.new(
|
190
|
-
@document_type,
|
191
|
-
page['prediction'],
|
192
|
-
input_file: input_doc,
|
193
|
-
page_id: page['id']
|
194
|
-
)
|
195
|
-
)
|
64
|
+
"'#{HTTP::API_KEY_ENV_NAME}' environment variable."
|
196
65
|
end
|
197
|
-
DocumentResponse.new(response, @document_type, document, pages)
|
198
66
|
end
|
199
67
|
end
|
200
68
|
end
|