mindee 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'inputs'
4
+ require_relative 'document_config'
5
+ require_relative 'endpoint'
6
+
7
+ module Mindee
8
+ # General client for sending a document to the API.
9
+ class DocumentClient
10
+ # @param input_doc [Mindee::InputDocument]
11
+ # @param doc_configs [Hash]
12
+ def initialize(input_doc, doc_configs)
13
+ @input_doc = input_doc
14
+ @doc_configs = doc_configs
15
+ end
16
+
17
+ # Call prediction API on the document and parse the results.
18
+ # @param document_name [String] Document name (type) to parse
19
+ # @param username [String] API username, the endpoint owner
20
+ # @param include_words [Boolean] Include all the words of the document in the response
21
+ # @param close_file [Boolean] Whether to close the file after parsing it.
22
+ # @return [Mindee::DocumentResponse]
23
+ def parse(document_name, username: '', include_words: false, close_file: true)
24
+ found = []
25
+ @doc_configs.each_key do |conf|
26
+ found.push(conf) if conf[1] == document_name
27
+ end
28
+ raise "Document type not configured: #{document_name}" if found.empty?
29
+
30
+ if !username.empty?
31
+ config_key = [username, document_name]
32
+ elsif found.length == 1
33
+ config_key = found[0]
34
+ else
35
+ usernames = found.map { |conf| conf[0] }
36
+ raise "Duplicate configuration detected.\n" \
37
+ "You specified the document '#{document_name}' in your custom config.\n" \
38
+ "To avoid confusion, please add the 'account_name' attribute to " \
39
+ "the parse method, one of #{usernames}."
40
+ end
41
+
42
+ doc_config = @doc_configs[config_key]
43
+ doc_config.predict(@input_doc, include_words, close_file)
44
+ end
45
+ end
46
+
47
+ # Mindee API Client.
48
+ # See: https://developers.mindee.com/docs/
49
+ class Client
50
+ DOC_TYPE_INVOICE = 'invoice'
51
+ DOC_TYPE_RECEIPT = 'receipt'
52
+ DOC_TYPE_PASSPORT = 'passport'
53
+ DOC_TYPE_FINANCIAL = 'financial_doc'
54
+
55
+ # @param raise_on_error [Boolean]
56
+ def initialize(api_key: nil, raise_on_error: true)
57
+ @raise_on_error = raise_on_error
58
+ @doc_configs = {}
59
+ @api_key = api_key
60
+ end
61
+
62
+ # Configure a 'Mindee Invoice' document.
63
+ # @param api_key [String] Override the client API key for this endpoint
64
+ # @return [Mindee::Client]
65
+ def config_invoice(api_key: nil)
66
+ @doc_configs[['mindee', DOC_TYPE_INVOICE]] = InvoiceConfig.new(
67
+ api_key || @api_key,
68
+ @raise_on_error
69
+ )
70
+ self
71
+ end
72
+
73
+ # Configure a 'Mindee Expense Receipts' document.
74
+ # @param api_key [String] Override the client API key for this endpoint
75
+ # @return [Mindee::Client]
76
+ def config_receipt(api_key: nil)
77
+ @doc_configs[['mindee', DOC_TYPE_RECEIPT]] = ReceiptConfig.new(
78
+ api_key || @api_key,
79
+ @raise_on_error
80
+ )
81
+ self
82
+ end
83
+
84
+ # Configure a 'Mindee Passport' document.
85
+ # @param api_key [String] Override the client API key for this endpoint
86
+ # @return [Mindee::Client]
87
+ def config_passport(api_key: nil)
88
+ @doc_configs[['mindee', DOC_TYPE_PASSPORT]] = PassportConfig.new(
89
+ api_key || @api_key,
90
+ @raise_on_error
91
+ )
92
+ self
93
+ end
94
+
95
+ # Configure a 'Mindee Financial document'. Uses 'Invoice' and 'Expense Receipt' internally.
96
+ # @param api_key [String] Override the client API key for this endpoint
97
+ # @return [Mindee::Client]
98
+ def config_financial_doc(api_key: nil)
99
+ @doc_configs[['mindee', DOC_TYPE_FINANCIAL]] = FinancialDocConfig.new(
100
+ api_key || @api_key,
101
+ @raise_on_error
102
+ )
103
+ self
104
+ end
105
+
106
+ # Configure a custom document using the 'Mindee API Builder'.
107
+ # @param account_name [String] Your organization's username on the API Builder
108
+ # @param document_name [String] The "API name" field in the "Settings" page of the API Builder
109
+ # @param api_key [String] Override the client API key for this endpoint
110
+ # @param version [String] Specify the version of the model to use. If not set, use the latest version of the model.
111
+ # @return [Mindee::Client]
112
+ def config_custom_doc(
113
+ document_name,
114
+ account_name,
115
+ api_key: nil,
116
+ version: '1'
117
+ )
118
+ @doc_configs[[account_name, document_name]] = CustomDocConfig.new(
119
+ document_name,
120
+ account_name,
121
+ version,
122
+ api_key || @api_key,
123
+ @raise_on_error
124
+ )
125
+ self
126
+ end
127
+
128
+ # Load a document from an absolute path, as a string.
129
+ # @param input_path [String] Path of file to open
130
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
131
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
132
+ # @return [Mindee::DocumentClient]
133
+ def doc_from_path(input_path, cut_pages: true, max_pages: MAX_DOC_PAGES)
134
+ doc = PathDocument.new(input_path, cut_pages, max_pages: max_pages)
135
+ DocumentClient.new(doc, @doc_configs)
136
+ end
137
+
138
+ # Load a document from raw bytes.
139
+ # @param input_bytes [String] Encoding::BINARY byte input
140
+ # @param filename [String] The name of the file (without the path)
141
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
142
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
143
+ # @return [Mindee::DocumentClient]
144
+ def doc_from_bytes(input_bytes, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
145
+ doc = BytesDocument.new(input_bytes, filename, cut_pages, max_pages: max_pages)
146
+ DocumentClient.new(doc, @doc_configs)
147
+ end
148
+
149
+ # Load a document from a base64 encoded string.
150
+ # @param base64_string [String] Input to parse as base64 string
151
+ # @param filename [String] The name of the file (without the path)
152
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
153
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
154
+ # @return [Mindee::DocumentClient]
155
+ def doc_from_b64string(base64_string, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
156
+ doc = Base64Document.new(base64_string, filename, cut_pages, max_pages: max_pages)
157
+ DocumentClient.new(doc, @doc_configs)
158
+ end
159
+
160
+ # Load a document from a normal Ruby `File`.
161
+ # @param input_file [File] Input file handle
162
+ # @param filename [String] The name of the file (without the path)
163
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
164
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
165
+ # @return [Mindee::DocumentClient]
166
+ def doc_from_file(input_file, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
167
+ doc = FileDocument.new(input_file, filename, cut_pages, max_pages: max_pages)
168
+ DocumentClient.new(doc, @doc_configs)
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ require_relative 'endpoint'
6
+ require_relative 'documents'
7
+ require_relative 'response'
8
+
9
+ module Mindee
10
+ # Specific client for sending a document to the API.
11
+ class DocumentConfig
12
+ # Array of possible Mindee::Endpoint to be used.
13
+ # @return [Array<Mindee::Endpoint>]
14
+ attr_reader :endpoints
15
+
16
+ # @param doc_class [Class<Mindee::Document>]
17
+ # @param document_type [String]
18
+ # @param endpoints [Array<Mindee::Endpoint>]
19
+ # @param raise_on_error [Boolean]
20
+ def initialize(doc_class, document_type, endpoints, raise_on_error)
21
+ @doc_class = doc_class
22
+ @document_type = document_type
23
+ @endpoints = endpoints
24
+ @raise_on_error = raise_on_error
25
+ end
26
+
27
+ # Parse a prediction API result.
28
+ # @param input_doc [Mindee::InputDocument]
29
+ # @param response [Hash]
30
+ # @return [Mindee::DocumentResponse]
31
+ def build_predict_result(input_doc, response)
32
+ document = @doc_class.new(
33
+ response['document']['inference']['prediction'],
34
+ input_file: input_doc,
35
+ page_id: nil
36
+ )
37
+ pages = []
38
+ response['document']['inference']['pages'].each do |page|
39
+ pages.push(
40
+ @doc_class.new(
41
+ page['prediction'],
42
+ input_file: input_doc,
43
+ page_id: page['id']
44
+ )
45
+ )
46
+ end
47
+ DocumentResponse.new(response, @document_type, document, pages)
48
+ end
49
+
50
+ # Call the prediction API.
51
+ # @param input_doc [Mindee::InputDocument]
52
+ # @param include_words [Boolean]
53
+ # @param close_file [Boolean]
54
+ # @return [Mindee::DocumentResponse]
55
+ def predict(input_doc, include_words, close_file)
56
+ check_api_keys
57
+ response = predict_request(input_doc, include_words, close_file)
58
+ parse_response(input_doc, response)
59
+ end
60
+
61
+ private
62
+
63
+ # @param input_doc [Mindee::InputDocument]
64
+ # @param response [Net::HTTPResponse]
65
+ # @return [Mindee::DocumentResponse]
66
+ def parse_response(input_doc, response)
67
+ hashed_response = JSON.parse(response.body, object_class: Hash)
68
+ unless (200..299).include?(response.code.to_i)
69
+ if @raise_on_error
70
+ raise Net::HTTPError.new(
71
+ "API #{response.code} HTTP error: #{hashed_response}", response
72
+ )
73
+ end
74
+ return DocumentResponse.new(
75
+ hashed_response, @document_type, {}, []
76
+ )
77
+ end
78
+ build_predict_result(input_doc, hashed_response)
79
+ end
80
+
81
+ # @param input_doc [Mindee::InputDocument]
82
+ # @param include_words [Boolean]
83
+ # @param close_file [Boolean]
84
+ # @return [Net::HTTPResponse]
85
+ def predict_request(input_doc, include_words, close_file)
86
+ @endpoints[0].predict_request(input_doc, include_words: include_words, close_file: close_file)
87
+ end
88
+
89
+ def check_api_keys
90
+ @endpoints.each do |endpoint|
91
+ next unless endpoint.api_key.nil? || endpoint.api_key.empty?
92
+
93
+ raise "Missing API key for '#{@document_type}', " \
94
+ "check your Client Configuration.\n" \
95
+ 'You can set this using the ' \
96
+ "'#{endpoint.envvar_key_name}' environment variable."
97
+ end
98
+ end
99
+ end
100
+
101
+ # Client for Invoice documents
102
+ class InvoiceConfig < DocumentConfig
103
+ def initialize(api_key, raise_on_error)
104
+ endpoints = [InvoiceEndpoint.new(api_key)]
105
+ super(
106
+ Invoice,
107
+ 'invoice',
108
+ endpoints,
109
+ raise_on_error
110
+ )
111
+ end
112
+ end
113
+
114
+ # Client for Receipt documents
115
+ class ReceiptConfig < DocumentConfig
116
+ def initialize(api_key, raise_on_error)
117
+ endpoints = [ReceiptEndpoint.new(api_key)]
118
+ super(
119
+ Receipt,
120
+ 'receipt',
121
+ endpoints,
122
+ raise_on_error
123
+ )
124
+ end
125
+ end
126
+
127
+ # Client for Passport documents
128
+ class PassportConfig < DocumentConfig
129
+ def initialize(api_key, raise_on_error)
130
+ endpoints = [PassportEndpoint.new(api_key)]
131
+ super(
132
+ Passport,
133
+ 'passport',
134
+ endpoints,
135
+ raise_on_error
136
+ )
137
+ end
138
+ end
139
+
140
+ # Client for Financial documents
141
+ class FinancialDocConfig < DocumentConfig
142
+ def initialize(api_key, raise_on_error)
143
+ endpoints = [
144
+ InvoiceEndpoint.new(api_key),
145
+ ReceiptEndpoint.new(api_key),
146
+ ]
147
+ super(
148
+ FinancialDocument,
149
+ 'financial_doc',
150
+ endpoints,
151
+ raise_on_error
152
+ )
153
+ end
154
+
155
+ private
156
+
157
+ def predict_request(input_doc, include_words, close_file)
158
+ endpoint = input_doc.pdf? ? @endpoints[0] : @endpoints[1]
159
+ endpoint.predict_request(input_doc, include_words: include_words, close_file: close_file)
160
+ end
161
+ end
162
+
163
+ # Client for Custom (constructed) documents
164
+ class CustomDocConfig < DocumentConfig
165
+ def initialize(document_type, account_name, version, api_key, raise_on_error)
166
+ endpoints = [CustomEndpoint.new(document_type, account_name, version, api_key)]
167
+ super(
168
+ CustomDocument,
169
+ document_type,
170
+ endpoints,
171
+ raise_on_error
172
+ )
173
+ end
174
+
175
+ # Parse a prediction API result.
176
+ # @param input_doc [Mindee::InputDocument]
177
+ # @param response [Hash]
178
+ # @return [Mindee::DocumentResponse]
179
+ def build_predict_result(input_doc, response)
180
+ document = CustomDocument.new(
181
+ @document_type,
182
+ response['document']['inference']['prediction'],
183
+ input_file: input_doc,
184
+ page_id: nil
185
+ )
186
+ pages = []
187
+ response['document']['inference']['pages'].each do |page|
188
+ pages.push(
189
+ CustomDocument.new(
190
+ @document_type,
191
+ page['prediction'],
192
+ input_file: input_doc,
193
+ page_id: page['id']
194
+ )
195
+ )
196
+ end
197
+ DocumentResponse.new(response, @document_type, document, pages)
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Base document object.
5
+ class Document
6
+ # document type
7
+ # @return [String]
8
+ attr_reader :document_type
9
+ # Validation checks for the document
10
+ # @return [Hash<Symbol, Boolean>]
11
+ attr_reader :checklist
12
+ # Original filename of the document
13
+ # @return [String, nil]
14
+ attr_reader :filename
15
+ # Detected MIME type of the document
16
+ # @return [String, nil]
17
+ attr_reader :file_mimetype
18
+
19
+ # @param input_file [Mindee::InputDocument, nil]
20
+ # @param document_type [String]
21
+ def initialize(document_type, input_file: nil)
22
+ @document_type = document_type
23
+ unless input_file.nil?
24
+ @filename = input_file.filename
25
+ @file_mimetype = input_file.file_mimetype
26
+ end
27
+ @checklist = {}
28
+ end
29
+
30
+ # @return [Boolean]
31
+ def all_checks
32
+ @checklist.all? { |_, value| value == true }
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../fields'
4
+ require_relative 'base'
5
+
6
+ module Mindee
7
+ # Custom document object.
8
+ class CustomDocument < Document
9
+ # All fields in the document
10
+ # @return [Hash<Symbol, Mindee::ListField>]
11
+ attr_reader :fields
12
+
13
+ # @param document_type [String]
14
+ # @param prediction [Hash]
15
+ # @param input_file [Mindee::InputDocument, nil]
16
+ # @param page_id [Integer, nil]
17
+ def initialize(document_type, prediction, input_file: nil, page_id: nil)
18
+ super(document_type, input_file: input_file)
19
+ @fields = {}
20
+ prediction.each do |field_name, field_prediction|
21
+ field_sym = field_name.to_sym
22
+ complete_field = ListField.new(field_prediction, page_id)
23
+
24
+ # Add the field to the `fields` array
25
+ @fields[field_sym] = complete_field
26
+
27
+ # Create a dynamic accessor function for the field
28
+ singleton_class.module_eval { attr_accessor field_sym }
29
+ send("#{field_sym}=", complete_field)
30
+ end
31
+ end
32
+
33
+ def to_s
34
+ out_str = String.new
35
+ out_str << "----- #{@document_type} -----"
36
+ out_str << "\nFilename: #{@filename}".rstrip
37
+ @fields.each do |name, info|
38
+ out_str << "\n#{name}: #{info}".rstrip
39
+ end
40
+ out_str << "\n----------------------"
41
+ out_str
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../fields'
4
+ require_relative 'base'
5
+ require_relative 'invoice'
6
+ require_relative 'receipt'
7
+
8
+ module Mindee
9
+ # Union of `Invoice` and `Receipt`.
10
+ class FinancialDocument < Document
11
+ # @return [Mindee::Locale]
12
+ attr_reader :locale
13
+ # @return [Mindee::Amount]
14
+ attr_reader :total_incl
15
+ # @return [Mindee::Amount]
16
+ attr_reader :total_excl
17
+ # @return [Mindee::Amount]
18
+ attr_reader :total_tax
19
+ # @return [Mindee::DateField]
20
+ attr_reader :date
21
+ # @return [Mindee::Field]
22
+ attr_reader :category
23
+ # @return [Mindee::Field]
24
+ attr_reader :invoice_number
25
+ # @return [Mindee::Field]
26
+ attr_reader :time
27
+ # @return [Mindee::DateField]
28
+ attr_reader :due_date
29
+ # @return [Array<Mindee::TaxField>]
30
+ attr_reader :taxes
31
+ # @return [Mindee::Field]
32
+ attr_reader :supplier
33
+ # @return [Mindee::Field]
34
+ attr_reader :supplier_address
35
+ # @return [Mindee::Field]
36
+ attr_reader :customer_name
37
+ # @return [Mindee::Field]
38
+ attr_reader :customer_address
39
+ # @return [Array<Mindee::CompanyRegistration>]
40
+ attr_reader :company_registration
41
+ # @return [Array<Mindee::CompanyRegistration>]
42
+ attr_reader :customer_company_registration
43
+ # @return [Array<Mindee::PaymentDetails>]
44
+ attr_reader :payment_details
45
+
46
+ # @param prediction [Hash]
47
+ # @param input_file [Mindee::InputDocument, nil]
48
+ # @param page_id [Integer, nil]
49
+ def initialize(prediction, input_file: nil, page_id: nil)
50
+ super('financial_doc', input_file: input_file)
51
+ @locale = Locale.new(prediction['locale'])
52
+ if prediction.include? 'invoice_number'
53
+ build_from_invoice(
54
+ Invoice.new(prediction, input_file: input_file, page_id: page_id)
55
+ )
56
+ else
57
+ build_from_receipt(
58
+ Receipt.new(prediction, input_file: input_file, page_id: page_id)
59
+ )
60
+ end
61
+ end
62
+
63
+ def to_s
64
+ customer_company_registration = @customer_company_registration.map(&:value).join('; ')
65
+ payment_details = @payment_details.map(&:to_s).join("\n ")
66
+ company_registration = @company_registration.map(&:to_s).join('; ')
67
+ taxes = @taxes.join("\n ")
68
+ out_str = String.new
69
+ out_str << '-----Financial Document data-----'
70
+ out_str << "\nFilename: #{@filename}".rstrip
71
+ out_str << "\nCategory: #{@category}".rstrip
72
+ out_str << "\nInvoice number: #{@invoice_number}".rstrip
73
+ out_str << "\nTotal amount including taxes: #{@total_incl}".rstrip
74
+ out_str << "\nTotal amount excluding taxes: #{@total_excl}".rstrip
75
+ out_str << "\nInvoice date: #{@date}".rstrip
76
+ out_str << "\nInvoice due date: #{@due_date}".rstrip
77
+ out_str << "\nSupplier name: #{@supplier}".rstrip
78
+ out_str << "\nSupplier address: #{@supplier_address}".rstrip
79
+ out_str << "\nCustomer name: #{@customer_name}".rstrip
80
+ out_str << "\nCustomer company registration: #{customer_company_registration}".rstrip
81
+ out_str << "\nCustomer address: #{@customer_address}".rstrip
82
+ out_str << "\nPayment details: #{payment_details}".rstrip
83
+ out_str << "\nCompany numbers: #{company_registration}".rstrip
84
+ out_str << "\nTaxes: #{taxes}".rstrip
85
+ out_str << "\nTotal taxes: #{@total_tax}".rstrip
86
+ out_str << "\nTime: #{@time}".rstrip
87
+ out_str << "\nLocale: #{@locale}".rstrip
88
+ out_str << "\n----------------------"
89
+ out_str
90
+ end
91
+
92
+ private
93
+
94
+ # @param invoice [Mindee::Invoice]
95
+ def build_from_invoice(invoice)
96
+ @orientation = invoice.orientation
97
+ @category = empty_field
98
+ @total_incl = invoice.total_incl
99
+ @total_excl = invoice.total_excl
100
+ @total_tax = invoice.total_tax
101
+ @date = invoice.date
102
+ @time = empty_field
103
+ @due_date = invoice.due_date
104
+ @invoice_number = invoice.invoice_number
105
+ @taxes = invoice.taxes
106
+ @supplier = invoice.supplier
107
+ @supplier_address = invoice.supplier_address
108
+ @company_registration = invoice.company_registration
109
+ @customer_company_registration = invoice.customer_company_registration
110
+ @payment_details = invoice.payment_details
111
+ end
112
+
113
+ # @param receipt [Mindee::Receipt]
114
+ def build_from_receipt(receipt)
115
+ @orientation = receipt.orientation
116
+ @category = receipt.category
117
+ @total_incl = receipt.total_incl
118
+ @total_excl = receipt.total_excl
119
+ @total_tax = receipt.total_tax
120
+ @date = receipt.date
121
+ @time = receipt.time
122
+ @due_date = empty_field
123
+ @taxes = receipt.taxes
124
+ @supplier = receipt.supplier
125
+ @supplier_address = empty_field
126
+ @company_registration = empty_field
127
+ @customer_company_registration = empty_field
128
+ @payment_details = empty_field
129
+ end
130
+
131
+ def empty_field
132
+ Mindee::Field.new({}, nil)
133
+ end
134
+ end
135
+ end