mindee 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'inputs'
4
+ require_relative 'document_config'
5
+ require_relative 'endpoint'
6
+
7
+ module Mindee
8
+ # General client for sending a document to the API.
9
+ class DocumentClient
10
+ # @param input_doc [Mindee::InputDocument]
11
+ # @param doc_configs [Hash]
12
+ def initialize(input_doc, doc_configs)
13
+ @input_doc = input_doc
14
+ @doc_configs = doc_configs
15
+ end
16
+
17
+ # Call prediction API on the document and parse the results.
18
+ # @param document_name [String] Document name (type) to parse
19
+ # @param username [String] API username, the endpoint owner
20
+ # @param include_words [Boolean] Include all the words of the document in the response
21
+ # @param close_file [Boolean] Whether to close the file after parsing it.
22
+ # @return [Mindee::DocumentResponse]
23
+ def parse(document_name, username: '', include_words: false, close_file: true)
24
+ found = []
25
+ @doc_configs.each_key do |conf|
26
+ found.push(conf) if conf[1] == document_name
27
+ end
28
+ raise "Document type not configured: #{document_name}" if found.empty?
29
+
30
+ if !username.empty?
31
+ config_key = [username, document_name]
32
+ elsif found.length == 1
33
+ config_key = found[0]
34
+ else
35
+ usernames = found.map { |conf| conf[0] }
36
+ raise "Duplicate configuration detected.\n" \
37
+ "You specified the document '#{document_name}' in your custom config.\n" \
38
+ "To avoid confusion, please add the 'account_name' attribute to " \
39
+ "the parse method, one of #{usernames}."
40
+ end
41
+
42
+ doc_config = @doc_configs[config_key]
43
+ doc_config.predict(@input_doc, include_words, close_file)
44
+ end
45
+ end
46
+
47
+ # Mindee API Client.
48
+ # See: https://developers.mindee.com/docs/
49
+ class Client
50
+ DOC_TYPE_INVOICE = 'invoice'
51
+ DOC_TYPE_RECEIPT = 'receipt'
52
+ DOC_TYPE_PASSPORT = 'passport'
53
+ DOC_TYPE_FINANCIAL = 'financial_doc'
54
+
55
+ # @param raise_on_error [Boolean]
56
+ def initialize(api_key: nil, raise_on_error: true)
57
+ @raise_on_error = raise_on_error
58
+ @doc_configs = {}
59
+ @api_key = api_key
60
+ end
61
+
62
+ # Configure a 'Mindee Invoice' document.
63
+ # @param api_key [String] Override the client API key for this endpoint
64
+ # @return [Mindee::Client]
65
+ def config_invoice(api_key: nil)
66
+ @doc_configs[['mindee', DOC_TYPE_INVOICE]] = InvoiceConfig.new(
67
+ api_key || @api_key,
68
+ @raise_on_error
69
+ )
70
+ self
71
+ end
72
+
73
+ # Configure a 'Mindee Expense Receipts' document.
74
+ # @param api_key [String] Override the client API key for this endpoint
75
+ # @return [Mindee::Client]
76
+ def config_receipt(api_key: nil)
77
+ @doc_configs[['mindee', DOC_TYPE_RECEIPT]] = ReceiptConfig.new(
78
+ api_key || @api_key,
79
+ @raise_on_error
80
+ )
81
+ self
82
+ end
83
+
84
+ # Configure a 'Mindee Passport' document.
85
+ # @param api_key [String] Override the client API key for this endpoint
86
+ # @return [Mindee::Client]
87
+ def config_passport(api_key: nil)
88
+ @doc_configs[['mindee', DOC_TYPE_PASSPORT]] = PassportConfig.new(
89
+ api_key || @api_key,
90
+ @raise_on_error
91
+ )
92
+ self
93
+ end
94
+
95
+ # Configure a 'Mindee Financial document'. Uses 'Invoice' and 'Expense Receipt' internally.
96
+ # @param api_key [String] Override the client API key for this endpoint
97
+ # @return [Mindee::Client]
98
+ def config_financial_doc(api_key: nil)
99
+ @doc_configs[['mindee', DOC_TYPE_FINANCIAL]] = FinancialDocConfig.new(
100
+ api_key || @api_key,
101
+ @raise_on_error
102
+ )
103
+ self
104
+ end
105
+
106
+ # Configure a custom document using the 'Mindee API Builder'.
107
+ # @param account_name [String] Your organization's username on the API Builder
108
+ # @param document_name [String] The "API name" field in the "Settings" page of the API Builder
109
+ # @param api_key [String] Override the client API key for this endpoint
110
+ # @param version [String] Specify the version of the model to use. If not set, use the latest version of the model.
111
+ # @return [Mindee::Client]
112
+ def config_custom_doc(
113
+ document_name,
114
+ account_name,
115
+ api_key: nil,
116
+ version: '1'
117
+ )
118
+ @doc_configs[[account_name, document_name]] = CustomDocConfig.new(
119
+ document_name,
120
+ account_name,
121
+ version,
122
+ api_key || @api_key,
123
+ @raise_on_error
124
+ )
125
+ self
126
+ end
127
+
128
+ # Load a document from an absolute path, as a string.
129
+ # @param input_path [String] Path of file to open
130
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
131
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
132
+ # @return [Mindee::DocumentClient]
133
+ def doc_from_path(input_path, cut_pages: true, max_pages: MAX_DOC_PAGES)
134
+ doc = PathDocument.new(input_path, cut_pages, max_pages: max_pages)
135
+ DocumentClient.new(doc, @doc_configs)
136
+ end
137
+
138
+ # Load a document from raw bytes.
139
+ # @param input_bytes [String] Encoding::BINARY byte input
140
+ # @param filename [String] The name of the file (without the path)
141
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
142
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
143
+ # @return [Mindee::DocumentClient]
144
+ def doc_from_bytes(input_bytes, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
145
+ doc = BytesDocument.new(input_bytes, filename, cut_pages, max_pages: max_pages)
146
+ DocumentClient.new(doc, @doc_configs)
147
+ end
148
+
149
+ # Load a document from a base64 encoded string.
150
+ # @param base64_string [String] Input to parse as base64 string
151
+ # @param filename [String] The name of the file (without the path)
152
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
153
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
154
+ # @return [Mindee::DocumentClient]
155
+ def doc_from_b64string(base64_string, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
156
+ doc = Base64Document.new(base64_string, filename, cut_pages, max_pages: max_pages)
157
+ DocumentClient.new(doc, @doc_configs)
158
+ end
159
+
160
+ # Load a document from a normal Ruby `File`.
161
+ # @param input_file [File] Input file handle
162
+ # @param filename [String] The name of the file (without the path)
163
+ # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
164
+ # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
165
+ # @return [Mindee::DocumentClient]
166
+ def doc_from_file(input_file, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
167
+ doc = FileDocument.new(input_file, filename, cut_pages, max_pages: max_pages)
168
+ DocumentClient.new(doc, @doc_configs)
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ require_relative 'endpoint'
6
+ require_relative 'documents'
7
+ require_relative 'response'
8
+
9
+ module Mindee
10
+ # Specific client for sending a document to the API.
11
+ class DocumentConfig
12
+ # Array of possible Mindee::Endpoint to be used.
13
+ # @return [Array<Mindee::Endpoint>]
14
+ attr_reader :endpoints
15
+
16
+ # @param doc_class [Class<Mindee::Document>]
17
+ # @param document_type [String]
18
+ # @param endpoints [Array<Mindee::Endpoint>]
19
+ # @param raise_on_error [Boolean]
20
+ def initialize(doc_class, document_type, endpoints, raise_on_error)
21
+ @doc_class = doc_class
22
+ @document_type = document_type
23
+ @endpoints = endpoints
24
+ @raise_on_error = raise_on_error
25
+ end
26
+
27
+ # Parse a prediction API result.
28
+ # @param input_doc [Mindee::InputDocument]
29
+ # @param response [Hash]
30
+ # @return [Mindee::DocumentResponse]
31
+ def build_predict_result(input_doc, response)
32
+ document = @doc_class.new(
33
+ response['document']['inference']['prediction'],
34
+ input_file: input_doc,
35
+ page_id: nil
36
+ )
37
+ pages = []
38
+ response['document']['inference']['pages'].each do |page|
39
+ pages.push(
40
+ @doc_class.new(
41
+ page['prediction'],
42
+ input_file: input_doc,
43
+ page_id: page['id']
44
+ )
45
+ )
46
+ end
47
+ DocumentResponse.new(response, @document_type, document, pages)
48
+ end
49
+
50
+ # Call the prediction API.
51
+ # @param input_doc [Mindee::InputDocument]
52
+ # @param include_words [Boolean]
53
+ # @param close_file [Boolean]
54
+ # @return [Mindee::DocumentResponse]
55
+ def predict(input_doc, include_words, close_file)
56
+ check_api_keys
57
+ response = predict_request(input_doc, include_words, close_file)
58
+ parse_response(input_doc, response)
59
+ end
60
+
61
+ private
62
+
63
+ # @param input_doc [Mindee::InputDocument]
64
+ # @param response [Net::HTTPResponse]
65
+ # @return [Mindee::DocumentResponse]
66
+ def parse_response(input_doc, response)
67
+ hashed_response = JSON.parse(response.body, object_class: Hash)
68
+ unless (200..299).include?(response.code.to_i)
69
+ if @raise_on_error
70
+ raise Net::HTTPError.new(
71
+ "API #{response.code} HTTP error: #{hashed_response}", response
72
+ )
73
+ end
74
+ return DocumentResponse.new(
75
+ hashed_response, @document_type, {}, []
76
+ )
77
+ end
78
+ build_predict_result(input_doc, hashed_response)
79
+ end
80
+
81
+ # @param input_doc [Mindee::InputDocument]
82
+ # @param include_words [Boolean]
83
+ # @param close_file [Boolean]
84
+ # @return [Net::HTTPResponse]
85
+ def predict_request(input_doc, include_words, close_file)
86
+ @endpoints[0].predict_request(input_doc, include_words: include_words, close_file: close_file)
87
+ end
88
+
89
+ def check_api_keys
90
+ @endpoints.each do |endpoint|
91
+ next unless endpoint.api_key.nil? || endpoint.api_key.empty?
92
+
93
+ raise "Missing API key for '#{@document_type}', " \
94
+ "check your Client Configuration.\n" \
95
+ 'You can set this using the ' \
96
+ "'#{endpoint.envvar_key_name}' environment variable."
97
+ end
98
+ end
99
+ end
100
+
101
+ # Client for Invoice documents
102
+ class InvoiceConfig < DocumentConfig
103
+ def initialize(api_key, raise_on_error)
104
+ endpoints = [InvoiceEndpoint.new(api_key)]
105
+ super(
106
+ Invoice,
107
+ 'invoice',
108
+ endpoints,
109
+ raise_on_error
110
+ )
111
+ end
112
+ end
113
+
114
+ # Client for Receipt documents
115
+ class ReceiptConfig < DocumentConfig
116
+ def initialize(api_key, raise_on_error)
117
+ endpoints = [ReceiptEndpoint.new(api_key)]
118
+ super(
119
+ Receipt,
120
+ 'receipt',
121
+ endpoints,
122
+ raise_on_error
123
+ )
124
+ end
125
+ end
126
+
127
+ # Client for Passport documents
128
+ class PassportConfig < DocumentConfig
129
+ def initialize(api_key, raise_on_error)
130
+ endpoints = [PassportEndpoint.new(api_key)]
131
+ super(
132
+ Passport,
133
+ 'passport',
134
+ endpoints,
135
+ raise_on_error
136
+ )
137
+ end
138
+ end
139
+
140
+ # Client for Financial documents
141
+ class FinancialDocConfig < DocumentConfig
142
+ def initialize(api_key, raise_on_error)
143
+ endpoints = [
144
+ InvoiceEndpoint.new(api_key),
145
+ ReceiptEndpoint.new(api_key),
146
+ ]
147
+ super(
148
+ FinancialDocument,
149
+ 'financial_doc',
150
+ endpoints,
151
+ raise_on_error
152
+ )
153
+ end
154
+
155
+ private
156
+
157
+ def predict_request(input_doc, include_words, close_file)
158
+ endpoint = input_doc.pdf? ? @endpoints[0] : @endpoints[1]
159
+ endpoint.predict_request(input_doc, include_words: include_words, close_file: close_file)
160
+ end
161
+ end
162
+
163
+ # Client for Custom (constructed) documents
164
+ class CustomDocConfig < DocumentConfig
165
+ def initialize(document_type, account_name, version, api_key, raise_on_error)
166
+ endpoints = [CustomEndpoint.new(document_type, account_name, version, api_key)]
167
+ super(
168
+ CustomDocument,
169
+ document_type,
170
+ endpoints,
171
+ raise_on_error
172
+ )
173
+ end
174
+
175
+ # Parse a prediction API result.
176
+ # @param input_doc [Mindee::InputDocument]
177
+ # @param response [Hash]
178
+ # @return [Mindee::DocumentResponse]
179
+ def build_predict_result(input_doc, response)
180
+ document = CustomDocument.new(
181
+ @document_type,
182
+ response['document']['inference']['prediction'],
183
+ input_file: input_doc,
184
+ page_id: nil
185
+ )
186
+ pages = []
187
+ response['document']['inference']['pages'].each do |page|
188
+ pages.push(
189
+ CustomDocument.new(
190
+ @document_type,
191
+ page['prediction'],
192
+ input_file: input_doc,
193
+ page_id: page['id']
194
+ )
195
+ )
196
+ end
197
+ DocumentResponse.new(response, @document_type, document, pages)
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Base document object.
5
+ class Document
6
+ # document type
7
+ # @return [String]
8
+ attr_reader :document_type
9
+ # Validation checks for the document
10
+ # @return [Hash<Symbol, Boolean>]
11
+ attr_reader :checklist
12
+ # Original filename of the document
13
+ # @return [String, nil]
14
+ attr_reader :filename
15
+ # Detected MIME type of the document
16
+ # @return [String, nil]
17
+ attr_reader :file_mimetype
18
+
19
+ # @param input_file [Mindee::InputDocument, nil]
20
+ # @param document_type [String]
21
+ def initialize(document_type, input_file: nil)
22
+ @document_type = document_type
23
+ unless input_file.nil?
24
+ @filename = input_file.filename
25
+ @file_mimetype = input_file.file_mimetype
26
+ end
27
+ @checklist = {}
28
+ end
29
+
30
+ # @return [Boolean]
31
+ def all_checks
32
+ @checklist.all? { |_, value| value == true }
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../fields'
4
+ require_relative 'base'
5
+
6
+ module Mindee
7
+ # Custom document object.
8
+ class CustomDocument < Document
9
+ # All fields in the document
10
+ # @return [Hash<Symbol, Mindee::ListField>]
11
+ attr_reader :fields
12
+
13
+ # @param document_type [String]
14
+ # @param prediction [Hash]
15
+ # @param input_file [Mindee::InputDocument, nil]
16
+ # @param page_id [Integer, nil]
17
+ def initialize(document_type, prediction, input_file: nil, page_id: nil)
18
+ super(document_type, input_file: input_file)
19
+ @fields = {}
20
+ prediction.each do |field_name, field_prediction|
21
+ field_sym = field_name.to_sym
22
+ complete_field = ListField.new(field_prediction, page_id)
23
+
24
+ # Add the field to the `fields` array
25
+ @fields[field_sym] = complete_field
26
+
27
+ # Create a dynamic accessor function for the field
28
+ singleton_class.module_eval { attr_accessor field_sym }
29
+ send("#{field_sym}=", complete_field)
30
+ end
31
+ end
32
+
33
+ def to_s
34
+ out_str = String.new
35
+ out_str << "----- #{@document_type} -----"
36
+ out_str << "\nFilename: #{@filename}".rstrip
37
+ @fields.each do |name, info|
38
+ out_str << "\n#{name}: #{info}".rstrip
39
+ end
40
+ out_str << "\n----------------------"
41
+ out_str
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../fields'
4
+ require_relative 'base'
5
+ require_relative 'invoice'
6
+ require_relative 'receipt'
7
+
8
+ module Mindee
9
+ # Union of `Invoice` and `Receipt`.
10
+ class FinancialDocument < Document
11
+ # @return [Mindee::Locale]
12
+ attr_reader :locale
13
+ # @return [Mindee::Amount]
14
+ attr_reader :total_incl
15
+ # @return [Mindee::Amount]
16
+ attr_reader :total_excl
17
+ # @return [Mindee::Amount]
18
+ attr_reader :total_tax
19
+ # @return [Mindee::DateField]
20
+ attr_reader :date
21
+ # @return [Mindee::Field]
22
+ attr_reader :category
23
+ # @return [Mindee::Field]
24
+ attr_reader :invoice_number
25
+ # @return [Mindee::Field]
26
+ attr_reader :time
27
+ # @return [Mindee::DateField]
28
+ attr_reader :due_date
29
+ # @return [Array<Mindee::TaxField>]
30
+ attr_reader :taxes
31
+ # @return [Mindee::Field]
32
+ attr_reader :supplier
33
+ # @return [Mindee::Field]
34
+ attr_reader :supplier_address
35
+ # @return [Mindee::Field]
36
+ attr_reader :customer_name
37
+ # @return [Mindee::Field]
38
+ attr_reader :customer_address
39
+ # @return [Array<Mindee::CompanyRegistration>]
40
+ attr_reader :company_registration
41
+ # @return [Array<Mindee::CompanyRegistration>]
42
+ attr_reader :customer_company_registration
43
+ # @return [Array<Mindee::PaymentDetails>]
44
+ attr_reader :payment_details
45
+
46
+ # @param prediction [Hash]
47
+ # @param input_file [Mindee::InputDocument, nil]
48
+ # @param page_id [Integer, nil]
49
+ def initialize(prediction, input_file: nil, page_id: nil)
50
+ super('financial_doc', input_file: input_file)
51
+ @locale = Locale.new(prediction['locale'])
52
+ if prediction.include? 'invoice_number'
53
+ build_from_invoice(
54
+ Invoice.new(prediction, input_file: input_file, page_id: page_id)
55
+ )
56
+ else
57
+ build_from_receipt(
58
+ Receipt.new(prediction, input_file: input_file, page_id: page_id)
59
+ )
60
+ end
61
+ end
62
+
63
+ def to_s
64
+ customer_company_registration = @customer_company_registration.map(&:value).join('; ')
65
+ payment_details = @payment_details.map(&:to_s).join("\n ")
66
+ company_registration = @company_registration.map(&:to_s).join('; ')
67
+ taxes = @taxes.join("\n ")
68
+ out_str = String.new
69
+ out_str << '-----Financial Document data-----'
70
+ out_str << "\nFilename: #{@filename}".rstrip
71
+ out_str << "\nCategory: #{@category}".rstrip
72
+ out_str << "\nInvoice number: #{@invoice_number}".rstrip
73
+ out_str << "\nTotal amount including taxes: #{@total_incl}".rstrip
74
+ out_str << "\nTotal amount excluding taxes: #{@total_excl}".rstrip
75
+ out_str << "\nInvoice date: #{@date}".rstrip
76
+ out_str << "\nInvoice due date: #{@due_date}".rstrip
77
+ out_str << "\nSupplier name: #{@supplier}".rstrip
78
+ out_str << "\nSupplier address: #{@supplier_address}".rstrip
79
+ out_str << "\nCustomer name: #{@customer_name}".rstrip
80
+ out_str << "\nCustomer company registration: #{customer_company_registration}".rstrip
81
+ out_str << "\nCustomer address: #{@customer_address}".rstrip
82
+ out_str << "\nPayment details: #{payment_details}".rstrip
83
+ out_str << "\nCompany numbers: #{company_registration}".rstrip
84
+ out_str << "\nTaxes: #{taxes}".rstrip
85
+ out_str << "\nTotal taxes: #{@total_tax}".rstrip
86
+ out_str << "\nTime: #{@time}".rstrip
87
+ out_str << "\nLocale: #{@locale}".rstrip
88
+ out_str << "\n----------------------"
89
+ out_str
90
+ end
91
+
92
+ private
93
+
94
+ # @param invoice [Mindee::Invoice]
95
+ def build_from_invoice(invoice)
96
+ @orientation = invoice.orientation
97
+ @category = empty_field
98
+ @total_incl = invoice.total_incl
99
+ @total_excl = invoice.total_excl
100
+ @total_tax = invoice.total_tax
101
+ @date = invoice.date
102
+ @time = empty_field
103
+ @due_date = invoice.due_date
104
+ @invoice_number = invoice.invoice_number
105
+ @taxes = invoice.taxes
106
+ @supplier = invoice.supplier
107
+ @supplier_address = invoice.supplier_address
108
+ @company_registration = invoice.company_registration
109
+ @customer_company_registration = invoice.customer_company_registration
110
+ @payment_details = invoice.payment_details
111
+ end
112
+
113
+ # @param receipt [Mindee::Receipt]
114
+ def build_from_receipt(receipt)
115
+ @orientation = receipt.orientation
116
+ @category = receipt.category
117
+ @total_incl = receipt.total_incl
118
+ @total_excl = receipt.total_excl
119
+ @total_tax = receipt.total_tax
120
+ @date = receipt.date
121
+ @time = receipt.time
122
+ @due_date = empty_field
123
+ @taxes = receipt.taxes
124
+ @supplier = receipt.supplier
125
+ @supplier_address = empty_field
126
+ @company_registration = empty_field
127
+ @customer_company_registration = empty_field
128
+ @payment_details = empty_field
129
+ end
130
+
131
+ def empty_field
132
+ Mindee::Field.new({}, nil)
133
+ end
134
+ end
135
+ end