mindee 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +25 -0
  6. data/Gemfile +0 -7
  7. data/README.md +52 -21
  8. data/Rakefile +6 -1
  9. data/bin/mindee.rb +70 -61
  10. data/docs/ruby-api-builder.md +131 -0
  11. data/docs/ruby-getting-started.md +265 -0
  12. data/docs/ruby-invoice-ocr.md +261 -0
  13. data/docs/ruby-passport-ocr.md +156 -0
  14. data/docs/ruby-receipt-ocr.md +170 -0
  15. data/lib/mindee/client.rb +128 -93
  16. data/lib/mindee/document_config.rb +22 -154
  17. data/lib/mindee/geometry.rb +105 -8
  18. data/lib/mindee/http/endpoint.rb +80 -0
  19. data/lib/mindee/input/pdf_processing.rb +106 -0
  20. data/lib/mindee/input/sources.rb +97 -0
  21. data/lib/mindee/input.rb +3 -0
  22. data/lib/mindee/parsing/document.rb +31 -0
  23. data/lib/mindee/parsing/error.rb +22 -0
  24. data/lib/mindee/parsing/inference.rb +53 -0
  25. data/lib/mindee/parsing/page.rb +46 -0
  26. data/lib/mindee/parsing/prediction/base.rb +30 -0
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  29. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  30. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  32. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  33. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  34. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  35. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  36. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  37. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  38. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  39. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  40. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  41. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  42. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  43. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  44. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  45. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  46. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
  47. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  48. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  49. data/lib/mindee/parsing/prediction.rb +12 -0
  50. data/lib/mindee/parsing.rb +4 -0
  51. data/lib/mindee/version.rb +1 -1
  52. data/mindee.gemspec +11 -5
  53. metadata +105 -30
  54. data/lib/mindee/documents/base.rb +0 -35
  55. data/lib/mindee/documents/custom.rb +0 -65
  56. data/lib/mindee/documents/financial_doc.rb +0 -135
  57. data/lib/mindee/documents/invoice.rb +0 -162
  58. data/lib/mindee/documents/passport.rb +0 -163
  59. data/lib/mindee/documents/receipt.rb +0 -109
  60. data/lib/mindee/documents.rb +0 -7
  61. data/lib/mindee/endpoint.rb +0 -105
  62. data/lib/mindee/fields/orientation.rb +0 -26
  63. data/lib/mindee/fields.rb +0 -11
  64. data/lib/mindee/inputs.rb +0 -153
  65. data/lib/mindee/response.rb +0 -27
@@ -0,0 +1,170 @@
1
+ The Ruby OCR SDK supports the [receipt API](https://developers.mindee.com/docs/receipt-ocr) for extracting data from receipts.
2
+
3
+ Using this sample below, we are going to illustrate how to extract the data that we want using the OCR SDK.
4
+
5
+ ![sample receipt](https://raw.githubusercontent.com/mindee/client-lib-test-data/main/receipt/receipt-with-tip.jpg)
6
+
7
+ ## Quick Start
8
+ ```ruby
9
+ require 'mindee'
10
+
11
+ # Init a new client, specifying an API key
12
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
13
+
14
+ # Send the file
15
+ result = mindee_client.doc_from_path('/path/to/the/file.ext').parse(Mindee::Prediction::ReceiptV4)
16
+
17
+ # Print a summary of the document prediction in RST format
18
+ puts result.inference.prediction
19
+ ```
20
+
21
+ Output:
22
+ ```shell
23
+ :Locale: en-US; en; US; USD;
24
+ :Date: 2014-07-07
25
+ :Category: food
26
+ :Subcategory: restaurant
27
+ :Document type: EXPENSE RECEIPT
28
+ :Time: 20:20
29
+ :Supplier name: LOGANS
30
+ :Taxes: 3.34 TAX
31
+ :Total net: 40.48
32
+ :Total taxes: 3.34
33
+ :Tip: 10.00
34
+ :Total amount: 53.8
35
+ ```
36
+
37
+ ## Fields
38
+ Each prediction object contains a set of different fields.
39
+ Each `Field` object contains at a minimum the following attributes:
40
+
41
+ * `value` (String or Float depending on the field type): corresponds to the field value. Can be `nil` if no value was extracted.
42
+ * `confidence` (Float): the confidence score of the field prediction.
43
+ * `bounding_box` (Array< Array< Float > >): contains exactly 4 relative vertices coordinates (points) of a right rectangle containing the field in the document.
44
+ * `polygon` (Array< Array< Float > >): contains the relative vertices coordinates (points) of a polygon containing the field in the image.
45
+ * `reconstructed` (Boolean): True if the field was reconstructed or computed using other fields.
46
+
47
+
48
+ ## Attributes
49
+ Depending on the field type specified, additional attributes can be extracted in the `Receipt` object.
50
+
51
+ Using the above sample, the following are the basic fields that can be extracted:
52
+
53
+ - [Orientation](#orientation)
54
+ - [Category](#category)
55
+ - [Date](#date)
56
+ - [Locale](#locale)
57
+ - [Supplier Information](#supplier-information)
58
+ - [Taxes](#taxes)
59
+ - [Time](#time)
60
+ - [Totals](#totals)
61
+
62
+
63
+ ### Category
64
+ * **`category`** (Field): Receipt category as seen on the receipt.
65
+ The following categories are supported: toll, food, parking, transport, accommodation, gasoline, miscellaneous.
66
+
67
+ ```ruby
68
+ puts result.inference.prediction.category.value
69
+ ```
70
+
71
+
72
+ ### Date
73
+ Date fields:
74
+
75
+ * contain the `date_object` attribute, which is a standard Ruby [date object](https://ruby-doc.org/stdlib-2.7.1/libdoc/date/rdoc/Date.html)
76
+ * have a `value` attribute which is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) representation of the date.
77
+
78
+ The following date fields are available:
79
+
80
+ * **`date`**: Date the receipt was issued
81
+
82
+ ```ruby
83
+ puts result.inference.prediction.date.value
84
+ ```
85
+
86
+
87
+ ### Locale
88
+ **`locale`** (Locale): Locale information.
89
+
90
+ * `locale.value` (String): Locale with country and language codes.
91
+ ```ruby
92
+ puts result.inference.prediction.locale
93
+ ```
94
+
95
+ * `locale.language` (String): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the document.
96
+ *
97
+ ```ruby
98
+ puts result.inference.prediction.locale.language
99
+ ```
100
+
101
+ * `locale.currency` (String): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the document.
102
+
103
+ ```ruby
104
+ puts result.inference.prediction.locale.currency
105
+ ```
106
+
107
+ * `locale.country` (String): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the document.
108
+
109
+ ```ruby
110
+ puts result.inference.prediction.locale.country
111
+ ```
112
+
113
+ ### Supplier Information
114
+ * **`supplier_name`** (Field): Supplier name as written in the receipt.
115
+
116
+ ```ruby
117
+ puts result.inference.prediction.supplier_name.value
118
+ ```
119
+
120
+
121
+ ### Taxes
122
+ **`taxes`** (Array< TaxField >): Contains tax fields as seen on the receipt.
123
+
124
+ * `value` (Float): The tax amount.
125
+ ```ruby
126
+ # Show the amount of the first tax
127
+ puts result.inference.prediction.taxes[0].value
128
+ ```
129
+
130
+ * `code` (String): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..).
131
+ ```ruby
132
+ # Show the code of the first tax
133
+ puts result.inference.prediction.taxes[0].code
134
+ ```
135
+
136
+ * `rate` (Float): The tax rate.
137
+ ```ruby
138
+ # Show the rate of the first tax
139
+ puts result.inference.prediction.taxes[0].rate
140
+ ```
141
+
142
+ ### Time
143
+ * **`time`**: Time of purchase as seen on the receipt
144
+ * `value` (string): Time of purchase with 24 hours formatting (hh:mm).
145
+
146
+ ```ruby
147
+ puts result.inference.prediction.time.value
148
+ ```
149
+
150
+ ### Totals
151
+ * **`total_amount`** (Field): Total amount including taxes
152
+
153
+ ```ruby
154
+ puts result.inference.prediction.total_amount.value
155
+ ```
156
+
157
+ * **`total_net`** (Field): Total amount paid excluding taxes
158
+
159
+ ```ruby
160
+ puts result.inference.prediction.total_net.value
161
+ ```
162
+
163
+ * **`total_tax`** (Field): Total tax value from tax lines
164
+
165
+ ```ruby
166
+ puts result.inference.prediction.total_tax.value
167
+ ```
168
+
169
+ ## Questions?
170
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
data/lib/mindee/client.rb CHANGED
@@ -1,8 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'inputs'
3
+ require_relative 'input'
4
4
  require_relative 'document_config'
5
- require_relative 'endpoint'
5
+ require_relative 'http/endpoint'
6
+ require_relative 'parsing/prediction'
6
7
 
7
8
  module Mindee
8
9
  # General client for sending a document to the API.
@@ -15,157 +16,191 @@ module Mindee
15
16
  end
16
17
 
17
18
  # Call prediction API on the document and parse the results.
18
- # @param document_name [String] Document name (type) to parse
19
- # @param username [String] API username, the endpoint owner
20
- # @param include_words [Boolean] Include all the words of the document in the response
21
- # @param close_file [Boolean] Whether to close the file after parsing it.
19
+ #
20
+ # @param prediction_class [Mindee::Prediction::Prediction]
21
+ #
22
+ # @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
23
+ # API Builder. Do not set for standard (off the shelf) endpoints.
24
+ #
25
+ # @param account_name [String] For custom endpoints, your account or organization username on the API Builder.
26
+ # This is normally not required unless you have a custom endpoint which has the
27
+ # same name as standard (off the shelf) endpoint.
28
+ # Do not set for standard (off the shelf) endpoints.
29
+ #
30
+ # @param include_words [Boolean] Whether to include the full text for each page.
31
+ # This performs a full OCR operation on the server and will increase response time.
32
+ #
33
+ # @param close_file [Boolean] Whether to `close()` the file after parsing it.
34
+ # Set to false if you need to access the file after this operation.
35
+ #
36
+ # @param page_options [Hash, nil] Page cutting/merge options:
37
+ #
38
+ # * `:page_indexes` Zero-based list of page indexes.
39
+ # * `:operation` Operation to apply on the document, given the `page_indexes specified:
40
+ # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
41
+ # * `:REMOVE` - remove the specified pages, and keep all others.
42
+ # * `:on_min_pages` Apply the operation only if document has at least this many pages.
43
+ #
44
+ # @param cropper [Boolean] Whether to include cropper results for each page.
45
+ # This performs a cropping operation on the server and will increase response time.
46
+ #
22
47
  # @return [Mindee::DocumentResponse]
23
- def parse(document_name, username: '', include_words: false, close_file: true)
48
+ def parse(
49
+ prediction_class,
50
+ endpoint_name: '',
51
+ account_name: '',
52
+ include_words: false,
53
+ close_file: true,
54
+ page_options: nil,
55
+ cropper: false
56
+ )
57
+ doc_config = find_doc_config(prediction_class, endpoint_name, account_name)
58
+ @input_doc.process_pdf(page_options) if !page_options.nil? && @input_doc.pdf?
59
+ doc_config.predict(@input_doc, include_words, close_file, cropper)
60
+ end
61
+
62
+ private
63
+
64
+ # @param document_class [Mindee::Prediction::Prediction]
65
+ # @param endpoint_name [String]
66
+ def determine_endpoint_name(document_class, endpoint_name)
67
+ return document_class.name if document_class.name != Prediction::CustomV1.name
68
+
69
+ raise "endpoint_name is required when using #{document_class.name} class" if endpoint_name.empty?
70
+
71
+ endpoint_name
72
+ end
73
+
74
+ # @param document_class [Mindee::Prediction::Prediction]
75
+ # @param endpoint_name [String]
76
+ # @param account_name [String]
77
+ def find_doc_config(document_class, endpoint_name, account_name)
78
+ endpoint_name = determine_endpoint_name(document_class, endpoint_name)
79
+
24
80
  found = []
25
81
  @doc_configs.each_key do |conf|
26
- found.push(conf) if conf[1] == document_name
82
+ found.push(conf) if conf[1] == endpoint_name
27
83
  end
28
- raise "Document type not configured: #{document_name}" if found.empty?
84
+ raise "Endpoint not configured: #{endpoint_name}" if found.empty?
29
85
 
30
- if !username.empty?
31
- config_key = [username, document_name]
86
+ if !account_name.empty?
87
+ config_key = [account_name, endpoint_name]
32
88
  elsif found.length == 1
33
89
  config_key = found[0]
34
90
  else
35
91
  usernames = found.map { |conf| conf[0] }
36
92
  raise "Duplicate configuration detected.\n" \
37
- "You specified the document '#{document_name}' in your custom config.\n" \
93
+ "You specified the document '#{endpoint_name}' in your custom config.\n" \
38
94
  "To avoid confusion, please add the 'account_name' attribute to " \
39
95
  "the parse method, one of #{usernames}."
40
96
  end
41
97
 
42
- doc_config = @doc_configs[config_key]
43
- doc_config.predict(@input_doc, include_words, close_file)
98
+ @doc_configs[config_key]
44
99
  end
45
100
  end
46
101
 
47
102
  # Mindee API Client.
48
103
  # See: https://developers.mindee.com/docs/
49
104
  class Client
50
- DOC_TYPE_INVOICE = 'invoice'
51
- DOC_TYPE_RECEIPT = 'receipt'
52
- DOC_TYPE_PASSPORT = 'passport'
53
- DOC_TYPE_FINANCIAL = 'financial_doc'
54
-
55
- # @param raise_on_error [Boolean]
56
- def initialize(api_key: nil, raise_on_error: true)
57
- @raise_on_error = raise_on_error
105
+ # @param api_key [String]
106
+ def initialize(api_key: '')
58
107
  @doc_configs = {}
59
108
  @api_key = api_key
60
- end
61
-
62
- # Configure a 'Mindee Invoice' document.
63
- # @param api_key [String] Override the client API key for this endpoint
64
- # @return [Mindee::Client]
65
- def config_invoice(api_key: nil)
66
- @doc_configs[['mindee', DOC_TYPE_INVOICE]] = InvoiceConfig.new(
67
- api_key || @api_key,
68
- @raise_on_error
69
- )
70
- self
71
- end
72
-
73
- # Configure a 'Mindee Expense Receipts' document.
74
- # @param api_key [String] Override the client API key for this endpoint
75
- # @return [Mindee::Client]
76
- def config_receipt(api_key: nil)
77
- @doc_configs[['mindee', DOC_TYPE_RECEIPT]] = ReceiptConfig.new(
78
- api_key || @api_key,
79
- @raise_on_error
80
- )
81
- self
82
- end
83
-
84
- # Configure a 'Mindee Passport' document.
85
- # @param api_key [String] Override the client API key for this endpoint
86
- # @return [Mindee::Client]
87
- def config_passport(api_key: nil)
88
- @doc_configs[['mindee', DOC_TYPE_PASSPORT]] = PassportConfig.new(
89
- api_key || @api_key,
90
- @raise_on_error
91
- )
92
- self
93
- end
94
-
95
- # Configure a 'Mindee Financial document'. Uses 'Invoice' and 'Expense Receipt' internally.
96
- # @param api_key [String] Override the client API key for this endpoint
97
- # @return [Mindee::Client]
98
- def config_financial_doc(api_key: nil)
99
- @doc_configs[['mindee', DOC_TYPE_FINANCIAL]] = FinancialDocConfig.new(
100
- api_key || @api_key,
101
- @raise_on_error
102
- )
103
- self
109
+ init_default_endpoints
104
110
  end
105
111
 
106
112
  # Configure a custom document using the 'Mindee API Builder'.
107
113
  # @param account_name [String] Your organization's username on the API Builder
108
- # @param document_name [String] The "API name" field in the "Settings" page of the API Builder
109
- # @param api_key [String] Override the client API key for this endpoint
114
+ # @param endpoint_name [String] The "API name" field in the "Settings" page of the API Builder
110
115
  # @param version [String] Specify the version of the model to use. If not set, use the latest version of the model.
111
116
  # @return [Mindee::Client]
112
- def config_custom_doc(
113
- document_name,
117
+ def add_endpoint(
114
118
  account_name,
115
- api_key: nil,
119
+ endpoint_name,
116
120
  version: '1'
117
121
  )
118
- @doc_configs[[account_name, document_name]] = CustomDocConfig.new(
119
- document_name,
120
- account_name,
121
- version,
122
- api_key || @api_key,
123
- @raise_on_error
122
+ @doc_configs[[account_name, endpoint_name]] = DocumentConfig.new(
123
+ Prediction::CustomV1,
124
+ [HTTP::CustomEndpoint.new(account_name, endpoint_name, version, @api_key)]
124
125
  )
125
126
  self
126
127
  end
127
128
 
128
129
  # Load a document from an absolute path, as a string.
129
130
  # @param input_path [String] Path of file to open
130
- # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
131
- # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
132
131
  # @return [Mindee::DocumentClient]
133
- def doc_from_path(input_path, cut_pages: true, max_pages: MAX_DOC_PAGES)
134
- doc = PathDocument.new(input_path, cut_pages, max_pages: max_pages)
132
+ def doc_from_path(input_path)
133
+ doc = Input::PathDocument.new(input_path)
135
134
  DocumentClient.new(doc, @doc_configs)
136
135
  end
137
136
 
138
137
  # Load a document from raw bytes.
139
138
  # @param input_bytes [String] Encoding::BINARY byte input
140
139
  # @param filename [String] The name of the file (without the path)
141
- # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
142
- # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
143
140
  # @return [Mindee::DocumentClient]
144
- def doc_from_bytes(input_bytes, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
145
- doc = BytesDocument.new(input_bytes, filename, cut_pages, max_pages: max_pages)
141
+ def doc_from_bytes(input_bytes, filename)
142
+ doc = Input::BytesDocument.new(input_bytes, filename)
146
143
  DocumentClient.new(doc, @doc_configs)
147
144
  end
148
145
 
149
146
  # Load a document from a base64 encoded string.
150
147
  # @param base64_string [String] Input to parse as base64 string
151
148
  # @param filename [String] The name of the file (without the path)
152
- # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
153
- # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
154
149
  # @return [Mindee::DocumentClient]
155
- def doc_from_b64string(base64_string, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
156
- doc = Base64Document.new(base64_string, filename, cut_pages, max_pages: max_pages)
150
+ def doc_from_b64string(base64_string, filename)
151
+ doc = Input::Base64Document.new(base64_string, filename)
157
152
  DocumentClient.new(doc, @doc_configs)
158
153
  end
159
154
 
160
155
  # Load a document from a normal Ruby `File`.
161
156
  # @param input_file [File] Input file handle
162
157
  # @param filename [String] The name of the file (without the path)
163
- # @param cut_pages [Boolean] Automatically reconstruct a multi-page document.
164
- # @param max_pages [Integer] Number (between 1 and 3 incl.) of pages to reconstruct a document.
165
158
  # @return [Mindee::DocumentClient]
166
- def doc_from_file(input_file, filename, cut_pages: true, max_pages: MAX_DOC_PAGES)
167
- doc = FileDocument.new(input_file, filename, cut_pages, max_pages: max_pages)
159
+ def doc_from_file(input_file, filename)
160
+ doc = Input::FileDocument.new(input_file, filename)
168
161
  DocumentClient.new(doc, @doc_configs)
169
162
  end
163
+
164
+ private
165
+
166
+ def init_default_endpoints
167
+ @doc_configs[['mindee', Prediction::InvoiceV4.name]] = DocumentConfig.new(
168
+ Prediction::InvoiceV4,
169
+ [HTTP::StandardEndpoint.new('invoices', '4', @api_key)]
170
+ )
171
+ @doc_configs[['mindee', Prediction::ReceiptV4.name]] = DocumentConfig.new(
172
+ Prediction::ReceiptV4,
173
+ [HTTP::StandardEndpoint.new('expense_receipts', '4', @api_key)]
174
+ )
175
+ @doc_configs[['mindee', Prediction::PassportV1.name]] = DocumentConfig.new(
176
+ Prediction::PassportV1,
177
+ [HTTP::StandardEndpoint.new('passport', '1', @api_key)]
178
+ )
179
+ @doc_configs[['mindee', Prediction::EU::LicensePlateV1.name]] = DocumentConfig.new(
180
+ Prediction::EU::LicensePlateV1,
181
+ [HTTP::StandardEndpoint.new('license_plates', '1', @api_key)]
182
+ )
183
+ @doc_configs[['mindee', Prediction::ShippingContainerV1.name]] = DocumentConfig.new(
184
+ Prediction::ShippingContainerV1,
185
+ [HTTP::StandardEndpoint.new('shipping_containers', '1', @api_key)]
186
+ )
187
+ @doc_configs[['mindee', Prediction::US::BankCheckV1.name]] = DocumentConfig.new(
188
+ Prediction::US::BankCheckV1,
189
+ [HTTP::StandardEndpoint.new('bank_check', '1', @api_key)]
190
+ )
191
+ @doc_configs[['mindee', Prediction::FR::BankAccountDetailsV1.name]] = DocumentConfig.new(
192
+ Prediction::FR::BankAccountDetailsV1,
193
+ [HTTP::StandardEndpoint.new('bank_account_details', '1', @api_key)]
194
+ )
195
+ @doc_configs[['mindee', Prediction::FR::CarteVitaleV1.name]] = DocumentConfig.new(
196
+ Prediction::FR::CarteVitaleV1,
197
+ [HTTP::StandardEndpoint.new('carte_vitale', '1', @api_key)]
198
+ )
199
+ @doc_configs[['mindee', Prediction::FR::IdCardV1.name]] = DocumentConfig.new(
200
+ Prediction::FR::IdCardV1,
201
+ [HTTP::StandardEndpoint.new('idcard_fr', '1', @api_key)]
202
+ )
203
+ self
204
+ end
170
205
  end
171
206
  end
@@ -2,88 +2,56 @@
2
2
 
3
3
  require 'json'
4
4
 
5
- require_relative 'endpoint'
6
- require_relative 'documents'
7
- require_relative 'response'
5
+ require_relative 'http/endpoint'
6
+ require_relative 'parsing/document'
7
+ require_relative 'parsing/error'
8
+ require_relative 'parsing/prediction'
8
9
 
9
10
  module Mindee
10
11
  # Specific client for sending a document to the API.
11
12
  class DocumentConfig
12
13
  # Array of possible Mindee::Endpoint to be used.
13
- # @return [Array<Mindee::Endpoint>]
14
+ # @return [Array<Mindee::HTTP::Endpoint>]
14
15
  attr_reader :endpoints
15
16
 
16
- # @param doc_class [Class<Mindee::Document>]
17
- # @param document_type [String]
18
- # @param endpoints [Array<Mindee::Endpoint>]
19
- # @param raise_on_error [Boolean]
20
- def initialize(doc_class, document_type, endpoints, raise_on_error)
21
- @doc_class = doc_class
22
- @document_type = document_type
17
+ # @param prediction_class [Class<Mindee::Prediction::Prediction>]
18
+ # @param endpoints [Array<Mindee::HTTP::Endpoint>]
19
+ def initialize(prediction_class, endpoints)
20
+ @prediction_class = prediction_class
23
21
  @endpoints = endpoints
24
- @raise_on_error = raise_on_error
25
- end
26
-
27
- # Parse a prediction API result.
28
- # @param input_doc [Mindee::InputDocument]
29
- # @param response [Hash]
30
- # @return [Mindee::DocumentResponse]
31
- def build_predict_result(input_doc, response)
32
- document = @doc_class.new(
33
- response['document']['inference']['prediction'],
34
- input_file: input_doc,
35
- page_id: nil
36
- )
37
- pages = []
38
- response['document']['inference']['pages'].each do |page|
39
- pages.push(
40
- @doc_class.new(
41
- page['prediction'],
42
- input_file: input_doc,
43
- page_id: page['id']
44
- )
45
- )
46
- end
47
- DocumentResponse.new(response, @document_type, document, pages)
48
22
  end
49
23
 
50
24
  # Call the prediction API.
51
25
  # @param input_doc [Mindee::InputDocument]
52
26
  # @param include_words [Boolean]
53
27
  # @param close_file [Boolean]
28
+ # @param cropper [Boolean]
54
29
  # @return [Mindee::DocumentResponse]
55
- def predict(input_doc, include_words, close_file)
30
+ def predict(input_doc, include_words, close_file, cropper)
56
31
  check_api_keys
57
- response = predict_request(input_doc, include_words, close_file)
58
- parse_response(input_doc, response)
32
+ response = predict_request(input_doc, include_words, close_file, cropper)
33
+ parse_response(response)
59
34
  end
60
35
 
61
36
  private
62
37
 
63
- # @param input_doc [Mindee::InputDocument]
64
38
  # @param response [Net::HTTPResponse]
65
39
  # @return [Mindee::DocumentResponse]
66
- def parse_response(input_doc, response)
40
+ def parse_response(response)
67
41
  hashed_response = JSON.parse(response.body, object_class: Hash)
68
- unless (200..299).include?(response.code.to_i)
69
- if @raise_on_error
70
- raise Net::HTTPError.new(
71
- "API #{response.code} HTTP error: #{hashed_response}", response
72
- )
73
- end
74
- return DocumentResponse.new(
75
- hashed_response, @document_type, {}, []
76
- )
77
- end
78
- build_predict_result(input_doc, hashed_response)
42
+ return Document.new(@prediction_class, hashed_response['document']) if (200..299).include?(response.code.to_i)
43
+
44
+ error = Parsing::Error.new(hashed_response['api_request']['error'])
45
+ raise error
79
46
  end
80
47
 
81
48
  # @param input_doc [Mindee::InputDocument]
82
49
  # @param include_words [Boolean]
83
50
  # @param close_file [Boolean]
51
+ # # @param cropper [Boolean]
84
52
  # @return [Net::HTTPResponse]
85
- def predict_request(input_doc, include_words, close_file)
86
- @endpoints[0].predict_request(input_doc, include_words: include_words, close_file: close_file)
53
+ def predict_request(input_doc, include_words, close_file, cropper)
54
+ @endpoints[0].predict_req_post(input_doc, include_words: include_words, close_file: close_file, cropper: cropper)
87
55
  end
88
56
 
89
57
  def check_api_keys
@@ -93,108 +61,8 @@ module Mindee
93
61
  raise "Missing API key for '#{@document_type}', " \
94
62
  "check your Client Configuration.\n" \
95
63
  'You can set this using the ' \
96
- "'#{endpoint.envvar_key_name}' environment variable."
97
- end
98
- end
99
- end
100
-
101
- # Client for Invoice documents
102
- class InvoiceConfig < DocumentConfig
103
- def initialize(api_key, raise_on_error)
104
- endpoints = [InvoiceEndpoint.new(api_key)]
105
- super(
106
- Invoice,
107
- 'invoice',
108
- endpoints,
109
- raise_on_error
110
- )
111
- end
112
- end
113
-
114
- # Client for Receipt documents
115
- class ReceiptConfig < DocumentConfig
116
- def initialize(api_key, raise_on_error)
117
- endpoints = [ReceiptEndpoint.new(api_key)]
118
- super(
119
- Receipt,
120
- 'receipt',
121
- endpoints,
122
- raise_on_error
123
- )
124
- end
125
- end
126
-
127
- # Client for Passport documents
128
- class PassportConfig < DocumentConfig
129
- def initialize(api_key, raise_on_error)
130
- endpoints = [PassportEndpoint.new(api_key)]
131
- super(
132
- Passport,
133
- 'passport',
134
- endpoints,
135
- raise_on_error
136
- )
137
- end
138
- end
139
-
140
- # Client for Financial documents
141
- class FinancialDocConfig < DocumentConfig
142
- def initialize(api_key, raise_on_error)
143
- endpoints = [
144
- InvoiceEndpoint.new(api_key),
145
- ReceiptEndpoint.new(api_key),
146
- ]
147
- super(
148
- FinancialDocument,
149
- 'financial_doc',
150
- endpoints,
151
- raise_on_error
152
- )
153
- end
154
-
155
- private
156
-
157
- def predict_request(input_doc, include_words, close_file)
158
- endpoint = input_doc.pdf? ? @endpoints[0] : @endpoints[1]
159
- endpoint.predict_request(input_doc, include_words: include_words, close_file: close_file)
160
- end
161
- end
162
-
163
- # Client for Custom (constructed) documents
164
- class CustomDocConfig < DocumentConfig
165
- def initialize(document_type, account_name, version, api_key, raise_on_error)
166
- endpoints = [CustomEndpoint.new(document_type, account_name, version, api_key)]
167
- super(
168
- CustomDocument,
169
- document_type,
170
- endpoints,
171
- raise_on_error
172
- )
173
- end
174
-
175
- # Parse a prediction API result.
176
- # @param input_doc [Mindee::InputDocument]
177
- # @param response [Hash]
178
- # @return [Mindee::DocumentResponse]
179
- def build_predict_result(input_doc, response)
180
- document = CustomDocument.new(
181
- @document_type,
182
- response['document']['inference']['prediction'],
183
- input_file: input_doc,
184
- page_id: nil
185
- )
186
- pages = []
187
- response['document']['inference']['pages'].each do |page|
188
- pages.push(
189
- CustomDocument.new(
190
- @document_type,
191
- page['prediction'],
192
- input_file: input_doc,
193
- page_id: page['id']
194
- )
195
- )
64
+ "'#{HTTP::API_KEY_ENV_NAME}' environment variable."
196
65
  end
197
- DocumentResponse.new(response, @document_type, document, pages)
198
66
  end
199
67
  end
200
68
  end