mindee 1.2.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +26 -0
  6. data/README.md +46 -23
  7. data/Rakefile +6 -1
  8. data/bin/mindee.rb +78 -61
  9. data/docs/ruby-api-builder.md +124 -0
  10. data/docs/ruby-getting-started.md +265 -0
  11. data/docs/ruby-invoice-ocr.md +260 -0
  12. data/docs/ruby-passport-ocr.md +156 -0
  13. data/docs/ruby-receipt-ocr.md +170 -0
  14. data/lib/mindee/client.rb +132 -93
  15. data/lib/mindee/document_config.rb +29 -169
  16. data/lib/mindee/geometry.rb +105 -8
  17. data/lib/mindee/http/endpoint.rb +80 -0
  18. data/lib/mindee/input/pdf_processing.rb +106 -0
  19. data/lib/mindee/input/sources.rb +97 -0
  20. data/lib/mindee/input.rb +3 -0
  21. data/lib/mindee/parsing/document.rb +31 -0
  22. data/lib/mindee/parsing/error.rb +22 -0
  23. data/lib/mindee/parsing/inference.rb +53 -0
  24. data/lib/mindee/parsing/page.rb +46 -0
  25. data/lib/mindee/parsing/prediction/base.rb +30 -0
  26. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  29. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  30. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  32. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  33. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  34. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  35. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  36. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  37. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  38. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  39. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +245 -0
  40. data/lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb +58 -0
  41. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  42. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  43. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  44. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  45. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  46. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  47. data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +82 -0
  48. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +87 -0
  49. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  50. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  51. data/lib/mindee/parsing/prediction.rb +14 -0
  52. data/lib/mindee/parsing.rb +4 -0
  53. data/lib/mindee/version.rb +1 -1
  54. data/mindee.gemspec +2 -1
  55. metadata +60 -24
  56. data/lib/mindee/documents/base.rb +0 -35
  57. data/lib/mindee/documents/custom.rb +0 -65
  58. data/lib/mindee/documents/financial_doc.rb +0 -135
  59. data/lib/mindee/documents/invoice.rb +0 -162
  60. data/lib/mindee/documents/passport.rb +0 -163
  61. data/lib/mindee/documents/receipt.rb +0 -109
  62. data/lib/mindee/documents.rb +0 -7
  63. data/lib/mindee/endpoint.rb +0 -105
  64. data/lib/mindee/fields/orientation.rb +0 -26
  65. data/lib/mindee/fields.rb +0 -11
  66. data/lib/mindee/inputs.rb +0 -153
  67. data/lib/mindee/response.rb +0 -27
@@ -2,199 +2,59 @@
2
2
 
3
3
  require 'json'
4
4
 
5
- require_relative 'endpoint'
6
- require_relative 'documents'
7
- require_relative 'response'
5
+ require_relative 'http/endpoint'
6
+ require_relative 'parsing/document'
7
+ require_relative 'parsing/error'
8
+ require_relative 'parsing/prediction'
8
9
 
9
10
  module Mindee
10
11
  # Specific client for sending a document to the API.
11
12
  class DocumentConfig
12
13
  # Array of possible Mindee::Endpoint to be used.
13
- # @return [Array<Mindee::Endpoint>]
14
- attr_reader :endpoints
14
+ # @return [Mindee::HTTP::Endpoint]
15
+ attr_reader :endpoint
15
16
 
16
- # @param doc_class [Class<Mindee::Document>]
17
- # @param document_type [String]
18
- # @param endpoints [Array<Mindee::Endpoint>]
19
- # @param raise_on_error [Boolean]
20
- def initialize(doc_class, document_type, endpoints, raise_on_error)
21
- @doc_class = doc_class
22
- @document_type = document_type
23
- @endpoints = endpoints
24
- @raise_on_error = raise_on_error
25
- end
26
-
27
- # Parse a prediction API result.
28
- # @param input_doc [Mindee::InputDocument]
29
- # @param response [Hash]
30
- # @return [Mindee::DocumentResponse]
31
- def build_predict_result(input_doc, response)
32
- document = @doc_class.new(
33
- response['document']['inference']['prediction'],
34
- input_file: input_doc,
35
- page_id: nil
36
- )
37
- pages = []
38
- response['document']['inference']['pages'].each do |page|
39
- pages.push(
40
- @doc_class.new(
41
- page['prediction'],
42
- input_file: input_doc,
43
- page_id: page['id']
44
- )
45
- )
46
- end
47
- DocumentResponse.new(response, @document_type, document, pages)
17
+ # @param prediction_class [Class<Mindee::Prediction::Prediction>]
18
+ # @param endpoint [Mindee::HTTP::Endpoint]
19
+ def initialize(prediction_class, endpoint)
20
+ @prediction_class = prediction_class
21
+ @endpoint = endpoint
48
22
  end
49
23
 
50
24
  # Call the prediction API.
51
25
  # @param input_doc [Mindee::InputDocument]
52
26
  # @param include_words [Boolean]
53
27
  # @param close_file [Boolean]
28
+ # @param cropper [Boolean]
54
29
  # @return [Mindee::DocumentResponse]
55
- def predict(input_doc, include_words, close_file)
56
- check_api_keys
57
- response = predict_request(input_doc, include_words, close_file)
58
- parse_response(input_doc, response)
30
+ def predict(input_doc, include_words, close_file, cropper)
31
+ check_api_key
32
+ response = predict_request(input_doc, include_words, close_file, cropper)
33
+ hashed_response = JSON.parse(response.body, object_class: Hash)
34
+ return Document.new(@prediction_class, hashed_response['document']) if (200..299).include?(response.code.to_i)
35
+
36
+ error = Parsing::Error.new(hashed_response['api_request']['error'])
37
+ raise error
59
38
  end
60
39
 
61
40
  private
62
41
 
63
- # @param input_doc [Mindee::InputDocument]
64
- # @param response [Net::HTTPResponse]
65
- # @return [Mindee::DocumentResponse]
66
- def parse_response(input_doc, response)
67
- hashed_response = JSON.parse(response.body, object_class: Hash)
68
- unless (200..299).include?(response.code.to_i)
69
- if @raise_on_error
70
- raise Net::HTTPError.new(
71
- "API #{response.code} HTTP error: #{hashed_response}", response
72
- )
73
- end
74
- return DocumentResponse.new(
75
- hashed_response, @document_type, {}, []
76
- )
77
- end
78
- build_predict_result(input_doc, hashed_response)
79
- end
80
-
81
42
  # @param input_doc [Mindee::InputDocument]
82
43
  # @param include_words [Boolean]
83
44
  # @param close_file [Boolean]
45
+ # # @param cropper [Boolean]
84
46
  # @return [Net::HTTPResponse]
85
- def predict_request(input_doc, include_words, close_file)
86
- @endpoints[0].predict_request(input_doc, include_words: include_words, close_file: close_file)
87
- end
88
-
89
- def check_api_keys
90
- @endpoints.each do |endpoint|
91
- next unless endpoint.api_key.nil? || endpoint.api_key.empty?
92
-
93
- raise "Missing API key for '#{@document_type}', " \
94
- "check your Client Configuration.\n" \
95
- 'You can set this using the ' \
96
- "'#{endpoint.envvar_key_name}' environment variable."
97
- end
98
- end
99
- end
100
-
101
- # Client for Invoice documents
102
- class InvoiceConfig < DocumentConfig
103
- def initialize(api_key, raise_on_error)
104
- endpoints = [InvoiceEndpoint.new(api_key)]
105
- super(
106
- Invoice,
107
- 'invoice',
108
- endpoints,
109
- raise_on_error
110
- )
47
+ def predict_request(input_doc, include_words, close_file, cropper)
48
+ @endpoint.predict_req_post(input_doc, include_words: include_words, close_file: close_file, cropper: cropper)
111
49
  end
112
- end
113
50
 
114
- # Client for Receipt documents
115
- class ReceiptConfig < DocumentConfig
116
- def initialize(api_key, raise_on_error)
117
- endpoints = [ReceiptEndpoint.new(api_key)]
118
- super(
119
- Receipt,
120
- 'receipt',
121
- endpoints,
122
- raise_on_error
123
- )
124
- end
125
- end
51
+ def check_api_key
52
+ return unless @endpoint.api_key.nil? || @endpoint.api_key.empty?
126
53
 
127
- # Client for Passport documents
128
- class PassportConfig < DocumentConfig
129
- def initialize(api_key, raise_on_error)
130
- endpoints = [PassportEndpoint.new(api_key)]
131
- super(
132
- Passport,
133
- 'passport',
134
- endpoints,
135
- raise_on_error
136
- )
137
- end
138
- end
139
-
140
- # Client for Financial documents
141
- class FinancialDocConfig < DocumentConfig
142
- def initialize(api_key, raise_on_error)
143
- endpoints = [
144
- InvoiceEndpoint.new(api_key),
145
- ReceiptEndpoint.new(api_key),
146
- ]
147
- super(
148
- FinancialDocument,
149
- 'financial_doc',
150
- endpoints,
151
- raise_on_error
152
- )
153
- end
154
-
155
- private
156
-
157
- def predict_request(input_doc, include_words, close_file)
158
- endpoint = input_doc.pdf? ? @endpoints[0] : @endpoints[1]
159
- endpoint.predict_request(input_doc, include_words: include_words, close_file: close_file)
160
- end
161
- end
162
-
163
- # Client for Custom (constructed) documents
164
- class CustomDocConfig < DocumentConfig
165
- def initialize(document_type, account_name, version, api_key, raise_on_error)
166
- endpoints = [CustomEndpoint.new(document_type, account_name, version, api_key)]
167
- super(
168
- CustomDocument,
169
- document_type,
170
- endpoints,
171
- raise_on_error
172
- )
173
- end
174
-
175
- # Parse a prediction API result.
176
- # @param input_doc [Mindee::InputDocument]
177
- # @param response [Hash]
178
- # @return [Mindee::DocumentResponse]
179
- def build_predict_result(input_doc, response)
180
- document = CustomDocument.new(
181
- @document_type,
182
- response['document']['inference']['prediction'],
183
- input_file: input_doc,
184
- page_id: nil
185
- )
186
- pages = []
187
- response['document']['inference']['pages'].each do |page|
188
- pages.push(
189
- CustomDocument.new(
190
- @document_type,
191
- page['prediction'],
192
- input_file: input_doc,
193
- page_id: page['id']
194
- )
195
- )
196
- end
197
- DocumentResponse.new(response, @document_type, document, pages)
54
+ raise "Missing API key for '#{@document_type}', " \
55
+ "check your Client Configuration.\n" \
56
+ 'You can set this using the ' \
57
+ "'#{HTTP::API_KEY_ENV_NAME}' environment variable."
198
58
  end
199
59
  end
200
60
  end
@@ -3,19 +3,116 @@
3
3
  module Mindee
4
4
  # Various helper functions for geometry.
5
5
  module Geometry
6
+ # A relative set of coordinates (X, Y) on the document.
7
+ class Point
8
+ # @return [Float]
9
+ attr_accessor :x
10
+ # @return [Float]
11
+ attr_accessor :y
12
+
13
+ # @param x [Float]
14
+ # @param y [Float]
15
+ # rubocop:disable Naming/MethodParameterName
16
+ def initialize(x, y)
17
+ @x = x
18
+ @y = y
19
+ end
20
+ # rubocop:enable Naming/MethodParameterName
21
+
22
+ # @return [Float]
23
+ def [](key)
24
+ case key
25
+ when 0
26
+ @x
27
+ when 1
28
+ @y
29
+ else
30
+ throw '0 or 1 only'
31
+ end
32
+ end
33
+ end
34
+
35
+ # Contains exactly 4 relative vertices coordinates (Points).
36
+ class Quadrilateral
37
+ # @return [Mindee::Geometry::Point]
38
+ attr_accessor :top_left
39
+ # @return [Mindee::Geometry::Point]
40
+ attr_accessor :top_right
41
+ # @return [Mindee::Geometry::Point]
42
+ attr_accessor :bottom_right
43
+ # @return [Mindee::Geometry::Point]
44
+ attr_accessor :bottom_left
45
+
46
+ # @param top_left [Mindee::Geometry::Point]
47
+ # @param top_right [Mindee::Geometry::Point]
48
+ # @param bottom_right [Mindee::Geometry::Point]
49
+ # @param bottom_left [Mindee::Geometry::Point]
50
+ def initialize(top_left, top_right, bottom_right, bottom_left)
51
+ @top_left = top_left
52
+ @top_right = top_right
53
+ @bottom_right = bottom_right
54
+ @bottom_left = bottom_left
55
+ end
56
+
57
+ # @return [Mindee::Geometry::Point]
58
+ def [](key)
59
+ case key
60
+ when 0
61
+ @top_left
62
+ when 1
63
+ @top_right
64
+ when 2
65
+ @bottom_right
66
+ when 3
67
+ @bottom_left
68
+ else
69
+ throw '0, 1, 2, 3 only'
70
+ end
71
+ end
72
+ end
73
+
74
+ class Polygon < Array
75
+ end
76
+
77
+ # Transform a prediction into a Quadrilateral.
78
+ def self.quadrilateral_from_prediction(prediction)
79
+ throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
80
+
81
+ Quadrilateral.new(
82
+ Point.new(prediction[0][0], prediction[0][1]),
83
+ Point.new(prediction[1][0], prediction[1][1]),
84
+ Point.new(prediction[2][0], prediction[2][1]),
85
+ Point.new(prediction[3][0], prediction[3][1])
86
+ )
87
+ end
88
+
89
+ # Transform a prediction into a Polygon.
90
+ def self.polygon_from_prediction(prediction)
91
+ polygon = Polygon.new
92
+ return polygon if prediction.nil?
93
+
94
+ prediction.each do |point|
95
+ polygon << Point.new(point[0], point[1])
96
+ end
97
+ polygon
98
+ end
99
+
6
100
  # @return [Array<Float>]
7
101
  def self.get_bbox(vertices)
8
- x_min = vertices.map { |v| v[0] }.min
9
- x_max = vertices.map { |v| v[0] }.max
10
- y_min = vertices.map { |v| v[1] }.min
11
- y_max = vertices.map { |v| v[1] }.max
12
- [x_min, y_min, x_max, y_max]
102
+ x_coords = vertices.map(&:x)
103
+ y_coords = vertices.map(&:y)
104
+ [x_coords.min, y_coords.min, x_coords.max, y_coords.max]
13
105
  end
14
106
 
15
- # @return [Array<Array<Float>>]
16
- def self.get_bbox_as_polygon(vertices)
107
+ # @return [Mindee::Geometry::Quadrilateral]
108
+ def self.get_bounding_box(vertices)
17
109
  x_min, y_min, x_max, y_max = get_bbox(vertices)
18
- [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]]
110
+ Quadrilateral.new(
111
+ Point.new(x_min, y_min),
112
+ Point.new(x_max, y_min),
113
+ Point.new(x_max, y_max),
114
+ Point.new(x_min, y_max)
115
+ )
19
116
  end
20
117
  end
21
118
  end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require_relative '../version'
5
+
6
+ module Mindee
7
+ module HTTP
8
+ API_KEY_ENV_NAME = 'MINDEE_API_KEY'
9
+ API_KEY_DEFAULT = nil
10
+
11
+ BASE_URL_ENV_NAME = 'MINDEE_BASE_URL'
12
+ BASE_URL_DEFAULT = 'https://api.mindee.net/v1'
13
+
14
+ REQUEST_TIMEOUT_ENV_NAME = 'MINDEE_REQUEST_TIMEOUT'
15
+ TIMEOUT_DEFAULT = 120
16
+
17
+ USER_AGENT = "mindee-api-ruby@v#{Mindee::VERSION} ruby-v#{RUBY_VERSION} #{Mindee::PLATFORM}"
18
+
19
+ # Generic API endpoint for a product.
20
+ class Endpoint
21
+ # @return [String]
22
+ attr_reader :api_key
23
+ # @return [Integer]
24
+ attr_reader :request_timeout
25
+
26
+ def initialize(owner, url_name, version, api_key: '')
27
+ @owner = owner
28
+ @url_name = url_name
29
+ @version = version
30
+ @request_timeout = ENV.fetch(REQUEST_TIMEOUT_ENV_NAME, TIMEOUT_DEFAULT).to_i
31
+ @api_key = api_key.nil? || api_key.empty? ? ENV.fetch(API_KEY_ENV_NAME, API_KEY_DEFAULT) : api_key
32
+ @url_root = "#{BASE_URL_DEFAULT}/products/#{@owner}/#{@url_name}/v#{@version}"
33
+ end
34
+
35
+ # @param input_doc [Mindee::InputDocument]
36
+ # @param include_words [Boolean]
37
+ # @param close_file [Boolean]
38
+ # @param cropper [Boolean]
39
+ # @return [Net::HTTPResponse]
40
+ def predict_req_post(input_doc, include_words: false, close_file: true, cropper: false)
41
+ uri = URI("#{@url_root}/predict")
42
+
43
+ params = {}
44
+ params[:cropper] = 'true' if cropper
45
+ uri.query = URI.encode_www_form(params)
46
+
47
+ headers = {
48
+ 'Authorization' => "Token #{@api_key}",
49
+ 'User-Agent' => USER_AGENT,
50
+ }
51
+ req = Net::HTTP::Post.new(uri, headers)
52
+
53
+ form_data = {
54
+ 'document' => input_doc.read_document(close: close_file),
55
+ }
56
+ form_data.push ['include_mvision', 'true'] if include_words
57
+
58
+ req.set_form(form_data, 'multipart/form-data')
59
+
60
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
61
+ http.request(req)
62
+ end
63
+ end
64
+ end
65
+
66
+ # Receipt API endpoint
67
+ class StandardEndpoint < Endpoint
68
+ def initialize(endpoint_name, version, api_key)
69
+ super('mindee', endpoint_name, version, api_key: api_key)
70
+ end
71
+ end
72
+
73
+ # Custom (constructed) API endpoint
74
+ class CustomEndpoint < Endpoint
75
+ def initialize(account_name, endpoint_name, version, api_key)
76
+ super(account_name, endpoint_name, version, api_key: api_key)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'origami'
5
+
6
+ # Monkey-patching for Origami
7
+ module PDFTools
8
+ def to_io_stream(params = {})
9
+ options = {
10
+ delinearize: true,
11
+ recompile: true,
12
+ decrypt: false,
13
+ }
14
+ options.update(params)
15
+
16
+ if frozen? # incompatible flags with frozen doc (signed)
17
+ options[:recompile] = nil
18
+ options[:rebuild_xrefs] = nil
19
+ options[:noindent] = nil
20
+ options[:obfuscate] = false
21
+ end
22
+ load_all_objects unless @loaded
23
+
24
+ intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
25
+ delinearize! if options[:delinearize] && linearized?
26
+ compile(options) if options[:recompile]
27
+
28
+ io_stream = StringIO.new(output(options))
29
+ io_stream.set_encoding Encoding::BINARY
30
+ io_stream
31
+ end
32
+ end
33
+
34
+ Origami::PDF.class_eval { include PDFTools }
35
+
36
+ module Mindee
37
+ module Input
38
+ # Class for PDF documents
39
+ module PdfProcessor
40
+ DEFAULT_OPTIONS = {
41
+ page_indexes: [0],
42
+ operation: :KEEP_ONLY,
43
+ on_min_pages: 0,
44
+ }.freeze
45
+
46
+ # @param io_stream [StreamIO]
47
+ # @param options [Hash]
48
+ def self.parse(io_stream, options)
49
+ options = DEFAULT_OPTIONS.merge(options)
50
+
51
+ current_pdf = open_pdf(io_stream)
52
+ pages_count = current_pdf.pages.size
53
+ return if options[:on_min_pages] > pages_count
54
+
55
+ all_pages = (0..pages_count - 1).to_a
56
+
57
+ case options[:operation]
58
+ when :KEEP_ONLY
59
+ pages_to_remove = indexes_from_keep(options[:page_indexes], all_pages)
60
+ when :REMOVE
61
+ pages_to_remove = indexes_from_remove(options[:page_indexes], all_pages)
62
+ else
63
+ raise "operation must be one of :KEEP_ONLY or :REMOVE, sent '#{behavior}'"
64
+ end
65
+
66
+ current_pdf.delete_pages_at(pages_to_remove) if pages_to_remove.to_a != all_pages.to_a
67
+ current_pdf.to_io_stream
68
+ end
69
+
70
+ # @param page_indexes [Array]
71
+ # @param all_pages [Array]
72
+ def self.indexes_from_keep(page_indexes, all_pages)
73
+ pages_to_keep = Set.new
74
+ page_indexes.each do |idx|
75
+ idx = (all_pages.length - (idx + 2)) if idx.negative?
76
+ page = all_pages[idx]
77
+ next if page.nil?
78
+
79
+ pages_to_keep << page
80
+ end
81
+ all_pages.to_set - pages_to_keep
82
+ end
83
+
84
+ # @param page_indexes [Array]
85
+ # @param all_pages [Array]
86
+ def self.indexes_from_remove(page_indexes, all_pages)
87
+ pages_to_remove = Set.new
88
+ page_indexes.each do |idx|
89
+ idx = (all_pages.length - (idx + 2)) if idx.negative?
90
+ page = all_pages[idx]
91
+ next if page.nil?
92
+
93
+ pages_to_remove << page
94
+ end
95
+ end
96
+
97
+ # @param io_stream [StringIO]
98
+ # @return [Origami::PDF]
99
+ def self.open_pdf(io_stream)
100
+ pdf_parser = Origami::PDF::LinearParser.new({ verbosity: Origami::Parser::VERBOSE_QUIET })
101
+ io_stream.seek(0)
102
+ pdf_parser.parse(io_stream)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+ require 'marcel'
5
+
6
+ require_relative 'pdf_processing'
7
+
8
+ module Mindee
9
+ module Input
10
+ ALLOWED_MIME_TYPES = [
11
+ 'application/pdf',
12
+ 'image/heic',
13
+ 'image/png',
14
+ 'image/jpeg',
15
+ 'image/tiff',
16
+ 'image/webp',
17
+ ].freeze
18
+
19
+ # Base class for loading documents.
20
+ class InputDocument
21
+ # @return [String]
22
+ attr_reader :filename
23
+ # @return [String]
24
+ attr_reader :file_mimetype
25
+ # @return [StreamIO]
26
+ attr_reader :io_stream
27
+
28
+ # @param io_stream [StreamIO]
29
+ def initialize(io_stream, filename)
30
+ @io_stream = io_stream
31
+ @filename = filename
32
+ @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
33
+
34
+ return if ALLOWED_MIME_TYPES.include? @file_mimetype
35
+
36
+ raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
37
+ end
38
+
39
+ def pdf?
40
+ @file_mimetype == 'application/pdf'
41
+ end
42
+
43
+ def process_pdf(options)
44
+ @io_stream.seek(0)
45
+ @io_stream = PdfProcessor.parse(@io_stream, options)
46
+ end
47
+
48
+ # @param close [Boolean]
49
+ def read_document(close: true)
50
+ @io_stream.seek(0)
51
+ data = @io_stream.read
52
+ @io_stream.close if close
53
+ [data].pack('m')
54
+ end
55
+ end
56
+
57
+ # Load a document from a path.
58
+ class PathDocument < InputDocument
59
+ # @param filepath [String]
60
+ def initialize(filepath)
61
+ io_stream = File.open(filepath, 'rb')
62
+ super(io_stream, File.basename(filepath))
63
+ end
64
+ end
65
+
66
+ # Load a document from a base64 string.
67
+ class Base64Document < InputDocument
68
+ # @param base64_string [String]
69
+ # @param filename [String]
70
+ def initialize(base64_string, filename)
71
+ io_stream = StringIO.new(base64_string.unpack1('m*'))
72
+ io_stream.set_encoding Encoding::BINARY
73
+ super(io_stream, filename)
74
+ end
75
+ end
76
+
77
+ # Load a document from raw bytes.
78
+ class BytesDocument < InputDocument
79
+ # @param raw_bytes [String]
80
+ # @param filename [String]
81
+ def initialize(raw_bytes, filename)
82
+ io_stream = StringIO.new(raw_bytes)
83
+ io_stream.set_encoding Encoding::BINARY
84
+ super(io_stream, filename)
85
+ end
86
+ end
87
+
88
+ # Load a document from a file handle.
89
+ class FileDocument < InputDocument
90
+ # @param filename [String]
91
+ def initialize(file_handle, filename)
92
+ io_stream = file_handle
93
+ super(io_stream, filename)
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'input/sources'
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'inference'
4
+
5
+ module Mindee
6
+ # Stores all response attributes.
7
+ class Document
8
+ # @return [Mindee::Inference]
9
+ attr_reader :inference
10
+ # @return [String] Filename sent to the API
11
+ attr_reader :name
12
+ # @return [String] Mindee ID of the document
13
+ attr_reader :id
14
+
15
+ # @param prediction_class [Class<Mindee::Prediction::Prediction>]
16
+ # @param http_response [Hash]
17
+ def initialize(prediction_class, http_response)
18
+ @id = http_response['id']
19
+ @name = http_response['name']
20
+ @inference = Mindee::Inference.new(prediction_class, http_response['inference'])
21
+ end
22
+
23
+ def to_s
24
+ out_str = String.new
25
+ out_str << "########\nDocument\n########"
26
+ out_str << "\n:Mindee ID: #{@id}"
27
+ out_str << "\n:Filename: #{@name}"
28
+ out_str << "\n\n#{@inference}"
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ # API Error
6
+ class Error < StandardError
7
+ # @return [String]
8
+ attr_reader :api_code
9
+ # @return [String]
10
+ attr_reader :api_details
11
+ # @return [String]
12
+ attr_reader :api_message
13
+
14
+ def initialize(error)
15
+ @api_code = error['code']
16
+ @api_details = error['details']
17
+ @api_message = error['message']
18
+ super("#{@api_code}: #{@api_details} - #{@api_message}")
19
+ end
20
+ end
21
+ end
22
+ end