mindee 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +2 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +36 -0
  6. data/README.md +29 -16
  7. data/bin/mindee.rb +23 -26
  8. data/docs/code_samples/bank_account_details_v1.txt +10 -5
  9. data/docs/code_samples/bank_account_details_v2.txt +19 -0
  10. data/docs/code_samples/bank_check_v1.txt +10 -5
  11. data/docs/code_samples/carte_vitale_v1.txt +10 -5
  12. data/docs/code_samples/custom_v1.txt +19 -10
  13. data/docs/code_samples/default.txt +10 -2
  14. data/docs/code_samples/expense_receipts_v4.txt +10 -5
  15. data/docs/code_samples/expense_receipts_v5.txt +11 -6
  16. data/docs/code_samples/financial_document_v1.txt +10 -5
  17. data/docs/code_samples/idcard_fr_v1.txt +10 -5
  18. data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
  19. data/docs/code_samples/invoices_v4.txt +10 -5
  20. data/docs/code_samples/license_plates_v1.txt +10 -5
  21. data/docs/code_samples/passport_v1.txt +10 -5
  22. data/docs/code_samples/proof_of_address_v1.txt +10 -5
  23. data/docs/ruby-api-builder.md +30 -31
  24. data/docs/ruby-getting-started.md +64 -23
  25. data/docs/ruby-invoice-ocr.md +70 -59
  26. data/docs/ruby-passport-ocr.md +49 -40
  27. data/docs/ruby-receipt-ocr.md +45 -32
  28. data/lib/mindee/client.rb +150 -148
  29. data/lib/mindee/geometry/min_max.rb +23 -0
  30. data/lib/mindee/geometry/point.rb +35 -0
  31. data/lib/mindee/geometry/polygon.rb +23 -0
  32. data/lib/mindee/geometry/quadrilateral.rb +45 -0
  33. data/lib/mindee/geometry/utils.rb +81 -0
  34. data/lib/mindee/geometry.rb +5 -116
  35. data/lib/mindee/http/endpoint.rb +123 -16
  36. data/lib/mindee/http.rb +3 -0
  37. data/lib/mindee/input/sources.rb +87 -73
  38. data/lib/mindee/parsing/common/api_response.rb +109 -0
  39. data/lib/mindee/parsing/common/document.rb +48 -0
  40. data/lib/mindee/parsing/common/error.rb +24 -0
  41. data/lib/mindee/parsing/common/inference.rb +43 -0
  42. data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
  43. data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
  44. data/lib/mindee/parsing/common/ocr.rb +3 -0
  45. data/lib/mindee/parsing/common/orientation.rb +26 -0
  46. data/lib/mindee/parsing/common/page.rb +40 -0
  47. data/lib/mindee/parsing/common/prediction.rb +15 -0
  48. data/lib/mindee/parsing/common/product.rb +19 -0
  49. data/lib/mindee/parsing/common.rb +10 -0
  50. data/lib/mindee/parsing/custom/classification_field.rb +28 -0
  51. data/lib/mindee/parsing/custom/list_field.rb +76 -0
  52. data/lib/mindee/parsing/custom.rb +4 -0
  53. data/lib/mindee/parsing/standard/amount_field.rb +26 -0
  54. data/lib/mindee/parsing/standard/base_field.rb +104 -0
  55. data/lib/mindee/parsing/standard/classification_field.rb +16 -0
  56. data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
  57. data/lib/mindee/parsing/standard/date_field.rb +34 -0
  58. data/lib/mindee/parsing/standard/locale_field.rb +50 -0
  59. data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
  60. data/lib/mindee/parsing/standard/position_field.rb +44 -0
  61. data/lib/mindee/parsing/standard/tax_field.rb +108 -0
  62. data/lib/mindee/parsing/standard/text_field.rb +16 -0
  63. data/lib/mindee/parsing/standard.rb +12 -0
  64. data/lib/mindee/parsing.rb +3 -2
  65. data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
  66. data/lib/mindee/pdf/pdf_tools.rb +34 -0
  67. data/lib/mindee/pdf.rb +3 -0
  68. data/lib/mindee/product/.rubocop.yml +5 -0
  69. data/lib/mindee/product/custom/custom_v1.rb +35 -0
  70. data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
  71. data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
  72. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
  73. data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
  74. data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
  75. data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
  76. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
  77. data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
  78. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
  79. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
  80. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
  81. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
  82. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
  83. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
  84. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
  85. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
  86. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
  87. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
  88. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
  89. data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
  90. data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
  91. data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
  92. data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
  93. data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
  94. data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
  95. data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
  96. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
  97. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
  98. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
  99. data/lib/mindee/product/passport/passport_v1.rb +36 -0
  100. data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
  101. data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
  102. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
  103. data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
  104. data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
  105. data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
  106. data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
  107. data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
  108. data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
  109. data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
  110. data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
  111. data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
  112. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
  113. data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
  114. data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
  115. data/lib/mindee/product.rb +16 -0
  116. data/lib/mindee/version.rb +2 -1
  117. data/lib/mindee.rb +3 -1
  118. metadata +87 -38
  119. data/docs/code_samples/shipping_containers_v1.txt +0 -14
  120. data/lib/mindee/document_config.rb +0 -60
  121. data/lib/mindee/parsing/document.rb +0 -31
  122. data/lib/mindee/parsing/error.rb +0 -22
  123. data/lib/mindee/parsing/inference.rb +0 -53
  124. data/lib/mindee/parsing/page.rb +0 -46
  125. data/lib/mindee/parsing/prediction/base.rb +0 -30
  126. data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
  127. data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
  128. data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
  129. data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
  130. data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
  131. data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
  132. data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
  133. data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
  134. data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
  135. data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
  136. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
  137. data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
  138. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
  139. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
  140. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
  141. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
  142. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
  143. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
  144. data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
  145. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
  146. data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
  147. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
  148. data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
  149. data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
  150. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
  151. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
  152. data/lib/mindee/parsing/prediction.rb +0 -15
@@ -1,118 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Mindee
4
- # Various helper functions for geometry.
5
- module Geometry
6
- # A relative set of coordinates (X, Y) on the document.
7
- class Point
8
- # @return [Float]
9
- attr_accessor :x
10
- # @return [Float]
11
- attr_accessor :y
12
-
13
- # @param x [Float]
14
- # @param y [Float]
15
- # rubocop:disable Naming/MethodParameterName
16
- def initialize(x, y)
17
- @x = x
18
- @y = y
19
- end
20
- # rubocop:enable Naming/MethodParameterName
21
-
22
- # @return [Float]
23
- def [](key)
24
- case key
25
- when 0
26
- @x
27
- when 1
28
- @y
29
- else
30
- throw '0 or 1 only'
31
- end
32
- end
33
- end
34
-
35
- # Contains exactly 4 relative vertices coordinates (Points).
36
- class Quadrilateral
37
- # @return [Mindee::Geometry::Point]
38
- attr_accessor :top_left
39
- # @return [Mindee::Geometry::Point]
40
- attr_accessor :top_right
41
- # @return [Mindee::Geometry::Point]
42
- attr_accessor :bottom_right
43
- # @return [Mindee::Geometry::Point]
44
- attr_accessor :bottom_left
45
-
46
- # @param top_left [Mindee::Geometry::Point]
47
- # @param top_right [Mindee::Geometry::Point]
48
- # @param bottom_right [Mindee::Geometry::Point]
49
- # @param bottom_left [Mindee::Geometry::Point]
50
- def initialize(top_left, top_right, bottom_right, bottom_left)
51
- @top_left = top_left
52
- @top_right = top_right
53
- @bottom_right = bottom_right
54
- @bottom_left = bottom_left
55
- end
56
-
57
- # @return [Mindee::Geometry::Point]
58
- def [](key)
59
- case key
60
- when 0
61
- @top_left
62
- when 1
63
- @top_right
64
- when 2
65
- @bottom_right
66
- when 3
67
- @bottom_left
68
- else
69
- throw '0, 1, 2, 3 only'
70
- end
71
- end
72
- end
73
-
74
- class Polygon < Array
75
- end
76
-
77
- # Transform a prediction into a Quadrilateral.
78
- def self.quadrilateral_from_prediction(prediction)
79
- throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
80
-
81
- Quadrilateral.new(
82
- Point.new(prediction[0][0], prediction[0][1]),
83
- Point.new(prediction[1][0], prediction[1][1]),
84
- Point.new(prediction[2][0], prediction[2][1]),
85
- Point.new(prediction[3][0], prediction[3][1])
86
- )
87
- end
88
-
89
- # Transform a prediction into a Polygon.
90
- def self.polygon_from_prediction(prediction)
91
- polygon = Polygon.new
92
- return polygon if prediction.nil?
93
-
94
- prediction.each do |point|
95
- polygon << Point.new(point[0], point[1])
96
- end
97
- polygon
98
- end
99
-
100
- # @return [Array<Float>]
101
- def self.get_bbox(vertices)
102
- x_coords = vertices.map(&:x)
103
- y_coords = vertices.map(&:y)
104
- [x_coords.min, y_coords.min, x_coords.max, y_coords.max]
105
- end
106
-
107
- # @return [Mindee::Geometry::Quadrilateral]
108
- def self.get_bounding_box(vertices)
109
- x_min, y_min, x_max, y_max = get_bbox(vertices)
110
- Quadrilateral.new(
111
- Point.new(x_min, y_min),
112
- Point.new(x_max, y_min),
113
- Point.new(x_max, y_max),
114
- Point.new(x_min, y_max)
115
- )
116
- end
117
- end
118
- end
3
+ require_relative 'geometry/min_max'
4
+ require_relative 'geometry/point'
5
+ require_relative 'geometry/quadrilateral'
6
+ require_relative 'geometry/polygon'
7
+ require_relative 'geometry/utils'
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
3
4
  require 'net/http'
4
5
  require_relative '../version'
5
6
 
@@ -32,12 +33,58 @@ module Mindee
32
33
  @url_root = "#{BASE_URL_DEFAULT}/products/#{@owner}/#{@url_name}/v#{@version}"
33
34
  end
34
35
 
35
- # @param input_doc [Mindee::InputDocument]
36
- # @param include_words [Boolean]
36
+ # Call the prediction API.
37
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
38
+ # @param all_words [Boolean]
39
+ # @param close_file [Boolean]
40
+ # @param cropper [Boolean]
41
+ # @return [Hash]
42
+ def predict(input_source, all_words, close_file, cropper)
43
+ check_api_key
44
+ response = predict_req_post(input_source, all_words: all_words, close_file: close_file, cropper: cropper)
45
+ hashed_response = JSON.parse(response.body, object_class: Hash)
46
+ return hashed_response if (200..299).include?(response.code.to_i)
47
+
48
+ error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
49
+ raise error
50
+ end
51
+
52
+ # Call the prediction API.
53
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
54
+ # @param close_file [Boolean]
55
+ # @param cropper [Boolean]
56
+ # @return [Hash]
57
+ def predict_async(input_source, all_words, close_file, cropper)
58
+ check_api_key
59
+ response = document_queue_req_get(input_source, all_words, close_file, cropper)
60
+ hashed_response = JSON.parse(response.body, object_class: Hash)
61
+ return hashed_response if (200..299).include?(response.code.to_i)
62
+
63
+ error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
64
+ raise error
65
+ end
66
+
67
+ # Calls the parsed async doc.
68
+ # @param job_id [String]
69
+ # @return [Hash]
70
+ def parse_async(job_id)
71
+ check_api_key
72
+ response = document_queue_req(job_id)
73
+ hashed_response = JSON.parse(response.body, object_class: Hash)
74
+ return hashed_response if (200..299).include?(response.code.to_i)
75
+
76
+ error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
77
+ raise error
78
+ end
79
+
80
+ private
81
+
82
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
83
+ # @param all_words [Boolean]
37
84
  # @param close_file [Boolean]
38
85
  # @param cropper [Boolean]
39
86
  # @return [Net::HTTPResponse]
40
- def predict_req_post(input_doc, include_words: false, close_file: true, cropper: false)
87
+ def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
41
88
  uri = URI("#{@url_root}/predict")
42
89
 
43
90
  params = {}
@@ -49,11 +96,51 @@ module Mindee
49
96
  'User-Agent' => USER_AGENT,
50
97
  }
51
98
  req = Net::HTTP::Post.new(uri, headers)
99
+ form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
100
+ {
101
+ 'document' => input_source.url,
102
+ }
103
+ else
104
+ {
105
+ 'document' => input_source.read_document(close: close_file),
106
+ }
107
+ end
108
+ form_data.push ['include_mvision', 'true'] if all_words
109
+
110
+ req.set_form(form_data, 'multipart/form-data')
111
+
112
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
113
+ http.request(req)
114
+ end
115
+ end
116
+
117
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
118
+ # @param all_words [Boolean]
119
+ # @param close_file [Boolean]
120
+ # @param cropper [Boolean]
121
+ # @return [Net::HTTPResponse]
122
+ def document_queue_req_get(input_source, all_words, close_file, cropper)
123
+ uri = URI("#{@url_root}/predict_async")
124
+
125
+ params = {}
126
+ params[:cropper] = 'true' if cropper
127
+ uri.query = URI.encode_www_form(params)
52
128
 
53
- form_data = {
54
- 'document' => input_doc.read_document(close: close_file),
129
+ headers = {
130
+ 'Authorization' => "Token #{@api_key}",
131
+ 'User-Agent' => USER_AGENT,
55
132
  }
56
- form_data.push ['include_mvision', 'true'] if include_words
133
+ req = Net::HTTP::Post.new(uri, headers)
134
+ form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
135
+ {
136
+ 'document' => input_source.url,
137
+ }
138
+ else
139
+ {
140
+ 'document' => input_source.read_document(close: close_file),
141
+ }
142
+ end
143
+ form_data.push ['include_mvision', 'true'] if all_words
57
144
 
58
145
  req.set_form(form_data, 'multipart/form-data')
59
146
 
@@ -61,19 +148,39 @@ module Mindee
61
148
  http.request(req)
62
149
  end
63
150
  end
64
- end
65
151
 
66
- # Receipt API endpoint
67
- class StandardEndpoint < Endpoint
68
- def initialize(endpoint_name, version, api_key)
69
- super('mindee', endpoint_name, version, api_key: api_key)
152
+ # @param job_id [String]
153
+ # @return [Net::HTTPResponse]
154
+ def document_queue_req(job_id)
155
+ uri = URI("#{@url_root}/documents/queue/#{job_id}")
156
+
157
+ headers = {
158
+ 'Authorization' => "Token #{@api_key}",
159
+ 'User-Agent' => USER_AGENT,
160
+ }
161
+
162
+ req = Net::HTTP::Get.new(uri, headers)
163
+
164
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
165
+ http.request(req)
166
+ end
167
+
168
+ if response.code.to_i > 299 && response.code.to_i < 400
169
+ req = Net::HTTP::Get.new(response['location'], headers)
170
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
171
+ http.request(req)
172
+ end
173
+ end
174
+ response
70
175
  end
71
- end
72
176
 
73
- # Custom (constructed) API endpoint
74
- class CustomEndpoint < Endpoint
75
- def initialize(account_name, endpoint_name, version, api_key)
76
- super(account_name, endpoint_name, version, api_key: api_key)
177
+ def check_api_key
178
+ return unless @api_key.nil? || @api_key.empty?
179
+
180
+ raise "Missing API key for product \"'#{@url_name}' v#{@version}\" (belonging to \"#{@owner}\"), " \
181
+ "check your Client Configuration.\n" \
182
+ 'You can set this using the ' \
183
+ "'#{HTTP::API_KEY_ENV_NAME}' environment variable."
77
184
  end
78
185
  end
79
186
  end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'http/endpoint'
@@ -3,94 +3,108 @@
3
3
  require 'stringio'
4
4
  require 'marcel'
5
5
 
6
- require_relative 'pdf_processing'
6
+ require_relative '../pdf'
7
7
 
8
8
  module Mindee
9
9
  module Input
10
- ALLOWED_MIME_TYPES = [
11
- 'application/pdf',
12
- 'image/heic',
13
- 'image/png',
14
- 'image/jpeg',
15
- 'image/tiff',
16
- 'image/webp',
17
- ].freeze
18
-
19
- # Base class for loading documents.
20
- class InputDocument
21
- # @return [String]
22
- attr_reader :filename
23
- # @return [String]
24
- attr_reader :file_mimetype
25
- # @return [StreamIO]
26
- attr_reader :io_stream
27
-
28
- # @param io_stream [StreamIO]
29
- def initialize(io_stream, filename)
30
- @io_stream = io_stream
31
- @filename = filename
32
- @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
33
-
34
- return if ALLOWED_MIME_TYPES.include? @file_mimetype
35
-
36
- raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
37
- end
10
+ module Source
11
+ ALLOWED_MIME_TYPES = [
12
+ 'application/pdf',
13
+ 'image/heic',
14
+ 'image/png',
15
+ 'image/jpeg',
16
+ 'image/tiff',
17
+ 'image/webp',
18
+ ].freeze
38
19
 
39
- def pdf?
40
- @file_mimetype == 'application/pdf'
41
- end
20
+ # Base class for loading documents.
21
+ class LocalInputSource
22
+ # @return [String]
23
+ attr_reader :filename
24
+ # @return [String]
25
+ attr_reader :file_mimetype
26
+ # @return [StreamIO]
27
+ attr_reader :io_stream
28
+
29
+ # @param io_stream [StreamIO]
30
+ def initialize(io_stream, filename)
31
+ @io_stream = io_stream
32
+ @filename = filename
33
+ @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
34
+
35
+ return if ALLOWED_MIME_TYPES.include? @file_mimetype
36
+
37
+ raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
38
+ end
39
+
40
+ def pdf?
41
+ @file_mimetype == 'application/pdf'
42
+ end
43
+
44
+ def process_pdf(options)
45
+ @io_stream.seek(0)
46
+ @io_stream = PdfProcessor.parse(@io_stream, options)
47
+ end
42
48
 
43
- def process_pdf(options)
44
- @io_stream.seek(0)
45
- @io_stream = PdfProcessor.parse(@io_stream, options)
49
+ # @param close [Boolean]
50
+ def read_document(close: true)
51
+ @io_stream.seek(0)
52
+ data = @io_stream.read
53
+ @io_stream.close if close
54
+ [data].pack('m')
55
+ end
46
56
  end
47
57
 
48
- # @param close [Boolean]
49
- def read_document(close: true)
50
- @io_stream.seek(0)
51
- data = @io_stream.read
52
- @io_stream.close if close
53
- [data].pack('m')
58
+ # Load a document from a path.
59
+ class PathInputSource < LocalInputSource
60
+ # @param filepath [String]
61
+ def initialize(filepath)
62
+ io_stream = File.open(filepath, 'rb')
63
+ super(io_stream, File.basename(filepath))
64
+ end
54
65
  end
55
- end
56
66
 
57
- # Load a document from a path.
58
- class PathDocument < InputDocument
59
- # @param filepath [String]
60
- def initialize(filepath)
61
- io_stream = File.open(filepath, 'rb')
62
- super(io_stream, File.basename(filepath))
67
+ # Load a document from a base64 string.
68
+ class Base64InputSource < LocalInputSource
69
+ # @param base64_string [String]
70
+ # @param filename [String]
71
+ def initialize(base64_string, filename)
72
+ io_stream = StringIO.new(base64_string.unpack1('m*'))
73
+ io_stream.set_encoding Encoding::BINARY
74
+ super(io_stream, filename)
75
+ end
63
76
  end
64
- end
65
77
 
66
- # Load a document from a base64 string.
67
- class Base64Document < InputDocument
68
- # @param base64_string [String]
69
- # @param filename [String]
70
- def initialize(base64_string, filename)
71
- io_stream = StringIO.new(base64_string.unpack1('m*'))
72
- io_stream.set_encoding Encoding::BINARY
73
- super(io_stream, filename)
78
+ # Load a document from raw bytes.
79
+ class BytesInputSource < LocalInputSource
80
+ # @param raw_bytes [String]
81
+ # @param filename [String]
82
+ def initialize(raw_bytes, filename)
83
+ io_stream = StringIO.new(raw_bytes)
84
+ io_stream.set_encoding Encoding::BINARY
85
+ super(io_stream, filename)
86
+ end
74
87
  end
75
- end
76
88
 
77
- # Load a document from raw bytes.
78
- class BytesDocument < InputDocument
79
- # @param raw_bytes [String]
80
- # @param filename [String]
81
- def initialize(raw_bytes, filename)
82
- io_stream = StringIO.new(raw_bytes)
83
- io_stream.set_encoding Encoding::BINARY
84
- super(io_stream, filename)
89
+ # Load a document from a file handle.
90
+ class FileInputSource < LocalInputSource
91
+ # @param filename [String]
92
+ def initialize(file_handle, filename)
93
+ io_stream = file_handle
94
+ super(io_stream, filename)
95
+ end
85
96
  end
86
- end
87
97
 
88
- # Load a document from a file handle.
89
- class FileDocument < InputDocument
90
- # @param filename [String]
91
- def initialize(file_handle, filename)
92
- io_stream = file_handle
93
- super(io_stream, filename)
98
+ # Load a remote document from a file url.
99
+ class UrlInputSource
100
+ # @return [String]
101
+ attr_reader :url
102
+
103
+ def initialize(url)
104
+ raise 'URL must be HTTPS' unless url.start_with? 'https://'
105
+
106
+ @url = url
107
+ end
94
108
  end
95
109
  end
96
110
  end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'document'
4
+ require 'time'
5
+
6
+ module Mindee
7
+ module Parsing
8
+ module Common
9
+ module JobStatus
10
+ WAITING = :waiting
11
+ PROCESSING = :processing
12
+ COMPLETED = :completed
13
+ end
14
+
15
+ module RequestStatus
16
+ FAILURE = :failure
17
+ SUCCESS = :success
18
+ end
19
+
20
+ # Job (queue) information on async parsing.
21
+ class Job
22
+ # @return [String] Mindee ID of the document
23
+ attr_reader :id
24
+ # @return [Mindee::Parsing::Standard::DateField]
25
+ attr_reader :issued_at
26
+ # @return [Mindee::Parsing::Standard::DateField, nil]
27
+ attr_reader :available_at
28
+ # @return [JobStatus, Symbol]
29
+ attr_reader :status
30
+ # @return [Integer, nil]
31
+ attr_reader :millisecs_taken
32
+
33
+ # @param http_response [Hash]
34
+ def initialize(http_response)
35
+ @id = http_response['id']
36
+ @issued_at = Time.iso8601(http_response['issued_at'])
37
+ if http_response.key?('available_at') && !http_response['available_at'].nil?
38
+ @available_at = Time.iso8601(http_response['available_at'])
39
+ @millisecs_taken = (1000 * (@available_at.to_time - @issued_at.to_time).to_f).to_i
40
+ end
41
+ @status = case http_response['status']
42
+ when 'waiting'
43
+ JobStatus::WAITING
44
+ when 'processing'
45
+ JobStatus::PROCESSING
46
+ when 'completed'
47
+ JobStatus::COMPLETED
48
+ else
49
+ http_response['status']&.to_sym
50
+ end
51
+ end
52
+ end
53
+
54
+ # HTTP request response.
55
+ class ApiRequest
56
+ # @return [Hash]
57
+ attr_reader :error
58
+ # @return [Array<String>]
59
+ attr_reader :ressources
60
+ # @return [RequestStatus, Symbol]
61
+ attr_reader :status
62
+ # @return [Integer]
63
+ attr_reader :status_code
64
+ # @return [String]
65
+ attr_reader :url
66
+
67
+ def initialize(server_response)
68
+ @error = server_response['error']
69
+ @ressources = server_response['ressources']
70
+
71
+ @status = if server_response['status'] == 'failure'
72
+ RequestStatus::FAILURE
73
+ elsif server_response['status'] == 'success'
74
+ RequestStatus::SUCCESS
75
+ else
76
+ server_response['status']&.to_sym
77
+ end
78
+ @status_code = server_response['status_code']
79
+ @url = server_response['url']
80
+ end
81
+ end
82
+
83
+ # Wrapper class for all predictions (synchronous and asynchronous)
84
+ class ApiResponse
85
+ # @return [Mindee::Parsing::Common::Document, nil]
86
+ attr_reader :document
87
+ # @return [Mindee::Parsing::Common::Job, nil]
88
+ attr_reader :job
89
+ # @return [Mindee::Parsing::Common::ApiRequest]
90
+ attr_reader :api_request
91
+
92
+ # @param product_class [Class<Mindee::Product>]
93
+ # @param http_response [Hash]
94
+ def initialize(product_class, http_response)
95
+ if http_response.key?('api_request')
96
+ @api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request'])
97
+ end
98
+ if http_response.key?('document') &&
99
+ (!http_response.key?('job') ||
100
+ http_response['job']['status'] == 'completed') &&
101
+ @api_request.status == RequestStatus::SUCCESS
102
+ @document = Mindee::Parsing::Common::Document.new(product_class, http_response['document'])
103
+ end
104
+ @job = Mindee::Parsing::Common::Job.new(http_response['job']) if http_response.key?('job')
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'inference'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Common
8
+ # Stores all response attributes.
9
+ class Document
10
+ # @return [Mindee::Inference]
11
+ attr_reader :inference
12
+ # @return [String] Filename sent to the API
13
+ attr_reader :name
14
+ # @return [String] Mindee ID of the document
15
+ attr_reader :id
16
+ # @return [Mindee::Parsing::Common::Ocr::Ocr, nil]
17
+ attr_reader :ocr
18
+
19
+ # @param http_response [Hash]
20
+ # @return [Mindee::Parsing::Common::Ocr::Ocr]
21
+ def self.load_ocr(http_response)
22
+ ocr_prediction = http_response.fetch('ocr', nil)
23
+ return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?
24
+
25
+ Ocr(ocr_prediction)
26
+ end
27
+
28
+ # @param product_class [Class<Mindee::Product>]
29
+ # @param http_response [Hash]
30
+ def initialize(product_class, http_response)
31
+ @id = http_response['id']
32
+ @name = http_response['name']
33
+ @inference = product_class.new(http_response['inference'])
34
+ @ocr = self.class.load_ocr(http_response)
35
+ end
36
+
37
+ # @return [String]
38
+ def to_s
39
+ out_str = String.new
40
+ out_str << "########\nDocument\n########"
41
+ out_str << "\n:Mindee ID: #{@id}"
42
+ out_str << "\n:Filename: #{@name}"
43
+ out_str << "\n\n#{@inference}"
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # API HttpError
7
+ class HttpError < StandardError
8
+ # @return [String]
9
+ attr_reader :api_code
10
+ # @return [String]
11
+ attr_reader :api_details
12
+ # @return [String]
13
+ attr_reader :api_message
14
+
15
+ def initialize(error)
16
+ @api_code = error['code']
17
+ @api_details = error['details']
18
+ @api_message = error['message']
19
+ super("#{@api_code}: #{@api_details} - #{@api_message}")
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end