mindee 2.2.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +2 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +40 -0
  6. data/README.md +29 -16
  7. data/bin/mindee.rb +23 -26
  8. data/docs/code_samples/bank_account_details_v1.txt +10 -5
  9. data/docs/code_samples/bank_account_details_v2.txt +19 -0
  10. data/docs/code_samples/bank_check_v1.txt +10 -5
  11. data/docs/code_samples/carte_vitale_v1.txt +10 -5
  12. data/docs/code_samples/custom_v1.txt +19 -10
  13. data/docs/code_samples/default.txt +10 -2
  14. data/docs/code_samples/expense_receipts_v4.txt +10 -5
  15. data/docs/code_samples/expense_receipts_v5.txt +11 -6
  16. data/docs/code_samples/financial_document_v1.txt +10 -5
  17. data/docs/code_samples/idcard_fr_v1.txt +10 -5
  18. data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
  19. data/docs/code_samples/invoices_v4.txt +10 -5
  20. data/docs/code_samples/license_plates_v1.txt +10 -5
  21. data/docs/code_samples/passport_v1.txt +10 -5
  22. data/docs/code_samples/proof_of_address_v1.txt +10 -5
  23. data/docs/code_samples/us_driver_license_v1.txt +19 -0
  24. data/docs/ruby-api-builder.md +30 -31
  25. data/docs/ruby-getting-started.md +64 -23
  26. data/docs/ruby-invoice-ocr.md +70 -59
  27. data/docs/ruby-passport-ocr.md +49 -40
  28. data/docs/ruby-receipt-ocr.md +45 -32
  29. data/lib/mindee/client.rb +150 -148
  30. data/lib/mindee/geometry/min_max.rb +23 -0
  31. data/lib/mindee/geometry/point.rb +35 -0
  32. data/lib/mindee/geometry/polygon.rb +23 -0
  33. data/lib/mindee/geometry/quadrilateral.rb +45 -0
  34. data/lib/mindee/geometry/utils.rb +81 -0
  35. data/lib/mindee/geometry.rb +5 -116
  36. data/lib/mindee/http/endpoint.rb +115 -16
  37. data/lib/mindee/http.rb +3 -0
  38. data/lib/mindee/input/sources.rb +90 -73
  39. data/lib/mindee/parsing/common/api_response.rb +109 -0
  40. data/lib/mindee/parsing/common/document.rb +48 -0
  41. data/lib/mindee/parsing/common/error.rb +24 -0
  42. data/lib/mindee/parsing/common/inference.rb +43 -0
  43. data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
  44. data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
  45. data/lib/mindee/parsing/common/ocr.rb +3 -0
  46. data/lib/mindee/parsing/common/orientation.rb +26 -0
  47. data/lib/mindee/parsing/common/page.rb +40 -0
  48. data/lib/mindee/parsing/common/prediction.rb +15 -0
  49. data/lib/mindee/parsing/common/product.rb +19 -0
  50. data/lib/mindee/parsing/common.rb +10 -0
  51. data/lib/mindee/parsing/custom/classification_field.rb +28 -0
  52. data/lib/mindee/parsing/custom/list_field.rb +76 -0
  53. data/lib/mindee/parsing/custom.rb +4 -0
  54. data/lib/mindee/parsing/standard/amount_field.rb +26 -0
  55. data/lib/mindee/parsing/standard/base_field.rb +104 -0
  56. data/lib/mindee/parsing/standard/classification_field.rb +16 -0
  57. data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
  58. data/lib/mindee/parsing/standard/date_field.rb +34 -0
  59. data/lib/mindee/parsing/standard/locale_field.rb +50 -0
  60. data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
  61. data/lib/mindee/parsing/standard/position_field.rb +47 -0
  62. data/lib/mindee/parsing/standard/tax_field.rb +108 -0
  63. data/lib/mindee/parsing/standard/text_field.rb +16 -0
  64. data/lib/mindee/parsing/standard.rb +12 -0
  65. data/lib/mindee/parsing.rb +3 -2
  66. data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
  67. data/lib/mindee/pdf/pdf_tools.rb +34 -0
  68. data/lib/mindee/pdf.rb +3 -0
  69. data/lib/mindee/product/.rubocop.yml +5 -0
  70. data/lib/mindee/product/custom/custom_v1.rb +35 -0
  71. data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
  72. data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
  73. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
  74. data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
  75. data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
  76. data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
  77. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +202 -0
  78. data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
  79. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
  80. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
  81. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
  82. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
  83. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
  84. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
  85. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +51 -0
  86. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
  87. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
  88. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
  89. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
  90. data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
  91. data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
  92. data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
  93. data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
  94. data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
  95. data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
  96. data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
  97. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
  98. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
  99. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
  100. data/lib/mindee/product/passport/passport_v1.rb +36 -0
  101. data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
  102. data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
  103. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
  104. data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
  105. data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
  106. data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
  107. data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
  108. data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
  109. data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
  110. data/lib/mindee/product/receipt/receipt_v5_document.rb +149 -0
  111. data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
  112. data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
  113. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
  114. data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +62 -0
  115. data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +57 -0
  116. data/lib/mindee/product/us/driver_license/driver_license_v1.rb +38 -0
  117. data/lib/mindee/product/us/driver_license/driver_license_v1_document.rb +113 -0
  118. data/lib/mindee/product/us/driver_license/driver_license_v1_page.rb +53 -0
  119. data/lib/mindee/product.rb +17 -0
  120. data/lib/mindee/version.rb +2 -1
  121. data/lib/mindee.rb +3 -1
  122. metadata +91 -38
  123. data/docs/code_samples/shipping_containers_v1.txt +0 -14
  124. data/lib/mindee/document_config.rb +0 -60
  125. data/lib/mindee/parsing/document.rb +0 -31
  126. data/lib/mindee/parsing/error.rb +0 -22
  127. data/lib/mindee/parsing/inference.rb +0 -53
  128. data/lib/mindee/parsing/page.rb +0 -46
  129. data/lib/mindee/parsing/prediction/base.rb +0 -30
  130. data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
  131. data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
  132. data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
  133. data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
  134. data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
  135. data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
  136. data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
  137. data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -44
  138. data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
  139. data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
  140. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
  141. data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
  142. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
  143. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
  144. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
  145. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
  146. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
  147. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
  148. data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
  149. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
  150. data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
  151. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
  152. data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
  153. data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
  154. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
  155. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
  156. data/lib/mindee/parsing/prediction.rb +0 -15
@@ -1,118 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Mindee
4
- # Various helper functions for geometry.
5
- module Geometry
6
- # A relative set of coordinates (X, Y) on the document.
7
- class Point
8
- # @return [Float]
9
- attr_accessor :x
10
- # @return [Float]
11
- attr_accessor :y
12
-
13
- # @param x [Float]
14
- # @param y [Float]
15
- # rubocop:disable Naming/MethodParameterName
16
- def initialize(x, y)
17
- @x = x
18
- @y = y
19
- end
20
- # rubocop:enable Naming/MethodParameterName
21
-
22
- # @return [Float]
23
- def [](key)
24
- case key
25
- when 0
26
- @x
27
- when 1
28
- @y
29
- else
30
- throw '0 or 1 only'
31
- end
32
- end
33
- end
34
-
35
- # Contains exactly 4 relative vertices coordinates (Points).
36
- class Quadrilateral
37
- # @return [Mindee::Geometry::Point]
38
- attr_accessor :top_left
39
- # @return [Mindee::Geometry::Point]
40
- attr_accessor :top_right
41
- # @return [Mindee::Geometry::Point]
42
- attr_accessor :bottom_right
43
- # @return [Mindee::Geometry::Point]
44
- attr_accessor :bottom_left
45
-
46
- # @param top_left [Mindee::Geometry::Point]
47
- # @param top_right [Mindee::Geometry::Point]
48
- # @param bottom_right [Mindee::Geometry::Point]
49
- # @param bottom_left [Mindee::Geometry::Point]
50
- def initialize(top_left, top_right, bottom_right, bottom_left)
51
- @top_left = top_left
52
- @top_right = top_right
53
- @bottom_right = bottom_right
54
- @bottom_left = bottom_left
55
- end
56
-
57
- # @return [Mindee::Geometry::Point]
58
- def [](key)
59
- case key
60
- when 0
61
- @top_left
62
- when 1
63
- @top_right
64
- when 2
65
- @bottom_right
66
- when 3
67
- @bottom_left
68
- else
69
- throw '0, 1, 2, 3 only'
70
- end
71
- end
72
- end
73
-
74
- class Polygon < Array
75
- end
76
-
77
- # Transform a prediction into a Quadrilateral.
78
- def self.quadrilateral_from_prediction(prediction)
79
- throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
80
-
81
- Quadrilateral.new(
82
- Point.new(prediction[0][0], prediction[0][1]),
83
- Point.new(prediction[1][0], prediction[1][1]),
84
- Point.new(prediction[2][0], prediction[2][1]),
85
- Point.new(prediction[3][0], prediction[3][1])
86
- )
87
- end
88
-
89
- # Transform a prediction into a Polygon.
90
- def self.polygon_from_prediction(prediction)
91
- polygon = Polygon.new
92
- return polygon if prediction.nil?
93
-
94
- prediction.each do |point|
95
- polygon << Point.new(point[0], point[1])
96
- end
97
- polygon
98
- end
99
-
100
- # @return [Array<Float>]
101
- def self.get_bbox(vertices)
102
- x_coords = vertices.map(&:x)
103
- y_coords = vertices.map(&:y)
104
- [x_coords.min, y_coords.min, x_coords.max, y_coords.max]
105
- end
106
-
107
- # @return [Mindee::Geometry::Quadrilateral]
108
- def self.get_bounding_box(vertices)
109
- x_min, y_min, x_max, y_max = get_bbox(vertices)
110
- Quadrilateral.new(
111
- Point.new(x_min, y_min),
112
- Point.new(x_max, y_min),
113
- Point.new(x_max, y_max),
114
- Point.new(x_min, y_max)
115
- )
116
- end
117
- end
118
- end
3
+ require_relative 'geometry/min_max'
4
+ require_relative 'geometry/point'
5
+ require_relative 'geometry/quadrilateral'
6
+ require_relative 'geometry/polygon'
7
+ require_relative 'geometry/utils'
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
3
4
  require 'net/http'
4
5
  require_relative '../version'
5
6
 
@@ -32,12 +33,58 @@ module Mindee
32
33
  @url_root = "#{BASE_URL_DEFAULT}/products/#{@owner}/#{@url_name}/v#{@version}"
33
34
  end
34
35
 
35
- # @param input_doc [Mindee::InputDocument]
36
- # @param include_words [Boolean]
36
+ # Call the prediction API.
37
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
38
+ # @param all_words [Boolean]
39
+ # @param close_file [Boolean]
40
+ # @param cropper [Boolean]
41
+ # @return [Hash]
42
+ def predict(input_source, all_words, close_file, cropper)
43
+ check_api_key
44
+ response = predict_req_post(input_source, all_words: all_words, close_file: close_file, cropper: cropper)
45
+ hashed_response = JSON.parse(response.body, object_class: Hash)
46
+ return hashed_response if (200..299).include?(response.code.to_i)
47
+
48
+ error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
49
+ raise error
50
+ end
51
+
52
+ # Call the prediction API.
53
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
54
+ # @param close_file [Boolean]
55
+ # @param cropper [Boolean]
56
+ # @return [Hash]
57
+ def predict_async(input_source, all_words, close_file, cropper)
58
+ check_api_key
59
+ response = document_queue_req_get(input_source, all_words, close_file, cropper)
60
+ hashed_response = JSON.parse(response.body, object_class: Hash)
61
+ return hashed_response if (200..299).include?(response.code.to_i)
62
+
63
+ error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
64
+ raise error
65
+ end
66
+
67
+ # Calls the parsed async doc.
68
+ # @param job_id [String]
69
+ # @return [Hash]
70
+ def parse_async(job_id)
71
+ check_api_key
72
+ response = document_queue_req(job_id)
73
+ hashed_response = JSON.parse(response.body, object_class: Hash)
74
+ return hashed_response if (200..299).include?(response.code.to_i)
75
+
76
+ error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
77
+ raise error
78
+ end
79
+
80
+ private
81
+
82
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
83
+ # @param all_words [Boolean]
37
84
  # @param close_file [Boolean]
38
85
  # @param cropper [Boolean]
39
86
  # @return [Net::HTTPResponse]
40
- def predict_req_post(input_doc, include_words: false, close_file: true, cropper: false)
87
+ def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
41
88
  uri = URI("#{@url_root}/predict")
42
89
 
43
90
  params = {}
@@ -49,11 +96,43 @@ module Mindee
49
96
  'User-Agent' => USER_AGENT,
50
97
  }
51
98
  req = Net::HTTP::Post.new(uri, headers)
99
+ form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
100
+ [['document', input_source.url]]
101
+ else
102
+ [input_source.read_document(close: close_file)]
103
+ end
104
+ form_data.push ['include_mvision', 'true'] if all_words
105
+
106
+ req.set_form(form_data, 'multipart/form-data')
107
+
108
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
109
+ http.request(req)
110
+ end
111
+ end
112
+
113
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
114
+ # @param all_words [Boolean]
115
+ # @param close_file [Boolean]
116
+ # @param cropper [Boolean]
117
+ # @return [Net::HTTPResponse]
118
+ def document_queue_req_get(input_source, all_words, close_file, cropper)
119
+ uri = URI("#{@url_root}/predict_async")
120
+
121
+ params = {}
122
+ params[:cropper] = 'true' if cropper
123
+ uri.query = URI.encode_www_form(params)
52
124
 
53
- form_data = {
54
- 'document' => input_doc.read_document(close: close_file),
125
+ headers = {
126
+ 'Authorization' => "Token #{@api_key}",
127
+ 'User-Agent' => USER_AGENT,
55
128
  }
56
- form_data.push ['include_mvision', 'true'] if include_words
129
+ req = Net::HTTP::Post.new(uri, headers)
130
+ form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
131
+ [['document', input_source.url]]
132
+ else
133
+ [input_source.read_document(close: close_file)]
134
+ end
135
+ form_data.push ['include_mvision', 'true'] if all_words
57
136
 
58
137
  req.set_form(form_data, 'multipart/form-data')
59
138
 
@@ -61,19 +140,39 @@ module Mindee
61
140
  http.request(req)
62
141
  end
63
142
  end
64
- end
65
143
 
66
- # Receipt API endpoint
67
- class StandardEndpoint < Endpoint
68
- def initialize(endpoint_name, version, api_key)
69
- super('mindee', endpoint_name, version, api_key: api_key)
144
+ # @param job_id [String]
145
+ # @return [Net::HTTPResponse]
146
+ def document_queue_req(job_id)
147
+ uri = URI("#{@url_root}/documents/queue/#{job_id}")
148
+
149
+ headers = {
150
+ 'Authorization' => "Token #{@api_key}",
151
+ 'User-Agent' => USER_AGENT,
152
+ }
153
+
154
+ req = Net::HTTP::Get.new(uri, headers)
155
+
156
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
157
+ http.request(req)
158
+ end
159
+
160
+ if response.code.to_i > 299 && response.code.to_i < 400
161
+ req = Net::HTTP::Get.new(response['location'], headers)
162
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
163
+ http.request(req)
164
+ end
165
+ end
166
+ response
70
167
  end
71
- end
72
168
 
73
- # Custom (constructed) API endpoint
74
- class CustomEndpoint < Endpoint
75
- def initialize(account_name, endpoint_name, version, api_key)
76
- super(account_name, endpoint_name, version, api_key: api_key)
169
+ def check_api_key
170
+ return unless @api_key.nil? || @api_key.empty?
171
+
172
+ raise "Missing API key for product \"'#{@url_name}' v#{@version}\" (belonging to \"#{@owner}\"), " \
173
+ "check your Client Configuration.\n" \
174
+ 'You can set this using the ' \
175
+ "'#{HTTP::API_KEY_ENV_NAME}' environment variable."
77
176
  end
78
177
  end
79
178
  end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'http/endpoint'
@@ -3,94 +3,111 @@
3
3
  require 'stringio'
4
4
  require 'marcel'
5
5
 
6
- require_relative 'pdf_processing'
6
+ require_relative '../pdf'
7
7
 
8
8
  module Mindee
9
9
  module Input
10
- ALLOWED_MIME_TYPES = [
11
- 'application/pdf',
12
- 'image/heic',
13
- 'image/png',
14
- 'image/jpeg',
15
- 'image/tiff',
16
- 'image/webp',
17
- ].freeze
18
-
19
- # Base class for loading documents.
20
- class InputDocument
21
- # @return [String]
22
- attr_reader :filename
23
- # @return [String]
24
- attr_reader :file_mimetype
25
- # @return [StreamIO]
26
- attr_reader :io_stream
27
-
28
- # @param io_stream [StreamIO]
29
- def initialize(io_stream, filename)
30
- @io_stream = io_stream
31
- @filename = filename
32
- @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
33
-
34
- return if ALLOWED_MIME_TYPES.include? @file_mimetype
35
-
36
- raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
37
- end
10
+ module Source
11
+ ALLOWED_MIME_TYPES = [
12
+ 'application/pdf',
13
+ 'image/heic',
14
+ 'image/png',
15
+ 'image/jpeg',
16
+ 'image/tiff',
17
+ 'image/webp',
18
+ ].freeze
38
19
 
39
- def pdf?
40
- @file_mimetype == 'application/pdf'
41
- end
20
+ # Base class for loading documents.
21
+ class LocalInputSource
22
+ # @return [String]
23
+ attr_reader :filename
24
+ # @return [String]
25
+ attr_reader :file_mimetype
26
+ # @return [StreamIO]
27
+ attr_reader :io_stream
28
+
29
+ # @param io_stream [StreamIO]
30
+ def initialize(io_stream, filename)
31
+ @io_stream = io_stream
32
+ @filename = filename
33
+ @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
34
+
35
+ return if ALLOWED_MIME_TYPES.include? @file_mimetype
36
+
37
+ raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
38
+ end
39
+
40
+ def pdf?
41
+ @file_mimetype.to_s == 'application/pdf'
42
+ end
43
+
44
+ def process_pdf(options)
45
+ @io_stream.seek(0)
46
+ @io_stream = PdfProcessor.parse(@io_stream, options)
47
+ end
42
48
 
43
- def process_pdf(options)
44
- @io_stream.seek(0)
45
- @io_stream = PdfProcessor.parse(@io_stream, options)
49
+ # @param close [Boolean]
50
+ def read_document(close: true)
51
+ @io_stream.seek(0)
52
+ # Avoids needlessly re-packing some files
53
+ data = @io_stream.read
54
+ @io_stream.close if close
55
+ return ['document', data, { filename: @filename }] if pdf?
56
+
57
+ ['document', [data].pack('m'), { filename: @filename }]
58
+ end
46
59
  end
47
60
 
48
- # @param close [Boolean]
49
- def read_document(close: true)
50
- @io_stream.seek(0)
51
- data = @io_stream.read
52
- @io_stream.close if close
53
- [data].pack('m')
61
+ # Load a document from a path.
62
+ class PathInputSource < LocalInputSource
63
+ # @param filepath [String]
64
+ def initialize(filepath)
65
+ io_stream = File.open(filepath, 'rb')
66
+ super(io_stream, File.basename(filepath))
67
+ end
54
68
  end
55
- end
56
69
 
57
- # Load a document from a path.
58
- class PathDocument < InputDocument
59
- # @param filepath [String]
60
- def initialize(filepath)
61
- io_stream = File.open(filepath, 'rb')
62
- super(io_stream, File.basename(filepath))
70
+ # Load a document from a base64 string.
71
+ class Base64InputSource < LocalInputSource
72
+ # @param base64_string [String]
73
+ # @param filename [String]
74
+ def initialize(base64_string, filename)
75
+ io_stream = StringIO.new(base64_string.unpack1('m*'))
76
+ io_stream.set_encoding Encoding::BINARY
77
+ super(io_stream, filename)
78
+ end
63
79
  end
64
- end
65
80
 
66
- # Load a document from a base64 string.
67
- class Base64Document < InputDocument
68
- # @param base64_string [String]
69
- # @param filename [String]
70
- def initialize(base64_string, filename)
71
- io_stream = StringIO.new(base64_string.unpack1('m*'))
72
- io_stream.set_encoding Encoding::BINARY
73
- super(io_stream, filename)
81
+ # Load a document from raw bytes.
82
+ class BytesInputSource < LocalInputSource
83
+ # @param raw_bytes [String]
84
+ # @param filename [String]
85
+ def initialize(raw_bytes, filename)
86
+ io_stream = StringIO.new(raw_bytes)
87
+ io_stream.set_encoding Encoding::BINARY
88
+ super(io_stream, filename)
89
+ end
74
90
  end
75
- end
76
91
 
77
- # Load a document from raw bytes.
78
- class BytesDocument < InputDocument
79
- # @param raw_bytes [String]
80
- # @param filename [String]
81
- def initialize(raw_bytes, filename)
82
- io_stream = StringIO.new(raw_bytes)
83
- io_stream.set_encoding Encoding::BINARY
84
- super(io_stream, filename)
92
+ # Load a document from a file handle.
93
+ class FileInputSource < LocalInputSource
94
+ # @param filename [String]
95
+ def initialize(file_handle, filename)
96
+ io_stream = file_handle
97
+ super(io_stream, filename)
98
+ end
85
99
  end
86
- end
87
100
 
88
- # Load a document from a file handle.
89
- class FileDocument < InputDocument
90
- # @param filename [String]
91
- def initialize(file_handle, filename)
92
- io_stream = file_handle
93
- super(io_stream, filename)
101
+ # Load a remote document from a file url.
102
+ class UrlInputSource
103
+ # @return [String]
104
+ attr_reader :url
105
+
106
+ def initialize(url)
107
+ raise 'URL must be HTTPS' unless url.start_with? 'https://'
108
+
109
+ @url = url
110
+ end
94
111
  end
95
112
  end
96
113
  end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'document'
4
+ require 'time'
5
+
6
+ module Mindee
7
+ module Parsing
8
+ module Common
9
+ module JobStatus
10
+ WAITING = :waiting
11
+ PROCESSING = :processing
12
+ COMPLETED = :completed
13
+ end
14
+
15
+ module RequestStatus
16
+ FAILURE = :failure
17
+ SUCCESS = :success
18
+ end
19
+
20
+ # Job (queue) information on async parsing.
21
+ class Job
22
+ # @return [String] Mindee ID of the document
23
+ attr_reader :id
24
+ # @return [Mindee::Parsing::Standard::DateField]
25
+ attr_reader :issued_at
26
+ # @return [Mindee::Parsing::Standard::DateField, nil]
27
+ attr_reader :available_at
28
+ # @return [JobStatus, Symbol]
29
+ attr_reader :status
30
+ # @return [Integer, nil]
31
+ attr_reader :millisecs_taken
32
+
33
+ # @param http_response [Hash]
34
+ def initialize(http_response)
35
+ @id = http_response['id']
36
+ @issued_at = Time.iso8601(http_response['issued_at'])
37
+ if http_response.key?('available_at') && !http_response['available_at'].nil?
38
+ @available_at = Time.iso8601(http_response['available_at'])
39
+ @millisecs_taken = (1000 * (@available_at.to_time - @issued_at.to_time).to_f).to_i
40
+ end
41
+ @status = case http_response['status']
42
+ when 'waiting'
43
+ JobStatus::WAITING
44
+ when 'processing'
45
+ JobStatus::PROCESSING
46
+ when 'completed'
47
+ JobStatus::COMPLETED
48
+ else
49
+ http_response['status']&.to_sym
50
+ end
51
+ end
52
+ end
53
+
54
+ # HTTP request response.
55
+ class ApiRequest
56
+ # @return [Hash]
57
+ attr_reader :error
58
+ # @return [Array<String>]
59
+ attr_reader :ressources
60
+ # @return [RequestStatus, Symbol]
61
+ attr_reader :status
62
+ # @return [Integer]
63
+ attr_reader :status_code
64
+ # @return [String]
65
+ attr_reader :url
66
+
67
+ def initialize(server_response)
68
+ @error = server_response['error']
69
+ @ressources = server_response['ressources']
70
+
71
+ @status = if server_response['status'] == 'failure'
72
+ RequestStatus::FAILURE
73
+ elsif server_response['status'] == 'success'
74
+ RequestStatus::SUCCESS
75
+ else
76
+ server_response['status']&.to_sym
77
+ end
78
+ @status_code = server_response['status_code']
79
+ @url = server_response['url']
80
+ end
81
+ end
82
+
83
+ # Wrapper class for all predictions (synchronous and asynchronous)
84
+ class ApiResponse
85
+ # @return [Mindee::Parsing::Common::Document, nil]
86
+ attr_reader :document
87
+ # @return [Mindee::Parsing::Common::Job, nil]
88
+ attr_reader :job
89
+ # @return [Mindee::Parsing::Common::ApiRequest]
90
+ attr_reader :api_request
91
+
92
+ # @param product_class [Class<Mindee::Product>]
93
+ # @param http_response [Hash]
94
+ def initialize(product_class, http_response)
95
+ if http_response.key?('api_request')
96
+ @api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request'])
97
+ end
98
+ if http_response.key?('document') &&
99
+ (!http_response.key?('job') ||
100
+ http_response['job']['status'] == 'completed') &&
101
+ @api_request.status == RequestStatus::SUCCESS
102
+ @document = Mindee::Parsing::Common::Document.new(product_class, http_response['document'])
103
+ end
104
+ @job = Mindee::Parsing::Common::Job.new(http_response['job']) if http_response.key?('job')
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'inference'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Common
8
+ # Stores all response attributes.
9
+ class Document
10
+ # @return [Mindee::Inference]
11
+ attr_reader :inference
12
+ # @return [String] Filename sent to the API
13
+ attr_reader :name
14
+ # @return [String] Mindee ID of the document
15
+ attr_reader :id
16
+ # @return [Mindee::Parsing::Common::Ocr::Ocr, nil]
17
+ attr_reader :ocr
18
+
19
+ # @param http_response [Hash]
20
+ # @return [Mindee::Parsing::Common::Ocr::Ocr]
21
+ def self.load_ocr(http_response)
22
+ ocr_prediction = http_response.fetch('ocr', nil)
23
+ return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?
24
+
25
+ Ocr::Ocr.new(ocr_prediction)
26
+ end
27
+
28
+ # @param product_class [Class<Mindee::Product>]
29
+ # @param http_response [Hash]
30
+ def initialize(product_class, http_response)
31
+ @id = http_response['id']
32
+ @name = http_response['name']
33
+ @inference = product_class.new(http_response['inference'])
34
+ @ocr = self.class.load_ocr(http_response)
35
+ end
36
+
37
+ # @return [String]
38
+ def to_s
39
+ out_str = String.new
40
+ out_str << "########\nDocument\n########"
41
+ out_str << "\n:Mindee ID: #{@id}"
42
+ out_str << "\n:Filename: #{@name}"
43
+ out_str << "\n\n#{@inference}"
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # API HttpError
7
+ class HttpError < StandardError
8
+ # @return [String]
9
+ attr_reader :api_code
10
+ # @return [String]
11
+ attr_reader :api_details
12
+ # @return [String]
13
+ attr_reader :api_message
14
+
15
+ def initialize(error)
16
+ @api_code = error['code']
17
+ @api_details = error['details']
18
+ @api_message = error['message']
19
+ super("#{@api_code}: #{@api_details} - #{@api_message}")
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end