mindee 2.2.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +40 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/code_samples/us_driver_license_v1.txt +19 -0
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +115 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +90 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +47 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +202 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +51 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +149 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +62 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +57 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +38 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1_document.rb +113 -0
- data/lib/mindee/product/us/driver_license/driver_license_v1_page.rb +53 -0
- data/lib/mindee/product.rb +17 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +91 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -44
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
data/lib/mindee/geometry.rb
CHANGED
@@ -1,118 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
# @return [Float]
|
9
|
-
attr_accessor :x
|
10
|
-
# @return [Float]
|
11
|
-
attr_accessor :y
|
12
|
-
|
13
|
-
# @param x [Float]
|
14
|
-
# @param y [Float]
|
15
|
-
# rubocop:disable Naming/MethodParameterName
|
16
|
-
def initialize(x, y)
|
17
|
-
@x = x
|
18
|
-
@y = y
|
19
|
-
end
|
20
|
-
# rubocop:enable Naming/MethodParameterName
|
21
|
-
|
22
|
-
# @return [Float]
|
23
|
-
def [](key)
|
24
|
-
case key
|
25
|
-
when 0
|
26
|
-
@x
|
27
|
-
when 1
|
28
|
-
@y
|
29
|
-
else
|
30
|
-
throw '0 or 1 only'
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
# Contains exactly 4 relative vertices coordinates (Points).
|
36
|
-
class Quadrilateral
|
37
|
-
# @return [Mindee::Geometry::Point]
|
38
|
-
attr_accessor :top_left
|
39
|
-
# @return [Mindee::Geometry::Point]
|
40
|
-
attr_accessor :top_right
|
41
|
-
# @return [Mindee::Geometry::Point]
|
42
|
-
attr_accessor :bottom_right
|
43
|
-
# @return [Mindee::Geometry::Point]
|
44
|
-
attr_accessor :bottom_left
|
45
|
-
|
46
|
-
# @param top_left [Mindee::Geometry::Point]
|
47
|
-
# @param top_right [Mindee::Geometry::Point]
|
48
|
-
# @param bottom_right [Mindee::Geometry::Point]
|
49
|
-
# @param bottom_left [Mindee::Geometry::Point]
|
50
|
-
def initialize(top_left, top_right, bottom_right, bottom_left)
|
51
|
-
@top_left = top_left
|
52
|
-
@top_right = top_right
|
53
|
-
@bottom_right = bottom_right
|
54
|
-
@bottom_left = bottom_left
|
55
|
-
end
|
56
|
-
|
57
|
-
# @return [Mindee::Geometry::Point]
|
58
|
-
def [](key)
|
59
|
-
case key
|
60
|
-
when 0
|
61
|
-
@top_left
|
62
|
-
when 1
|
63
|
-
@top_right
|
64
|
-
when 2
|
65
|
-
@bottom_right
|
66
|
-
when 3
|
67
|
-
@bottom_left
|
68
|
-
else
|
69
|
-
throw '0, 1, 2, 3 only'
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
class Polygon < Array
|
75
|
-
end
|
76
|
-
|
77
|
-
# Transform a prediction into a Quadrilateral.
|
78
|
-
def self.quadrilateral_from_prediction(prediction)
|
79
|
-
throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
|
80
|
-
|
81
|
-
Quadrilateral.new(
|
82
|
-
Point.new(prediction[0][0], prediction[0][1]),
|
83
|
-
Point.new(prediction[1][0], prediction[1][1]),
|
84
|
-
Point.new(prediction[2][0], prediction[2][1]),
|
85
|
-
Point.new(prediction[3][0], prediction[3][1])
|
86
|
-
)
|
87
|
-
end
|
88
|
-
|
89
|
-
# Transform a prediction into a Polygon.
|
90
|
-
def self.polygon_from_prediction(prediction)
|
91
|
-
polygon = Polygon.new
|
92
|
-
return polygon if prediction.nil?
|
93
|
-
|
94
|
-
prediction.each do |point|
|
95
|
-
polygon << Point.new(point[0], point[1])
|
96
|
-
end
|
97
|
-
polygon
|
98
|
-
end
|
99
|
-
|
100
|
-
# @return [Array<Float>]
|
101
|
-
def self.get_bbox(vertices)
|
102
|
-
x_coords = vertices.map(&:x)
|
103
|
-
y_coords = vertices.map(&:y)
|
104
|
-
[x_coords.min, y_coords.min, x_coords.max, y_coords.max]
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [Mindee::Geometry::Quadrilateral]
|
108
|
-
def self.get_bounding_box(vertices)
|
109
|
-
x_min, y_min, x_max, y_max = get_bbox(vertices)
|
110
|
-
Quadrilateral.new(
|
111
|
-
Point.new(x_min, y_min),
|
112
|
-
Point.new(x_max, y_min),
|
113
|
-
Point.new(x_max, y_max),
|
114
|
-
Point.new(x_min, y_max)
|
115
|
-
)
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
3
|
+
require_relative 'geometry/min_max'
|
4
|
+
require_relative 'geometry/point'
|
5
|
+
require_relative 'geometry/quadrilateral'
|
6
|
+
require_relative 'geometry/polygon'
|
7
|
+
require_relative 'geometry/utils'
|
data/lib/mindee/http/endpoint.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'json'
|
3
4
|
require 'net/http'
|
4
5
|
require_relative '../version'
|
5
6
|
|
@@ -32,12 +33,58 @@ module Mindee
|
|
32
33
|
@url_root = "#{BASE_URL_DEFAULT}/products/#{@owner}/#{@url_name}/v#{@version}"
|
33
34
|
end
|
34
35
|
|
35
|
-
#
|
36
|
-
# @param
|
36
|
+
# Call the prediction API.
|
37
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
38
|
+
# @param all_words [Boolean]
|
39
|
+
# @param close_file [Boolean]
|
40
|
+
# @param cropper [Boolean]
|
41
|
+
# @return [Hash]
|
42
|
+
def predict(input_source, all_words, close_file, cropper)
|
43
|
+
check_api_key
|
44
|
+
response = predict_req_post(input_source, all_words: all_words, close_file: close_file, cropper: cropper)
|
45
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
46
|
+
return hashed_response if (200..299).include?(response.code.to_i)
|
47
|
+
|
48
|
+
error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
|
49
|
+
raise error
|
50
|
+
end
|
51
|
+
|
52
|
+
# Call the prediction API.
|
53
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
54
|
+
# @param close_file [Boolean]
|
55
|
+
# @param cropper [Boolean]
|
56
|
+
# @return [Hash]
|
57
|
+
def predict_async(input_source, all_words, close_file, cropper)
|
58
|
+
check_api_key
|
59
|
+
response = document_queue_req_get(input_source, all_words, close_file, cropper)
|
60
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
61
|
+
return hashed_response if (200..299).include?(response.code.to_i)
|
62
|
+
|
63
|
+
error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
|
64
|
+
raise error
|
65
|
+
end
|
66
|
+
|
67
|
+
# Calls the parsed async doc.
|
68
|
+
# @param job_id [String]
|
69
|
+
# @return [Hash]
|
70
|
+
def parse_async(job_id)
|
71
|
+
check_api_key
|
72
|
+
response = document_queue_req(job_id)
|
73
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
74
|
+
return hashed_response if (200..299).include?(response.code.to_i)
|
75
|
+
|
76
|
+
error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
|
77
|
+
raise error
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
83
|
+
# @param all_words [Boolean]
|
37
84
|
# @param close_file [Boolean]
|
38
85
|
# @param cropper [Boolean]
|
39
86
|
# @return [Net::HTTPResponse]
|
40
|
-
def predict_req_post(
|
87
|
+
def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
|
41
88
|
uri = URI("#{@url_root}/predict")
|
42
89
|
|
43
90
|
params = {}
|
@@ -49,11 +96,43 @@ module Mindee
|
|
49
96
|
'User-Agent' => USER_AGENT,
|
50
97
|
}
|
51
98
|
req = Net::HTTP::Post.new(uri, headers)
|
99
|
+
form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
|
100
|
+
[['document', input_source.url]]
|
101
|
+
else
|
102
|
+
[input_source.read_document(close: close_file)]
|
103
|
+
end
|
104
|
+
form_data.push ['include_mvision', 'true'] if all_words
|
105
|
+
|
106
|
+
req.set_form(form_data, 'multipart/form-data')
|
107
|
+
|
108
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
109
|
+
http.request(req)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
114
|
+
# @param all_words [Boolean]
|
115
|
+
# @param close_file [Boolean]
|
116
|
+
# @param cropper [Boolean]
|
117
|
+
# @return [Net::HTTPResponse]
|
118
|
+
def document_queue_req_get(input_source, all_words, close_file, cropper)
|
119
|
+
uri = URI("#{@url_root}/predict_async")
|
120
|
+
|
121
|
+
params = {}
|
122
|
+
params[:cropper] = 'true' if cropper
|
123
|
+
uri.query = URI.encode_www_form(params)
|
52
124
|
|
53
|
-
|
54
|
-
'
|
125
|
+
headers = {
|
126
|
+
'Authorization' => "Token #{@api_key}",
|
127
|
+
'User-Agent' => USER_AGENT,
|
55
128
|
}
|
56
|
-
|
129
|
+
req = Net::HTTP::Post.new(uri, headers)
|
130
|
+
form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
|
131
|
+
[['document', input_source.url]]
|
132
|
+
else
|
133
|
+
[input_source.read_document(close: close_file)]
|
134
|
+
end
|
135
|
+
form_data.push ['include_mvision', 'true'] if all_words
|
57
136
|
|
58
137
|
req.set_form(form_data, 'multipart/form-data')
|
59
138
|
|
@@ -61,19 +140,39 @@ module Mindee
|
|
61
140
|
http.request(req)
|
62
141
|
end
|
63
142
|
end
|
64
|
-
end
|
65
143
|
|
66
|
-
|
67
|
-
|
68
|
-
def
|
69
|
-
|
144
|
+
# @param job_id [String]
|
145
|
+
# @return [Net::HTTPResponse]
|
146
|
+
def document_queue_req(job_id)
|
147
|
+
uri = URI("#{@url_root}/documents/queue/#{job_id}")
|
148
|
+
|
149
|
+
headers = {
|
150
|
+
'Authorization' => "Token #{@api_key}",
|
151
|
+
'User-Agent' => USER_AGENT,
|
152
|
+
}
|
153
|
+
|
154
|
+
req = Net::HTTP::Get.new(uri, headers)
|
155
|
+
|
156
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
157
|
+
http.request(req)
|
158
|
+
end
|
159
|
+
|
160
|
+
if response.code.to_i > 299 && response.code.to_i < 400
|
161
|
+
req = Net::HTTP::Get.new(response['location'], headers)
|
162
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
163
|
+
http.request(req)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
response
|
70
167
|
end
|
71
|
-
end
|
72
168
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
169
|
+
def check_api_key
|
170
|
+
return unless @api_key.nil? || @api_key.empty?
|
171
|
+
|
172
|
+
raise "Missing API key for product \"'#{@url_name}' v#{@version}\" (belonging to \"#{@owner}\"), " \
|
173
|
+
"check your Client Configuration.\n" \
|
174
|
+
'You can set this using the ' \
|
175
|
+
"'#{HTTP::API_KEY_ENV_NAME}' environment variable."
|
77
176
|
end
|
78
177
|
end
|
79
178
|
end
|
data/lib/mindee/http.rb
ADDED
data/lib/mindee/input/sources.rb
CHANGED
@@ -3,94 +3,111 @@
|
|
3
3
|
require 'stringio'
|
4
4
|
require 'marcel'
|
5
5
|
|
6
|
-
require_relative '
|
6
|
+
require_relative '../pdf'
|
7
7
|
|
8
8
|
module Mindee
|
9
9
|
module Input
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
# Base class for loading documents.
|
20
|
-
class InputDocument
|
21
|
-
# @return [String]
|
22
|
-
attr_reader :filename
|
23
|
-
# @return [String]
|
24
|
-
attr_reader :file_mimetype
|
25
|
-
# @return [StreamIO]
|
26
|
-
attr_reader :io_stream
|
27
|
-
|
28
|
-
# @param io_stream [StreamIO]
|
29
|
-
def initialize(io_stream, filename)
|
30
|
-
@io_stream = io_stream
|
31
|
-
@filename = filename
|
32
|
-
@file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
|
33
|
-
|
34
|
-
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
35
|
-
|
36
|
-
raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
|
37
|
-
end
|
10
|
+
module Source
|
11
|
+
ALLOWED_MIME_TYPES = [
|
12
|
+
'application/pdf',
|
13
|
+
'image/heic',
|
14
|
+
'image/png',
|
15
|
+
'image/jpeg',
|
16
|
+
'image/tiff',
|
17
|
+
'image/webp',
|
18
|
+
].freeze
|
38
19
|
|
39
|
-
|
40
|
-
|
41
|
-
|
20
|
+
# Base class for loading documents.
|
21
|
+
class LocalInputSource
|
22
|
+
# @return [String]
|
23
|
+
attr_reader :filename
|
24
|
+
# @return [String]
|
25
|
+
attr_reader :file_mimetype
|
26
|
+
# @return [StreamIO]
|
27
|
+
attr_reader :io_stream
|
28
|
+
|
29
|
+
# @param io_stream [StreamIO]
|
30
|
+
def initialize(io_stream, filename)
|
31
|
+
@io_stream = io_stream
|
32
|
+
@filename = filename
|
33
|
+
@file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
|
34
|
+
|
35
|
+
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
36
|
+
|
37
|
+
raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def pdf?
|
41
|
+
@file_mimetype.to_s == 'application/pdf'
|
42
|
+
end
|
43
|
+
|
44
|
+
def process_pdf(options)
|
45
|
+
@io_stream.seek(0)
|
46
|
+
@io_stream = PdfProcessor.parse(@io_stream, options)
|
47
|
+
end
|
42
48
|
|
43
|
-
|
44
|
-
|
45
|
-
|
49
|
+
# @param close [Boolean]
|
50
|
+
def read_document(close: true)
|
51
|
+
@io_stream.seek(0)
|
52
|
+
# Avoids needlessly re-packing some files
|
53
|
+
data = @io_stream.read
|
54
|
+
@io_stream.close if close
|
55
|
+
return ['document', data, { filename: @filename }] if pdf?
|
56
|
+
|
57
|
+
['document', [data].pack('m'), { filename: @filename }]
|
58
|
+
end
|
46
59
|
end
|
47
60
|
|
48
|
-
#
|
49
|
-
|
50
|
-
@
|
51
|
-
|
52
|
-
|
53
|
-
|
61
|
+
# Load a document from a path.
|
62
|
+
class PathInputSource < LocalInputSource
|
63
|
+
# @param filepath [String]
|
64
|
+
def initialize(filepath)
|
65
|
+
io_stream = File.open(filepath, 'rb')
|
66
|
+
super(io_stream, File.basename(filepath))
|
67
|
+
end
|
54
68
|
end
|
55
|
-
end
|
56
69
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
70
|
+
# Load a document from a base64 string.
|
71
|
+
class Base64InputSource < LocalInputSource
|
72
|
+
# @param base64_string [String]
|
73
|
+
# @param filename [String]
|
74
|
+
def initialize(base64_string, filename)
|
75
|
+
io_stream = StringIO.new(base64_string.unpack1('m*'))
|
76
|
+
io_stream.set_encoding Encoding::BINARY
|
77
|
+
super(io_stream, filename)
|
78
|
+
end
|
63
79
|
end
|
64
|
-
end
|
65
80
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
81
|
+
# Load a document from raw bytes.
|
82
|
+
class BytesInputSource < LocalInputSource
|
83
|
+
# @param raw_bytes [String]
|
84
|
+
# @param filename [String]
|
85
|
+
def initialize(raw_bytes, filename)
|
86
|
+
io_stream = StringIO.new(raw_bytes)
|
87
|
+
io_stream.set_encoding Encoding::BINARY
|
88
|
+
super(io_stream, filename)
|
89
|
+
end
|
74
90
|
end
|
75
|
-
end
|
76
91
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
super(io_stream, filename)
|
92
|
+
# Load a document from a file handle.
|
93
|
+
class FileInputSource < LocalInputSource
|
94
|
+
# @param filename [String]
|
95
|
+
def initialize(file_handle, filename)
|
96
|
+
io_stream = file_handle
|
97
|
+
super(io_stream, filename)
|
98
|
+
end
|
85
99
|
end
|
86
|
-
end
|
87
100
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
101
|
+
# Load a remote document from a file url.
|
102
|
+
class UrlInputSource
|
103
|
+
# @return [String]
|
104
|
+
attr_reader :url
|
105
|
+
|
106
|
+
def initialize(url)
|
107
|
+
raise 'URL must be HTTPS' unless url.start_with? 'https://'
|
108
|
+
|
109
|
+
@url = url
|
110
|
+
end
|
94
111
|
end
|
95
112
|
end
|
96
113
|
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'document'
|
4
|
+
require 'time'
|
5
|
+
|
6
|
+
module Mindee
|
7
|
+
module Parsing
|
8
|
+
module Common
|
9
|
+
module JobStatus
|
10
|
+
WAITING = :waiting
|
11
|
+
PROCESSING = :processing
|
12
|
+
COMPLETED = :completed
|
13
|
+
end
|
14
|
+
|
15
|
+
module RequestStatus
|
16
|
+
FAILURE = :failure
|
17
|
+
SUCCESS = :success
|
18
|
+
end
|
19
|
+
|
20
|
+
# Job (queue) information on async parsing.
|
21
|
+
class Job
|
22
|
+
# @return [String] Mindee ID of the document
|
23
|
+
attr_reader :id
|
24
|
+
# @return [Mindee::Parsing::Standard::DateField]
|
25
|
+
attr_reader :issued_at
|
26
|
+
# @return [Mindee::Parsing::Standard::DateField, nil]
|
27
|
+
attr_reader :available_at
|
28
|
+
# @return [JobStatus, Symbol]
|
29
|
+
attr_reader :status
|
30
|
+
# @return [Integer, nil]
|
31
|
+
attr_reader :millisecs_taken
|
32
|
+
|
33
|
+
# @param http_response [Hash]
|
34
|
+
def initialize(http_response)
|
35
|
+
@id = http_response['id']
|
36
|
+
@issued_at = Time.iso8601(http_response['issued_at'])
|
37
|
+
if http_response.key?('available_at') && !http_response['available_at'].nil?
|
38
|
+
@available_at = Time.iso8601(http_response['available_at'])
|
39
|
+
@millisecs_taken = (1000 * (@available_at.to_time - @issued_at.to_time).to_f).to_i
|
40
|
+
end
|
41
|
+
@status = case http_response['status']
|
42
|
+
when 'waiting'
|
43
|
+
JobStatus::WAITING
|
44
|
+
when 'processing'
|
45
|
+
JobStatus::PROCESSING
|
46
|
+
when 'completed'
|
47
|
+
JobStatus::COMPLETED
|
48
|
+
else
|
49
|
+
http_response['status']&.to_sym
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# HTTP request response.
|
55
|
+
class ApiRequest
|
56
|
+
# @return [Hash]
|
57
|
+
attr_reader :error
|
58
|
+
# @return [Array<String>]
|
59
|
+
attr_reader :ressources
|
60
|
+
# @return [RequestStatus, Symbol]
|
61
|
+
attr_reader :status
|
62
|
+
# @return [Integer]
|
63
|
+
attr_reader :status_code
|
64
|
+
# @return [String]
|
65
|
+
attr_reader :url
|
66
|
+
|
67
|
+
def initialize(server_response)
|
68
|
+
@error = server_response['error']
|
69
|
+
@ressources = server_response['ressources']
|
70
|
+
|
71
|
+
@status = if server_response['status'] == 'failure'
|
72
|
+
RequestStatus::FAILURE
|
73
|
+
elsif server_response['status'] == 'success'
|
74
|
+
RequestStatus::SUCCESS
|
75
|
+
else
|
76
|
+
server_response['status']&.to_sym
|
77
|
+
end
|
78
|
+
@status_code = server_response['status_code']
|
79
|
+
@url = server_response['url']
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Wrapper class for all predictions (synchronous and asynchronous)
|
84
|
+
class ApiResponse
|
85
|
+
# @return [Mindee::Parsing::Common::Document, nil]
|
86
|
+
attr_reader :document
|
87
|
+
# @return [Mindee::Parsing::Common::Job, nil]
|
88
|
+
attr_reader :job
|
89
|
+
# @return [Mindee::Parsing::Common::ApiRequest]
|
90
|
+
attr_reader :api_request
|
91
|
+
|
92
|
+
# @param product_class [Class<Mindee::Product>]
|
93
|
+
# @param http_response [Hash]
|
94
|
+
def initialize(product_class, http_response)
|
95
|
+
if http_response.key?('api_request')
|
96
|
+
@api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request'])
|
97
|
+
end
|
98
|
+
if http_response.key?('document') &&
|
99
|
+
(!http_response.key?('job') ||
|
100
|
+
http_response['job']['status'] == 'completed') &&
|
101
|
+
@api_request.status == RequestStatus::SUCCESS
|
102
|
+
@document = Mindee::Parsing::Common::Document.new(product_class, http_response['document'])
|
103
|
+
end
|
104
|
+
@job = Mindee::Parsing::Common::Job.new(http_response['job']) if http_response.key?('job')
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'inference'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
module Common
|
8
|
+
# Stores all response attributes.
|
9
|
+
class Document
|
10
|
+
# @return [Mindee::Inference]
|
11
|
+
attr_reader :inference
|
12
|
+
# @return [String] Filename sent to the API
|
13
|
+
attr_reader :name
|
14
|
+
# @return [String] Mindee ID of the document
|
15
|
+
attr_reader :id
|
16
|
+
# @return [Mindee::Parsing::Common::Ocr::Ocr, nil]
|
17
|
+
attr_reader :ocr
|
18
|
+
|
19
|
+
# @param http_response [Hash]
|
20
|
+
# @return [Mindee::Parsing::Common::Ocr::Ocr]
|
21
|
+
def self.load_ocr(http_response)
|
22
|
+
ocr_prediction = http_response.fetch('ocr', nil)
|
23
|
+
return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?
|
24
|
+
|
25
|
+
Ocr::Ocr.new(ocr_prediction)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param product_class [Class<Mindee::Product>]
|
29
|
+
# @param http_response [Hash]
|
30
|
+
def initialize(product_class, http_response)
|
31
|
+
@id = http_response['id']
|
32
|
+
@name = http_response['name']
|
33
|
+
@inference = product_class.new(http_response['inference'])
|
34
|
+
@ocr = self.class.load_ocr(http_response)
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [String]
|
38
|
+
def to_s
|
39
|
+
out_str = String.new
|
40
|
+
out_str << "########\nDocument\n########"
|
41
|
+
out_str << "\n:Mindee ID: #{@id}"
|
42
|
+
out_str << "\n:Filename: #{@name}"
|
43
|
+
out_str << "\n\n#{@inference}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Common
|
6
|
+
# API HttpError
|
7
|
+
class HttpError < StandardError
|
8
|
+
# @return [String]
|
9
|
+
attr_reader :api_code
|
10
|
+
# @return [String]
|
11
|
+
attr_reader :api_details
|
12
|
+
# @return [String]
|
13
|
+
attr_reader :api_message
|
14
|
+
|
15
|
+
def initialize(error)
|
16
|
+
@api_code = error['code']
|
17
|
+
@api_details = error['details']
|
18
|
+
@api_message = error['message']
|
19
|
+
super("#{@api_code}: #{@api_details} - #{@api_message}")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|