mindee 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +36 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +123 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +87 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +44 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
- data/lib/mindee/product.rb +16 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +87 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
data/lib/mindee/geometry.rb
CHANGED
|
@@ -1,118 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
# @return [Float]
|
|
9
|
-
attr_accessor :x
|
|
10
|
-
# @return [Float]
|
|
11
|
-
attr_accessor :y
|
|
12
|
-
|
|
13
|
-
# @param x [Float]
|
|
14
|
-
# @param y [Float]
|
|
15
|
-
# rubocop:disable Naming/MethodParameterName
|
|
16
|
-
def initialize(x, y)
|
|
17
|
-
@x = x
|
|
18
|
-
@y = y
|
|
19
|
-
end
|
|
20
|
-
# rubocop:enable Naming/MethodParameterName
|
|
21
|
-
|
|
22
|
-
# @return [Float]
|
|
23
|
-
def [](key)
|
|
24
|
-
case key
|
|
25
|
-
when 0
|
|
26
|
-
@x
|
|
27
|
-
when 1
|
|
28
|
-
@y
|
|
29
|
-
else
|
|
30
|
-
throw '0 or 1 only'
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Contains exactly 4 relative vertices coordinates (Points).
|
|
36
|
-
class Quadrilateral
|
|
37
|
-
# @return [Mindee::Geometry::Point]
|
|
38
|
-
attr_accessor :top_left
|
|
39
|
-
# @return [Mindee::Geometry::Point]
|
|
40
|
-
attr_accessor :top_right
|
|
41
|
-
# @return [Mindee::Geometry::Point]
|
|
42
|
-
attr_accessor :bottom_right
|
|
43
|
-
# @return [Mindee::Geometry::Point]
|
|
44
|
-
attr_accessor :bottom_left
|
|
45
|
-
|
|
46
|
-
# @param top_left [Mindee::Geometry::Point]
|
|
47
|
-
# @param top_right [Mindee::Geometry::Point]
|
|
48
|
-
# @param bottom_right [Mindee::Geometry::Point]
|
|
49
|
-
# @param bottom_left [Mindee::Geometry::Point]
|
|
50
|
-
def initialize(top_left, top_right, bottom_right, bottom_left)
|
|
51
|
-
@top_left = top_left
|
|
52
|
-
@top_right = top_right
|
|
53
|
-
@bottom_right = bottom_right
|
|
54
|
-
@bottom_left = bottom_left
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# @return [Mindee::Geometry::Point]
|
|
58
|
-
def [](key)
|
|
59
|
-
case key
|
|
60
|
-
when 0
|
|
61
|
-
@top_left
|
|
62
|
-
when 1
|
|
63
|
-
@top_right
|
|
64
|
-
when 2
|
|
65
|
-
@bottom_right
|
|
66
|
-
when 3
|
|
67
|
-
@bottom_left
|
|
68
|
-
else
|
|
69
|
-
throw '0, 1, 2, 3 only'
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
class Polygon < Array
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# Transform a prediction into a Quadrilateral.
|
|
78
|
-
def self.quadrilateral_from_prediction(prediction)
|
|
79
|
-
throw "Prediction must have exactly 4 points, found #{prediction.size}" if prediction.size != 4
|
|
80
|
-
|
|
81
|
-
Quadrilateral.new(
|
|
82
|
-
Point.new(prediction[0][0], prediction[0][1]),
|
|
83
|
-
Point.new(prediction[1][0], prediction[1][1]),
|
|
84
|
-
Point.new(prediction[2][0], prediction[2][1]),
|
|
85
|
-
Point.new(prediction[3][0], prediction[3][1])
|
|
86
|
-
)
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# Transform a prediction into a Polygon.
|
|
90
|
-
def self.polygon_from_prediction(prediction)
|
|
91
|
-
polygon = Polygon.new
|
|
92
|
-
return polygon if prediction.nil?
|
|
93
|
-
|
|
94
|
-
prediction.each do |point|
|
|
95
|
-
polygon << Point.new(point[0], point[1])
|
|
96
|
-
end
|
|
97
|
-
polygon
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
# @return [Array<Float>]
|
|
101
|
-
def self.get_bbox(vertices)
|
|
102
|
-
x_coords = vertices.map(&:x)
|
|
103
|
-
y_coords = vertices.map(&:y)
|
|
104
|
-
[x_coords.min, y_coords.min, x_coords.max, y_coords.max]
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# @return [Mindee::Geometry::Quadrilateral]
|
|
108
|
-
def self.get_bounding_box(vertices)
|
|
109
|
-
x_min, y_min, x_max, y_max = get_bbox(vertices)
|
|
110
|
-
Quadrilateral.new(
|
|
111
|
-
Point.new(x_min, y_min),
|
|
112
|
-
Point.new(x_max, y_min),
|
|
113
|
-
Point.new(x_max, y_max),
|
|
114
|
-
Point.new(x_min, y_max)
|
|
115
|
-
)
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
end
|
|
3
|
+
require_relative 'geometry/min_max'
|
|
4
|
+
require_relative 'geometry/point'
|
|
5
|
+
require_relative 'geometry/quadrilateral'
|
|
6
|
+
require_relative 'geometry/polygon'
|
|
7
|
+
require_relative 'geometry/utils'
|
data/lib/mindee/http/endpoint.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'json'
|
|
3
4
|
require 'net/http'
|
|
4
5
|
require_relative '../version'
|
|
5
6
|
|
|
@@ -32,12 +33,58 @@ module Mindee
|
|
|
32
33
|
@url_root = "#{BASE_URL_DEFAULT}/products/#{@owner}/#{@url_name}/v#{@version}"
|
|
33
34
|
end
|
|
34
35
|
|
|
35
|
-
#
|
|
36
|
-
# @param
|
|
36
|
+
# Call the prediction API.
|
|
37
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
38
|
+
# @param all_words [Boolean]
|
|
39
|
+
# @param close_file [Boolean]
|
|
40
|
+
# @param cropper [Boolean]
|
|
41
|
+
# @return [Hash]
|
|
42
|
+
def predict(input_source, all_words, close_file, cropper)
|
|
43
|
+
check_api_key
|
|
44
|
+
response = predict_req_post(input_source, all_words: all_words, close_file: close_file, cropper: cropper)
|
|
45
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
|
46
|
+
return hashed_response if (200..299).include?(response.code.to_i)
|
|
47
|
+
|
|
48
|
+
error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
|
|
49
|
+
raise error
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Call the prediction API.
|
|
53
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
54
|
+
# @param close_file [Boolean]
|
|
55
|
+
# @param cropper [Boolean]
|
|
56
|
+
# @return [Hash]
|
|
57
|
+
def predict_async(input_source, all_words, close_file, cropper)
|
|
58
|
+
check_api_key
|
|
59
|
+
response = document_queue_req_get(input_source, all_words, close_file, cropper)
|
|
60
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
|
61
|
+
return hashed_response if (200..299).include?(response.code.to_i)
|
|
62
|
+
|
|
63
|
+
error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
|
|
64
|
+
raise error
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Calls the parsed async doc.
|
|
68
|
+
# @param job_id [String]
|
|
69
|
+
# @return [Hash]
|
|
70
|
+
def parse_async(job_id)
|
|
71
|
+
check_api_key
|
|
72
|
+
response = document_queue_req(job_id)
|
|
73
|
+
hashed_response = JSON.parse(response.body, object_class: Hash)
|
|
74
|
+
return hashed_response if (200..299).include?(response.code.to_i)
|
|
75
|
+
|
|
76
|
+
error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
|
|
77
|
+
raise error
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
83
|
+
# @param all_words [Boolean]
|
|
37
84
|
# @param close_file [Boolean]
|
|
38
85
|
# @param cropper [Boolean]
|
|
39
86
|
# @return [Net::HTTPResponse]
|
|
40
|
-
def predict_req_post(
|
|
87
|
+
def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
|
|
41
88
|
uri = URI("#{@url_root}/predict")
|
|
42
89
|
|
|
43
90
|
params = {}
|
|
@@ -49,11 +96,51 @@ module Mindee
|
|
|
49
96
|
'User-Agent' => USER_AGENT,
|
|
50
97
|
}
|
|
51
98
|
req = Net::HTTP::Post.new(uri, headers)
|
|
99
|
+
form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
|
|
100
|
+
{
|
|
101
|
+
'document' => input_source.url,
|
|
102
|
+
}
|
|
103
|
+
else
|
|
104
|
+
{
|
|
105
|
+
'document' => input_source.read_document(close: close_file),
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
form_data.push ['include_mvision', 'true'] if all_words
|
|
109
|
+
|
|
110
|
+
req.set_form(form_data, 'multipart/form-data')
|
|
111
|
+
|
|
112
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
|
113
|
+
http.request(req)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
118
|
+
# @param all_words [Boolean]
|
|
119
|
+
# @param close_file [Boolean]
|
|
120
|
+
# @param cropper [Boolean]
|
|
121
|
+
# @return [Net::HTTPResponse]
|
|
122
|
+
def document_queue_req_get(input_source, all_words, close_file, cropper)
|
|
123
|
+
uri = URI("#{@url_root}/predict_async")
|
|
124
|
+
|
|
125
|
+
params = {}
|
|
126
|
+
params[:cropper] = 'true' if cropper
|
|
127
|
+
uri.query = URI.encode_www_form(params)
|
|
52
128
|
|
|
53
|
-
|
|
54
|
-
'
|
|
129
|
+
headers = {
|
|
130
|
+
'Authorization' => "Token #{@api_key}",
|
|
131
|
+
'User-Agent' => USER_AGENT,
|
|
55
132
|
}
|
|
56
|
-
|
|
133
|
+
req = Net::HTTP::Post.new(uri, headers)
|
|
134
|
+
form_data = if input_source.is_a?(Mindee::Input::Source::UrlInputSource)
|
|
135
|
+
{
|
|
136
|
+
'document' => input_source.url,
|
|
137
|
+
}
|
|
138
|
+
else
|
|
139
|
+
{
|
|
140
|
+
'document' => input_source.read_document(close: close_file),
|
|
141
|
+
}
|
|
142
|
+
end
|
|
143
|
+
form_data.push ['include_mvision', 'true'] if all_words
|
|
57
144
|
|
|
58
145
|
req.set_form(form_data, 'multipart/form-data')
|
|
59
146
|
|
|
@@ -61,19 +148,39 @@ module Mindee
|
|
|
61
148
|
http.request(req)
|
|
62
149
|
end
|
|
63
150
|
end
|
|
64
|
-
end
|
|
65
151
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def
|
|
69
|
-
|
|
152
|
+
# @param job_id [String]
|
|
153
|
+
# @return [Net::HTTPResponse]
|
|
154
|
+
def document_queue_req(job_id)
|
|
155
|
+
uri = URI("#{@url_root}/documents/queue/#{job_id}")
|
|
156
|
+
|
|
157
|
+
headers = {
|
|
158
|
+
'Authorization' => "Token #{@api_key}",
|
|
159
|
+
'User-Agent' => USER_AGENT,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
req = Net::HTTP::Get.new(uri, headers)
|
|
163
|
+
|
|
164
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
|
165
|
+
http.request(req)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
if response.code.to_i > 299 && response.code.to_i < 400
|
|
169
|
+
req = Net::HTTP::Get.new(response['location'], headers)
|
|
170
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http|
|
|
171
|
+
http.request(req)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
response
|
|
70
175
|
end
|
|
71
|
-
end
|
|
72
176
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
177
|
+
def check_api_key
|
|
178
|
+
return unless @api_key.nil? || @api_key.empty?
|
|
179
|
+
|
|
180
|
+
raise "Missing API key for product \"'#{@url_name}' v#{@version}\" (belonging to \"#{@owner}\"), " \
|
|
181
|
+
"check your Client Configuration.\n" \
|
|
182
|
+
'You can set this using the ' \
|
|
183
|
+
"'#{HTTP::API_KEY_ENV_NAME}' environment variable."
|
|
77
184
|
end
|
|
78
185
|
end
|
|
79
186
|
end
|
data/lib/mindee/http.rb
ADDED
data/lib/mindee/input/sources.rb
CHANGED
|
@@ -3,94 +3,108 @@
|
|
|
3
3
|
require 'stringio'
|
|
4
4
|
require 'marcel'
|
|
5
5
|
|
|
6
|
-
require_relative '
|
|
6
|
+
require_relative '../pdf'
|
|
7
7
|
|
|
8
8
|
module Mindee
|
|
9
9
|
module Input
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# Base class for loading documents.
|
|
20
|
-
class InputDocument
|
|
21
|
-
# @return [String]
|
|
22
|
-
attr_reader :filename
|
|
23
|
-
# @return [String]
|
|
24
|
-
attr_reader :file_mimetype
|
|
25
|
-
# @return [StreamIO]
|
|
26
|
-
attr_reader :io_stream
|
|
27
|
-
|
|
28
|
-
# @param io_stream [StreamIO]
|
|
29
|
-
def initialize(io_stream, filename)
|
|
30
|
-
@io_stream = io_stream
|
|
31
|
-
@filename = filename
|
|
32
|
-
@file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
|
|
33
|
-
|
|
34
|
-
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
|
35
|
-
|
|
36
|
-
raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
|
|
37
|
-
end
|
|
10
|
+
module Source
|
|
11
|
+
ALLOWED_MIME_TYPES = [
|
|
12
|
+
'application/pdf',
|
|
13
|
+
'image/heic',
|
|
14
|
+
'image/png',
|
|
15
|
+
'image/jpeg',
|
|
16
|
+
'image/tiff',
|
|
17
|
+
'image/webp',
|
|
18
|
+
].freeze
|
|
38
19
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
20
|
+
# Base class for loading documents.
|
|
21
|
+
class LocalInputSource
|
|
22
|
+
# @return [String]
|
|
23
|
+
attr_reader :filename
|
|
24
|
+
# @return [String]
|
|
25
|
+
attr_reader :file_mimetype
|
|
26
|
+
# @return [StreamIO]
|
|
27
|
+
attr_reader :io_stream
|
|
28
|
+
|
|
29
|
+
# @param io_stream [StreamIO]
|
|
30
|
+
def initialize(io_stream, filename)
|
|
31
|
+
@io_stream = io_stream
|
|
32
|
+
@filename = filename
|
|
33
|
+
@file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
|
|
34
|
+
|
|
35
|
+
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
|
36
|
+
|
|
37
|
+
raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def pdf?
|
|
41
|
+
@file_mimetype == 'application/pdf'
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def process_pdf(options)
|
|
45
|
+
@io_stream.seek(0)
|
|
46
|
+
@io_stream = PdfProcessor.parse(@io_stream, options)
|
|
47
|
+
end
|
|
42
48
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
49
|
+
# @param close [Boolean]
|
|
50
|
+
def read_document(close: true)
|
|
51
|
+
@io_stream.seek(0)
|
|
52
|
+
data = @io_stream.read
|
|
53
|
+
@io_stream.close if close
|
|
54
|
+
[data].pack('m')
|
|
55
|
+
end
|
|
46
56
|
end
|
|
47
57
|
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
@
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
58
|
+
# Load a document from a path.
|
|
59
|
+
class PathInputSource < LocalInputSource
|
|
60
|
+
# @param filepath [String]
|
|
61
|
+
def initialize(filepath)
|
|
62
|
+
io_stream = File.open(filepath, 'rb')
|
|
63
|
+
super(io_stream, File.basename(filepath))
|
|
64
|
+
end
|
|
54
65
|
end
|
|
55
|
-
end
|
|
56
66
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
67
|
+
# Load a document from a base64 string.
|
|
68
|
+
class Base64InputSource < LocalInputSource
|
|
69
|
+
# @param base64_string [String]
|
|
70
|
+
# @param filename [String]
|
|
71
|
+
def initialize(base64_string, filename)
|
|
72
|
+
io_stream = StringIO.new(base64_string.unpack1('m*'))
|
|
73
|
+
io_stream.set_encoding Encoding::BINARY
|
|
74
|
+
super(io_stream, filename)
|
|
75
|
+
end
|
|
63
76
|
end
|
|
64
|
-
end
|
|
65
77
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
78
|
+
# Load a document from raw bytes.
|
|
79
|
+
class BytesInputSource < LocalInputSource
|
|
80
|
+
# @param raw_bytes [String]
|
|
81
|
+
# @param filename [String]
|
|
82
|
+
def initialize(raw_bytes, filename)
|
|
83
|
+
io_stream = StringIO.new(raw_bytes)
|
|
84
|
+
io_stream.set_encoding Encoding::BINARY
|
|
85
|
+
super(io_stream, filename)
|
|
86
|
+
end
|
|
74
87
|
end
|
|
75
|
-
end
|
|
76
88
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
super(io_stream, filename)
|
|
89
|
+
# Load a document from a file handle.
|
|
90
|
+
class FileInputSource < LocalInputSource
|
|
91
|
+
# @param filename [String]
|
|
92
|
+
def initialize(file_handle, filename)
|
|
93
|
+
io_stream = file_handle
|
|
94
|
+
super(io_stream, filename)
|
|
95
|
+
end
|
|
85
96
|
end
|
|
86
|
-
end
|
|
87
97
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
98
|
+
# Load a remote document from a file url.
|
|
99
|
+
class UrlInputSource
|
|
100
|
+
# @return [String]
|
|
101
|
+
attr_reader :url
|
|
102
|
+
|
|
103
|
+
def initialize(url)
|
|
104
|
+
raise 'URL must be HTTPS' unless url.start_with? 'https://'
|
|
105
|
+
|
|
106
|
+
@url = url
|
|
107
|
+
end
|
|
94
108
|
end
|
|
95
109
|
end
|
|
96
110
|
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'document'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
module Parsing
|
|
8
|
+
module Common
|
|
9
|
+
module JobStatus
|
|
10
|
+
WAITING = :waiting
|
|
11
|
+
PROCESSING = :processing
|
|
12
|
+
COMPLETED = :completed
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module RequestStatus
|
|
16
|
+
FAILURE = :failure
|
|
17
|
+
SUCCESS = :success
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Job (queue) information on async parsing.
|
|
21
|
+
class Job
|
|
22
|
+
# @return [String] Mindee ID of the document
|
|
23
|
+
attr_reader :id
|
|
24
|
+
# @return [Mindee::Parsing::Standard::DateField]
|
|
25
|
+
attr_reader :issued_at
|
|
26
|
+
# @return [Mindee::Parsing::Standard::DateField, nil]
|
|
27
|
+
attr_reader :available_at
|
|
28
|
+
# @return [JobStatus, Symbol]
|
|
29
|
+
attr_reader :status
|
|
30
|
+
# @return [Integer, nil]
|
|
31
|
+
attr_reader :millisecs_taken
|
|
32
|
+
|
|
33
|
+
# @param http_response [Hash]
|
|
34
|
+
def initialize(http_response)
|
|
35
|
+
@id = http_response['id']
|
|
36
|
+
@issued_at = Time.iso8601(http_response['issued_at'])
|
|
37
|
+
if http_response.key?('available_at') && !http_response['available_at'].nil?
|
|
38
|
+
@available_at = Time.iso8601(http_response['available_at'])
|
|
39
|
+
@millisecs_taken = (1000 * (@available_at.to_time - @issued_at.to_time).to_f).to_i
|
|
40
|
+
end
|
|
41
|
+
@status = case http_response['status']
|
|
42
|
+
when 'waiting'
|
|
43
|
+
JobStatus::WAITING
|
|
44
|
+
when 'processing'
|
|
45
|
+
JobStatus::PROCESSING
|
|
46
|
+
when 'completed'
|
|
47
|
+
JobStatus::COMPLETED
|
|
48
|
+
else
|
|
49
|
+
http_response['status']&.to_sym
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# HTTP request response.
|
|
55
|
+
class ApiRequest
|
|
56
|
+
# @return [Hash]
|
|
57
|
+
attr_reader :error
|
|
58
|
+
# @return [Array<String>]
|
|
59
|
+
attr_reader :ressources
|
|
60
|
+
# @return [RequestStatus, Symbol]
|
|
61
|
+
attr_reader :status
|
|
62
|
+
# @return [Integer]
|
|
63
|
+
attr_reader :status_code
|
|
64
|
+
# @return [String]
|
|
65
|
+
attr_reader :url
|
|
66
|
+
|
|
67
|
+
def initialize(server_response)
|
|
68
|
+
@error = server_response['error']
|
|
69
|
+
@ressources = server_response['ressources']
|
|
70
|
+
|
|
71
|
+
@status = if server_response['status'] == 'failure'
|
|
72
|
+
RequestStatus::FAILURE
|
|
73
|
+
elsif server_response['status'] == 'success'
|
|
74
|
+
RequestStatus::SUCCESS
|
|
75
|
+
else
|
|
76
|
+
server_response['status']&.to_sym
|
|
77
|
+
end
|
|
78
|
+
@status_code = server_response['status_code']
|
|
79
|
+
@url = server_response['url']
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Wrapper class for all predictions (synchronous and asynchronous)
|
|
84
|
+
class ApiResponse
|
|
85
|
+
# @return [Mindee::Parsing::Common::Document, nil]
|
|
86
|
+
attr_reader :document
|
|
87
|
+
# @return [Mindee::Parsing::Common::Job, nil]
|
|
88
|
+
attr_reader :job
|
|
89
|
+
# @return [Mindee::Parsing::Common::ApiRequest]
|
|
90
|
+
attr_reader :api_request
|
|
91
|
+
|
|
92
|
+
# @param product_class [Class<Mindee::Product>]
|
|
93
|
+
# @param http_response [Hash]
|
|
94
|
+
def initialize(product_class, http_response)
|
|
95
|
+
if http_response.key?('api_request')
|
|
96
|
+
@api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request'])
|
|
97
|
+
end
|
|
98
|
+
if http_response.key?('document') &&
|
|
99
|
+
(!http_response.key?('job') ||
|
|
100
|
+
http_response['job']['status'] == 'completed') &&
|
|
101
|
+
@api_request.status == RequestStatus::SUCCESS
|
|
102
|
+
@document = Mindee::Parsing::Common::Document.new(product_class, http_response['document'])
|
|
103
|
+
end
|
|
104
|
+
@job = Mindee::Parsing::Common::Job.new(http_response['job']) if http_response.key?('job')
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'inference'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Common
|
|
8
|
+
# Stores all response attributes.
|
|
9
|
+
class Document
|
|
10
|
+
# @return [Mindee::Inference]
|
|
11
|
+
attr_reader :inference
|
|
12
|
+
# @return [String] Filename sent to the API
|
|
13
|
+
attr_reader :name
|
|
14
|
+
# @return [String] Mindee ID of the document
|
|
15
|
+
attr_reader :id
|
|
16
|
+
# @return [Mindee::Parsing::Common::Ocr::Ocr, nil]
|
|
17
|
+
attr_reader :ocr
|
|
18
|
+
|
|
19
|
+
# @param http_response [Hash]
|
|
20
|
+
# @return [Mindee::Parsing::Common::Ocr::Ocr]
|
|
21
|
+
def self.load_ocr(http_response)
|
|
22
|
+
ocr_prediction = http_response.fetch('ocr', nil)
|
|
23
|
+
return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?
|
|
24
|
+
|
|
25
|
+
Ocr(ocr_prediction)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @param product_class [Class<Mindee::Product>]
|
|
29
|
+
# @param http_response [Hash]
|
|
30
|
+
def initialize(product_class, http_response)
|
|
31
|
+
@id = http_response['id']
|
|
32
|
+
@name = http_response['name']
|
|
33
|
+
@inference = product_class.new(http_response['inference'])
|
|
34
|
+
@ocr = self.class.load_ocr(http_response)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @return [String]
|
|
38
|
+
def to_s
|
|
39
|
+
out_str = String.new
|
|
40
|
+
out_str << "########\nDocument\n########"
|
|
41
|
+
out_str << "\n:Mindee ID: #{@id}"
|
|
42
|
+
out_str << "\n:Filename: #{@name}"
|
|
43
|
+
out_str << "\n\n#{@inference}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Common
|
|
6
|
+
# API HttpError
|
|
7
|
+
class HttpError < StandardError
|
|
8
|
+
# @return [String]
|
|
9
|
+
attr_reader :api_code
|
|
10
|
+
# @return [String]
|
|
11
|
+
attr_reader :api_details
|
|
12
|
+
# @return [String]
|
|
13
|
+
attr_reader :api_message
|
|
14
|
+
|
|
15
|
+
def initialize(error)
|
|
16
|
+
@api_code = error['code']
|
|
17
|
+
@api_details = error['details']
|
|
18
|
+
@api_message = error['message']
|
|
19
|
+
super("#{@api_code}: #{@api_details} - #{@api_message}")
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|