mindee 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +36 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +123 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +87 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +44 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
- data/lib/mindee/product.rb +16 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +87 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Standard
|
|
6
|
+
# Represents locale information
|
|
7
|
+
class Locale
|
|
8
|
+
# The confidence score, value will be between 0.0 and 1.0
|
|
9
|
+
# @return [Float]
|
|
10
|
+
attr_reader :confidence
|
|
11
|
+
# Language code in ISO 639-1 format.
|
|
12
|
+
# @return [String]
|
|
13
|
+
attr_reader :language
|
|
14
|
+
# Country code in ISO 3166-1 alpha-2 format.
|
|
15
|
+
# @return [String, nil]
|
|
16
|
+
attr_reader :country
|
|
17
|
+
# Currency code in ISO 4217 format.
|
|
18
|
+
# @return [String]
|
|
19
|
+
attr_reader :currency
|
|
20
|
+
# Language code, with country code when available.
|
|
21
|
+
# @return [String]
|
|
22
|
+
attr_reader :value
|
|
23
|
+
|
|
24
|
+
# @param prediction [Hash]
|
|
25
|
+
def initialize(prediction, _page_id = nil)
|
|
26
|
+
value_key = if prediction.include? 'value'
|
|
27
|
+
'value'
|
|
28
|
+
else
|
|
29
|
+
'language'
|
|
30
|
+
end
|
|
31
|
+
@confidence = prediction['confidence']
|
|
32
|
+
@value = prediction[value_key]
|
|
33
|
+
@language = prediction['language']
|
|
34
|
+
@country = prediction['country']
|
|
35
|
+
@currency = prediction['currency']
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [String]
|
|
39
|
+
def to_s
|
|
40
|
+
out_str = String.new
|
|
41
|
+
out_str << "#{@value}; " if @value
|
|
42
|
+
out_str << "#{@language}; " if @language
|
|
43
|
+
out_str << "#{@country}; " if @country
|
|
44
|
+
out_str << "#{@currency}; " if @currency
|
|
45
|
+
out_str.strip
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_field'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Standard
|
|
8
|
+
# Represents payment details for invoices and receipts
|
|
9
|
+
class PaymentDetails < Field
|
|
10
|
+
# @return [String, nil]
|
|
11
|
+
attr_reader :account_number
|
|
12
|
+
# @return [String, nil]
|
|
13
|
+
attr_reader :iban
|
|
14
|
+
# @return [String, nil]
|
|
15
|
+
attr_reader :routing_number
|
|
16
|
+
# @return [String, nil]
|
|
17
|
+
attr_reader :swift
|
|
18
|
+
|
|
19
|
+
# @param prediction [Hash]
|
|
20
|
+
# @param page_id [Integer, nil]
|
|
21
|
+
# @param reconstructed [Boolean]
|
|
22
|
+
def initialize(prediction, page_id, reconstructed: false)
|
|
23
|
+
super
|
|
24
|
+
@account_number = prediction['account_number']
|
|
25
|
+
@iban = prediction['iban']
|
|
26
|
+
@routing_number = prediction['routing_number']
|
|
27
|
+
@swift = prediction['swift']
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return [String]
|
|
31
|
+
def to_s
|
|
32
|
+
out_str = String.new
|
|
33
|
+
out_str << "#{@account_number}; " if @account_number
|
|
34
|
+
out_str << "#{@iban}; " if @iban
|
|
35
|
+
out_str << "#{@routing_number}; " if @routing_number
|
|
36
|
+
out_str << "#{@swift}; " if @swift
|
|
37
|
+
out_str.strip
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Standard
|
|
6
|
+
# An element's position on the image
|
|
7
|
+
class PositionField
|
|
8
|
+
# @return [Mindee::Geometry::Polygon]
|
|
9
|
+
attr_reader :polygon
|
|
10
|
+
# @return [Mindee::Geometry::Polygon]
|
|
11
|
+
attr_reader :value
|
|
12
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
13
|
+
attr_reader :quadrangle
|
|
14
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
15
|
+
attr_reader :rectangle
|
|
16
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
17
|
+
attr_reader :bounding_box
|
|
18
|
+
|
|
19
|
+
# @param prediction [Hash]
|
|
20
|
+
# @param page_id [Integer, nil]
|
|
21
|
+
def initialize(prediction, page_id)
|
|
22
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon']) unless prediction['polygon'].empty?
|
|
23
|
+
@quadrangle = to_quadrilateral(prediction, 'quadrangle')
|
|
24
|
+
@rectangle = to_quadrilateral(prediction, 'rectangle')
|
|
25
|
+
@bounding_box = to_quadrilateral(prediction, 'bounding_box')
|
|
26
|
+
@page_id = page_id || prediction['page_id']
|
|
27
|
+
@value = @polygon
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return [String]
|
|
31
|
+
def to_s
|
|
32
|
+
out_str = String.new
|
|
33
|
+
out_str << "Polygon with #{@polygon.size} points."
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def to_quadrilateral(prediction, key)
|
|
39
|
+
Geometry.quadrilateral_from_prediction(prediction[key]) unless prediction[key].empty?
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_field'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Standard
|
|
8
|
+
# Represents tax information.
|
|
9
|
+
class TaxField < Field
|
|
10
|
+
# Tax value as 3 decimal float
|
|
11
|
+
# @return [Float, nil]
|
|
12
|
+
attr_reader :value
|
|
13
|
+
# Tax rate percentage
|
|
14
|
+
# @return [Float]
|
|
15
|
+
attr_reader :rate
|
|
16
|
+
# Tax code
|
|
17
|
+
# @return [String]
|
|
18
|
+
attr_reader :code
|
|
19
|
+
# Tax base
|
|
20
|
+
# @return [Float]
|
|
21
|
+
attr_reader :base
|
|
22
|
+
|
|
23
|
+
# @param prediction [Hash]
|
|
24
|
+
# @param page_id [Integer, nil]
|
|
25
|
+
def initialize(prediction, page_id)
|
|
26
|
+
super
|
|
27
|
+
@value = prediction['value']&.round(3)
|
|
28
|
+
@rate = prediction['rate'].to_f unless prediction['rate'].nil?
|
|
29
|
+
@base = prediction['base'].to_f unless prediction['base'].nil?
|
|
30
|
+
@code = prediction['code'] unless prediction['code'] == 'None'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @param value [Float]
|
|
34
|
+
def print_float(value)
|
|
35
|
+
format('%.2f', value)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [String]
|
|
39
|
+
def to_s
|
|
40
|
+
printable = printable_values
|
|
41
|
+
out_str = String.new
|
|
42
|
+
out_str << ("Base: #{printable[:base]}")
|
|
43
|
+
out_str << (", Code: #{printable[:code]}")
|
|
44
|
+
out_str << (", Rate (%): #{printable[:rate]}")
|
|
45
|
+
out_str << (", Amount: #{printable[:value]}")
|
|
46
|
+
out_str.strip
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @return [Hash]
|
|
50
|
+
def printable_values
|
|
51
|
+
out_h = {}
|
|
52
|
+
out_h[:code] = @code.nil? ? '' : @code
|
|
53
|
+
out_h[:base] = @base.nil? ? '' : print_float(@base)
|
|
54
|
+
out_h[:rate] = @rate.nil? ? '' : print_float(@rate).to_s
|
|
55
|
+
out_h[:value] = @value.nil? ? '' : print_float(@value).to_s
|
|
56
|
+
out_h
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @return [String]
|
|
60
|
+
def to_table_line
|
|
61
|
+
printable = printable_values
|
|
62
|
+
out_str = String.new
|
|
63
|
+
out_str << ("| #{printable[:base].ljust(13, ' ')}")
|
|
64
|
+
out_str << (" | #{printable[:code].ljust(6, ' ')}")
|
|
65
|
+
out_str << (" | #{printable[:rate].ljust(8, ' ')}")
|
|
66
|
+
out_str << (" | #{printable[:value].ljust(13, ' ')} |")
|
|
67
|
+
out_str.strip
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Represents tax information, grouped as an array.
|
|
72
|
+
class Taxes < Array
|
|
73
|
+
# @param prediction [Hash]
|
|
74
|
+
# @param page_id [Integer, nil]
|
|
75
|
+
def initialize(prediction, page_id)
|
|
76
|
+
super(prediction.map { |entry| TaxField.new(entry, page_id) })
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# @param char [String]
|
|
80
|
+
# @return [String]
|
|
81
|
+
def line_separator(char)
|
|
82
|
+
out_str = String.new
|
|
83
|
+
out_str << ' '
|
|
84
|
+
out_str << "+#{char * 15}"
|
|
85
|
+
out_str << "+#{char * 8}"
|
|
86
|
+
out_str << "+#{char * 10}"
|
|
87
|
+
out_str << "+#{char * 15}"
|
|
88
|
+
out_str << '+'
|
|
89
|
+
out_str
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# @return [String]
|
|
93
|
+
def to_s
|
|
94
|
+
return '' if nil? || empty?
|
|
95
|
+
|
|
96
|
+
out_str = String.new
|
|
97
|
+
out_str << ("\n#{line_separator('-')}")
|
|
98
|
+
out_str << "\n | Base | Code | Rate (%) | Amount |"
|
|
99
|
+
out_str << "\n#{line_separator('=')}"
|
|
100
|
+
each do |entry|
|
|
101
|
+
out_str << "\n #{entry.to_table_line}\n#{line_separator('-')}"
|
|
102
|
+
end
|
|
103
|
+
out_str
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_field'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Standard
|
|
8
|
+
# Represents basic text information.
|
|
9
|
+
class TextField < Field
|
|
10
|
+
# Value as String
|
|
11
|
+
# @return [String, nil]
|
|
12
|
+
attr_reader :value
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'standard/amount_field'
|
|
4
|
+
require_relative 'standard/base_field'
|
|
5
|
+
require_relative 'standard/classification_field'
|
|
6
|
+
require_relative 'standard/company_registration_field'
|
|
7
|
+
require_relative 'standard/date_field'
|
|
8
|
+
require_relative 'standard/payment_details_field'
|
|
9
|
+
require_relative 'standard/position_field'
|
|
10
|
+
require_relative 'standard/tax_field'
|
|
11
|
+
require_relative 'standard/text_field'
|
|
12
|
+
require_relative 'standard/locale_field'
|
data/lib/mindee/parsing.rb
CHANGED
|
@@ -2,41 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
require 'set'
|
|
4
4
|
require 'origami'
|
|
5
|
-
|
|
6
|
-
# Monkey-patching for Origami
|
|
7
|
-
module PDFTools
|
|
8
|
-
def to_io_stream(params = {})
|
|
9
|
-
options = {
|
|
10
|
-
delinearize: true,
|
|
11
|
-
recompile: true,
|
|
12
|
-
decrypt: false,
|
|
13
|
-
}
|
|
14
|
-
options.update(params)
|
|
15
|
-
|
|
16
|
-
if frozen? # incompatible flags with frozen doc (signed)
|
|
17
|
-
options[:recompile] = nil
|
|
18
|
-
options[:rebuild_xrefs] = nil
|
|
19
|
-
options[:noindent] = nil
|
|
20
|
-
options[:obfuscate] = false
|
|
21
|
-
end
|
|
22
|
-
load_all_objects unless @loaded
|
|
23
|
-
|
|
24
|
-
intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
|
|
25
|
-
delinearize! if options[:delinearize] && linearized?
|
|
26
|
-
compile(options) if options[:recompile]
|
|
27
|
-
|
|
28
|
-
io_stream = StringIO.new(output(options))
|
|
29
|
-
io_stream.set_encoding Encoding::BINARY
|
|
30
|
-
io_stream
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
Origami::PDF.class_eval { include PDFTools }
|
|
5
|
+
require_relative 'pdf_tools'
|
|
35
6
|
|
|
36
7
|
module Mindee
|
|
37
|
-
module
|
|
38
|
-
#
|
|
8
|
+
module PDF
|
|
9
|
+
# Module for PDF document handling
|
|
39
10
|
module PdfProcessor
|
|
11
|
+
Origami::PDF.class_eval { include PDFTools }
|
|
40
12
|
DEFAULT_OPTIONS = {
|
|
41
13
|
page_indexes: [0],
|
|
42
14
|
operation: :KEEP_ONLY,
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module PDF
|
|
5
|
+
# Monkey-patching for Origami
|
|
6
|
+
module PDFTools
|
|
7
|
+
# @return [StringIO]
|
|
8
|
+
def to_io_stream(params = {})
|
|
9
|
+
options = {
|
|
10
|
+
delinearize: true,
|
|
11
|
+
recompile: true,
|
|
12
|
+
decrypt: false,
|
|
13
|
+
}
|
|
14
|
+
options.update(params)
|
|
15
|
+
|
|
16
|
+
if frozen? # incompatible flags with frozen doc (signed)
|
|
17
|
+
options[:recompile] = nil
|
|
18
|
+
options[:rebuild_xrefs] = nil
|
|
19
|
+
options[:noindent] = nil
|
|
20
|
+
options[:obfuscate] = false
|
|
21
|
+
end
|
|
22
|
+
load_all_objects unless @loaded
|
|
23
|
+
|
|
24
|
+
intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
|
|
25
|
+
delinearize! if options[:delinearize] && linearized?
|
|
26
|
+
compile(options) if options[:recompile]
|
|
27
|
+
|
|
28
|
+
io_stream = StringIO.new(output(options))
|
|
29
|
+
io_stream.set_encoding Encoding::BINARY
|
|
30
|
+
io_stream
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/mindee/pdf.rb
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'custom_v1_document'
|
|
4
|
+
require_relative 'custom_v1_page'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
module Product
|
|
8
|
+
module Custom
|
|
9
|
+
# Custom Document V1 prediction inference.
|
|
10
|
+
class CustomV1 < Mindee::Parsing::Common::Inference
|
|
11
|
+
@endpoint_name = ''
|
|
12
|
+
@endpoint_version = ''
|
|
13
|
+
|
|
14
|
+
# @param prediction [Hash]
|
|
15
|
+
def initialize(prediction)
|
|
16
|
+
super
|
|
17
|
+
@prediction = CustomV1Document.new(prediction['prediction'], nil)
|
|
18
|
+
@pages = []
|
|
19
|
+
prediction['pages'].each do |page|
|
|
20
|
+
@pages.push(CustomV1Page.new(page))
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class << self
|
|
25
|
+
# Name of the endpoint for this product.
|
|
26
|
+
# @return [String]
|
|
27
|
+
attr_reader :endpoint_name
|
|
28
|
+
# Version for this product.
|
|
29
|
+
# @return [String]
|
|
30
|
+
attr_reader :endpoint_version
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../parsing'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Product
|
|
7
|
+
module Custom
|
|
8
|
+
# Custom Document V1 prediction
|
|
9
|
+
class CustomV1Document < Mindee::Parsing::Common::Prediction
|
|
10
|
+
# All value fields in the document
|
|
11
|
+
# @return [Hash<Symbol, Mindee::Parsing::Custom::ListField>]
|
|
12
|
+
attr_reader :fields
|
|
13
|
+
# All classifications in the document
|
|
14
|
+
# @return [Hash<Symbol, Mindee::Parsing::Custom::ClassificationField>]
|
|
15
|
+
attr_reader :classifications
|
|
16
|
+
|
|
17
|
+
# @param prediction [Hash]
|
|
18
|
+
# @param page_id [Integer, nil]
|
|
19
|
+
def initialize(prediction, page_id)
|
|
20
|
+
super()
|
|
21
|
+
@fields = {}
|
|
22
|
+
@classifications = {}
|
|
23
|
+
prediction.each do |field_name, field_prediction|
|
|
24
|
+
field_sym = field_name.to_sym
|
|
25
|
+
set_field(field_sym, field_prediction, page_id)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [String]
|
|
30
|
+
def to_s
|
|
31
|
+
out_str = String.new
|
|
32
|
+
@classifications.each do |name, info|
|
|
33
|
+
out_str << "\n:#{name}: #{info}".rstrip
|
|
34
|
+
end
|
|
35
|
+
@fields.each do |name, info|
|
|
36
|
+
out_str << "\n:#{name}: #{info}".rstrip
|
|
37
|
+
end
|
|
38
|
+
out_str[1..].to_s
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# @param field_prediction [Hash]
|
|
44
|
+
def set_field(field_sym, field_prediction, page_id)
|
|
45
|
+
# Currently two types of fields possible in a custom API response:
|
|
46
|
+
# fields having a list of values, and classification fields.
|
|
47
|
+
# Here we use the fact that only value lists have the 'values' attribute.
|
|
48
|
+
|
|
49
|
+
if field_prediction.key? 'values'
|
|
50
|
+
@fields[field_sym] = Parsing::Custom::ListField.new(field_prediction, page_id)
|
|
51
|
+
elsif field_prediction.key? 'value'
|
|
52
|
+
@classifications[field_sym] = Parsing::Custom::ClassificationField.new(field_prediction)
|
|
53
|
+
else
|
|
54
|
+
throw 'Unknown API field type'
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../parsing'
|
|
4
|
+
require_relative 'custom_v1_document'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
module Product
|
|
8
|
+
module Custom
|
|
9
|
+
# Custom Document V1 page.
|
|
10
|
+
class CustomV1Page < Mindee::Parsing::Common::Page
|
|
11
|
+
# @param prediction [Hash]
|
|
12
|
+
def initialize(prediction)
|
|
13
|
+
super(prediction)
|
|
14
|
+
@prediction = CustomV1PagePrediction.new(
|
|
15
|
+
prediction['prediction'],
|
|
16
|
+
prediction['id']
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Custom Document V1 page prediction.
|
|
22
|
+
class CustomV1PagePrediction < CustomV1Document
|
|
23
|
+
# @return [String]
|
|
24
|
+
def to_s
|
|
25
|
+
out_str = String.new
|
|
26
|
+
out_str << "\n#{super}"
|
|
27
|
+
out_str
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../../parsing'
|
|
4
|
+
require_relative 'license_plate_v1_document'
|
|
5
|
+
require_relative 'license_plate_v1_page'
|
|
6
|
+
|
|
7
|
+
module Mindee
|
|
8
|
+
module Product
|
|
9
|
+
module EU
|
|
10
|
+
module LicensePlate
|
|
11
|
+
# License Plate V1 prediction inference.
|
|
12
|
+
class LicensePlateV1 < Mindee::Parsing::Common::Inference
|
|
13
|
+
@endpoint_name = 'license_plates'
|
|
14
|
+
@endpoint_version = '1'
|
|
15
|
+
|
|
16
|
+
# @param prediction [Hash]
|
|
17
|
+
def initialize(prediction)
|
|
18
|
+
super
|
|
19
|
+
@prediction = LicensePlateV1Document.new(prediction['prediction'], nil)
|
|
20
|
+
@pages = []
|
|
21
|
+
prediction['pages'].each do |page|
|
|
22
|
+
@pages.push(LicensePlateV1Page.new(page))
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
# Name of the endpoint for this product.
|
|
28
|
+
# @return [String]
|
|
29
|
+
attr_reader :endpoint_name
|
|
30
|
+
# Version for this product.
|
|
31
|
+
# @return [String]
|
|
32
|
+
attr_reader :endpoint_version
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../../parsing'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Product
|
|
7
|
+
module EU
|
|
8
|
+
module LicensePlate
|
|
9
|
+
# License Plate V1 document prediction.
|
|
10
|
+
class LicensePlateV1Document < Mindee::Parsing::Common::Prediction
|
|
11
|
+
include Mindee::Parsing::Standard
|
|
12
|
+
# List of all license plates found in the image.
|
|
13
|
+
# @return [Array<Mindee::Parsing::Standard::TextField>]
|
|
14
|
+
attr_reader :license_plates
|
|
15
|
+
|
|
16
|
+
# @param prediction [Hash]
|
|
17
|
+
# @param page_id [Integer, nil]
|
|
18
|
+
def initialize(prediction, page_id)
|
|
19
|
+
super()
|
|
20
|
+
@license_plates = []
|
|
21
|
+
prediction['license_plates'].each do |item|
|
|
22
|
+
@license_plates.push(TextField.new(item, page_id))
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @return [String]
|
|
27
|
+
def to_s
|
|
28
|
+
license_plates = @license_plates.join("\n #{' ' * 16}")
|
|
29
|
+
out_str = String.new
|
|
30
|
+
out_str << "\n:License Plates: #{license_plates}".rstrip
|
|
31
|
+
out_str[1..].to_s
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../../parsing'
|
|
4
|
+
require_relative 'license_plate_v1_document'
|
|
5
|
+
|
|
6
|
+
module Mindee
|
|
7
|
+
module Product
|
|
8
|
+
module EU
|
|
9
|
+
module LicensePlate
|
|
10
|
+
# License Plate V1 page.
|
|
11
|
+
class LicensePlateV1Page < Mindee::Parsing::Common::Page
|
|
12
|
+
# @param prediction [Hash]
|
|
13
|
+
def initialize(prediction)
|
|
14
|
+
super(prediction)
|
|
15
|
+
@prediction = LicensePlateV1PagePrediction.new(
|
|
16
|
+
prediction['prediction'],
|
|
17
|
+
prediction['id']
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# License Plate V1 page prediction.
|
|
23
|
+
class LicensePlateV1PagePrediction < LicensePlateV1Document
|
|
24
|
+
# @return [String]
|
|
25
|
+
def to_s
|
|
26
|
+
out_str = String.new
|
|
27
|
+
out_str << "\n#{super}"
|
|
28
|
+
out_str
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../parsing'
|
|
4
|
+
require_relative 'financial_document_v1_document'
|
|
5
|
+
require_relative 'financial_document_v1_page'
|
|
6
|
+
|
|
7
|
+
module Mindee
|
|
8
|
+
module Product
|
|
9
|
+
module FinancialDocument
|
|
10
|
+
# Financial Document V1 prediction inference.
|
|
11
|
+
class FinancialDocumentV1 < Mindee::Parsing::Common::Inference
|
|
12
|
+
@endpoint_name = 'financial_document'
|
|
13
|
+
@endpoint_version = '1'
|
|
14
|
+
|
|
15
|
+
# @param prediction [Hash]
|
|
16
|
+
def initialize(prediction)
|
|
17
|
+
super
|
|
18
|
+
@prediction = FinancialDocumentV1Document.new(prediction['prediction'], nil)
|
|
19
|
+
@pages = []
|
|
20
|
+
prediction['pages'].each do |page|
|
|
21
|
+
@pages.push(FinancialDocumentV1Page.new(page))
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class << self
|
|
26
|
+
# Name of the endpoint for this product.
|
|
27
|
+
# @return [String]
|
|
28
|
+
attr_reader :endpoint_name
|
|
29
|
+
# Version for this product.
|
|
30
|
+
# @return [String]
|
|
31
|
+
attr_reader :endpoint_version
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|