mindee 2.2.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +36 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +123 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +87 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +44 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
- data/lib/mindee/product.rb +16 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +87 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'product'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
# Common fields used for most documents.
|
8
|
+
module Common
|
9
|
+
# Abstract class for prediction Inferences
|
10
|
+
# Holds prediction for a page or entire document.
|
11
|
+
class Inference
|
12
|
+
# @return [Boolean]
|
13
|
+
attr_reader :is_rotation_applied
|
14
|
+
# @return [Array<Mindee::Parsing::Common::Page>]
|
15
|
+
attr_reader :pages
|
16
|
+
# @return [Mindee::Parsing::Common::Prediction]
|
17
|
+
attr_reader :prediction
|
18
|
+
# @return [Mindee::Parsing::Common::Product]
|
19
|
+
attr_reader :product
|
20
|
+
|
21
|
+
# @param raw_prediction [Hash]
|
22
|
+
def initialize(raw_prediction)
|
23
|
+
@is_rotation_applied = raw_prediction['is_rotation_applied']
|
24
|
+
@product = Product.new(raw_prediction['product'])
|
25
|
+
@pages = []
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [String]
|
29
|
+
def to_s
|
30
|
+
is_rotation_applied = @is_rotation_applied ? 'Yes' : 'No'
|
31
|
+
out_str = String.new
|
32
|
+
out_str << "Inference\n#########"
|
33
|
+
out_str << "\n:Product: #{@product.name} v#{@product.version}"
|
34
|
+
out_str << "\n:Rotation applied: #{is_rotation_applied}"
|
35
|
+
out_str << "\n\nPrediction\n=========="
|
36
|
+
out_str << "\n#{@prediction}"
|
37
|
+
out_str << "\n\nPage Predictions\n================\n\n"
|
38
|
+
out_str << @pages.map(&:to_s).join("\n\n")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Common
|
6
|
+
module Ocr
|
7
|
+
# Mindee Vision V1.
|
8
|
+
class MVisionV1
|
9
|
+
# List of pages.
|
10
|
+
# @return [Array<OcrPage>]
|
11
|
+
attr_reader :pages
|
12
|
+
|
13
|
+
# @param prediction [Hash]
|
14
|
+
def initialize(prediction)
|
15
|
+
@pages = []
|
16
|
+
prediction['pages'].each do |page_prediction|
|
17
|
+
@pages.push(OcrPage.new(page_prediction))
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String]
|
22
|
+
def to_s
|
23
|
+
out_str = String.new
|
24
|
+
@pages.map do |page|
|
25
|
+
out_str << "\n"
|
26
|
+
out_str << page.to_s
|
27
|
+
end
|
28
|
+
out_str.strip
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'mvision_v1'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
module Common
|
8
|
+
module Ocr
|
9
|
+
# A single word.
|
10
|
+
class OcrWord
|
11
|
+
# The confidence score, value will be between 0.0 and 1.0
|
12
|
+
# @return [Float]
|
13
|
+
attr_accessor :confidence
|
14
|
+
# @return [String]
|
15
|
+
attr_reader :text
|
16
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
17
|
+
attr_reader :bounding_box
|
18
|
+
# @return [Mindee::Geometry::Polygon]
|
19
|
+
attr_reader :polygon
|
20
|
+
|
21
|
+
# @param prediction [Hash]
|
22
|
+
def initialize(prediction)
|
23
|
+
@text = prediction['text']
|
24
|
+
@confidence = prediction['confidence']
|
25
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon'])
|
26
|
+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [String]
|
30
|
+
def to_s
|
31
|
+
@text.to_s
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# A list of words which are on the same line.
|
36
|
+
class OcrLine < Array
|
37
|
+
# @param prediction [Hash, nil]
|
38
|
+
# @param from_array [Array, nil]
|
39
|
+
def initialize(prediction = nil, from_array = nil)
|
40
|
+
if !prediction.nil?
|
41
|
+
super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
|
42
|
+
elsif !from_array.nil?
|
43
|
+
super(from_array)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Sort the words on the line from left to right.
|
48
|
+
# @return [OcrLine]
|
49
|
+
def sort_on_x
|
50
|
+
from_array = sort do |word1, word2|
|
51
|
+
Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
|
52
|
+
end
|
53
|
+
OcrLine.new(nil, from_array)
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [String]
|
57
|
+
def to_s
|
58
|
+
each(&:to_s).join(' ')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# OCR extraction for a single page.
|
63
|
+
class OcrPage
|
64
|
+
# All the words on the page, in semi-random order.
|
65
|
+
# @return [Array<OcrWord>]
|
66
|
+
attr_reader :all_words
|
67
|
+
# @return [Array<OcrLine>]
|
68
|
+
attr_reader :lines
|
69
|
+
|
70
|
+
# @param prediction [Hash]
|
71
|
+
def initialize(prediction)
|
72
|
+
@lines = []
|
73
|
+
@all_words = []
|
74
|
+
prediction['all_words'].each do |word_prediction|
|
75
|
+
@all_words.push(OcrWord.new(word_prediction))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# All the words on the page, ordered in lines.
|
80
|
+
# @return [Array<OcrLine>]
|
81
|
+
def all_lines
|
82
|
+
@lines = to_lines if @lines.empty?
|
83
|
+
@lines
|
84
|
+
end
|
85
|
+
|
86
|
+
# @return [String]
|
87
|
+
def to_s
|
88
|
+
lines = all_lines
|
89
|
+
return '' if lines.empty?
|
90
|
+
|
91
|
+
out_str = String.new
|
92
|
+
lines.map do |line|
|
93
|
+
out_str << "#{line}\n" unless line.to_s.strip.empty?
|
94
|
+
end
|
95
|
+
out_str.strip
|
96
|
+
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
# Helper function that iterates through all the words and compares them to a candidate
|
101
|
+
# @param sorted_words [Array<OcrWord>]
|
102
|
+
# @param current [OcrWord]
|
103
|
+
# @param indexes [Array<Integer>]
|
104
|
+
# @param lines [Array<OcrLine>]
|
105
|
+
def parse_one(sorted_words, current, indexes, lines)
|
106
|
+
line = OcrLine.new([])
|
107
|
+
sorted_words.each_with_index do |word, idx|
|
108
|
+
next if indexes.include?(idx)
|
109
|
+
|
110
|
+
if current.nil?
|
111
|
+
current = word
|
112
|
+
indexes.push(idx)
|
113
|
+
line = OcrLine.new([])
|
114
|
+
line.push(word)
|
115
|
+
elsif words_on_same_line?(current, word)
|
116
|
+
line.push(word)
|
117
|
+
indexes.push(idx)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
lines.push(line.sort_on_x) if line.any?
|
121
|
+
end
|
122
|
+
|
123
|
+
# Order all the words on the page into lines.
|
124
|
+
# @return [Array<OcrLine>]
|
125
|
+
def to_lines
|
126
|
+
current = nil
|
127
|
+
indexes = []
|
128
|
+
lines = []
|
129
|
+
|
130
|
+
# make sure words are sorted from top to bottom
|
131
|
+
all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
|
132
|
+
all_words.each do
|
133
|
+
parse_one(all_words, current, indexes, lines)
|
134
|
+
current = nil
|
135
|
+
end
|
136
|
+
lines
|
137
|
+
end
|
138
|
+
|
139
|
+
# Determine if two words are on the same line.
|
140
|
+
# @param current_word [Mindee::Parsing::Common::Ocr::OcrWord]
|
141
|
+
# @param next_word [Mindee::Parsing::Common::Ocr::OcrWord]
|
142
|
+
# @return [Boolean]
|
143
|
+
def words_on_same_line?(current_word, next_word)
|
144
|
+
current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
|
145
|
+
next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
|
146
|
+
current_in_next || next_in_current
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# OCR extraction from the entire document.
|
151
|
+
class Ocr
|
152
|
+
# Mindee Vision v1 results.
|
153
|
+
# @return [Mindee::Parsing::Common::Ocr::MVisionV1]
|
154
|
+
attr_reader :mvision_v1
|
155
|
+
|
156
|
+
# @param prediction [Hash]
|
157
|
+
def initialize(prediction)
|
158
|
+
@mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
|
159
|
+
end
|
160
|
+
|
161
|
+
# @return [String]
|
162
|
+
def to_s
|
163
|
+
@mvision_v1.to_s
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Common
|
6
|
+
# Page orientation
|
7
|
+
class Orientation
|
8
|
+
# @return [Integer]
|
9
|
+
attr_reader :page_id
|
10
|
+
# A prediction among these 3 possible outputs:
|
11
|
+
# * 0 degrees: the page is already upright
|
12
|
+
# * 90 degrees: the page must be rotated clockwise to be upright
|
13
|
+
# * 270 degrees: the page must be rotated counterclockwise to be upright
|
14
|
+
# @return [Integer, nil]
|
15
|
+
attr_reader :value
|
16
|
+
|
17
|
+
# @param prediction [Hash]
|
18
|
+
# @param page_id [Integer]
|
19
|
+
def initialize(prediction, page_id)
|
20
|
+
@value = prediction['value']
|
21
|
+
@page_id = page_id
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'product'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
# Common fields used for most documents.
|
8
|
+
module Common
|
9
|
+
# Abstract wrapper class for prediction Pages
|
10
|
+
# Holds prediction for a page as well as it's orientation and id.
|
11
|
+
class Page
|
12
|
+
# Id of the page (as given by the API).
|
13
|
+
# @return [Integer]
|
14
|
+
attr_reader :page_id
|
15
|
+
# Orientation of the page.
|
16
|
+
# @return [Mindee::Parsing::Common::Orientation]
|
17
|
+
attr_reader :orientation
|
18
|
+
# Page prediction
|
19
|
+
# @return [Mindee::Parsing::Common::Prediction]
|
20
|
+
attr_reader :prediction
|
21
|
+
|
22
|
+
# @param raw_prediction [Hash]
|
23
|
+
def initialize(raw_prediction)
|
24
|
+
@page_id = raw_prediction['id']
|
25
|
+
@orientation = Orientation.new(raw_prediction['orientation'], @page_id)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [String]
|
29
|
+
def to_s
|
30
|
+
out_str = String.new
|
31
|
+
title = "Page #{@page_id}"
|
32
|
+
out_str << "#{title}\n"
|
33
|
+
out_str << ('-' * title.size)
|
34
|
+
out_str << @prediction.to_s
|
35
|
+
out_str
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Common
|
6
|
+
# Product information
|
7
|
+
class Product
|
8
|
+
attr_reader :name, :type, :version
|
9
|
+
|
10
|
+
# @param prediction [Hash]
|
11
|
+
def initialize(prediction)
|
12
|
+
@name = prediction['name']
|
13
|
+
@type = prediction['type']
|
14
|
+
@version = prediction['version']
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'common/api_response'
|
4
|
+
require_relative 'common/document'
|
5
|
+
require_relative 'common/error'
|
6
|
+
require_relative 'common/inference'
|
7
|
+
require_relative 'common/ocr'
|
8
|
+
require_relative 'common/prediction'
|
9
|
+
require_relative 'common/orientation'
|
10
|
+
require_relative 'common/page'
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Custom
|
6
|
+
# Document classification (custom docs)
|
7
|
+
class ClassificationField
|
8
|
+
# The classification value
|
9
|
+
# @return [String]
|
10
|
+
attr_reader :value
|
11
|
+
# The confidence score, value will be between 0.0 and 1.0
|
12
|
+
# @return [Float]
|
13
|
+
attr_accessor :confidence
|
14
|
+
|
15
|
+
# @param prediction [Hash]
|
16
|
+
def initialize(prediction)
|
17
|
+
@value = prediction['value']
|
18
|
+
@confidence = prediction['confidence']
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String]
|
22
|
+
def to_s
|
23
|
+
@value.nil? ? '' : @value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Custom
|
6
|
+
# Field in a list.
|
7
|
+
class ListFieldItem
|
8
|
+
# The confidence score, value will be between 0.0 and 1.0
|
9
|
+
# @return [Float]
|
10
|
+
attr_accessor :confidence
|
11
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
12
|
+
attr_reader :bounding_box
|
13
|
+
# @return [Mindee::Geometry::Polygon]
|
14
|
+
attr_reader :polygon
|
15
|
+
# @return [Array, Hash, String, nil]
|
16
|
+
attr_reader :content
|
17
|
+
|
18
|
+
# @param prediction [Hash]
|
19
|
+
def initialize(prediction)
|
20
|
+
@content = prediction['content']
|
21
|
+
@confidence = prediction['confidence']
|
22
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon'])
|
23
|
+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String]
|
27
|
+
def to_s
|
28
|
+
@content.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Field where actual values are kept in a list (custom docs).
|
33
|
+
class ListField
|
34
|
+
# @return [Array<Mindee::Parsing::Custom::ListFieldItem>]
|
35
|
+
attr_reader :values
|
36
|
+
# @return [Integer, nil]
|
37
|
+
attr_reader :page_id
|
38
|
+
# true if the field was reconstructed or computed using other fields.
|
39
|
+
# @return [Boolean]
|
40
|
+
attr_reader :reconstructed
|
41
|
+
# The confidence score, value will be between 0.0 and 1.0
|
42
|
+
# @return [Float]
|
43
|
+
attr_accessor :confidence
|
44
|
+
|
45
|
+
# @param prediction [Hash]
|
46
|
+
# @param page_id [Integer, nil]
|
47
|
+
# @param reconstructed [Boolean]
|
48
|
+
def initialize(prediction, page_id, reconstructed: false)
|
49
|
+
@values = []
|
50
|
+
@confidence = prediction['confidence']
|
51
|
+
@page_id = page_id || prediction['page_id']
|
52
|
+
@reconstructed = reconstructed
|
53
|
+
|
54
|
+
prediction['values'].each do |field|
|
55
|
+
@values.push(ListFieldItem.new(field))
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Array]
|
60
|
+
def contents_list
|
61
|
+
@values.map(&:content)
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [String]
|
65
|
+
def contents_str(separator: ' ')
|
66
|
+
@values.map(&:to_s).join(separator)
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [String]
|
70
|
+
def to_s
|
71
|
+
contents_str(separator: ' ')
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_field'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
module Standard
|
8
|
+
# Represents tax information.
|
9
|
+
class AmountField < Field
|
10
|
+
# Amount value as 3 decimal float
|
11
|
+
# @return [Float, nil]
|
12
|
+
attr_reader :value
|
13
|
+
|
14
|
+
def initialize(prediction, page_id, reconstructed: false)
|
15
|
+
super
|
16
|
+
@value = @value.round(3) unless @value.nil?
|
17
|
+
end
|
18
|
+
|
19
|
+
# @return [String]
|
20
|
+
def to_s
|
21
|
+
Field.float_to_string(@value)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../geometry'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
module Standard
|
8
|
+
# Base Field object, upon which fields and feature fields are built
|
9
|
+
class AbstractField
|
10
|
+
# @return [Mindee::Geometry::Quadrilateral, nil]
|
11
|
+
attr_reader :bounding_box
|
12
|
+
# @return [Mindee::Geometry::Polygon, nil]
|
13
|
+
attr_reader :polygon
|
14
|
+
# @return [Integer, nil]
|
15
|
+
attr_reader :page_id
|
16
|
+
# The confidence score, value will be between 0.0 and 1.0
|
17
|
+
# @return [Float, nil]
|
18
|
+
attr_accessor :confidence
|
19
|
+
|
20
|
+
# @param prediction [Hash]
|
21
|
+
# @param page_id [Integer, nil]
|
22
|
+
def initialize(prediction, page_id)
|
23
|
+
@confidence = prediction['confidence'] if prediction.key?('confidence')
|
24
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon']) if prediction.key?('polygon')
|
25
|
+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
|
26
|
+
@page_id = page_id || prediction['page_id']
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [String]
|
30
|
+
def to_s
|
31
|
+
@value ? @value.to_s : ''
|
32
|
+
end
|
33
|
+
|
34
|
+
# Multiply all the Mindee::Parsing::Standard::Field confidences in the array.
|
35
|
+
# @return [Float]
|
36
|
+
def self.array_confidence(field_array)
|
37
|
+
product = 1
|
38
|
+
field_array.each do |field|
|
39
|
+
return 0.0 if field.confidence.nil?
|
40
|
+
|
41
|
+
product *= field.confidence
|
42
|
+
end
|
43
|
+
product.to_f
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add all the Mindee::Parsing::Standard::Field values in the array.
|
47
|
+
# @return [Float]
|
48
|
+
def self.array_sum(field_array)
|
49
|
+
arr_sum = 0
|
50
|
+
field_array.each do |field|
|
51
|
+
return 0.0 if field.value.nil?
|
52
|
+
|
53
|
+
arr_sum += field.value
|
54
|
+
end
|
55
|
+
arr_sum.to_f
|
56
|
+
end
|
57
|
+
|
58
|
+
# @param value [Float]
|
59
|
+
# @param min_precision [Integer]
|
60
|
+
# @return [String]
|
61
|
+
def self.float_to_string(value, min_precision = 2)
|
62
|
+
return String.new if value.nil?
|
63
|
+
|
64
|
+
precision = value.to_f.to_s.split('.')[1].size
|
65
|
+
precision = [precision, min_precision].max
|
66
|
+
format_string = "%.#{precision}f"
|
67
|
+
format(format_string, value)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Base field object.
|
72
|
+
class Field < AbstractField
|
73
|
+
# @return [String, Float, Integer, Boolean]
|
74
|
+
attr_reader :value
|
75
|
+
# true if the field was reconstructed or computed using other fields.
|
76
|
+
# @return [Boolean]
|
77
|
+
attr_reader :reconstructed
|
78
|
+
|
79
|
+
# @param prediction [Hash]
|
80
|
+
# @param page_id [Integer, nil]
|
81
|
+
# @param reconstructed [Boolean]
|
82
|
+
def initialize(prediction, page_id, reconstructed: false)
|
83
|
+
super(prediction, page_id)
|
84
|
+
@value = prediction['value']
|
85
|
+
@reconstructed = reconstructed
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Feature field object wrapper for specialized methods.
|
90
|
+
class FeatureField < AbstractField
|
91
|
+
# Format strings for display by shortening long strings and assigning empty ones.
|
92
|
+
# @param in_str [String, nil]
|
93
|
+
# @param max_col_size [int, nil]
|
94
|
+
# @return [String]
|
95
|
+
def format_for_display(in_str, max_col_size = nil)
|
96
|
+
return '' if in_str.nil?
|
97
|
+
return in_str if max_col_size.nil?
|
98
|
+
|
99
|
+
in_str.length < max_col_size ? in_str : "#{in_str[0..max_col_size - 3]}..."
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_field'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Parsing
|
7
|
+
module Standard
|
8
|
+
# Represents a classifier value.
|
9
|
+
class ClassificationField < Field
|
10
|
+
# Value as String
|
11
|
+
# @return [String]
|
12
|
+
attr_reader :value
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Parsing
|
5
|
+
module Standard
|
6
|
+
# Company registration number or code, and its type.
|
7
|
+
class CompanyRegistration < Field
|
8
|
+
# @return [String]
|
9
|
+
attr_reader :type
|
10
|
+
|
11
|
+
# @param prediction [Hash]
|
12
|
+
# @param page_id [Integer, nil]
|
13
|
+
# @param reconstructed [Boolean]
|
14
|
+
def initialize(prediction, page_id, reconstructed: false)
|
15
|
+
super
|
16
|
+
@type = prediction['type']
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
require_relative 'base_field'
|
6
|
+
|
7
|
+
module Mindee
|
8
|
+
module Parsing
|
9
|
+
module Standard
|
10
|
+
# Represents a date.
|
11
|
+
class DateField < Field
|
12
|
+
# The date as a standard Ruby `Date` object.
|
13
|
+
# @return [Date, nil]
|
14
|
+
attr_reader :date_object
|
15
|
+
# The ISO 8601 representation of the date, regardless of the `raw` contents.
|
16
|
+
# @return [String, nil]
|
17
|
+
attr_reader :value
|
18
|
+
# The textual representation of the date as found on the document.
|
19
|
+
# @return [String, nil]
|
20
|
+
attr_reader :raw
|
21
|
+
|
22
|
+
# @param prediction [Hash]
|
23
|
+
# @param page_id [Integer, nil]
|
24
|
+
def initialize(prediction, page_id)
|
25
|
+
super
|
26
|
+
return unless @value
|
27
|
+
|
28
|
+
@date_object = Date.parse(@value)
|
29
|
+
@raw = prediction['raw']
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|