mindee 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +2 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +36 -0
- data/README.md +29 -16
- data/bin/mindee.rb +23 -26
- data/docs/code_samples/bank_account_details_v1.txt +10 -5
- data/docs/code_samples/bank_account_details_v2.txt +19 -0
- data/docs/code_samples/bank_check_v1.txt +10 -5
- data/docs/code_samples/carte_vitale_v1.txt +10 -5
- data/docs/code_samples/custom_v1.txt +19 -10
- data/docs/code_samples/default.txt +10 -2
- data/docs/code_samples/expense_receipts_v4.txt +10 -5
- data/docs/code_samples/expense_receipts_v5.txt +11 -6
- data/docs/code_samples/financial_document_v1.txt +10 -5
- data/docs/code_samples/idcard_fr_v1.txt +10 -5
- data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
- data/docs/code_samples/invoices_v4.txt +10 -5
- data/docs/code_samples/license_plates_v1.txt +10 -5
- data/docs/code_samples/passport_v1.txt +10 -5
- data/docs/code_samples/proof_of_address_v1.txt +10 -5
- data/docs/ruby-api-builder.md +30 -31
- data/docs/ruby-getting-started.md +64 -23
- data/docs/ruby-invoice-ocr.md +70 -59
- data/docs/ruby-passport-ocr.md +49 -40
- data/docs/ruby-receipt-ocr.md +45 -32
- data/lib/mindee/client.rb +150 -148
- data/lib/mindee/geometry/min_max.rb +23 -0
- data/lib/mindee/geometry/point.rb +35 -0
- data/lib/mindee/geometry/polygon.rb +23 -0
- data/lib/mindee/geometry/quadrilateral.rb +45 -0
- data/lib/mindee/geometry/utils.rb +81 -0
- data/lib/mindee/geometry.rb +5 -116
- data/lib/mindee/http/endpoint.rb +123 -16
- data/lib/mindee/http.rb +3 -0
- data/lib/mindee/input/sources.rb +87 -73
- data/lib/mindee/parsing/common/api_response.rb +109 -0
- data/lib/mindee/parsing/common/document.rb +48 -0
- data/lib/mindee/parsing/common/error.rb +24 -0
- data/lib/mindee/parsing/common/inference.rb +43 -0
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
- data/lib/mindee/parsing/common/ocr.rb +3 -0
- data/lib/mindee/parsing/common/orientation.rb +26 -0
- data/lib/mindee/parsing/common/page.rb +40 -0
- data/lib/mindee/parsing/common/prediction.rb +15 -0
- data/lib/mindee/parsing/common/product.rb +19 -0
- data/lib/mindee/parsing/common.rb +10 -0
- data/lib/mindee/parsing/custom/classification_field.rb +28 -0
- data/lib/mindee/parsing/custom/list_field.rb +76 -0
- data/lib/mindee/parsing/custom.rb +4 -0
- data/lib/mindee/parsing/standard/amount_field.rb +26 -0
- data/lib/mindee/parsing/standard/base_field.rb +104 -0
- data/lib/mindee/parsing/standard/classification_field.rb +16 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
- data/lib/mindee/parsing/standard/date_field.rb +34 -0
- data/lib/mindee/parsing/standard/locale_field.rb +50 -0
- data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
- data/lib/mindee/parsing/standard/position_field.rb +44 -0
- data/lib/mindee/parsing/standard/tax_field.rb +108 -0
- data/lib/mindee/parsing/standard/text_field.rb +16 -0
- data/lib/mindee/parsing/standard.rb +12 -0
- data/lib/mindee/parsing.rb +3 -2
- data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
- data/lib/mindee/pdf/pdf_tools.rb +34 -0
- data/lib/mindee/pdf.rb +3 -0
- data/lib/mindee/product/.rubocop.yml +5 -0
- data/lib/mindee/product/custom/custom_v1.rb +35 -0
- data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
- data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
- data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
- data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
- data/lib/mindee/product/passport/passport_v1.rb +36 -0
- data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
- data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
- data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
- data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
- data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
- data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
- data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
- data/lib/mindee/product.rb +16 -0
- data/lib/mindee/version.rb +2 -1
- data/lib/mindee.rb +3 -1
- metadata +87 -38
- data/docs/code_samples/shipping_containers_v1.txt +0 -14
- data/lib/mindee/document_config.rb +0 -60
- data/lib/mindee/parsing/document.rb +0 -31
- data/lib/mindee/parsing/error.rb +0 -22
- data/lib/mindee/parsing/inference.rb +0 -53
- data/lib/mindee/parsing/page.rb +0 -46
- data/lib/mindee/parsing/prediction/base.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
- data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
- data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
- data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
- data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
- data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
- data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
- data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
- data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
- data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
- data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
- data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
- data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
- data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
- data/lib/mindee/parsing/prediction.rb +0 -15
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'product'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
# Common fields used for most documents.
|
|
8
|
+
module Common
|
|
9
|
+
# Abstract class for prediction Inferences
|
|
10
|
+
# Holds prediction for a page or entire document.
|
|
11
|
+
class Inference
|
|
12
|
+
# @return [Boolean]
|
|
13
|
+
attr_reader :is_rotation_applied
|
|
14
|
+
# @return [Array<Mindee::Parsing::Common::Page>]
|
|
15
|
+
attr_reader :pages
|
|
16
|
+
# @return [Mindee::Parsing::Common::Prediction]
|
|
17
|
+
attr_reader :prediction
|
|
18
|
+
# @return [Mindee::Parsing::Common::Product]
|
|
19
|
+
attr_reader :product
|
|
20
|
+
|
|
21
|
+
# @param raw_prediction [Hash]
|
|
22
|
+
def initialize(raw_prediction)
|
|
23
|
+
@is_rotation_applied = raw_prediction['is_rotation_applied']
|
|
24
|
+
@product = Product.new(raw_prediction['product'])
|
|
25
|
+
@pages = []
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @return [String]
|
|
29
|
+
def to_s
|
|
30
|
+
is_rotation_applied = @is_rotation_applied ? 'Yes' : 'No'
|
|
31
|
+
out_str = String.new
|
|
32
|
+
out_str << "Inference\n#########"
|
|
33
|
+
out_str << "\n:Product: #{@product.name} v#{@product.version}"
|
|
34
|
+
out_str << "\n:Rotation applied: #{is_rotation_applied}"
|
|
35
|
+
out_str << "\n\nPrediction\n=========="
|
|
36
|
+
out_str << "\n#{@prediction}"
|
|
37
|
+
out_str << "\n\nPage Predictions\n================\n\n"
|
|
38
|
+
out_str << @pages.map(&:to_s).join("\n\n")
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Common
|
|
6
|
+
module Ocr
|
|
7
|
+
# Mindee Vision V1.
|
|
8
|
+
class MVisionV1
|
|
9
|
+
# List of pages.
|
|
10
|
+
# @return [Array<OcrPage>]
|
|
11
|
+
attr_reader :pages
|
|
12
|
+
|
|
13
|
+
# @param prediction [Hash]
|
|
14
|
+
def initialize(prediction)
|
|
15
|
+
@pages = []
|
|
16
|
+
prediction['pages'].each do |page_prediction|
|
|
17
|
+
@pages.push(OcrPage.new(page_prediction))
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @return [String]
|
|
22
|
+
def to_s
|
|
23
|
+
out_str = String.new
|
|
24
|
+
@pages.map do |page|
|
|
25
|
+
out_str << "\n"
|
|
26
|
+
out_str << page.to_s
|
|
27
|
+
end
|
|
28
|
+
out_str.strip
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'mvision_v1'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Common
|
|
8
|
+
module Ocr
|
|
9
|
+
# A single word.
|
|
10
|
+
class OcrWord
|
|
11
|
+
# The confidence score, value will be between 0.0 and 1.0
|
|
12
|
+
# @return [Float]
|
|
13
|
+
attr_accessor :confidence
|
|
14
|
+
# @return [String]
|
|
15
|
+
attr_reader :text
|
|
16
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
17
|
+
attr_reader :bounding_box
|
|
18
|
+
# @return [Mindee::Geometry::Polygon]
|
|
19
|
+
attr_reader :polygon
|
|
20
|
+
|
|
21
|
+
# @param prediction [Hash]
|
|
22
|
+
def initialize(prediction)
|
|
23
|
+
@text = prediction['text']
|
|
24
|
+
@confidence = prediction['confidence']
|
|
25
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon'])
|
|
26
|
+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [String]
|
|
30
|
+
def to_s
|
|
31
|
+
@text.to_s
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# A list of words which are on the same line.
|
|
36
|
+
class OcrLine < Array
|
|
37
|
+
# @param prediction [Hash, nil]
|
|
38
|
+
# @param from_array [Array, nil]
|
|
39
|
+
def initialize(prediction = nil, from_array = nil)
|
|
40
|
+
if !prediction.nil?
|
|
41
|
+
super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
|
|
42
|
+
elsif !from_array.nil?
|
|
43
|
+
super(from_array)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Sort the words on the line from left to right.
|
|
48
|
+
# @return [OcrLine]
|
|
49
|
+
def sort_on_x
|
|
50
|
+
from_array = sort do |word1, word2|
|
|
51
|
+
Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
|
|
52
|
+
end
|
|
53
|
+
OcrLine.new(nil, from_array)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @return [String]
|
|
57
|
+
def to_s
|
|
58
|
+
each(&:to_s).join(' ')
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# OCR extraction for a single page.
|
|
63
|
+
class OcrPage
|
|
64
|
+
# All the words on the page, in semi-random order.
|
|
65
|
+
# @return [Array<OcrWord>]
|
|
66
|
+
attr_reader :all_words
|
|
67
|
+
# @return [Array<OcrLine>]
|
|
68
|
+
attr_reader :lines
|
|
69
|
+
|
|
70
|
+
# @param prediction [Hash]
|
|
71
|
+
def initialize(prediction)
|
|
72
|
+
@lines = []
|
|
73
|
+
@all_words = []
|
|
74
|
+
prediction['all_words'].each do |word_prediction|
|
|
75
|
+
@all_words.push(OcrWord.new(word_prediction))
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# All the words on the page, ordered in lines.
|
|
80
|
+
# @return [Array<OcrLine>]
|
|
81
|
+
def all_lines
|
|
82
|
+
@lines = to_lines if @lines.empty?
|
|
83
|
+
@lines
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# @return [String]
|
|
87
|
+
def to_s
|
|
88
|
+
lines = all_lines
|
|
89
|
+
return '' if lines.empty?
|
|
90
|
+
|
|
91
|
+
out_str = String.new
|
|
92
|
+
lines.map do |line|
|
|
93
|
+
out_str << "#{line}\n" unless line.to_s.strip.empty?
|
|
94
|
+
end
|
|
95
|
+
out_str.strip
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
# Helper function that iterates through all the words and compares them to a candidate
|
|
101
|
+
# @param sorted_words [Array<OcrWord>]
|
|
102
|
+
# @param current [OcrWord]
|
|
103
|
+
# @param indexes [Array<Integer>]
|
|
104
|
+
# @param lines [Array<OcrLine>]
|
|
105
|
+
def parse_one(sorted_words, current, indexes, lines)
|
|
106
|
+
line = OcrLine.new([])
|
|
107
|
+
sorted_words.each_with_index do |word, idx|
|
|
108
|
+
next if indexes.include?(idx)
|
|
109
|
+
|
|
110
|
+
if current.nil?
|
|
111
|
+
current = word
|
|
112
|
+
indexes.push(idx)
|
|
113
|
+
line = OcrLine.new([])
|
|
114
|
+
line.push(word)
|
|
115
|
+
elsif words_on_same_line?(current, word)
|
|
116
|
+
line.push(word)
|
|
117
|
+
indexes.push(idx)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
lines.push(line.sort_on_x) if line.any?
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Order all the words on the page into lines.
|
|
124
|
+
# @return [Array<OcrLine>]
|
|
125
|
+
def to_lines
|
|
126
|
+
current = nil
|
|
127
|
+
indexes = []
|
|
128
|
+
lines = []
|
|
129
|
+
|
|
130
|
+
# make sure words are sorted from top to bottom
|
|
131
|
+
all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
|
|
132
|
+
all_words.each do
|
|
133
|
+
parse_one(all_words, current, indexes, lines)
|
|
134
|
+
current = nil
|
|
135
|
+
end
|
|
136
|
+
lines
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Determine if two words are on the same line.
|
|
140
|
+
# @param current_word [Mindee::Parsing::Common::Ocr::OcrWord]
|
|
141
|
+
# @param next_word [Mindee::Parsing::Common::Ocr::OcrWord]
|
|
142
|
+
# @return [Boolean]
|
|
143
|
+
def words_on_same_line?(current_word, next_word)
|
|
144
|
+
current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
|
|
145
|
+
next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
|
|
146
|
+
current_in_next || next_in_current
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# OCR extraction from the entire document.
|
|
151
|
+
class Ocr
|
|
152
|
+
# Mindee Vision v1 results.
|
|
153
|
+
# @return [Mindee::Parsing::Common::Ocr::MVisionV1]
|
|
154
|
+
attr_reader :mvision_v1
|
|
155
|
+
|
|
156
|
+
# @param prediction [Hash]
|
|
157
|
+
def initialize(prediction)
|
|
158
|
+
@mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# @return [String]
|
|
162
|
+
def to_s
|
|
163
|
+
@mvision_v1.to_s
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Common
|
|
6
|
+
# Page orientation
|
|
7
|
+
class Orientation
|
|
8
|
+
# @return [Integer]
|
|
9
|
+
attr_reader :page_id
|
|
10
|
+
# A prediction among these 3 possible outputs:
|
|
11
|
+
# * 0 degrees: the page is already upright
|
|
12
|
+
# * 90 degrees: the page must be rotated clockwise to be upright
|
|
13
|
+
# * 270 degrees: the page must be rotated counterclockwise to be upright
|
|
14
|
+
# @return [Integer, nil]
|
|
15
|
+
attr_reader :value
|
|
16
|
+
|
|
17
|
+
# @param prediction [Hash]
|
|
18
|
+
# @param page_id [Integer]
|
|
19
|
+
def initialize(prediction, page_id)
|
|
20
|
+
@value = prediction['value']
|
|
21
|
+
@page_id = page_id
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'product'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
# Common fields used for most documents.
|
|
8
|
+
module Common
|
|
9
|
+
# Abstract wrapper class for prediction Pages
|
|
10
|
+
# Holds prediction for a page as well as it's orientation and id.
|
|
11
|
+
class Page
|
|
12
|
+
# Id of the page (as given by the API).
|
|
13
|
+
# @return [Integer]
|
|
14
|
+
attr_reader :page_id
|
|
15
|
+
# Orientation of the page.
|
|
16
|
+
# @return [Mindee::Parsing::Common::Orientation]
|
|
17
|
+
attr_reader :orientation
|
|
18
|
+
# Page prediction
|
|
19
|
+
# @return [Mindee::Parsing::Common::Prediction]
|
|
20
|
+
attr_reader :prediction
|
|
21
|
+
|
|
22
|
+
# @param raw_prediction [Hash]
|
|
23
|
+
def initialize(raw_prediction)
|
|
24
|
+
@page_id = raw_prediction['id']
|
|
25
|
+
@orientation = Orientation.new(raw_prediction['orientation'], @page_id)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @return [String]
|
|
29
|
+
def to_s
|
|
30
|
+
out_str = String.new
|
|
31
|
+
title = "Page #{@page_id}"
|
|
32
|
+
out_str << "#{title}\n"
|
|
33
|
+
out_str << ('-' * title.size)
|
|
34
|
+
out_str << @prediction.to_s
|
|
35
|
+
out_str
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Common
|
|
6
|
+
# Product information
|
|
7
|
+
class Product
|
|
8
|
+
attr_reader :name, :type, :version
|
|
9
|
+
|
|
10
|
+
# @param prediction [Hash]
|
|
11
|
+
def initialize(prediction)
|
|
12
|
+
@name = prediction['name']
|
|
13
|
+
@type = prediction['type']
|
|
14
|
+
@version = prediction['version']
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'common/api_response'
|
|
4
|
+
require_relative 'common/document'
|
|
5
|
+
require_relative 'common/error'
|
|
6
|
+
require_relative 'common/inference'
|
|
7
|
+
require_relative 'common/ocr'
|
|
8
|
+
require_relative 'common/prediction'
|
|
9
|
+
require_relative 'common/orientation'
|
|
10
|
+
require_relative 'common/page'
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Custom
|
|
6
|
+
# Document classification (custom docs)
|
|
7
|
+
class ClassificationField
|
|
8
|
+
# The classification value
|
|
9
|
+
# @return [String]
|
|
10
|
+
attr_reader :value
|
|
11
|
+
# The confidence score, value will be between 0.0 and 1.0
|
|
12
|
+
# @return [Float]
|
|
13
|
+
attr_accessor :confidence
|
|
14
|
+
|
|
15
|
+
# @param prediction [Hash]
|
|
16
|
+
def initialize(prediction)
|
|
17
|
+
@value = prediction['value']
|
|
18
|
+
@confidence = prediction['confidence']
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @return [String]
|
|
22
|
+
def to_s
|
|
23
|
+
@value.nil? ? '' : @value
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Custom
|
|
6
|
+
# Field in a list.
|
|
7
|
+
class ListFieldItem
|
|
8
|
+
# The confidence score, value will be between 0.0 and 1.0
|
|
9
|
+
# @return [Float]
|
|
10
|
+
attr_accessor :confidence
|
|
11
|
+
# @return [Mindee::Geometry::Quadrilateral]
|
|
12
|
+
attr_reader :bounding_box
|
|
13
|
+
# @return [Mindee::Geometry::Polygon]
|
|
14
|
+
attr_reader :polygon
|
|
15
|
+
# @return [Array, Hash, String, nil]
|
|
16
|
+
attr_reader :content
|
|
17
|
+
|
|
18
|
+
# @param prediction [Hash]
|
|
19
|
+
def initialize(prediction)
|
|
20
|
+
@content = prediction['content']
|
|
21
|
+
@confidence = prediction['confidence']
|
|
22
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon'])
|
|
23
|
+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @return [String]
|
|
27
|
+
def to_s
|
|
28
|
+
@content.to_s
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Field where actual values are kept in a list (custom docs).
|
|
33
|
+
class ListField
|
|
34
|
+
# @return [Array<Mindee::Parsing::Custom::ListFieldItem>]
|
|
35
|
+
attr_reader :values
|
|
36
|
+
# @return [Integer, nil]
|
|
37
|
+
attr_reader :page_id
|
|
38
|
+
# true if the field was reconstructed or computed using other fields.
|
|
39
|
+
# @return [Boolean]
|
|
40
|
+
attr_reader :reconstructed
|
|
41
|
+
# The confidence score, value will be between 0.0 and 1.0
|
|
42
|
+
# @return [Float]
|
|
43
|
+
attr_accessor :confidence
|
|
44
|
+
|
|
45
|
+
# @param prediction [Hash]
|
|
46
|
+
# @param page_id [Integer, nil]
|
|
47
|
+
# @param reconstructed [Boolean]
|
|
48
|
+
def initialize(prediction, page_id, reconstructed: false)
|
|
49
|
+
@values = []
|
|
50
|
+
@confidence = prediction['confidence']
|
|
51
|
+
@page_id = page_id || prediction['page_id']
|
|
52
|
+
@reconstructed = reconstructed
|
|
53
|
+
|
|
54
|
+
prediction['values'].each do |field|
|
|
55
|
+
@values.push(ListFieldItem.new(field))
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @return [Array]
|
|
60
|
+
def contents_list
|
|
61
|
+
@values.map(&:content)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @return [String]
|
|
65
|
+
def contents_str(separator: ' ')
|
|
66
|
+
@values.map(&:to_s).join(separator)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @return [String]
|
|
70
|
+
def to_s
|
|
71
|
+
contents_str(separator: ' ')
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_field'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Standard
|
|
8
|
+
# Represents tax information.
|
|
9
|
+
class AmountField < Field
|
|
10
|
+
# Amount value as 3 decimal float
|
|
11
|
+
# @return [Float, nil]
|
|
12
|
+
attr_reader :value
|
|
13
|
+
|
|
14
|
+
def initialize(prediction, page_id, reconstructed: false)
|
|
15
|
+
super
|
|
16
|
+
@value = @value.round(3) unless @value.nil?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @return [String]
|
|
20
|
+
def to_s
|
|
21
|
+
Field.float_to_string(@value)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../geometry'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Standard
|
|
8
|
+
# Base Field object, upon which fields and feature fields are built
|
|
9
|
+
class AbstractField
|
|
10
|
+
# @return [Mindee::Geometry::Quadrilateral, nil]
|
|
11
|
+
attr_reader :bounding_box
|
|
12
|
+
# @return [Mindee::Geometry::Polygon, nil]
|
|
13
|
+
attr_reader :polygon
|
|
14
|
+
# @return [Integer, nil]
|
|
15
|
+
attr_reader :page_id
|
|
16
|
+
# The confidence score, value will be between 0.0 and 1.0
|
|
17
|
+
# @return [Float, nil]
|
|
18
|
+
attr_accessor :confidence
|
|
19
|
+
|
|
20
|
+
# @param prediction [Hash]
|
|
21
|
+
# @param page_id [Integer, nil]
|
|
22
|
+
def initialize(prediction, page_id)
|
|
23
|
+
@confidence = prediction['confidence'] if prediction.key?('confidence')
|
|
24
|
+
@polygon = Geometry.polygon_from_prediction(prediction['polygon']) if prediction.key?('polygon')
|
|
25
|
+
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
|
|
26
|
+
@page_id = page_id || prediction['page_id']
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [String]
|
|
30
|
+
def to_s
|
|
31
|
+
@value ? @value.to_s : ''
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Multiply all the Mindee::Parsing::Standard::Field confidences in the array.
|
|
35
|
+
# @return [Float]
|
|
36
|
+
def self.array_confidence(field_array)
|
|
37
|
+
product = 1
|
|
38
|
+
field_array.each do |field|
|
|
39
|
+
return 0.0 if field.confidence.nil?
|
|
40
|
+
|
|
41
|
+
product *= field.confidence
|
|
42
|
+
end
|
|
43
|
+
product.to_f
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Add all the Mindee::Parsing::Standard::Field values in the array.
|
|
47
|
+
# @return [Float]
|
|
48
|
+
def self.array_sum(field_array)
|
|
49
|
+
arr_sum = 0
|
|
50
|
+
field_array.each do |field|
|
|
51
|
+
return 0.0 if field.value.nil?
|
|
52
|
+
|
|
53
|
+
arr_sum += field.value
|
|
54
|
+
end
|
|
55
|
+
arr_sum.to_f
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @param value [Float]
|
|
59
|
+
# @param min_precision [Integer]
|
|
60
|
+
# @return [String]
|
|
61
|
+
def self.float_to_string(value, min_precision = 2)
|
|
62
|
+
return String.new if value.nil?
|
|
63
|
+
|
|
64
|
+
precision = value.to_f.to_s.split('.')[1].size
|
|
65
|
+
precision = [precision, min_precision].max
|
|
66
|
+
format_string = "%.#{precision}f"
|
|
67
|
+
format(format_string, value)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Base field object.
|
|
72
|
+
class Field < AbstractField
|
|
73
|
+
# @return [String, Float, Integer, Boolean]
|
|
74
|
+
attr_reader :value
|
|
75
|
+
# true if the field was reconstructed or computed using other fields.
|
|
76
|
+
# @return [Boolean]
|
|
77
|
+
attr_reader :reconstructed
|
|
78
|
+
|
|
79
|
+
# @param prediction [Hash]
|
|
80
|
+
# @param page_id [Integer, nil]
|
|
81
|
+
# @param reconstructed [Boolean]
|
|
82
|
+
def initialize(prediction, page_id, reconstructed: false)
|
|
83
|
+
super(prediction, page_id)
|
|
84
|
+
@value = prediction['value']
|
|
85
|
+
@reconstructed = reconstructed
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Feature field object wrapper for specialized methods.
|
|
90
|
+
class FeatureField < AbstractField
|
|
91
|
+
# Format strings for display by shortening long strings and assigning empty ones.
|
|
92
|
+
# @param in_str [String, nil]
|
|
93
|
+
# @param max_col_size [int, nil]
|
|
94
|
+
# @return [String]
|
|
95
|
+
def format_for_display(in_str, max_col_size = nil)
|
|
96
|
+
return '' if in_str.nil?
|
|
97
|
+
return in_str if max_col_size.nil?
|
|
98
|
+
|
|
99
|
+
in_str.length < max_col_size ? in_str : "#{in_str[0..max_col_size - 3]}..."
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_field'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
module Parsing
|
|
7
|
+
module Standard
|
|
8
|
+
# Represents a classifier value.
|
|
9
|
+
class ClassificationField < Field
|
|
10
|
+
# Value as String
|
|
11
|
+
# @return [String]
|
|
12
|
+
attr_reader :value
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module Parsing
|
|
5
|
+
module Standard
|
|
6
|
+
# Company registration number or code, and its type.
|
|
7
|
+
class CompanyRegistration < Field
|
|
8
|
+
# @return [String]
|
|
9
|
+
attr_reader :type
|
|
10
|
+
|
|
11
|
+
# @param prediction [Hash]
|
|
12
|
+
# @param page_id [Integer, nil]
|
|
13
|
+
# @param reconstructed [Boolean]
|
|
14
|
+
def initialize(prediction, page_id, reconstructed: false)
|
|
15
|
+
super
|
|
16
|
+
@type = prediction['type']
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'date'
|
|
4
|
+
|
|
5
|
+
require_relative 'base_field'
|
|
6
|
+
|
|
7
|
+
module Mindee
|
|
8
|
+
module Parsing
|
|
9
|
+
module Standard
|
|
10
|
+
# Represents a date.
|
|
11
|
+
class DateField < Field
|
|
12
|
+
# The date as a standard Ruby `Date` object.
|
|
13
|
+
# @return [Date, nil]
|
|
14
|
+
attr_reader :date_object
|
|
15
|
+
# The ISO 8601 representation of the date, regardless of the `raw` contents.
|
|
16
|
+
# @return [String, nil]
|
|
17
|
+
attr_reader :value
|
|
18
|
+
# The textual representation of the date as found on the document.
|
|
19
|
+
# @return [String, nil]
|
|
20
|
+
attr_reader :raw
|
|
21
|
+
|
|
22
|
+
# @param prediction [Hash]
|
|
23
|
+
# @param page_id [Integer, nil]
|
|
24
|
+
def initialize(prediction, page_id)
|
|
25
|
+
super
|
|
26
|
+
return unless @value
|
|
27
|
+
|
|
28
|
+
@date_object = Date.parse(@value)
|
|
29
|
+
@raw = prediction['raw']
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|