mindee 2.2.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +2 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +36 -0
  6. data/README.md +29 -16
  7. data/bin/mindee.rb +23 -26
  8. data/docs/code_samples/bank_account_details_v1.txt +10 -5
  9. data/docs/code_samples/bank_account_details_v2.txt +19 -0
  10. data/docs/code_samples/bank_check_v1.txt +10 -5
  11. data/docs/code_samples/carte_vitale_v1.txt +10 -5
  12. data/docs/code_samples/custom_v1.txt +19 -10
  13. data/docs/code_samples/default.txt +10 -2
  14. data/docs/code_samples/expense_receipts_v4.txt +10 -5
  15. data/docs/code_samples/expense_receipts_v5.txt +11 -6
  16. data/docs/code_samples/financial_document_v1.txt +10 -5
  17. data/docs/code_samples/idcard_fr_v1.txt +10 -5
  18. data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
  19. data/docs/code_samples/invoices_v4.txt +10 -5
  20. data/docs/code_samples/license_plates_v1.txt +10 -5
  21. data/docs/code_samples/passport_v1.txt +10 -5
  22. data/docs/code_samples/proof_of_address_v1.txt +10 -5
  23. data/docs/ruby-api-builder.md +30 -31
  24. data/docs/ruby-getting-started.md +64 -23
  25. data/docs/ruby-invoice-ocr.md +70 -59
  26. data/docs/ruby-passport-ocr.md +49 -40
  27. data/docs/ruby-receipt-ocr.md +45 -32
  28. data/lib/mindee/client.rb +150 -148
  29. data/lib/mindee/geometry/min_max.rb +23 -0
  30. data/lib/mindee/geometry/point.rb +35 -0
  31. data/lib/mindee/geometry/polygon.rb +23 -0
  32. data/lib/mindee/geometry/quadrilateral.rb +45 -0
  33. data/lib/mindee/geometry/utils.rb +81 -0
  34. data/lib/mindee/geometry.rb +5 -116
  35. data/lib/mindee/http/endpoint.rb +123 -16
  36. data/lib/mindee/http.rb +3 -0
  37. data/lib/mindee/input/sources.rb +87 -73
  38. data/lib/mindee/parsing/common/api_response.rb +109 -0
  39. data/lib/mindee/parsing/common/document.rb +48 -0
  40. data/lib/mindee/parsing/common/error.rb +24 -0
  41. data/lib/mindee/parsing/common/inference.rb +43 -0
  42. data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
  43. data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
  44. data/lib/mindee/parsing/common/ocr.rb +3 -0
  45. data/lib/mindee/parsing/common/orientation.rb +26 -0
  46. data/lib/mindee/parsing/common/page.rb +40 -0
  47. data/lib/mindee/parsing/common/prediction.rb +15 -0
  48. data/lib/mindee/parsing/common/product.rb +19 -0
  49. data/lib/mindee/parsing/common.rb +10 -0
  50. data/lib/mindee/parsing/custom/classification_field.rb +28 -0
  51. data/lib/mindee/parsing/custom/list_field.rb +76 -0
  52. data/lib/mindee/parsing/custom.rb +4 -0
  53. data/lib/mindee/parsing/standard/amount_field.rb +26 -0
  54. data/lib/mindee/parsing/standard/base_field.rb +104 -0
  55. data/lib/mindee/parsing/standard/classification_field.rb +16 -0
  56. data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
  57. data/lib/mindee/parsing/standard/date_field.rb +34 -0
  58. data/lib/mindee/parsing/standard/locale_field.rb +50 -0
  59. data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
  60. data/lib/mindee/parsing/standard/position_field.rb +44 -0
  61. data/lib/mindee/parsing/standard/tax_field.rb +108 -0
  62. data/lib/mindee/parsing/standard/text_field.rb +16 -0
  63. data/lib/mindee/parsing/standard.rb +12 -0
  64. data/lib/mindee/parsing.rb +3 -2
  65. data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
  66. data/lib/mindee/pdf/pdf_tools.rb +34 -0
  67. data/lib/mindee/pdf.rb +3 -0
  68. data/lib/mindee/product/.rubocop.yml +5 -0
  69. data/lib/mindee/product/custom/custom_v1.rb +35 -0
  70. data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
  71. data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
  72. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
  73. data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
  74. data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
  75. data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
  76. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
  77. data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
  78. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
  79. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
  80. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
  81. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
  82. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
  83. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
  84. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
  85. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
  86. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
  87. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
  88. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
  89. data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
  90. data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
  91. data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
  92. data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
  93. data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
  94. data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
  95. data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
  96. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
  97. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
  98. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
  99. data/lib/mindee/product/passport/passport_v1.rb +36 -0
  100. data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
  101. data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
  102. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
  103. data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
  104. data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
  105. data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
  106. data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
  107. data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
  108. data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
  109. data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
  110. data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
  111. data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
  112. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
  113. data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
  114. data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
  115. data/lib/mindee/product.rb +16 -0
  116. data/lib/mindee/version.rb +2 -1
  117. data/lib/mindee.rb +3 -1
  118. metadata +87 -38
  119. data/docs/code_samples/shipping_containers_v1.txt +0 -14
  120. data/lib/mindee/document_config.rb +0 -60
  121. data/lib/mindee/parsing/document.rb +0 -31
  122. data/lib/mindee/parsing/error.rb +0 -22
  123. data/lib/mindee/parsing/inference.rb +0 -53
  124. data/lib/mindee/parsing/page.rb +0 -46
  125. data/lib/mindee/parsing/prediction/base.rb +0 -30
  126. data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
  127. data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
  128. data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
  129. data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
  130. data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
  131. data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
  132. data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
  133. data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
  134. data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
  135. data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
  136. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
  137. data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
  138. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
  139. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
  140. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
  141. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
  142. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
  143. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
  144. data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
  145. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
  146. data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
  147. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
  148. data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
  149. data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
  150. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
  151. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
  152. data/lib/mindee/parsing/prediction.rb +0 -15
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ # Common fields used for most documents.
8
+ module Common
9
+ # Abstract class for prediction Inferences
10
+ # Holds prediction for a page or entire document.
11
+ class Inference
12
+ # @return [Boolean]
13
+ attr_reader :is_rotation_applied
14
+ # @return [Array<Mindee::Parsing::Common::Page>]
15
+ attr_reader :pages
16
+ # @return [Mindee::Parsing::Common::Prediction]
17
+ attr_reader :prediction
18
+ # @return [Mindee::Parsing::Common::Product]
19
+ attr_reader :product
20
+
21
+ # @param raw_prediction [Hash]
22
+ def initialize(raw_prediction)
23
+ @is_rotation_applied = raw_prediction['is_rotation_applied']
24
+ @product = Product.new(raw_prediction['product'])
25
+ @pages = []
26
+ end
27
+
28
+ # @return [String]
29
+ def to_s
30
+ is_rotation_applied = @is_rotation_applied ? 'Yes' : 'No'
31
+ out_str = String.new
32
+ out_str << "Inference\n#########"
33
+ out_str << "\n:Product: #{@product.name} v#{@product.version}"
34
+ out_str << "\n:Rotation applied: #{is_rotation_applied}"
35
+ out_str << "\n\nPrediction\n=========="
36
+ out_str << "\n#{@prediction}"
37
+ out_str << "\n\nPage Predictions\n================\n\n"
38
+ out_str << @pages.map(&:to_s).join("\n\n")
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ module Ocr
7
+ # Mindee Vision V1.
8
+ class MVisionV1
9
+ # List of pages.
10
+ # @return [Array<OcrPage>]
11
+ attr_reader :pages
12
+
13
+ # @param prediction [Hash]
14
+ def initialize(prediction)
15
+ @pages = []
16
+ prediction['pages'].each do |page_prediction|
17
+ @pages.push(OcrPage.new(page_prediction))
18
+ end
19
+ end
20
+
21
+ # @return [String]
22
+ def to_s
23
+ out_str = String.new
24
+ @pages.map do |page|
25
+ out_str << "\n"
26
+ out_str << page.to_s
27
+ end
28
+ out_str.strip
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'mvision_v1'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Common
8
+ module Ocr
9
+ # A single word.
10
+ class OcrWord
11
+ # The confidence score, value will be between 0.0 and 1.0
12
+ # @return [Float]
13
+ attr_accessor :confidence
14
+ # @return [String]
15
+ attr_reader :text
16
+ # @return [Mindee::Geometry::Quadrilateral]
17
+ attr_reader :bounding_box
18
+ # @return [Mindee::Geometry::Polygon]
19
+ attr_reader :polygon
20
+
21
+ # @param prediction [Hash]
22
+ def initialize(prediction)
23
+ @text = prediction['text']
24
+ @confidence = prediction['confidence']
25
+ @polygon = Geometry.polygon_from_prediction(prediction['polygon'])
26
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
27
+ end
28
+
29
+ # @return [String]
30
+ def to_s
31
+ @text.to_s
32
+ end
33
+ end
34
+
35
+ # A list of words which are on the same line.
36
+ class OcrLine < Array
37
+ # @param prediction [Hash, nil]
38
+ # @param from_array [Array, nil]
39
+ def initialize(prediction = nil, from_array = nil)
40
+ if !prediction.nil?
41
+ super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
42
+ elsif !from_array.nil?
43
+ super(from_array)
44
+ end
45
+ end
46
+
47
+ # Sort the words on the line from left to right.
48
+ # @return [OcrLine]
49
+ def sort_on_x
50
+ from_array = sort do |word1, word2|
51
+ Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
52
+ end
53
+ OcrLine.new(nil, from_array)
54
+ end
55
+
56
+ # @return [String]
57
+ def to_s
58
+ each(&:to_s).join(' ')
59
+ end
60
+ end
61
+
62
+ # OCR extraction for a single page.
63
+ class OcrPage
64
+ # All the words on the page, in semi-random order.
65
+ # @return [Array<OcrWord>]
66
+ attr_reader :all_words
67
+ # @return [Array<OcrLine>]
68
+ attr_reader :lines
69
+
70
+ # @param prediction [Hash]
71
+ def initialize(prediction)
72
+ @lines = []
73
+ @all_words = []
74
+ prediction['all_words'].each do |word_prediction|
75
+ @all_words.push(OcrWord.new(word_prediction))
76
+ end
77
+ end
78
+
79
+ # All the words on the page, ordered in lines.
80
+ # @return [Array<OcrLine>]
81
+ def all_lines
82
+ @lines = to_lines if @lines.empty?
83
+ @lines
84
+ end
85
+
86
+ # @return [String]
87
+ def to_s
88
+ lines = all_lines
89
+ return '' if lines.empty?
90
+
91
+ out_str = String.new
92
+ lines.map do |line|
93
+ out_str << "#{line}\n" unless line.to_s.strip.empty?
94
+ end
95
+ out_str.strip
96
+ end
97
+
98
+ private
99
+
100
+ # Helper function that iterates through all the words and compares them to a candidate
101
+ # @param sorted_words [Array<OcrWord>]
102
+ # @param current [OcrWord]
103
+ # @param indexes [Array<Integer>]
104
+ # @param lines [Array<OcrLine>]
105
+ def parse_one(sorted_words, current, indexes, lines)
106
+ line = OcrLine.new([])
107
+ sorted_words.each_with_index do |word, idx|
108
+ next if indexes.include?(idx)
109
+
110
+ if current.nil?
111
+ current = word
112
+ indexes.push(idx)
113
+ line = OcrLine.new([])
114
+ line.push(word)
115
+ elsif words_on_same_line?(current, word)
116
+ line.push(word)
117
+ indexes.push(idx)
118
+ end
119
+ end
120
+ lines.push(line.sort_on_x) if line.any?
121
+ end
122
+
123
+ # Order all the words on the page into lines.
124
+ # @return [Array<OcrLine>]
125
+ def to_lines
126
+ current = nil
127
+ indexes = []
128
+ lines = []
129
+
130
+ # make sure words are sorted from top to bottom
131
+ all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
132
+ all_words.each do
133
+ parse_one(all_words, current, indexes, lines)
134
+ current = nil
135
+ end
136
+ lines
137
+ end
138
+
139
+ # Determine if two words are on the same line.
140
+ # @param current_word [Mindee::Parsing::Common::Ocr::OcrWord]
141
+ # @param next_word [Mindee::Parsing::Common::Ocr::OcrWord]
142
+ # @return [Boolean]
143
+ def words_on_same_line?(current_word, next_word)
144
+ current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
145
+ next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
146
+ current_in_next || next_in_current
147
+ end
148
+ end
149
+
150
+ # OCR extraction from the entire document.
151
+ class Ocr
152
+ # Mindee Vision v1 results.
153
+ # @return [Mindee::Parsing::Common::Ocr::MVisionV1]
154
+ attr_reader :mvision_v1
155
+
156
+ # @param prediction [Hash]
157
+ def initialize(prediction)
158
+ @mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
159
+ end
160
+
161
+ # @return [String]
162
+ def to_s
163
+ @mvision_v1.to_s
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr/ocr'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Page orientation
7
+ class Orientation
8
+ # @return [Integer]
9
+ attr_reader :page_id
10
+ # A prediction among these 3 possible outputs:
11
+ # * 0 degrees: the page is already upright
12
+ # * 90 degrees: the page must be rotated clockwise to be upright
13
+ # * 270 degrees: the page must be rotated counterclockwise to be upright
14
+ # @return [Integer, nil]
15
+ attr_reader :value
16
+
17
+ # @param prediction [Hash]
18
+ # @param page_id [Integer]
19
+ def initialize(prediction, page_id)
20
+ @value = prediction['value']
21
+ @page_id = page_id
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ # Common fields used for most documents.
8
+ module Common
9
+ # Abstract wrapper class for prediction Pages
10
+ # Holds prediction for a page as well as it's orientation and id.
11
+ class Page
12
+ # Id of the page (as given by the API).
13
+ # @return [Integer]
14
+ attr_reader :page_id
15
+ # Orientation of the page.
16
+ # @return [Mindee::Parsing::Common::Orientation]
17
+ attr_reader :orientation
18
+ # Page prediction
19
+ # @return [Mindee::Parsing::Common::Prediction]
20
+ attr_reader :prediction
21
+
22
+ # @param raw_prediction [Hash]
23
+ def initialize(raw_prediction)
24
+ @page_id = raw_prediction['id']
25
+ @orientation = Orientation.new(raw_prediction['orientation'], @page_id)
26
+ end
27
+
28
+ # @return [String]
29
+ def to_s
30
+ out_str = String.new
31
+ title = "Page #{@page_id}"
32
+ out_str << "#{title}\n"
33
+ out_str << ('-' * title.size)
34
+ out_str << @prediction.to_s
35
+ out_str
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Abstract class for all predictions
7
+ class Prediction
8
+ # @return [String]
9
+ def to_s
10
+ ''
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Product information
7
+ class Product
8
+ attr_reader :name, :type, :version
9
+
10
+ # @param prediction [Hash]
11
+ def initialize(prediction)
12
+ @name = prediction['name']
13
+ @type = prediction['type']
14
+ @version = prediction['version']
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'common/api_response'
4
+ require_relative 'common/document'
5
+ require_relative 'common/error'
6
+ require_relative 'common/inference'
7
+ require_relative 'common/ocr'
8
+ require_relative 'common/prediction'
9
+ require_relative 'common/orientation'
10
+ require_relative 'common/page'
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Custom
6
+ # Document classification (custom docs)
7
+ class ClassificationField
8
+ # The classification value
9
+ # @return [String]
10
+ attr_reader :value
11
+ # The confidence score, value will be between 0.0 and 1.0
12
+ # @return [Float]
13
+ attr_accessor :confidence
14
+
15
+ # @param prediction [Hash]
16
+ def initialize(prediction)
17
+ @value = prediction['value']
18
+ @confidence = prediction['confidence']
19
+ end
20
+
21
+ # @return [String]
22
+ def to_s
23
+ @value.nil? ? '' : @value
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Custom
6
+ # Field in a list.
7
+ class ListFieldItem
8
+ # The confidence score, value will be between 0.0 and 1.0
9
+ # @return [Float]
10
+ attr_accessor :confidence
11
+ # @return [Mindee::Geometry::Quadrilateral]
12
+ attr_reader :bounding_box
13
+ # @return [Mindee::Geometry::Polygon]
14
+ attr_reader :polygon
15
+ # @return [Array, Hash, String, nil]
16
+ attr_reader :content
17
+
18
+ # @param prediction [Hash]
19
+ def initialize(prediction)
20
+ @content = prediction['content']
21
+ @confidence = prediction['confidence']
22
+ @polygon = Geometry.polygon_from_prediction(prediction['polygon'])
23
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
24
+ end
25
+
26
+ # @return [String]
27
+ def to_s
28
+ @content.to_s
29
+ end
30
+ end
31
+
32
+ # Field where actual values are kept in a list (custom docs).
33
+ class ListField
34
+ # @return [Array<Mindee::Parsing::Custom::ListFieldItem>]
35
+ attr_reader :values
36
+ # @return [Integer, nil]
37
+ attr_reader :page_id
38
+ # true if the field was reconstructed or computed using other fields.
39
+ # @return [Boolean]
40
+ attr_reader :reconstructed
41
+ # The confidence score, value will be between 0.0 and 1.0
42
+ # @return [Float]
43
+ attr_accessor :confidence
44
+
45
+ # @param prediction [Hash]
46
+ # @param page_id [Integer, nil]
47
+ # @param reconstructed [Boolean]
48
+ def initialize(prediction, page_id, reconstructed: false)
49
+ @values = []
50
+ @confidence = prediction['confidence']
51
+ @page_id = page_id || prediction['page_id']
52
+ @reconstructed = reconstructed
53
+
54
+ prediction['values'].each do |field|
55
+ @values.push(ListFieldItem.new(field))
56
+ end
57
+ end
58
+
59
+ # @return [Array]
60
+ def contents_list
61
+ @values.map(&:content)
62
+ end
63
+
64
+ # @return [String]
65
+ def contents_str(separator: ' ')
66
+ @values.map(&:to_s).join(separator)
67
+ end
68
+
69
+ # @return [String]
70
+ def to_s
71
+ contents_str(separator: ' ')
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'custom/classification_field'
4
+ require_relative 'custom/list_field'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_field'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Standard
8
+ # Represents tax information.
9
+ class AmountField < Field
10
+ # Amount value as 3 decimal float
11
+ # @return [Float, nil]
12
+ attr_reader :value
13
+
14
+ def initialize(prediction, page_id, reconstructed: false)
15
+ super
16
+ @value = @value.round(3) unless @value.nil?
17
+ end
18
+
19
+ # @return [String]
20
+ def to_s
21
+ Field.float_to_string(@value)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../geometry'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Standard
8
+ # Base Field object, upon which fields and feature fields are built
9
+ class AbstractField
10
+ # @return [Mindee::Geometry::Quadrilateral, nil]
11
+ attr_reader :bounding_box
12
+ # @return [Mindee::Geometry::Polygon, nil]
13
+ attr_reader :polygon
14
+ # @return [Integer, nil]
15
+ attr_reader :page_id
16
+ # The confidence score, value will be between 0.0 and 1.0
17
+ # @return [Float, nil]
18
+ attr_accessor :confidence
19
+
20
+ # @param prediction [Hash]
21
+ # @param page_id [Integer, nil]
22
+ def initialize(prediction, page_id)
23
+ @confidence = prediction['confidence'] if prediction.key?('confidence')
24
+ @polygon = Geometry.polygon_from_prediction(prediction['polygon']) if prediction.key?('polygon')
25
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
26
+ @page_id = page_id || prediction['page_id']
27
+ end
28
+
29
+ # @return [String]
30
+ def to_s
31
+ @value ? @value.to_s : ''
32
+ end
33
+
34
+ # Multiply all the Mindee::Parsing::Standard::Field confidences in the array.
35
+ # @return [Float]
36
+ def self.array_confidence(field_array)
37
+ product = 1
38
+ field_array.each do |field|
39
+ return 0.0 if field.confidence.nil?
40
+
41
+ product *= field.confidence
42
+ end
43
+ product.to_f
44
+ end
45
+
46
+ # Add all the Mindee::Parsing::Standard::Field values in the array.
47
+ # @return [Float]
48
+ def self.array_sum(field_array)
49
+ arr_sum = 0
50
+ field_array.each do |field|
51
+ return 0.0 if field.value.nil?
52
+
53
+ arr_sum += field.value
54
+ end
55
+ arr_sum.to_f
56
+ end
57
+
58
+ # @param value [Float]
59
+ # @param min_precision [Integer]
60
+ # @return [String]
61
+ def self.float_to_string(value, min_precision = 2)
62
+ return String.new if value.nil?
63
+
64
+ precision = value.to_f.to_s.split('.')[1].size
65
+ precision = [precision, min_precision].max
66
+ format_string = "%.#{precision}f"
67
+ format(format_string, value)
68
+ end
69
+ end
70
+
71
+ # Base field object.
72
+ class Field < AbstractField
73
+ # @return [String, Float, Integer, Boolean]
74
+ attr_reader :value
75
+ # true if the field was reconstructed or computed using other fields.
76
+ # @return [Boolean]
77
+ attr_reader :reconstructed
78
+
79
+ # @param prediction [Hash]
80
+ # @param page_id [Integer, nil]
81
+ # @param reconstructed [Boolean]
82
+ def initialize(prediction, page_id, reconstructed: false)
83
+ super(prediction, page_id)
84
+ @value = prediction['value']
85
+ @reconstructed = reconstructed
86
+ end
87
+ end
88
+
89
+ # Feature field object wrapper for specialized methods.
90
+ class FeatureField < AbstractField
91
+ # Format strings for display by shortening long strings and assigning empty ones.
92
+ # @param in_str [String, nil]
93
+ # @param max_col_size [int, nil]
94
+ # @return [String]
95
+ def format_for_display(in_str, max_col_size = nil)
96
+ return '' if in_str.nil?
97
+ return in_str if max_col_size.nil?
98
+
99
+ in_str.length < max_col_size ? in_str : "#{in_str[0..max_col_size - 3]}..."
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_field'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Standard
8
+ # Represents a classifier value.
9
+ class ClassificationField < Field
10
+ # Value as String
11
+ # @return [String]
12
+ attr_reader :value
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Standard
6
+ # Company registration number or code, and its type.
7
+ class CompanyRegistration < Field
8
+ # @return [String]
9
+ attr_reader :type
10
+
11
+ # @param prediction [Hash]
12
+ # @param page_id [Integer, nil]
13
+ # @param reconstructed [Boolean]
14
+ def initialize(prediction, page_id, reconstructed: false)
15
+ super
16
+ @type = prediction['type']
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+
5
+ require_relative 'base_field'
6
+
7
+ module Mindee
8
+ module Parsing
9
+ module Standard
10
+ # Represents a date.
11
+ class DateField < Field
12
+ # The date as a standard Ruby `Date` object.
13
+ # @return [Date, nil]
14
+ attr_reader :date_object
15
+ # The ISO 8601 representation of the date, regardless of the `raw` contents.
16
+ # @return [String, nil]
17
+ attr_reader :value
18
+ # The textual representation of the date as found on the document.
19
+ # @return [String, nil]
20
+ attr_reader :raw
21
+
22
+ # @param prediction [Hash]
23
+ # @param page_id [Integer, nil]
24
+ def initialize(prediction, page_id)
25
+ super
26
+ return unless @value
27
+
28
+ @date_object = Date.parse(@value)
29
+ @raw = prediction['raw']
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end