mindee 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +2 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +36 -0
  6. data/README.md +29 -16
  7. data/bin/mindee.rb +23 -26
  8. data/docs/code_samples/bank_account_details_v1.txt +10 -5
  9. data/docs/code_samples/bank_account_details_v2.txt +19 -0
  10. data/docs/code_samples/bank_check_v1.txt +10 -5
  11. data/docs/code_samples/carte_vitale_v1.txt +10 -5
  12. data/docs/code_samples/custom_v1.txt +19 -10
  13. data/docs/code_samples/default.txt +10 -2
  14. data/docs/code_samples/expense_receipts_v4.txt +10 -5
  15. data/docs/code_samples/expense_receipts_v5.txt +11 -6
  16. data/docs/code_samples/financial_document_v1.txt +10 -5
  17. data/docs/code_samples/idcard_fr_v1.txt +10 -5
  18. data/docs/code_samples/invoice_splitter_v1_async.txt +66 -0
  19. data/docs/code_samples/invoices_v4.txt +10 -5
  20. data/docs/code_samples/license_plates_v1.txt +10 -5
  21. data/docs/code_samples/passport_v1.txt +10 -5
  22. data/docs/code_samples/proof_of_address_v1.txt +10 -5
  23. data/docs/ruby-api-builder.md +30 -31
  24. data/docs/ruby-getting-started.md +64 -23
  25. data/docs/ruby-invoice-ocr.md +70 -59
  26. data/docs/ruby-passport-ocr.md +49 -40
  27. data/docs/ruby-receipt-ocr.md +45 -32
  28. data/lib/mindee/client.rb +150 -148
  29. data/lib/mindee/geometry/min_max.rb +23 -0
  30. data/lib/mindee/geometry/point.rb +35 -0
  31. data/lib/mindee/geometry/polygon.rb +23 -0
  32. data/lib/mindee/geometry/quadrilateral.rb +45 -0
  33. data/lib/mindee/geometry/utils.rb +81 -0
  34. data/lib/mindee/geometry.rb +5 -116
  35. data/lib/mindee/http/endpoint.rb +123 -16
  36. data/lib/mindee/http.rb +3 -0
  37. data/lib/mindee/input/sources.rb +87 -73
  38. data/lib/mindee/parsing/common/api_response.rb +109 -0
  39. data/lib/mindee/parsing/common/document.rb +48 -0
  40. data/lib/mindee/parsing/common/error.rb +24 -0
  41. data/lib/mindee/parsing/common/inference.rb +43 -0
  42. data/lib/mindee/parsing/common/ocr/mvision_v1.rb +34 -0
  43. data/lib/mindee/parsing/common/ocr/ocr.rb +169 -0
  44. data/lib/mindee/parsing/common/ocr.rb +3 -0
  45. data/lib/mindee/parsing/common/orientation.rb +26 -0
  46. data/lib/mindee/parsing/common/page.rb +40 -0
  47. data/lib/mindee/parsing/common/prediction.rb +15 -0
  48. data/lib/mindee/parsing/common/product.rb +19 -0
  49. data/lib/mindee/parsing/common.rb +10 -0
  50. data/lib/mindee/parsing/custom/classification_field.rb +28 -0
  51. data/lib/mindee/parsing/custom/list_field.rb +76 -0
  52. data/lib/mindee/parsing/custom.rb +4 -0
  53. data/lib/mindee/parsing/standard/amount_field.rb +26 -0
  54. data/lib/mindee/parsing/standard/base_field.rb +104 -0
  55. data/lib/mindee/parsing/standard/classification_field.rb +16 -0
  56. data/lib/mindee/parsing/standard/company_registration_field.rb +21 -0
  57. data/lib/mindee/parsing/standard/date_field.rb +34 -0
  58. data/lib/mindee/parsing/standard/locale_field.rb +50 -0
  59. data/lib/mindee/parsing/standard/payment_details_field.rb +42 -0
  60. data/lib/mindee/parsing/standard/position_field.rb +44 -0
  61. data/lib/mindee/parsing/standard/tax_field.rb +108 -0
  62. data/lib/mindee/parsing/standard/text_field.rb +16 -0
  63. data/lib/mindee/parsing/standard.rb +12 -0
  64. data/lib/mindee/parsing.rb +3 -2
  65. data/lib/mindee/{input → pdf}/pdf_processing.rb +4 -32
  66. data/lib/mindee/pdf/pdf_tools.rb +34 -0
  67. data/lib/mindee/pdf.rb +3 -0
  68. data/lib/mindee/product/.rubocop.yml +5 -0
  69. data/lib/mindee/product/custom/custom_v1.rb +35 -0
  70. data/lib/mindee/product/custom/custom_v1_document.rb +60 -0
  71. data/lib/mindee/product/custom/custom_v1_page.rb +32 -0
  72. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +38 -0
  73. data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +37 -0
  74. data/lib/mindee/product/eu/license_plate/license_plate_v1_page.rb +34 -0
  75. data/lib/mindee/product/financial_document/financial_document_v1.rb +36 -0
  76. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +188 -0
  77. data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +90 -0
  78. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +32 -0
  79. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +38 -0
  80. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +43 -0
  81. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_page.rb +34 -0
  82. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +38 -0
  83. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +71 -0
  84. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +58 -0
  85. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_page.rb +34 -0
  86. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +38 -0
  87. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +52 -0
  88. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +34 -0
  89. data/lib/mindee/product/fr/id_card/id_card_v1.rb +38 -0
  90. data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +82 -0
  91. data/lib/mindee/product/fr/id_card/id_card_v1_page.rb +48 -0
  92. data/lib/mindee/product/invoice/invoice_v4.rb +37 -0
  93. data/lib/mindee/product/invoice/invoice_v4_document.rb +212 -0
  94. data/lib/mindee/product/invoice/invoice_v4_line_item.rb +66 -0
  95. data/lib/mindee/product/invoice/invoice_v4_page.rb +32 -0
  96. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +36 -0
  97. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +65 -0
  98. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_page.rb +32 -0
  99. data/lib/mindee/product/passport/passport_v1.rb +36 -0
  100. data/lib/mindee/{parsing/prediction/fr/id_card/id_card_v1.rb → product/passport/passport_v1_document.rb} +45 -45
  101. data/lib/mindee/product/passport/passport_v1_page.rb +32 -0
  102. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +36 -0
  103. data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +83 -0
  104. data/lib/mindee/product/proof_of_address/proof_of_address_v1_page.rb +32 -0
  105. data/lib/mindee/product/receipt/receipt_v4.rb +36 -0
  106. data/lib/mindee/product/receipt/receipt_v4_document.rb +86 -0
  107. data/lib/mindee/product/receipt/receipt_v4_page.rb +32 -0
  108. data/lib/mindee/product/receipt/receipt_v5.rb +36 -0
  109. data/lib/mindee/product/receipt/receipt_v5_document.rb +138 -0
  110. data/lib/mindee/product/receipt/receipt_v5_line_item.rb +69 -0
  111. data/lib/mindee/product/receipt/receipt_v5_page.rb +32 -0
  112. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +38 -0
  113. data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +73 -0
  114. data/lib/mindee/product/us/bank_check/bank_check_v1_page.rb +34 -0
  115. data/lib/mindee/product.rb +16 -0
  116. data/lib/mindee/version.rb +2 -1
  117. data/lib/mindee.rb +3 -1
  118. metadata +87 -38
  119. data/docs/code_samples/shipping_containers_v1.txt +0 -14
  120. data/lib/mindee/document_config.rb +0 -60
  121. data/lib/mindee/parsing/document.rb +0 -31
  122. data/lib/mindee/parsing/error.rb +0 -22
  123. data/lib/mindee/parsing/inference.rb +0 -53
  124. data/lib/mindee/parsing/page.rb +0 -46
  125. data/lib/mindee/parsing/prediction/base.rb +0 -30
  126. data/lib/mindee/parsing/prediction/common_fields/amount.rb +0 -21
  127. data/lib/mindee/parsing/prediction/common_fields/base.rb +0 -72
  128. data/lib/mindee/parsing/prediction/common_fields/company_registration.rb +0 -17
  129. data/lib/mindee/parsing/prediction/common_fields/date.rb +0 -30
  130. data/lib/mindee/parsing/prediction/common_fields/locale.rb +0 -45
  131. data/lib/mindee/parsing/prediction/common_fields/payment_details.rb +0 -33
  132. data/lib/mindee/parsing/prediction/common_fields/position.rb +0 -39
  133. data/lib/mindee/parsing/prediction/common_fields/tax.rb +0 -40
  134. data/lib/mindee/parsing/prediction/common_fields/text.rb +0 -12
  135. data/lib/mindee/parsing/prediction/common_fields.rb +0 -11
  136. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +0 -58
  137. data/lib/mindee/parsing/prediction/custom/fields.rb +0 -91
  138. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +0 -34
  139. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +0 -237
  140. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1_line_item.rb +0 -58
  141. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +0 -40
  142. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +0 -49
  143. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +0 -212
  144. data/lib/mindee/parsing/prediction/invoice/invoice_v4_line_item.rb +0 -58
  145. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +0 -121
  146. data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +0 -80
  147. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +0 -87
  148. data/lib/mindee/parsing/prediction/receipt/receipt_v5.rb +0 -136
  149. data/lib/mindee/parsing/prediction/receipt/receipt_v5_line_item.rb +0 -37
  150. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +0 -38
  151. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +0 -70
  152. data/lib/mindee/parsing/prediction.rb +0 -15
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ # Common fields used for most documents.
8
+ module Common
9
+ # Abstract class for prediction Inferences
10
+ # Holds prediction for a page or entire document.
11
+ class Inference
12
+ # @return [Boolean]
13
+ attr_reader :is_rotation_applied
14
+ # @return [Array<Mindee::Parsing::Common::Page>]
15
+ attr_reader :pages
16
+ # @return [Mindee::Parsing::Common::Prediction]
17
+ attr_reader :prediction
18
+ # @return [Mindee::Parsing::Common::Product]
19
+ attr_reader :product
20
+
21
+ # @param raw_prediction [Hash]
22
+ def initialize(raw_prediction)
23
+ @is_rotation_applied = raw_prediction['is_rotation_applied']
24
+ @product = Product.new(raw_prediction['product'])
25
+ @pages = []
26
+ end
27
+
28
+ # @return [String]
29
+ def to_s
30
+ is_rotation_applied = @is_rotation_applied ? 'Yes' : 'No'
31
+ out_str = String.new
32
+ out_str << "Inference\n#########"
33
+ out_str << "\n:Product: #{@product.name} v#{@product.version}"
34
+ out_str << "\n:Rotation applied: #{is_rotation_applied}"
35
+ out_str << "\n\nPrediction\n=========="
36
+ out_str << "\n#{@prediction}"
37
+ out_str << "\n\nPage Predictions\n================\n\n"
38
+ out_str << @pages.map(&:to_s).join("\n\n")
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ module Ocr
7
+ # Mindee Vision V1.
8
+ class MVisionV1
9
+ # List of pages.
10
+ # @return [Array<OcrPage>]
11
+ attr_reader :pages
12
+
13
+ # @param prediction [Hash]
14
+ def initialize(prediction)
15
+ @pages = []
16
+ prediction['pages'].each do |page_prediction|
17
+ @pages.push(OcrPage.new(page_prediction))
18
+ end
19
+ end
20
+
21
+ # @return [String]
22
+ def to_s
23
+ out_str = String.new
24
+ @pages.map do |page|
25
+ out_str << "\n"
26
+ out_str << page.to_s
27
+ end
28
+ out_str.strip
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'mvision_v1'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Common
8
+ module Ocr
9
+ # A single word.
10
+ class OcrWord
11
+ # The confidence score, value will be between 0.0 and 1.0
12
+ # @return [Float]
13
+ attr_accessor :confidence
14
+ # @return [String]
15
+ attr_reader :text
16
+ # @return [Mindee::Geometry::Quadrilateral]
17
+ attr_reader :bounding_box
18
+ # @return [Mindee::Geometry::Polygon]
19
+ attr_reader :polygon
20
+
21
+ # @param prediction [Hash]
22
+ def initialize(prediction)
23
+ @text = prediction['text']
24
+ @confidence = prediction['confidence']
25
+ @polygon = Geometry.polygon_from_prediction(prediction['polygon'])
26
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
27
+ end
28
+
29
+ # @return [String]
30
+ def to_s
31
+ @text.to_s
32
+ end
33
+ end
34
+
35
+ # A list of words which are on the same line.
36
+ class OcrLine < Array
37
+ # @param prediction [Hash, nil]
38
+ # @param from_array [Array, nil]
39
+ def initialize(prediction = nil, from_array = nil)
40
+ if !prediction.nil?
41
+ super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
42
+ elsif !from_array.nil?
43
+ super(from_array)
44
+ end
45
+ end
46
+
47
+ # Sort the words on the line from left to right.
48
+ # @return [OcrLine]
49
+ def sort_on_x
50
+ from_array = sort do |word1, word2|
51
+ Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
52
+ end
53
+ OcrLine.new(nil, from_array)
54
+ end
55
+
56
+ # @return [String]
57
+ def to_s
58
+ each(&:to_s).join(' ')
59
+ end
60
+ end
61
+
62
+ # OCR extraction for a single page.
63
+ class OcrPage
64
+ # All the words on the page, in semi-random order.
65
+ # @return [Array<OcrWord>]
66
+ attr_reader :all_words
67
+ # @return [Array<OcrLine>]
68
+ attr_reader :lines
69
+
70
+ # @param prediction [Hash]
71
+ def initialize(prediction)
72
+ @lines = []
73
+ @all_words = []
74
+ prediction['all_words'].each do |word_prediction|
75
+ @all_words.push(OcrWord.new(word_prediction))
76
+ end
77
+ end
78
+
79
+ # All the words on the page, ordered in lines.
80
+ # @return [Array<OcrLine>]
81
+ def all_lines
82
+ @lines = to_lines if @lines.empty?
83
+ @lines
84
+ end
85
+
86
+ # @return [String]
87
+ def to_s
88
+ lines = all_lines
89
+ return '' if lines.empty?
90
+
91
+ out_str = String.new
92
+ lines.map do |line|
93
+ out_str << "#{line}\n" unless line.to_s.strip.empty?
94
+ end
95
+ out_str.strip
96
+ end
97
+
98
+ private
99
+
100
+ # Helper function that iterates through all the words and compares them to a candidate
101
+ # @param sorted_words [Array<OcrWord>]
102
+ # @param current [OcrWord]
103
+ # @param indexes [Array<Integer>]
104
+ # @param lines [Array<OcrLine>]
105
+ def parse_one(sorted_words, current, indexes, lines)
106
+ line = OcrLine.new([])
107
+ sorted_words.each_with_index do |word, idx|
108
+ next if indexes.include?(idx)
109
+
110
+ if current.nil?
111
+ current = word
112
+ indexes.push(idx)
113
+ line = OcrLine.new([])
114
+ line.push(word)
115
+ elsif words_on_same_line?(current, word)
116
+ line.push(word)
117
+ indexes.push(idx)
118
+ end
119
+ end
120
+ lines.push(line.sort_on_x) if line.any?
121
+ end
122
+
123
+ # Order all the words on the page into lines.
124
+ # @return [Array<OcrLine>]
125
+ def to_lines
126
+ current = nil
127
+ indexes = []
128
+ lines = []
129
+
130
+ # make sure words are sorted from top to bottom
131
+ all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
132
+ all_words.each do
133
+ parse_one(all_words, current, indexes, lines)
134
+ current = nil
135
+ end
136
+ lines
137
+ end
138
+
139
+ # Determine if two words are on the same line.
140
+ # @param current_word [Mindee::Parsing::Common::Ocr::OcrWord]
141
+ # @param next_word [Mindee::Parsing::Common::Ocr::OcrWord]
142
+ # @return [Boolean]
143
+ def words_on_same_line?(current_word, next_word)
144
+ current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
145
+ next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
146
+ current_in_next || next_in_current
147
+ end
148
+ end
149
+
150
+ # OCR extraction from the entire document.
151
+ class Ocr
152
+ # Mindee Vision v1 results.
153
+ # @return [Mindee::Parsing::Common::Ocr::MVisionV1]
154
+ attr_reader :mvision_v1
155
+
156
+ # @param prediction [Hash]
157
+ def initialize(prediction)
158
+ @mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
159
+ end
160
+
161
+ # @return [String]
162
+ def to_s
163
+ @mvision_v1.to_s
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ocr/ocr'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Page orientation
7
+ class Orientation
8
+ # @return [Integer]
9
+ attr_reader :page_id
10
+ # A prediction among these 3 possible outputs:
11
+ # * 0 degrees: the page is already upright
12
+ # * 90 degrees: the page must be rotated clockwise to be upright
13
+ # * 270 degrees: the page must be rotated counterclockwise to be upright
14
+ # @return [Integer, nil]
15
+ attr_reader :value
16
+
17
+ # @param prediction [Hash]
18
+ # @param page_id [Integer]
19
+ def initialize(prediction, page_id)
20
+ @value = prediction['value']
21
+ @page_id = page_id
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'product'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ # Common fields used for most documents.
8
+ module Common
9
+ # Abstract wrapper class for prediction Pages
10
+ # Holds prediction for a page as well as it's orientation and id.
11
+ class Page
12
+ # Id of the page (as given by the API).
13
+ # @return [Integer]
14
+ attr_reader :page_id
15
+ # Orientation of the page.
16
+ # @return [Mindee::Parsing::Common::Orientation]
17
+ attr_reader :orientation
18
+ # Page prediction
19
+ # @return [Mindee::Parsing::Common::Prediction]
20
+ attr_reader :prediction
21
+
22
+ # @param raw_prediction [Hash]
23
+ def initialize(raw_prediction)
24
+ @page_id = raw_prediction['id']
25
+ @orientation = Orientation.new(raw_prediction['orientation'], @page_id)
26
+ end
27
+
28
+ # @return [String]
29
+ def to_s
30
+ out_str = String.new
31
+ title = "Page #{@page_id}"
32
+ out_str << "#{title}\n"
33
+ out_str << ('-' * title.size)
34
+ out_str << @prediction.to_s
35
+ out_str
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Abstract class for all predictions
7
+ class Prediction
8
+ # @return [String]
9
+ def to_s
10
+ ''
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Product information
7
+ class Product
8
+ attr_reader :name, :type, :version
9
+
10
+ # @param prediction [Hash]
11
+ def initialize(prediction)
12
+ @name = prediction['name']
13
+ @type = prediction['type']
14
+ @version = prediction['version']
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'common/api_response'
4
+ require_relative 'common/document'
5
+ require_relative 'common/error'
6
+ require_relative 'common/inference'
7
+ require_relative 'common/ocr'
8
+ require_relative 'common/prediction'
9
+ require_relative 'common/orientation'
10
+ require_relative 'common/page'
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Custom
6
+ # Document classification (custom docs)
7
+ class ClassificationField
8
+ # The classification value
9
+ # @return [String]
10
+ attr_reader :value
11
+ # The confidence score, value will be between 0.0 and 1.0
12
+ # @return [Float]
13
+ attr_accessor :confidence
14
+
15
+ # @param prediction [Hash]
16
+ def initialize(prediction)
17
+ @value = prediction['value']
18
+ @confidence = prediction['confidence']
19
+ end
20
+
21
+ # @return [String]
22
+ def to_s
23
+ @value.nil? ? '' : @value
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Custom
6
+ # Field in a list.
7
+ class ListFieldItem
8
+ # The confidence score, value will be between 0.0 and 1.0
9
+ # @return [Float]
10
+ attr_accessor :confidence
11
+ # @return [Mindee::Geometry::Quadrilateral]
12
+ attr_reader :bounding_box
13
+ # @return [Mindee::Geometry::Polygon]
14
+ attr_reader :polygon
15
+ # @return [Array, Hash, String, nil]
16
+ attr_reader :content
17
+
18
+ # @param prediction [Hash]
19
+ def initialize(prediction)
20
+ @content = prediction['content']
21
+ @confidence = prediction['confidence']
22
+ @polygon = Geometry.polygon_from_prediction(prediction['polygon'])
23
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
24
+ end
25
+
26
+ # @return [String]
27
+ def to_s
28
+ @content.to_s
29
+ end
30
+ end
31
+
32
+ # Field where actual values are kept in a list (custom docs).
33
+ class ListField
34
+ # @return [Array<Mindee::Parsing::Custom::ListFieldItem>]
35
+ attr_reader :values
36
+ # @return [Integer, nil]
37
+ attr_reader :page_id
38
+ # true if the field was reconstructed or computed using other fields.
39
+ # @return [Boolean]
40
+ attr_reader :reconstructed
41
+ # The confidence score, value will be between 0.0 and 1.0
42
+ # @return [Float]
43
+ attr_accessor :confidence
44
+
45
+ # @param prediction [Hash]
46
+ # @param page_id [Integer, nil]
47
+ # @param reconstructed [Boolean]
48
+ def initialize(prediction, page_id, reconstructed: false)
49
+ @values = []
50
+ @confidence = prediction['confidence']
51
+ @page_id = page_id || prediction['page_id']
52
+ @reconstructed = reconstructed
53
+
54
+ prediction['values'].each do |field|
55
+ @values.push(ListFieldItem.new(field))
56
+ end
57
+ end
58
+
59
+ # @return [Array]
60
+ def contents_list
61
+ @values.map(&:content)
62
+ end
63
+
64
+ # @return [String]
65
+ def contents_str(separator: ' ')
66
+ @values.map(&:to_s).join(separator)
67
+ end
68
+
69
+ # @return [String]
70
+ def to_s
71
+ contents_str(separator: ' ')
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'custom/classification_field'
4
+ require_relative 'custom/list_field'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_field'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Standard
8
+ # Represents tax information.
9
+ class AmountField < Field
10
+ # Amount value as 3 decimal float
11
+ # @return [Float, nil]
12
+ attr_reader :value
13
+
14
+ def initialize(prediction, page_id, reconstructed: false)
15
+ super
16
+ @value = @value.round(3) unless @value.nil?
17
+ end
18
+
19
+ # @return [String]
20
+ def to_s
21
+ Field.float_to_string(@value)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../geometry'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Standard
8
+ # Base Field object, upon which fields and feature fields are built
9
+ class AbstractField
10
+ # @return [Mindee::Geometry::Quadrilateral, nil]
11
+ attr_reader :bounding_box
12
+ # @return [Mindee::Geometry::Polygon, nil]
13
+ attr_reader :polygon
14
+ # @return [Integer, nil]
15
+ attr_reader :page_id
16
+ # The confidence score, value will be between 0.0 and 1.0
17
+ # @return [Float, nil]
18
+ attr_accessor :confidence
19
+
20
+ # @param prediction [Hash]
21
+ # @param page_id [Integer, nil]
22
+ def initialize(prediction, page_id)
23
+ @confidence = prediction['confidence'] if prediction.key?('confidence')
24
+ @polygon = Geometry.polygon_from_prediction(prediction['polygon']) if prediction.key?('polygon')
25
+ @bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
26
+ @page_id = page_id || prediction['page_id']
27
+ end
28
+
29
+ # @return [String]
30
+ def to_s
31
+ @value ? @value.to_s : ''
32
+ end
33
+
34
+ # Multiply all the Mindee::Parsing::Standard::Field confidences in the array.
35
+ # @return [Float]
36
+ def self.array_confidence(field_array)
37
+ product = 1
38
+ field_array.each do |field|
39
+ return 0.0 if field.confidence.nil?
40
+
41
+ product *= field.confidence
42
+ end
43
+ product.to_f
44
+ end
45
+
46
+ # Add all the Mindee::Parsing::Standard::Field values in the array.
47
+ # @return [Float]
48
+ def self.array_sum(field_array)
49
+ arr_sum = 0
50
+ field_array.each do |field|
51
+ return 0.0 if field.value.nil?
52
+
53
+ arr_sum += field.value
54
+ end
55
+ arr_sum.to_f
56
+ end
57
+
58
+ # @param value [Float]
59
+ # @param min_precision [Integer]
60
+ # @return [String]
61
+ def self.float_to_string(value, min_precision = 2)
62
+ return String.new if value.nil?
63
+
64
+ precision = value.to_f.to_s.split('.')[1].size
65
+ precision = [precision, min_precision].max
66
+ format_string = "%.#{precision}f"
67
+ format(format_string, value)
68
+ end
69
+ end
70
+
71
+ # Base field object.
72
+ class Field < AbstractField
73
+ # @return [String, Float, Integer, Boolean]
74
+ attr_reader :value
75
+ # true if the field was reconstructed or computed using other fields.
76
+ # @return [Boolean]
77
+ attr_reader :reconstructed
78
+
79
+ # @param prediction [Hash]
80
+ # @param page_id [Integer, nil]
81
+ # @param reconstructed [Boolean]
82
+ def initialize(prediction, page_id, reconstructed: false)
83
+ super(prediction, page_id)
84
+ @value = prediction['value']
85
+ @reconstructed = reconstructed
86
+ end
87
+ end
88
+
89
+ # Feature field object wrapper for specialized methods.
90
+ class FeatureField < AbstractField
91
+ # Format strings for display by shortening long strings and assigning empty ones.
92
+ # @param in_str [String, nil]
93
+ # @param max_col_size [int, nil]
94
+ # @return [String]
95
+ def format_for_display(in_str, max_col_size = nil)
96
+ return '' if in_str.nil?
97
+ return in_str if max_col_size.nil?
98
+
99
+ in_str.length < max_col_size ? in_str : "#{in_str[0..max_col_size - 3]}..."
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_field'
4
+
5
+ module Mindee
6
+ module Parsing
7
+ module Standard
8
+ # Represents a classifier value.
9
+ class ClassificationField < Field
10
+ # Value as String
11
+ # @return [String]
12
+ attr_reader :value
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Standard
6
+ # Company registration number or code, and its type.
7
+ class CompanyRegistration < Field
8
+ # @return [String]
9
+ attr_reader :type
10
+
11
+ # @param prediction [Hash]
12
+ # @param page_id [Integer, nil]
13
+ # @param reconstructed [Boolean]
14
+ def initialize(prediction, page_id, reconstructed: false)
15
+ super
16
+ @type = prediction['type']
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+
5
+ require_relative 'base_field'
6
+
7
+ module Mindee
8
+ module Parsing
9
+ module Standard
10
+ # Represents a date.
11
+ class DateField < Field
12
+ # The date as a standard Ruby `Date` object.
13
+ # @return [Date, nil]
14
+ attr_reader :date_object
15
+ # The ISO 8601 representation of the date, regardless of the `raw` contents.
16
+ # @return [String, nil]
17
+ attr_reader :value
18
+ # The textual representation of the date as found on the document.
19
+ # @return [String, nil]
20
+ attr_reader :raw
21
+
22
+ # @param prediction [Hash]
23
+ # @param page_id [Integer, nil]
24
+ def initialize(prediction, page_id)
25
+ super
26
+ return unless @value
27
+
28
+ @date_object = Date.parse(@value)
29
+ @raw = prediction['raw']
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end