mindee 3.12.0 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +26 -0
- data/README.md +23 -23
- data/Rakefile +5 -0
- data/docs/bank_account_details_v2.md +5 -1
- data/docs/bank_check_v1.md +6 -2
- data/docs/bank_statement_fr_v1.md +3 -0
- data/docs/barcode_reader_v1.md +5 -1
- data/docs/bill_of_lading_v1.md +251 -0
- data/docs/carte_grise_v1.md +5 -1
- data/docs/carte_vitale_v1.md +5 -1
- data/docs/code_samples/bill_of_lading_v1_async.txt +19 -0
- data/docs/code_samples/energy_bill_fra_v1_async.txt +19 -0
- data/docs/code_samples/invoices_v4_async.txt +19 -0
- data/docs/code_samples/nutrition_facts_v1_async.txt +19 -0
- data/docs/code_samples/payslip_fra_v2_async.txt +19 -0
- data/docs/cropper_v1.md +6 -2
- data/docs/custom_v1.md +5 -3
- data/docs/energy_bill_fra_v1.md +309 -0
- data/docs/eu_driver_license_v1.md +6 -2
- data/docs/expense_receipts_v5.md +30 -5
- data/docs/financial_document_v1.md +43 -1
- data/docs/generated_v1.md +3 -0
- data/docs/getting_started.md +3 -0
- data/docs/idcard_fr_v2.md +15 -2
- data/docs/international_id_v2.md +13 -1
- data/docs/invoice_splitter_v1.md +16 -13
- data/docs/invoices_v4.md +70 -23
- data/docs/license_plates_v1.md +5 -1
- data/docs/multi_receipts_detector_v1.md +5 -1
- data/docs/nutrition_facts_v1.md +374 -0
- data/docs/passport_v1.md +5 -1
- data/docs/payslip_fra_v2.md +294 -0
- data/docs/proof_of_address_v1.md +5 -1
- data/docs/resume_v1.md +24 -1
- data/docs/us_driver_license_v1.md +6 -2
- data/docs/us_healthcare_cards_v1.md +5 -1
- data/docs/us_mail_v2.md +6 -2
- data/docs/us_w9_v1.md +6 -2
- data/examples/auto_invoice_splitter_extraction.rb +48 -0
- data/examples/auto_multi_receipts_detector_extraction.rb +31 -0
- data/lib/mindee/client.rb +20 -8
- data/lib/mindee/{image_extraction → extraction}/common/extracted_image.rb +1 -1
- data/lib/mindee/extraction/common/image_extractor.rb +192 -0
- data/lib/mindee/{image_extraction → extraction}/common.rb +1 -0
- data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +32 -0
- data/lib/mindee/extraction/pdf_extractor/extracted_pdf.rb +55 -0
- data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +111 -0
- data/lib/mindee/extraction/pdf_extractor.rb +4 -0
- data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +322 -0
- data/lib/mindee/extraction/tax_extractor.rb +1 -320
- data/lib/mindee/extraction.rb +3 -0
- data/lib/mindee/http/endpoint.rb +18 -6
- data/lib/mindee/parsing/common/api_response.rb +1 -1
- data/lib/mindee/parsing/common/document.rb +31 -1
- data/lib/mindee/parsing/common/extras/cropper_extra.rb +29 -0
- data/lib/mindee/parsing/common/extras/extras.rb +50 -0
- data/lib/mindee/parsing/common/extras/full_text_ocr_extra.rb +32 -0
- data/lib/mindee/parsing/common/extras.rb +5 -0
- data/lib/mindee/parsing/common/page.rb +5 -0
- data/lib/mindee/parsing/standard/base_field.rb +1 -0
- data/lib/mindee/parsing/standard/date_field.rb +4 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1.rb +39 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rb +52 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rb +95 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rb +58 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_document.rb +136 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rb +58 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_page.rb +32 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rb +58 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +11 -1
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +15 -1
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +4 -15
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1.rb +41 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb +235 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rb +48 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rb +48 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb +97 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb +54 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_page.rb +34 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rb +97 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rb +97 -0
- data/lib/mindee/product/fr/payslip/payslip_v2.rb +41 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rb +54 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_document.rb +128 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_employee.rb +78 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_employer.rb +78 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_employment.rb +72 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_page.rb +34 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_detail.rb +100 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_period.rb +66 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_pto.rb +56 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_salary_detail.rb +81 -0
- data/lib/mindee/product/invoice/invoice_v4_document.rb +11 -1
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +15 -1
- data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +1 -1
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rb +39 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rb +173 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rb +87 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rb +32 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rb +46 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rb +58 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rb +52 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +11 -1
- data/lib/mindee/product/resume/resume_v1_certificate.rb +11 -1
- data/lib/mindee/product/resume/resume_v1_education.rb +14 -1
- data/lib/mindee/product/resume/resume_v1_language.rb +9 -1
- data/lib/mindee/product/resume/resume_v1_professional_experience.rb +15 -1
- data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +9 -1
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +9 -1
- data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_address.rb +14 -1
- data/lib/mindee/product/us/us_mail/us_mail_v2_sender_address.rb +5 -17
- data/lib/mindee/product.rb +5 -1
- data/lib/mindee/version.rb +1 -1
- metadata +71 -9
- data/lib/mindee/image_extraction/common/image_extractor.rb +0 -191
- data/lib/mindee/image_extraction/multi_receipts_extractor/multi_receipts_extractor.rb +0 -26
- data/lib/mindee/image_extraction.rb +0 -4
- /data/lib/mindee/{image_extraction → extraction}/multi_receipts_extractor.rb +0 -0
- /data/lib/mindee/extraction/{ocr_extractor.rb → tax_extractor/ocr_extractor.rb} +0 -0
@@ -26,6 +26,14 @@ module Mindee
|
|
26
26
|
|
27
27
|
# @return [Hash]
|
28
28
|
def printable_values
|
29
|
+
printable = {}
|
30
|
+
printable[:name] = format_for_display(@name)
|
31
|
+
printable[:url] = format_for_display(@url)
|
32
|
+
printable
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Hash]
|
36
|
+
def table_printable_values
|
29
37
|
printable = {}
|
30
38
|
printable[:name] = format_for_display(@name, 20)
|
31
39
|
printable[:url] = format_for_display(@url, 50)
|
@@ -34,7 +42,7 @@ module Mindee
|
|
34
42
|
|
35
43
|
# @return [String]
|
36
44
|
def to_table_line
|
37
|
-
printable =
|
45
|
+
printable = table_printable_values
|
38
46
|
out_str = String.new
|
39
47
|
out_str << format('| %- 21s', printable[:name])
|
40
48
|
out_str << format('| %- 51s', printable[:url])
|
@@ -27,6 +27,14 @@ module Mindee
|
|
27
27
|
|
28
28
|
# @return [Hash]
|
29
29
|
def printable_values
|
30
|
+
printable = {}
|
31
|
+
printable[:service_fees] = @service_fees.nil? ? '' : Field.float_to_string(@service_fees)
|
32
|
+
printable[:service_name] = format_for_display(@service_name)
|
33
|
+
printable
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [Hash]
|
37
|
+
def table_printable_values
|
30
38
|
printable = {}
|
31
39
|
printable[:service_fees] = @service_fees.nil? ? '' : Field.float_to_string(@service_fees)
|
32
40
|
printable[:service_name] = format_for_display(@service_name, nil)
|
@@ -35,7 +43,7 @@ module Mindee
|
|
35
43
|
|
36
44
|
# @return [String]
|
37
45
|
def to_table_line
|
38
|
-
printable =
|
46
|
+
printable = table_printable_values
|
39
47
|
out_str = String.new
|
40
48
|
out_str << format('| %- 13s', printable[:service_fees])
|
41
49
|
out_str << format('| %- 13s', printable[:service_name])
|
@@ -47,6 +47,19 @@ module Mindee
|
|
47
47
|
|
48
48
|
# @return [Hash]
|
49
49
|
def printable_values
|
50
|
+
printable = {}
|
51
|
+
printable[:city] = format_for_display(@city)
|
52
|
+
printable[:complete] = format_for_display(@complete)
|
53
|
+
printable[:is_address_change] = format_for_display(@is_address_change)
|
54
|
+
printable[:postal_code] = format_for_display(@postal_code)
|
55
|
+
printable[:private_mailbox_number] = format_for_display(@private_mailbox_number)
|
56
|
+
printable[:state] = format_for_display(@state)
|
57
|
+
printable[:street] = format_for_display(@street)
|
58
|
+
printable
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [Hash]
|
62
|
+
def table_printable_values
|
50
63
|
printable = {}
|
51
64
|
printable[:city] = format_for_display(@city, 15)
|
52
65
|
printable[:complete] = format_for_display(@complete, 35)
|
@@ -60,7 +73,7 @@ module Mindee
|
|
60
73
|
|
61
74
|
# @return [String]
|
62
75
|
def to_table_line
|
63
|
-
printable =
|
76
|
+
printable = table_printable_values
|
64
77
|
out_str = String.new
|
65
78
|
out_str << format('| %- 16s', printable[:city])
|
66
79
|
out_str << format('| %- 36s', printable[:complete])
|
@@ -40,26 +40,14 @@ module Mindee
|
|
40
40
|
# @return [Hash]
|
41
41
|
def printable_values
|
42
42
|
printable = {}
|
43
|
-
printable[:city] = format_for_display(@city
|
44
|
-
printable[:complete] = format_for_display(@complete
|
45
|
-
printable[:postal_code] = format_for_display(@postal_code
|
46
|
-
printable[:state] = format_for_display(@state
|
47
|
-
printable[:street] = format_for_display(@street
|
43
|
+
printable[:city] = format_for_display(@city)
|
44
|
+
printable[:complete] = format_for_display(@complete)
|
45
|
+
printable[:postal_code] = format_for_display(@postal_code)
|
46
|
+
printable[:state] = format_for_display(@state)
|
47
|
+
printable[:street] = format_for_display(@street)
|
48
48
|
printable
|
49
49
|
end
|
50
50
|
|
51
|
-
# @return [String]
|
52
|
-
def to_table_line
|
53
|
-
printable = printable_values
|
54
|
-
out_str = String.new
|
55
|
-
out_str << format('| %- 16s', printable[:city])
|
56
|
-
out_str << format('| %- 36s', printable[:complete])
|
57
|
-
out_str << format('| %- 12s', printable[:postal_code])
|
58
|
-
out_str << format('| %- 6s', printable[:state])
|
59
|
-
out_str << format('| %- 26s', printable[:street])
|
60
|
-
out_str << '|'
|
61
|
-
end
|
62
|
-
|
63
51
|
# @return [String]
|
64
52
|
def to_s
|
65
53
|
printable = printable_values
|
data/lib/mindee/product.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'product/barcode_reader/barcode_reader_v1'
|
4
|
+
require_relative 'product/bill_of_lading/bill_of_lading_v1'
|
4
5
|
require_relative 'product/custom/custom_v1'
|
5
6
|
require_relative 'product/proof_of_address/proof_of_address_v1'
|
6
7
|
require_relative 'product/financial_document/financial_document_v1'
|
@@ -15,14 +16,17 @@ require_relative 'product/eu/driver_license/driver_license_v1'
|
|
15
16
|
require_relative 'product/fr/bank_account_details/bank_account_details_v1'
|
16
17
|
require_relative 'product/fr/bank_account_details/bank_account_details_v2'
|
17
18
|
require_relative 'product/fr/bank_statement/bank_statement_v1'
|
19
|
+
require_relative 'product/fr/carte_grise/carte_grise_v1'
|
18
20
|
require_relative 'product/fr/carte_vitale/carte_vitale_v1'
|
19
21
|
require_relative 'product/fr/id_card/id_card_v1'
|
20
22
|
require_relative 'product/fr/id_card/id_card_v2'
|
21
|
-
require_relative 'product/fr/
|
23
|
+
require_relative 'product/fr/energy_bill/energy_bill_v1'
|
24
|
+
require_relative 'product/fr/payslip/payslip_v2'
|
22
25
|
require_relative 'product/generated/generated_v1'
|
23
26
|
require_relative 'product/invoice_splitter/invoice_splitter_v1'
|
24
27
|
require_relative 'product/international_id/international_id_v1'
|
25
28
|
require_relative 'product/international_id/international_id_v2'
|
29
|
+
require_relative 'product/nutrition_facts_label/nutrition_facts_label_v1'
|
26
30
|
require_relative 'product/resume/resume_v1'
|
27
31
|
require_relative 'product/us/bank_check/bank_check_v1'
|
28
32
|
require_relative 'product/us/driver_license/driver_license_v1'
|
data/lib/mindee/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marcel
|
@@ -135,6 +135,7 @@ files:
|
|
135
135
|
- docs/bank_check_v1.md
|
136
136
|
- docs/bank_statement_fr_v1.md
|
137
137
|
- docs/barcode_reader_v1.md
|
138
|
+
- docs/bill_of_lading_v1.md
|
138
139
|
- docs/carte_grise_v1.md
|
139
140
|
- docs/carte_vitale_v1.md
|
140
141
|
- docs/code_samples/bank_account_details_v1.txt
|
@@ -142,12 +143,14 @@ files:
|
|
142
143
|
- docs/code_samples/bank_check_v1.txt
|
143
144
|
- docs/code_samples/bank_statement_fr_v1_async.txt
|
144
145
|
- docs/code_samples/barcode_reader_v1.txt
|
146
|
+
- docs/code_samples/bill_of_lading_v1_async.txt
|
145
147
|
- docs/code_samples/carte_grise_v1.txt
|
146
148
|
- docs/code_samples/carte_vitale_v1.txt
|
147
149
|
- docs/code_samples/cropper_v1.txt
|
148
150
|
- docs/code_samples/custom_v1.txt
|
149
151
|
- docs/code_samples/default.txt
|
150
152
|
- docs/code_samples/default_async.txt
|
153
|
+
- docs/code_samples/energy_bill_fra_v1_async.txt
|
151
154
|
- docs/code_samples/eu_driver_license_v1.txt
|
152
155
|
- docs/code_samples/expense_receipts_v4.txt
|
153
156
|
- docs/code_samples/expense_receipts_v5.txt
|
@@ -159,9 +162,12 @@ files:
|
|
159
162
|
- docs/code_samples/international_id_v2_async.txt
|
160
163
|
- docs/code_samples/invoice_splitter_v1_async.txt
|
161
164
|
- docs/code_samples/invoices_v4.txt
|
165
|
+
- docs/code_samples/invoices_v4_async.txt
|
162
166
|
- docs/code_samples/license_plates_v1.txt
|
163
167
|
- docs/code_samples/multi_receipts_detector_v1.txt
|
168
|
+
- docs/code_samples/nutrition_facts_v1_async.txt
|
164
169
|
- docs/code_samples/passport_v1.txt
|
170
|
+
- docs/code_samples/payslip_fra_v2_async.txt
|
165
171
|
- docs/code_samples/proof_of_address_v1.txt
|
166
172
|
- docs/code_samples/resume_v1_async.txt
|
167
173
|
- docs/code_samples/us_driver_license_v1.txt
|
@@ -170,6 +176,7 @@ files:
|
|
170
176
|
- docs/code_samples/us_w9_v1.txt
|
171
177
|
- docs/cropper_v1.md
|
172
178
|
- docs/custom_v1.md
|
179
|
+
- docs/energy_bill_fra_v1.md
|
173
180
|
- docs/eu_driver_license_v1.md
|
174
181
|
- docs/expense_receipts_v5.md
|
175
182
|
- docs/financial_document_v1.md
|
@@ -181,18 +188,31 @@ files:
|
|
181
188
|
- docs/invoices_v4.md
|
182
189
|
- docs/license_plates_v1.md
|
183
190
|
- docs/multi_receipts_detector_v1.md
|
191
|
+
- docs/nutrition_facts_v1.md
|
184
192
|
- docs/passport_v1.md
|
193
|
+
- docs/payslip_fra_v2.md
|
185
194
|
- docs/proof_of_address_v1.md
|
186
195
|
- docs/resume_v1.md
|
187
196
|
- docs/us_driver_license_v1.md
|
188
197
|
- docs/us_healthcare_cards_v1.md
|
189
198
|
- docs/us_mail_v2.md
|
190
199
|
- docs/us_w9_v1.md
|
200
|
+
- examples/auto_invoice_splitter_extraction.rb
|
201
|
+
- examples/auto_multi_receipts_detector_extraction.rb
|
191
202
|
- lib/mindee.rb
|
192
203
|
- lib/mindee/client.rb
|
193
204
|
- lib/mindee/extraction.rb
|
194
|
-
- lib/mindee/extraction/
|
205
|
+
- lib/mindee/extraction/common.rb
|
206
|
+
- lib/mindee/extraction/common/extracted_image.rb
|
207
|
+
- lib/mindee/extraction/common/image_extractor.rb
|
208
|
+
- lib/mindee/extraction/multi_receipts_extractor.rb
|
209
|
+
- lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb
|
210
|
+
- lib/mindee/extraction/pdf_extractor.rb
|
211
|
+
- lib/mindee/extraction/pdf_extractor/extracted_pdf.rb
|
212
|
+
- lib/mindee/extraction/pdf_extractor/pdf_extractor.rb
|
195
213
|
- lib/mindee/extraction/tax_extractor.rb
|
214
|
+
- lib/mindee/extraction/tax_extractor/ocr_extractor.rb
|
215
|
+
- lib/mindee/extraction/tax_extractor/tax_extractor.rb
|
196
216
|
- lib/mindee/geometry.rb
|
197
217
|
- lib/mindee/geometry/min_max.rb
|
198
218
|
- lib/mindee/geometry/point.rb
|
@@ -204,12 +224,6 @@ files:
|
|
204
224
|
- lib/mindee/http/endpoint.rb
|
205
225
|
- lib/mindee/http/error.rb
|
206
226
|
- lib/mindee/http/response_validation.rb
|
207
|
-
- lib/mindee/image_extraction.rb
|
208
|
-
- lib/mindee/image_extraction/common.rb
|
209
|
-
- lib/mindee/image_extraction/common/extracted_image.rb
|
210
|
-
- lib/mindee/image_extraction/common/image_extractor.rb
|
211
|
-
- lib/mindee/image_extraction/multi_receipts_extractor.rb
|
212
|
-
- lib/mindee/image_extraction/multi_receipts_extractor/multi_receipts_extractor.rb
|
213
227
|
- lib/mindee/input.rb
|
214
228
|
- lib/mindee/input/local_response.rb
|
215
229
|
- lib/mindee/input/sources.rb
|
@@ -217,6 +231,10 @@ files:
|
|
217
231
|
- lib/mindee/parsing/common.rb
|
218
232
|
- lib/mindee/parsing/common/api_response.rb
|
219
233
|
- lib/mindee/parsing/common/document.rb
|
234
|
+
- lib/mindee/parsing/common/extras.rb
|
235
|
+
- lib/mindee/parsing/common/extras/cropper_extra.rb
|
236
|
+
- lib/mindee/parsing/common/extras/extras.rb
|
237
|
+
- lib/mindee/parsing/common/extras/full_text_ocr_extra.rb
|
220
238
|
- lib/mindee/parsing/common/inference.rb
|
221
239
|
- lib/mindee/parsing/common/ocr.rb
|
222
240
|
- lib/mindee/parsing/common/ocr/mvision_v1.rb
|
@@ -251,6 +269,14 @@ files:
|
|
251
269
|
- lib/mindee/product/barcode_reader/barcode_reader_v1.rb
|
252
270
|
- lib/mindee/product/barcode_reader/barcode_reader_v1_document.rb
|
253
271
|
- lib/mindee/product/barcode_reader/barcode_reader_v1_page.rb
|
272
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1.rb
|
273
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rb
|
274
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rb
|
275
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rb
|
276
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_document.rb
|
277
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rb
|
278
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_page.rb
|
279
|
+
- lib/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rb
|
254
280
|
- lib/mindee/product/cropper/cropper_v1.rb
|
255
281
|
- lib/mindee/product/cropper/cropper_v1_document.rb
|
256
282
|
- lib/mindee/product/cropper/cropper_v1_page.rb
|
@@ -284,12 +310,32 @@ files:
|
|
284
310
|
- lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb
|
285
311
|
- lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb
|
286
312
|
- lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb
|
313
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1.rb
|
314
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb
|
315
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rb
|
316
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rb
|
317
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb
|
318
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb
|
319
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_page.rb
|
320
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rb
|
321
|
+
- lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rb
|
287
322
|
- lib/mindee/product/fr/id_card/id_card_v1.rb
|
288
323
|
- lib/mindee/product/fr/id_card/id_card_v1_document.rb
|
289
324
|
- lib/mindee/product/fr/id_card/id_card_v1_page.rb
|
290
325
|
- lib/mindee/product/fr/id_card/id_card_v2.rb
|
291
326
|
- lib/mindee/product/fr/id_card/id_card_v2_document.rb
|
292
327
|
- lib/mindee/product/fr/id_card/id_card_v2_page.rb
|
328
|
+
- lib/mindee/product/fr/payslip/payslip_v2.rb
|
329
|
+
- lib/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rb
|
330
|
+
- lib/mindee/product/fr/payslip/payslip_v2_document.rb
|
331
|
+
- lib/mindee/product/fr/payslip/payslip_v2_employee.rb
|
332
|
+
- lib/mindee/product/fr/payslip/payslip_v2_employer.rb
|
333
|
+
- lib/mindee/product/fr/payslip/payslip_v2_employment.rb
|
334
|
+
- lib/mindee/product/fr/payslip/payslip_v2_page.rb
|
335
|
+
- lib/mindee/product/fr/payslip/payslip_v2_pay_detail.rb
|
336
|
+
- lib/mindee/product/fr/payslip/payslip_v2_pay_period.rb
|
337
|
+
- lib/mindee/product/fr/payslip/payslip_v2_pto.rb
|
338
|
+
- lib/mindee/product/fr/payslip/payslip_v2_salary_detail.rb
|
293
339
|
- lib/mindee/product/generated/generated_v1.rb
|
294
340
|
- lib/mindee/product/generated/generated_v1_document.rb
|
295
341
|
- lib/mindee/product/generated/generated_v1_page.rb
|
@@ -310,6 +356,22 @@ files:
|
|
310
356
|
- lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb
|
311
357
|
- lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb
|
312
358
|
- lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb
|
359
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rb
|
360
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rb
|
361
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rb
|
362
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rb
|
363
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rb
|
364
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rb
|
365
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rb
|
366
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rb
|
367
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rb
|
368
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rb
|
369
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rb
|
370
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rb
|
371
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rb
|
372
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rb
|
373
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rb
|
374
|
+
- lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rb
|
313
375
|
- lib/mindee/product/passport/passport_v1.rb
|
314
376
|
- lib/mindee/product/passport/passport_v1_document.rb
|
315
377
|
- lib/mindee/product/passport/passport_v1_page.rb
|
@@ -1,191 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'mini_magick'
|
4
|
-
require 'origami'
|
5
|
-
require 'stringio'
|
6
|
-
require 'tempfile'
|
7
|
-
require_relative '../../input/sources'
|
8
|
-
require_relative 'extracted_image'
|
9
|
-
|
10
|
-
module Mindee
|
11
|
-
# Image Extraction Module.
|
12
|
-
module ImageExtraction
|
13
|
-
def attach_image_as_new_file(input_buffer)
|
14
|
-
# Attaches an image as a new page in a PdfDocument object.
|
15
|
-
#
|
16
|
-
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
17
|
-
# @return [Origami::PDF] A PdfDocument handle.
|
18
|
-
|
19
|
-
magick_image = MiniMagick::Image.read(input_buffer)
|
20
|
-
# NOTE: some jpeg images get rendered as three different versions of themselves per output if the format isn't
|
21
|
-
# converted.
|
22
|
-
magick_image.format('jpg')
|
23
|
-
original_density = magick_image.resolution
|
24
|
-
scale_factor = original_density[0].to_f / 4.166666 # No clue why bit the resolution needs to be reduced for
|
25
|
-
# the pdf otherwise the resulting image shrinks.
|
26
|
-
magick_image.format('pdf', 0, { density: scale_factor.to_s })
|
27
|
-
io_buffer = StringIO.new
|
28
|
-
magick_image.write(io_buffer)
|
29
|
-
Origami::PDF.read(io_buffer)
|
30
|
-
end
|
31
|
-
|
32
|
-
# Extracts multiple images from a given local input source.
|
33
|
-
#
|
34
|
-
# @param [Mindee::Input::Source::LocalInputSource] input_source
|
35
|
-
# @param [Integer] page_id ID of the Page to extract from.
|
36
|
-
# @param [Array<Array<Mindee::Geometry::Point>>, Array<Mindee::Geometry::Quadrangle>] polygons List of coordinates
|
37
|
-
# to extract.
|
38
|
-
# @return [Array<Mindee::ImageExtraction::ExtractedImage>] Extracted Images.
|
39
|
-
def extract_multiple_images_from_source(input_source, page_id, polygons)
|
40
|
-
new_stream = load_doc(input_source, page_id)
|
41
|
-
new_stream.seek(0)
|
42
|
-
|
43
|
-
extract_images_from_polygons(input_source, new_stream, page_id, polygons)
|
44
|
-
end
|
45
|
-
|
46
|
-
# Retrieves a PDF document's page.
|
47
|
-
#
|
48
|
-
# @param [Origami::PDF] pdf_doc Origami PDF handle.
|
49
|
-
# @param [Integer] page_id Page ID.
|
50
|
-
def get_page(pdf_doc, page_id)
|
51
|
-
stream = StringIO.new
|
52
|
-
pdf_doc.save(stream)
|
53
|
-
|
54
|
-
options = {
|
55
|
-
page_indexes: [page_id - 1],
|
56
|
-
}
|
57
|
-
|
58
|
-
Mindee::PDF::PdfProcessor.parse(stream, options)
|
59
|
-
end
|
60
|
-
|
61
|
-
# Extracts images from their positions on a file (as polygons).
|
62
|
-
#
|
63
|
-
# @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
|
64
|
-
# @param [StringIO] pdf_stream Buffer of the PDF.
|
65
|
-
# @param [Integer] page_id Page ID.
|
66
|
-
# @param [Array<Mindee::Geometry::Point, Mindee::Geometry::Polygon, Mindee::Geometry::Quadrangle>] polygons
|
67
|
-
# @return [Array<Mindee::ImageExtraction::ExtractedImage>] Extracted Images.
|
68
|
-
def extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
|
69
|
-
extracted_elements = []
|
70
|
-
|
71
|
-
polygons.each_with_index do |polygon, element_id|
|
72
|
-
polygon = normalize_polygon(polygon)
|
73
|
-
page_content = read_page_content(pdf_stream)
|
74
|
-
|
75
|
-
min_max_x = Geometry.get_min_max_x([
|
76
|
-
polygon.top_left,
|
77
|
-
polygon.bottom_right,
|
78
|
-
polygon.top_right,
|
79
|
-
polygon.bottom_left,
|
80
|
-
])
|
81
|
-
min_max_y = Geometry.get_min_max_y([
|
82
|
-
polygon.top_left,
|
83
|
-
polygon.bottom_right,
|
84
|
-
polygon.top_right,
|
85
|
-
polygon.bottom_left,
|
86
|
-
])
|
87
|
-
file_extension = determine_file_extension(input_source)
|
88
|
-
cropped_image = crop_image(page_content, min_max_x, min_max_y)
|
89
|
-
if file_extension == 'pdf'
|
90
|
-
cropped_image.format('jpg')
|
91
|
-
else
|
92
|
-
cropped_image.format(file_extension)
|
93
|
-
end
|
94
|
-
|
95
|
-
buffer = StringIO.new
|
96
|
-
write_image_to_buffer(cropped_image, buffer)
|
97
|
-
file_name = "#{input_source.filename}_page#{page_id}-#{element_id}.#{file_extension}"
|
98
|
-
|
99
|
-
extracted_elements << create_extracted_image(buffer, file_name, page_id, element_id)
|
100
|
-
end
|
101
|
-
|
102
|
-
extracted_elements
|
103
|
-
end
|
104
|
-
|
105
|
-
# Retrieves the bounding box of a polygon.
|
106
|
-
#
|
107
|
-
# @param [Array<Point>, Mindee::Geometry::Polygon] polygon
|
108
|
-
def normalize_polygon(polygon)
|
109
|
-
if polygon.is_a?(Mindee::Geometry::Polygon)
|
110
|
-
Mindee::Geometry.get_bounding_box(polygon)
|
111
|
-
else
|
112
|
-
polygon
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
# Loads a buffer into a MiniMagick Image.
|
117
|
-
#
|
118
|
-
# @param [StringIO] pdf_stream Buffer containg the PDF
|
119
|
-
# @return [MiniMagick::Image] a valid MiniMagick image handle.
|
120
|
-
def read_page_content(pdf_stream)
|
121
|
-
pdf_stream.rewind
|
122
|
-
MiniMagick::Image.read(pdf_stream)
|
123
|
-
end
|
124
|
-
|
125
|
-
# Crops a MiniMagick Image from a the given bounding box.
|
126
|
-
#
|
127
|
-
# @param [MiniMagick::Image] image Input Image.
|
128
|
-
# @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
|
129
|
-
# @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
|
130
|
-
def crop_image(image, min_max_x, min_max_y)
|
131
|
-
width = image[:width].to_i
|
132
|
-
height = image[:height].to_i
|
133
|
-
|
134
|
-
image.format('jpg')
|
135
|
-
new_width = (min_max_x.max - min_max_x.min) * width
|
136
|
-
new_height = (min_max_y.max - min_max_y.min) * height
|
137
|
-
image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
|
138
|
-
|
139
|
-
image
|
140
|
-
end
|
141
|
-
|
142
|
-
# Writes a MiniMagick::Image to a buffer.
|
143
|
-
#
|
144
|
-
# @param [MiniMagick::Image] image a valid MiniMagick image.
|
145
|
-
# @param [StringIO] buffer
|
146
|
-
def write_image_to_buffer(image, buffer)
|
147
|
-
image.write(buffer)
|
148
|
-
end
|
149
|
-
|
150
|
-
# Retrieves the file extension from the main file to apply it to the extracted images. Note: coerces pdf as jpg.
|
151
|
-
#
|
152
|
-
# @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
|
153
|
-
# @return [String] A valid file extension.
|
154
|
-
def determine_file_extension(input_source)
|
155
|
-
if input_source.pdf? || input_source.filename.downcase.end_with?('pdf')
|
156
|
-
'jpg'
|
157
|
-
else
|
158
|
-
File.extname(input_source.filename).strip.downcase[1..]
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
# Generates an ExtractedImage.
|
163
|
-
#
|
164
|
-
# @param [StringIO] buffer Buffer containing the image.
|
165
|
-
# @param [String] file_name Name for the file.
|
166
|
-
# @param [Object] page_id ID of the page the file was generated from.
|
167
|
-
# @param [Object] element_id ID of the element of a given page.
|
168
|
-
def create_extracted_image(buffer, file_name, page_id, element_id)
|
169
|
-
buffer.rewind
|
170
|
-
ExtractedImage.new(
|
171
|
-
Mindee::Input::Source::BytesInputSource.new(buffer.read, file_name),
|
172
|
-
page_id,
|
173
|
-
element_id
|
174
|
-
)
|
175
|
-
end
|
176
|
-
|
177
|
-
# Loads a single_page from an image file or a pdf document.
|
178
|
-
#
|
179
|
-
# @param input_file [LocalInputSource] Local input.
|
180
|
-
# @param [Integer] page_id Page ID.
|
181
|
-
# @return [MiniMagick::Image] A valid PdfDocument handle.
|
182
|
-
def load_doc(input_file, page_id)
|
183
|
-
input_file.io_stream.rewind
|
184
|
-
if input_file.pdf?
|
185
|
-
get_page(Origami::PDF.read(input_file.io_stream), page_id)
|
186
|
-
else
|
187
|
-
input_file.io_stream
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Mindee
|
4
|
-
# Image Extraction Module.
|
5
|
-
module ImageExtraction
|
6
|
-
def extract_receipts(input_source, inference)
|
7
|
-
# Extracts individual receipts from multi-receipts documents.
|
8
|
-
#
|
9
|
-
# @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
|
10
|
-
# @param inference [Inference] Results of the inference.
|
11
|
-
# @return [Array<ExtractedImage>] Individual extracted receipts as an array of ExtractedMultiReceiptsImage.
|
12
|
-
|
13
|
-
images = []
|
14
|
-
raise 'No possible receipts candidates found for MultiReceipts extraction.' unless inference.prediction.receipts
|
15
|
-
|
16
|
-
(0...input_source.count_pdf_pages).each do |page_id|
|
17
|
-
receipt_positions = inference.pages[page_id].prediction.receipts.map(&:bounding_box)
|
18
|
-
images.concat(
|
19
|
-
extract_multiple_images_from_source(input_source, page_id + 1, receipt_positions)
|
20
|
-
)
|
21
|
-
end
|
22
|
-
|
23
|
-
images
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
File without changes
|
File without changes
|