mindee 3.13.0 → 3.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/docs/bill_of_lading_v1.md +50 -1
- data/docs/energy_bill_fra_v1.md +61 -1
- data/docs/expense_receipts_v5.md +4 -4
- data/docs/financial_document_v1.md +14 -0
- data/docs/invoices_v4.md +16 -2
- data/docs/nutrition_facts_v1.md +80 -1
- data/docs/payslip_fra_v2.md +77 -1
- data/docs/us_mail_v2.md +1 -1
- data/examples/auto_invoice_splitter_extraction.rb +36 -31
- data/examples/auto_multi_receipts_detector_extraction.rb +31 -0
- data/lib/mindee/client.rb +1 -0
- data/lib/mindee/extraction/common/extracted_image.rb +1 -2
- data/lib/mindee/extraction/common/image_extractor.rb +147 -159
- data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +22 -16
- data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +3 -1
- data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +1 -0
- data/lib/mindee/geometry/point.rb +2 -1
- data/lib/mindee/image/image_compressor.rb +29 -0
- data/lib/mindee/image/image_utils.rb +104 -0
- data/lib/mindee/image.rb +4 -0
- data/lib/mindee/input/sources.rb +36 -0
- data/lib/mindee/parsing/standard/date_field.rb +4 -0
- data/lib/mindee/parsing/standard/position_field.rb +3 -0
- data/lib/mindee/pdf/pdf_compressor.rb +117 -0
- data/lib/mindee/pdf/{pdf_processing.rb → pdf_processor.rb} +17 -0
- data/lib/mindee/pdf/pdf_tools.rb +100 -0
- data/lib/mindee/pdf.rb +3 -1
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +11 -1
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
- data/lib/mindee/product/invoice/invoice_v4_document.rb +11 -1
- data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +10 -0
- data/mindee.gemspec +2 -1
- metadata +32 -7
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pdf-reader'
|
4
|
+
PDFReader = PDF
|
5
|
+
|
6
|
+
module Mindee
|
7
|
+
module PDF
|
8
|
+
# Image compressor module to handle PDF compression.
|
9
|
+
module PDFCompressor
|
10
|
+
# Compresses each page of a provided PDF stream. Skips if force_source_text isn't set and source text is detected.
|
11
|
+
# @param quality [Integer] Compression quality (70-100 for most JPG images in the test dataset).
|
12
|
+
# @param force_source_text_compression [Boolean] If true, attempts to re-write detected text.
|
13
|
+
# @param disable_source_text [Boolean] If true, doesn't re-apply source text to the original PDF.
|
14
|
+
def self.compress_pdf(pdf_data, quality: 85, force_source_text_compression: false, disable_source_text: true)
|
15
|
+
if PDFTools.source_text?(pdf_data)
|
16
|
+
if force_source_text_compression
|
17
|
+
if disable_source_text
|
18
|
+
puts "\e[33m[WARNING] Re-writing PDF source-text is an EXPERIMENTAL feature.\e[0m"
|
19
|
+
else
|
20
|
+
puts "\e[33m[WARNING] Source-file contains text, but disable_source_text flag is ignored. " \
|
21
|
+
"Resulting file will not contain any embedded text.\e[0m"
|
22
|
+
end
|
23
|
+
else
|
24
|
+
puts "\e[33m[WARNING] Source-text detected in input PDF. Aborting operation.\e[0m"
|
25
|
+
return pdf_data
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
pdf_data.rewind
|
30
|
+
pdf = Origami::PDF.read(pdf_data)
|
31
|
+
pages = process_pdf_pages(pdf, quality)
|
32
|
+
|
33
|
+
output_pdf = create_output_pdf(pages, disable_source_text, pdf_data)
|
34
|
+
|
35
|
+
output_stream = StringIO.new
|
36
|
+
output_pdf.save(output_stream)
|
37
|
+
output_stream
|
38
|
+
end
|
39
|
+
|
40
|
+
# Processes all pages in the PDF.
|
41
|
+
# @param pdf [Origami::PDF] The Origami PDF object to process.
|
42
|
+
# @param quality [Integer] Compression quality.
|
43
|
+
# @return [Array<Origami::Page>] Processed pages.
|
44
|
+
def self.process_pdf_pages(pdf, quality)
|
45
|
+
pdf.pages.map.with_index do |page, index|
|
46
|
+
process_pdf_page(Mindee::PDF::PdfProcessor.get_page(pdf, index), index, quality, page[:MediaBox])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Creates the output PDF with processed pages.
|
51
|
+
# @param pages [Array] Processed pages.
|
52
|
+
# @param disable_source_text [Boolean] Whether to disable source text.
|
53
|
+
# @param pdf_data [StringIO] Original PDF data.
|
54
|
+
# @return [Origami::PDF] Output PDF object.
|
55
|
+
def self.create_output_pdf(pages, disable_source_text, pdf_data)
|
56
|
+
output_pdf = Origami::PDF.new
|
57
|
+
# NOTE: Page order and XObject handling require adjustment due to origami adding the last page first.
|
58
|
+
pages.rotate!(1) if pages.count >= 2
|
59
|
+
|
60
|
+
inject_text(pdf_data, pages) unless disable_source_text
|
61
|
+
|
62
|
+
pages.each { |page| output_pdf.append_page(page) }
|
63
|
+
|
64
|
+
output_pdf
|
65
|
+
end
|
66
|
+
|
67
|
+
# Extracts text from a source text PDF, and injects it into a newly-created one.
|
68
|
+
# @param pdf_data [StringIO] Stream representation of the PDF.
|
69
|
+
# @param pages [Array<Origami::Page>] Array of pages containing the rasterized version of the initial pages.
|
70
|
+
def self.inject_text(pdf_data, pages)
|
71
|
+
reader = PDFReader::Reader.new(pdf_data)
|
72
|
+
|
73
|
+
reader.pages.each_with_index do |original_page, index|
|
74
|
+
break if index >= pages.length
|
75
|
+
|
76
|
+
receiver = PDFReader::Reader::PageTextReceiver.new
|
77
|
+
original_page.walk(receiver)
|
78
|
+
|
79
|
+
receiver.runs.each do |text_run|
|
80
|
+
x = text_run.origin.x
|
81
|
+
y = text_run.origin.y
|
82
|
+
text = text_run.text
|
83
|
+
font_size = text_run.font_size
|
84
|
+
|
85
|
+
content_stream = Origami::Stream.new
|
86
|
+
content_stream.dictionary[:Filter] = :FlateDecode
|
87
|
+
content_stream.data = "BT\n/F1 #{font_size} Tf\n#{x} #{y} Td\n(#{text}) Tj\nET\n"
|
88
|
+
|
89
|
+
pages[index].Contents.data += content_stream.data
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Takes in a page stream, rasterizes it into a JPEG image, and applies the result onto a new Origami PDF page.
|
95
|
+
# @param page_stream [StringIO] Stream representation of a single page from the initial PDF.
|
96
|
+
# @param page_index [Integer] Index of the current page. Technically not needed, but left for debugging purposes.
|
97
|
+
# @param image_quality [Integer] Quality to apply to the rasterized page.
|
98
|
+
# @param media_box [Array<Integer>, nil] Extracted media box from the page. Can be nil.
|
99
|
+
# @return [Origami::Page]
|
100
|
+
def self.process_pdf_page(page_stream, page_index, image_quality, media_box)
|
101
|
+
new_page = Origami::Page.new
|
102
|
+
compressed_image = Mindee::Image::ImageUtils.pdf_to_magick_image(page_stream, image_quality)
|
103
|
+
width, height = Mindee::Image::ImageUtils.calculate_dimensions_from_media_box(compressed_image, media_box)
|
104
|
+
|
105
|
+
compressed_xobject = PDF::PDFTools.create_xobject(compressed_image)
|
106
|
+
PDF::PDFTools.set_xobject_properties(compressed_xobject, compressed_image)
|
107
|
+
|
108
|
+
xobject_name = "X#{page_index + 1}"
|
109
|
+
PDF::PDFTools.add_content_to_page(new_page, xobject_name, width, height)
|
110
|
+
new_page.add_xobject(compressed_xobject, xobject_name)
|
111
|
+
|
112
|
+
PDF::PDFTools.set_page_dimensions(new_page, width, height)
|
113
|
+
new_page
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -18,6 +18,7 @@ module Mindee
|
|
18
18
|
|
19
19
|
# @param io_stream [StreamIO]
|
20
20
|
# @param options [Hash]
|
21
|
+
# @return [StringIO]
|
21
22
|
def self.parse(io_stream, options)
|
22
23
|
options = DEFAULT_OPTIONS.merge(options)
|
23
24
|
|
@@ -74,6 +75,22 @@ module Mindee
|
|
74
75
|
io_stream.seek(0)
|
75
76
|
pdf_parser.parse(io_stream)
|
76
77
|
end
|
78
|
+
|
79
|
+
# Retrieves a PDF document's page.
|
80
|
+
#
|
81
|
+
# @param [Origami::PDF] pdf_doc Origami PDF handle.
|
82
|
+
# @param [Integer] page_id Page ID.
|
83
|
+
# @return [StringIO]
|
84
|
+
def self.get_page(pdf_doc, page_id)
|
85
|
+
stream = StringIO.new
|
86
|
+
pdf_doc.save(stream)
|
87
|
+
|
88
|
+
options = {
|
89
|
+
page_indexes: [page_id - 1],
|
90
|
+
}
|
91
|
+
|
92
|
+
parse(stream, options)
|
93
|
+
end
|
77
94
|
end
|
78
95
|
end
|
79
96
|
end
|
data/lib/mindee/pdf/pdf_tools.rb
CHANGED
@@ -29,6 +29,106 @@ module Mindee
|
|
29
29
|
io_stream.set_encoding Encoding::BINARY
|
30
30
|
io_stream
|
31
31
|
end
|
32
|
+
|
33
|
+
# Checks a PDFs stream content for text operators
|
34
|
+
# See https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf page 243-251.
|
35
|
+
# @param [StringIO] stream Stream object from a PDFs page.
|
36
|
+
# @return [Boolean] True if a text operator is found in the stream.
|
37
|
+
def self.stream_has_text?(stream)
|
38
|
+
data = stream.data
|
39
|
+
return false if data.nil? || data.empty?
|
40
|
+
|
41
|
+
text_operators = ['Tc', 'Tw', 'Th', 'TL', 'Tf', 'Tk', 'Tr', 'Tm', 'T*', 'Tj', 'TJ', "'", '"']
|
42
|
+
text_operators.any? { |op| data.include?(op) }
|
43
|
+
end
|
44
|
+
|
45
|
+
# Checks whether the file has source_text. Sends false if the file isn't a PDF.
|
46
|
+
# @param [StringIO] pdf_data
|
47
|
+
# @return [Boolean] True if the pdf has source text, false otherwise.
|
48
|
+
def self.source_text?(pdf_data)
|
49
|
+
begin
|
50
|
+
pdf_data.rewind
|
51
|
+
pdf = Origami::PDF.read(pdf_data)
|
52
|
+
|
53
|
+
pdf.each_page do |page|
|
54
|
+
next unless page[:Contents]
|
55
|
+
|
56
|
+
contents = page[:Contents].solve
|
57
|
+
contents = [contents] unless contents.is_a?(Origami::Array)
|
58
|
+
|
59
|
+
contents.each do |stream_ref|
|
60
|
+
stream = stream_ref.solve
|
61
|
+
return true if stream_has_text?(stream)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
false
|
66
|
+
end
|
67
|
+
|
68
|
+
false
|
69
|
+
rescue Origami::InvalidPDFError
|
70
|
+
false
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.create_xobject(image)
|
74
|
+
image_io = Mindee::Image::ImageUtils.image_to_stringio(image)
|
75
|
+
Origami::Graphics::ImageXObject.from_image_file(image_io, 'jpg')
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.set_xobject_properties(xobject, image)
|
79
|
+
xobject.dictionary[:BitsPerComponent] = 8
|
80
|
+
xobject.dictionary[:Filter] = determine_filter(image)
|
81
|
+
xobject.dictionary[:Width] = image[:width]
|
82
|
+
xobject.dictionary[:Height] = image[:height]
|
83
|
+
xobject.dictionary[:ColorSpace] = determine_colorspace(image)
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.determine_filter(image)
|
87
|
+
filter = image.data['properties']['filter']
|
88
|
+
case filter
|
89
|
+
when %r{Zip}i then :FlateDecode
|
90
|
+
when %r{LZW}i then :LZWDecode
|
91
|
+
else :DCTDecode
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.determine_colorspace(image)
|
96
|
+
colorspace = image.data['colorspace']
|
97
|
+
case colorspace
|
98
|
+
when 'CMYK' then :DeviceCMYK
|
99
|
+
when 'Gray', 'PseudoClass Gray' then :DeviceGray
|
100
|
+
else :DeviceRGB
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.add_content_to_page(page, xobject_name, width, height)
|
105
|
+
content = "q\n#{width} 0 0 #{height} 0 0 cm\n/#{xobject_name} Do\nQ\n"
|
106
|
+
content_stream = Origami::Stream.new(content)
|
107
|
+
page.Contents = content_stream
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.set_page_dimensions(page, width, height)
|
111
|
+
page[:MediaBox] = [0, 0, width, height]
|
112
|
+
page[:CropBox] = [0, 0, width, height]
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.process_image_xobject(image_data, image_quality, width, height)
|
116
|
+
compressed_data = Image::ImageCompressor.compress_image(
|
117
|
+
image_data,
|
118
|
+
quality: image_quality,
|
119
|
+
max_width: width,
|
120
|
+
max_height: height
|
121
|
+
)
|
122
|
+
|
123
|
+
new_image = Origami::Graphics::ImageXObject.new
|
124
|
+
new_image.data = compressed_data
|
125
|
+
new_image.Width = width
|
126
|
+
new_image.Height = height
|
127
|
+
new_image.ColorSpace = :DeviceRGB
|
128
|
+
new_image.BitsPerComponent = 8
|
129
|
+
|
130
|
+
new_image
|
131
|
+
end
|
32
132
|
end
|
33
133
|
end
|
34
134
|
end
|
data/lib/mindee/pdf.rb
CHANGED
@@ -6,7 +6,7 @@ require_relative 'financial_document_v1_line_item'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module FinancialDocument
|
9
|
-
# Financial Document API version 1.
|
9
|
+
# Financial Document API version 1.10 document data.
|
10
10
|
class FinancialDocumentV1Document < Mindee::Parsing::Common::Prediction
|
11
11
|
include Mindee::Parsing::Standard
|
12
12
|
# The customer's address used for billing.
|
@@ -48,6 +48,12 @@ module Mindee
|
|
48
48
|
# The locale detected on the document.
|
49
49
|
# @return [Mindee::Parsing::Standard::LocaleField]
|
50
50
|
attr_reader :locale
|
51
|
+
# The date on which the payment is due / fullfilled.
|
52
|
+
# @return [Mindee::Parsing::Standard::DateField]
|
53
|
+
attr_reader :payment_date
|
54
|
+
# The purchase order number.
|
55
|
+
# @return [Mindee::Parsing::Standard::StringField]
|
56
|
+
attr_reader :po_number
|
51
57
|
# The receipt number or identifier only if document is a receipt.
|
52
58
|
# @return [Mindee::Parsing::Standard::StringField]
|
53
59
|
attr_reader :receipt_number
|
@@ -123,6 +129,8 @@ module Mindee
|
|
123
129
|
@line_items.push(FinancialDocumentV1LineItem.new(item, page_id))
|
124
130
|
end
|
125
131
|
@locale = LocaleField.new(prediction['locale'], page_id)
|
132
|
+
@payment_date = DateField.new(prediction['payment_date'], page_id)
|
133
|
+
@po_number = StringField.new(prediction['po_number'], page_id)
|
126
134
|
@receipt_number = StringField.new(prediction['receipt_number'], page_id)
|
127
135
|
@reference_numbers = []
|
128
136
|
prediction['reference_numbers'].each do |item|
|
@@ -161,11 +169,13 @@ module Mindee
|
|
161
169
|
out_str = String.new
|
162
170
|
out_str << "\n:Locale: #{@locale}".rstrip
|
163
171
|
out_str << "\n:Invoice Number: #{@invoice_number}".rstrip
|
172
|
+
out_str << "\n:Purchase Order Number: #{@po_number}".rstrip
|
164
173
|
out_str << "\n:Receipt Number: #{@receipt_number}".rstrip
|
165
174
|
out_str << "\n:Document Number: #{@document_number}".rstrip
|
166
175
|
out_str << "\n:Reference Numbers: #{reference_numbers}".rstrip
|
167
176
|
out_str << "\n:Purchase Date: #{@date}".rstrip
|
168
177
|
out_str << "\n:Due Date: #{@due_date}".rstrip
|
178
|
+
out_str << "\n:Payment Date: #{@payment_date}".rstrip
|
169
179
|
out_str << "\n:Total Net: #{@total_net}".rstrip
|
170
180
|
out_str << "\n:Total Amount: #{@total_amount}".rstrip
|
171
181
|
out_str << "\n:Taxes:#{@taxes}".rstrip
|
@@ -6,7 +6,7 @@ require_relative 'financial_document_v1_document'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module FinancialDocument
|
9
|
-
# Financial Document API version 1.
|
9
|
+
# Financial Document API version 1.10 page data.
|
10
10
|
class FinancialDocumentV1Page < Mindee::Parsing::Common::Page
|
11
11
|
# @param prediction [Hash]
|
12
12
|
def initialize(prediction)
|
@@ -6,7 +6,7 @@ require_relative 'invoice_v4_line_item'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module Invoice
|
9
|
-
# Invoice API version 4.
|
9
|
+
# Invoice API version 4.8 document data.
|
10
10
|
class InvoiceV4Document < Mindee::Parsing::Common::Prediction
|
11
11
|
include Mindee::Parsing::Standard
|
12
12
|
# The customer's address used for billing.
|
@@ -42,6 +42,12 @@ module Mindee
|
|
42
42
|
# The locale detected on the document.
|
43
43
|
# @return [Mindee::Parsing::Standard::LocaleField]
|
44
44
|
attr_reader :locale
|
45
|
+
# The date on which the payment is due/ was full-filled.
|
46
|
+
# @return [Mindee::Parsing::Standard::DateField]
|
47
|
+
attr_reader :payment_date
|
48
|
+
# The purchase order number.
|
49
|
+
# @return [Mindee::Parsing::Standard::StringField]
|
50
|
+
attr_reader :po_number
|
45
51
|
# List of Reference numbers, including PO number.
|
46
52
|
# @return [Array<Mindee::Parsing::Standard::StringField>]
|
47
53
|
attr_reader :reference_numbers
|
@@ -103,6 +109,8 @@ module Mindee
|
|
103
109
|
@line_items.push(InvoiceV4LineItem.new(item, page_id))
|
104
110
|
end
|
105
111
|
@locale = LocaleField.new(prediction['locale'], page_id)
|
112
|
+
@payment_date = DateField.new(prediction['payment_date'], page_id)
|
113
|
+
@po_number = StringField.new(prediction['po_number'], page_id)
|
106
114
|
@reference_numbers = []
|
107
115
|
prediction['reference_numbers'].each do |item|
|
108
116
|
@reference_numbers.push(StringField.new(item, page_id))
|
@@ -137,9 +145,11 @@ module Mindee
|
|
137
145
|
out_str = String.new
|
138
146
|
out_str << "\n:Locale: #{@locale}".rstrip
|
139
147
|
out_str << "\n:Invoice Number: #{@invoice_number}".rstrip
|
148
|
+
out_str << "\n:Purchase Order Number: #{@po_number}".rstrip
|
140
149
|
out_str << "\n:Reference Numbers: #{reference_numbers}".rstrip
|
141
150
|
out_str << "\n:Purchase Date: #{@date}".rstrip
|
142
151
|
out_str << "\n:Due Date: #{@due_date}".rstrip
|
152
|
+
out_str << "\n:Payment Date: #{@payment_date}".rstrip
|
143
153
|
out_str << "\n:Total Net: #{@total_net}".rstrip
|
144
154
|
out_str << "\n:Total Amount: #{@total_amount}".rstrip
|
145
155
|
out_str << "\n:Total Tax: #{@total_tax}".rstrip
|
@@ -6,7 +6,7 @@ require_relative 'invoice_v4_document'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module Invoice
|
9
|
-
# Invoice API version 4.
|
9
|
+
# Invoice API version 4.8 page data.
|
10
10
|
class InvoiceV4Page < Mindee::Parsing::Common::Page
|
11
11
|
# @param prediction [Hash]
|
12
12
|
def initialize(prediction)
|
data/lib/mindee/version.rb
CHANGED
data/lib/mindee.rb
CHANGED
@@ -19,6 +19,16 @@ module Mindee
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
module Image
|
23
|
+
# Miscellaneous image operations.
|
24
|
+
module ImageUtils
|
25
|
+
end
|
26
|
+
|
27
|
+
# Image compressor module to handle image compression.
|
28
|
+
module ImageCompressor
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
22
32
|
# Custom extraction module
|
23
33
|
module Extraction
|
24
34
|
end
|
data/mindee.gemspec
CHANGED
@@ -30,8 +30,9 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.required_ruby_version = Gem::Requirement.new('>= 2.6')
|
31
31
|
|
32
32
|
spec.add_runtime_dependency 'marcel', '~> 1.0.2'
|
33
|
-
spec.add_runtime_dependency 'mini_magick', '
|
33
|
+
spec.add_runtime_dependency 'mini_magick', '>=4', '< 6'
|
34
34
|
spec.add_runtime_dependency 'origamindee', '~> 3.1.0'
|
35
|
+
spec.add_runtime_dependency 'pdf-reader', '~> 2.12.0'
|
35
36
|
|
36
37
|
spec.add_development_dependency 'rake', '~> 12.3.3'
|
37
38
|
spec.add_development_dependency 'rspec', '~> 3.12.0'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.15.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marcel
|
@@ -28,16 +28,22 @@ dependencies:
|
|
28
28
|
name: mini_magick
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4'
|
34
|
+
- - "<"
|
32
35
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
36
|
+
version: '6'
|
34
37
|
type: :runtime
|
35
38
|
prerelease: false
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
38
|
-
- - "
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '4'
|
44
|
+
- - "<"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
46
|
+
version: '6'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: origamindee
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +58,20 @@ dependencies:
|
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
60
|
version: 3.1.0
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: pdf-reader
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: 2.12.0
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 2.12.0
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: rake
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -198,6 +218,7 @@ files:
|
|
198
218
|
- docs/us_mail_v2.md
|
199
219
|
- docs/us_w9_v1.md
|
200
220
|
- examples/auto_invoice_splitter_extraction.rb
|
221
|
+
- examples/auto_multi_receipts_detector_extraction.rb
|
201
222
|
- lib/mindee.rb
|
202
223
|
- lib/mindee/client.rb
|
203
224
|
- lib/mindee/extraction.rb
|
@@ -223,6 +244,9 @@ files:
|
|
223
244
|
- lib/mindee/http/endpoint.rb
|
224
245
|
- lib/mindee/http/error.rb
|
225
246
|
- lib/mindee/http/response_validation.rb
|
247
|
+
- lib/mindee/image.rb
|
248
|
+
- lib/mindee/image/image_compressor.rb
|
249
|
+
- lib/mindee/image/image_utils.rb
|
226
250
|
- lib/mindee/input.rb
|
227
251
|
- lib/mindee/input/local_response.rb
|
228
252
|
- lib/mindee/input/sources.rb
|
@@ -261,7 +285,8 @@ files:
|
|
261
285
|
- lib/mindee/parsing/standard/string_field.rb
|
262
286
|
- lib/mindee/parsing/standard/tax_field.rb
|
263
287
|
- lib/mindee/pdf.rb
|
264
|
-
- lib/mindee/pdf/
|
288
|
+
- lib/mindee/pdf/pdf_compressor.rb
|
289
|
+
- lib/mindee/pdf/pdf_processor.rb
|
265
290
|
- lib/mindee/pdf/pdf_tools.rb
|
266
291
|
- lib/mindee/product.rb
|
267
292
|
- lib/mindee/product/.rubocop.yml
|