mindee 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/docs/bill_of_lading_v1.md +50 -1
  4. data/docs/energy_bill_fra_v1.md +61 -1
  5. data/docs/expense_receipts_v5.md +4 -4
  6. data/docs/financial_document_v1.md +14 -0
  7. data/docs/invoices_v4.md +16 -2
  8. data/docs/nutrition_facts_v1.md +80 -1
  9. data/docs/payslip_fra_v2.md +77 -1
  10. data/docs/us_mail_v2.md +1 -1
  11. data/examples/auto_invoice_splitter_extraction.rb +36 -31
  12. data/examples/auto_multi_receipts_detector_extraction.rb +31 -0
  13. data/lib/mindee/client.rb +1 -0
  14. data/lib/mindee/extraction/common/extracted_image.rb +1 -2
  15. data/lib/mindee/extraction/common/image_extractor.rb +147 -159
  16. data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +22 -16
  17. data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +3 -1
  18. data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +1 -0
  19. data/lib/mindee/geometry/point.rb +2 -1
  20. data/lib/mindee/image/image_compressor.rb +29 -0
  21. data/lib/mindee/image/image_utils.rb +104 -0
  22. data/lib/mindee/image.rb +4 -0
  23. data/lib/mindee/input/sources.rb +36 -0
  24. data/lib/mindee/parsing/standard/date_field.rb +4 -0
  25. data/lib/mindee/parsing/standard/position_field.rb +3 -0
  26. data/lib/mindee/pdf/pdf_compressor.rb +117 -0
  27. data/lib/mindee/pdf/{pdf_processing.rb → pdf_processor.rb} +17 -0
  28. data/lib/mindee/pdf/pdf_tools.rb +100 -0
  29. data/lib/mindee/pdf.rb +3 -1
  30. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +11 -1
  31. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
  32. data/lib/mindee/product/invoice/invoice_v4_document.rb +11 -1
  33. data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
  34. data/lib/mindee/version.rb +1 -1
  35. data/lib/mindee.rb +10 -0
  36. data/mindee.gemspec +2 -1
  37. metadata +32 -7
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pdf-reader'
4
+ PDFReader = PDF
5
+
6
+ module Mindee
7
+ module PDF
8
+ # Image compressor module to handle PDF compression.
9
+ module PDFCompressor
10
+ # Compresses each page of a provided PDF stream. Skips if force_source_text isn't set and source text is detected.
11
+ # @param quality [Integer] Compression quality (70-100 for most JPG images in the test dataset).
12
+ # @param force_source_text_compression [Boolean] If true, attempts to re-write detected text.
13
+ # @param disable_source_text [Boolean] If true, doesn't re-apply source text to the original PDF.
14
+ def self.compress_pdf(pdf_data, quality: 85, force_source_text_compression: false, disable_source_text: true)
15
+ if PDFTools.source_text?(pdf_data)
16
+ if force_source_text_compression
17
+ if disable_source_text
18
+ puts "\e[33m[WARNING] Re-writing PDF source-text is an EXPERIMENTAL feature.\e[0m"
19
+ else
20
+ puts "\e[33m[WARNING] Source-file contains text, but disable_source_text flag is ignored. " \
21
+ "Resulting file will not contain any embedded text.\e[0m"
22
+ end
23
+ else
24
+ puts "\e[33m[WARNING] Source-text detected in input PDF. Aborting operation.\e[0m"
25
+ return pdf_data
26
+ end
27
+ end
28
+
29
+ pdf_data.rewind
30
+ pdf = Origami::PDF.read(pdf_data)
31
+ pages = process_pdf_pages(pdf, quality)
32
+
33
+ output_pdf = create_output_pdf(pages, disable_source_text, pdf_data)
34
+
35
+ output_stream = StringIO.new
36
+ output_pdf.save(output_stream)
37
+ output_stream
38
+ end
39
+
40
+ # Processes all pages in the PDF.
41
+ # @param pdf [Origami::PDF] The Origami PDF object to process.
42
+ # @param quality [Integer] Compression quality.
43
+ # @return [Array<Origami::Page>] Processed pages.
44
+ def self.process_pdf_pages(pdf, quality)
45
+ pdf.pages.map.with_index do |page, index|
46
+ process_pdf_page(Mindee::PDF::PdfProcessor.get_page(pdf, index), index, quality, page[:MediaBox])
47
+ end
48
+ end
49
+
50
+ # Creates the output PDF with processed pages.
51
+ # @param pages [Array] Processed pages.
52
+ # @param disable_source_text [Boolean] Whether to disable source text.
53
+ # @param pdf_data [StringIO] Original PDF data.
54
+ # @return [Origami::PDF] Output PDF object.
55
+ def self.create_output_pdf(pages, disable_source_text, pdf_data)
56
+ output_pdf = Origami::PDF.new
57
+ # NOTE: Page order and XObject handling require adjustment due to origami adding the last page first.
58
+ pages.rotate!(1) if pages.count >= 2
59
+
60
+ inject_text(pdf_data, pages) unless disable_source_text
61
+
62
+ pages.each { |page| output_pdf.append_page(page) }
63
+
64
+ output_pdf
65
+ end
66
+
67
+ # Extracts text from a source text PDF, and injects it into a newly-created one.
68
+ # @param pdf_data [StringIO] Stream representation of the PDF.
69
+ # @param pages [Array<Origami::Page>] Array of pages containing the rasterized version of the initial pages.
70
+ def self.inject_text(pdf_data, pages)
71
+ reader = PDFReader::Reader.new(pdf_data)
72
+
73
+ reader.pages.each_with_index do |original_page, index|
74
+ break if index >= pages.length
75
+
76
+ receiver = PDFReader::Reader::PageTextReceiver.new
77
+ original_page.walk(receiver)
78
+
79
+ receiver.runs.each do |text_run|
80
+ x = text_run.origin.x
81
+ y = text_run.origin.y
82
+ text = text_run.text
83
+ font_size = text_run.font_size
84
+
85
+ content_stream = Origami::Stream.new
86
+ content_stream.dictionary[:Filter] = :FlateDecode
87
+ content_stream.data = "BT\n/F1 #{font_size} Tf\n#{x} #{y} Td\n(#{text}) Tj\nET\n"
88
+
89
+ pages[index].Contents.data += content_stream.data
90
+ end
91
+ end
92
+ end
93
+
94
+ # Takes in a page stream, rasterizes it into a JPEG image, and applies the result onto a new Origami PDF page.
95
+ # @param page_stream [StringIO] Stream representation of a single page from the initial PDF.
96
+ # @param page_index [Integer] Index of the current page. Technically not needed, but left for debugging purposes.
97
+ # @param image_quality [Integer] Quality to apply to the rasterized page.
98
+ # @param media_box [Array<Integer>, nil] Extracted media box from the page. Can be nil.
99
+ # @return [Origami::Page]
100
+ def self.process_pdf_page(page_stream, page_index, image_quality, media_box)
101
+ new_page = Origami::Page.new
102
+ compressed_image = Mindee::Image::ImageUtils.pdf_to_magick_image(page_stream, image_quality)
103
+ width, height = Mindee::Image::ImageUtils.calculate_dimensions_from_media_box(compressed_image, media_box)
104
+
105
+ compressed_xobject = PDF::PDFTools.create_xobject(compressed_image)
106
+ PDF::PDFTools.set_xobject_properties(compressed_xobject, compressed_image)
107
+
108
+ xobject_name = "X#{page_index + 1}"
109
+ PDF::PDFTools.add_content_to_page(new_page, xobject_name, width, height)
110
+ new_page.add_xobject(compressed_xobject, xobject_name)
111
+
112
+ PDF::PDFTools.set_page_dimensions(new_page, width, height)
113
+ new_page
114
+ end
115
+ end
116
+ end
117
+ end
@@ -18,6 +18,7 @@ module Mindee
18
18
 
19
19
  # @param io_stream [StreamIO]
20
20
  # @param options [Hash]
21
+ # @return [StringIO]
21
22
  def self.parse(io_stream, options)
22
23
  options = DEFAULT_OPTIONS.merge(options)
23
24
 
@@ -74,6 +75,22 @@ module Mindee
74
75
  io_stream.seek(0)
75
76
  pdf_parser.parse(io_stream)
76
77
  end
78
+
79
+ # Retrieves a PDF document's page.
80
+ #
81
+ # @param [Origami::PDF] pdf_doc Origami PDF handle.
82
+ # @param [Integer] page_id Page ID.
83
+ # @return [StringIO]
84
+ def self.get_page(pdf_doc, page_id)
85
+ stream = StringIO.new
86
+ pdf_doc.save(stream)
87
+
88
+ options = {
89
+ page_indexes: [page_id - 1],
90
+ }
91
+
92
+ parse(stream, options)
93
+ end
77
94
  end
78
95
  end
79
96
  end
@@ -29,6 +29,106 @@ module Mindee
29
29
  io_stream.set_encoding Encoding::BINARY
30
30
  io_stream
31
31
  end
32
+
33
+ # Checks a PDFs stream content for text operators
34
+ # See https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf page 243-251.
35
+ # @param [StringIO] stream Stream object from a PDFs page.
36
+ # @return [Boolean] True if a text operator is found in the stream.
37
+ def self.stream_has_text?(stream)
38
+ data = stream.data
39
+ return false if data.nil? || data.empty?
40
+
41
+ text_operators = ['Tc', 'Tw', 'Th', 'TL', 'Tf', 'Tk', 'Tr', 'Tm', 'T*', 'Tj', 'TJ', "'", '"']
42
+ text_operators.any? { |op| data.include?(op) }
43
+ end
44
+
45
+ # Checks whether the file has source_text. Sends false if the file isn't a PDF.
46
+ # @param [StringIO] pdf_data
47
+ # @return [Boolean] True if the pdf has source text, false otherwise.
48
+ def self.source_text?(pdf_data)
49
+ begin
50
+ pdf_data.rewind
51
+ pdf = Origami::PDF.read(pdf_data)
52
+
53
+ pdf.each_page do |page|
54
+ next unless page[:Contents]
55
+
56
+ contents = page[:Contents].solve
57
+ contents = [contents] unless contents.is_a?(Origami::Array)
58
+
59
+ contents.each do |stream_ref|
60
+ stream = stream_ref.solve
61
+ return true if stream_has_text?(stream)
62
+ end
63
+ end
64
+
65
+ false
66
+ end
67
+
68
+ false
69
+ rescue Origami::InvalidPDFError
70
+ false
71
+ end
72
+
73
+ def self.create_xobject(image)
74
+ image_io = Mindee::Image::ImageUtils.image_to_stringio(image)
75
+ Origami::Graphics::ImageXObject.from_image_file(image_io, 'jpg')
76
+ end
77
+
78
+ def self.set_xobject_properties(xobject, image)
79
+ xobject.dictionary[:BitsPerComponent] = 8
80
+ xobject.dictionary[:Filter] = determine_filter(image)
81
+ xobject.dictionary[:Width] = image[:width]
82
+ xobject.dictionary[:Height] = image[:height]
83
+ xobject.dictionary[:ColorSpace] = determine_colorspace(image)
84
+ end
85
+
86
+ def self.determine_filter(image)
87
+ filter = image.data['properties']['filter']
88
+ case filter
89
+ when %r{Zip}i then :FlateDecode
90
+ when %r{LZW}i then :LZWDecode
91
+ else :DCTDecode
92
+ end
93
+ end
94
+
95
+ def self.determine_colorspace(image)
96
+ colorspace = image.data['colorspace']
97
+ case colorspace
98
+ when 'CMYK' then :DeviceCMYK
99
+ when 'Gray', 'PseudoClass Gray' then :DeviceGray
100
+ else :DeviceRGB
101
+ end
102
+ end
103
+
104
+ def self.add_content_to_page(page, xobject_name, width, height)
105
+ content = "q\n#{width} 0 0 #{height} 0 0 cm\n/#{xobject_name} Do\nQ\n"
106
+ content_stream = Origami::Stream.new(content)
107
+ page.Contents = content_stream
108
+ end
109
+
110
+ def self.set_page_dimensions(page, width, height)
111
+ page[:MediaBox] = [0, 0, width, height]
112
+ page[:CropBox] = [0, 0, width, height]
113
+ end
114
+
115
+ def self.process_image_xobject(image_data, image_quality, width, height)
116
+ compressed_data = Image::ImageCompressor.compress_image(
117
+ image_data,
118
+ quality: image_quality,
119
+ max_width: width,
120
+ max_height: height
121
+ )
122
+
123
+ new_image = Origami::Graphics::ImageXObject.new
124
+ new_image.data = compressed_data
125
+ new_image.Width = width
126
+ new_image.Height = height
127
+ new_image.ColorSpace = :DeviceRGB
128
+ new_image.BitsPerComponent = 8
129
+
130
+ new_image
131
+ end
32
132
  end
33
133
  end
34
134
  end
data/lib/mindee/pdf.rb CHANGED
@@ -1,3 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'pdf/pdf_processing'
3
+ require_relative 'pdf/pdf_compressor'
4
+ require_relative 'pdf/pdf_processor'
5
+ require_relative 'pdf/pdf_tools'
@@ -6,7 +6,7 @@ require_relative 'financial_document_v1_line_item'
6
6
  module Mindee
7
7
  module Product
8
8
  module FinancialDocument
9
- # Financial Document API version 1.9 document data.
9
+ # Financial Document API version 1.10 document data.
10
10
  class FinancialDocumentV1Document < Mindee::Parsing::Common::Prediction
11
11
  include Mindee::Parsing::Standard
12
12
  # The customer's address used for billing.
@@ -48,6 +48,12 @@ module Mindee
48
48
  # The locale detected on the document.
49
49
  # @return [Mindee::Parsing::Standard::LocaleField]
50
50
  attr_reader :locale
51
+ # The date on which the payment is due / fullfilled.
52
+ # @return [Mindee::Parsing::Standard::DateField]
53
+ attr_reader :payment_date
54
+ # The purchase order number.
55
+ # @return [Mindee::Parsing::Standard::StringField]
56
+ attr_reader :po_number
51
57
  # The receipt number or identifier only if document is a receipt.
52
58
  # @return [Mindee::Parsing::Standard::StringField]
53
59
  attr_reader :receipt_number
@@ -123,6 +129,8 @@ module Mindee
123
129
  @line_items.push(FinancialDocumentV1LineItem.new(item, page_id))
124
130
  end
125
131
  @locale = LocaleField.new(prediction['locale'], page_id)
132
+ @payment_date = DateField.new(prediction['payment_date'], page_id)
133
+ @po_number = StringField.new(prediction['po_number'], page_id)
126
134
  @receipt_number = StringField.new(prediction['receipt_number'], page_id)
127
135
  @reference_numbers = []
128
136
  prediction['reference_numbers'].each do |item|
@@ -161,11 +169,13 @@ module Mindee
161
169
  out_str = String.new
162
170
  out_str << "\n:Locale: #{@locale}".rstrip
163
171
  out_str << "\n:Invoice Number: #{@invoice_number}".rstrip
172
+ out_str << "\n:Purchase Order Number: #{@po_number}".rstrip
164
173
  out_str << "\n:Receipt Number: #{@receipt_number}".rstrip
165
174
  out_str << "\n:Document Number: #{@document_number}".rstrip
166
175
  out_str << "\n:Reference Numbers: #{reference_numbers}".rstrip
167
176
  out_str << "\n:Purchase Date: #{@date}".rstrip
168
177
  out_str << "\n:Due Date: #{@due_date}".rstrip
178
+ out_str << "\n:Payment Date: #{@payment_date}".rstrip
169
179
  out_str << "\n:Total Net: #{@total_net}".rstrip
170
180
  out_str << "\n:Total Amount: #{@total_amount}".rstrip
171
181
  out_str << "\n:Taxes:#{@taxes}".rstrip
@@ -6,7 +6,7 @@ require_relative 'financial_document_v1_document'
6
6
  module Mindee
7
7
  module Product
8
8
  module FinancialDocument
9
- # Financial Document API version 1.9 page data.
9
+ # Financial Document API version 1.10 page data.
10
10
  class FinancialDocumentV1Page < Mindee::Parsing::Common::Page
11
11
  # @param prediction [Hash]
12
12
  def initialize(prediction)
@@ -6,7 +6,7 @@ require_relative 'invoice_v4_line_item'
6
6
  module Mindee
7
7
  module Product
8
8
  module Invoice
9
- # Invoice API version 4.7 document data.
9
+ # Invoice API version 4.8 document data.
10
10
  class InvoiceV4Document < Mindee::Parsing::Common::Prediction
11
11
  include Mindee::Parsing::Standard
12
12
  # The customer's address used for billing.
@@ -42,6 +42,12 @@ module Mindee
42
42
  # The locale detected on the document.
43
43
  # @return [Mindee::Parsing::Standard::LocaleField]
44
44
  attr_reader :locale
45
+ # The date on which the payment is due/ was full-filled.
46
+ # @return [Mindee::Parsing::Standard::DateField]
47
+ attr_reader :payment_date
48
+ # The purchase order number.
49
+ # @return [Mindee::Parsing::Standard::StringField]
50
+ attr_reader :po_number
45
51
  # List of Reference numbers, including PO number.
46
52
  # @return [Array<Mindee::Parsing::Standard::StringField>]
47
53
  attr_reader :reference_numbers
@@ -103,6 +109,8 @@ module Mindee
103
109
  @line_items.push(InvoiceV4LineItem.new(item, page_id))
104
110
  end
105
111
  @locale = LocaleField.new(prediction['locale'], page_id)
112
+ @payment_date = DateField.new(prediction['payment_date'], page_id)
113
+ @po_number = StringField.new(prediction['po_number'], page_id)
106
114
  @reference_numbers = []
107
115
  prediction['reference_numbers'].each do |item|
108
116
  @reference_numbers.push(StringField.new(item, page_id))
@@ -137,9 +145,11 @@ module Mindee
137
145
  out_str = String.new
138
146
  out_str << "\n:Locale: #{@locale}".rstrip
139
147
  out_str << "\n:Invoice Number: #{@invoice_number}".rstrip
148
+ out_str << "\n:Purchase Order Number: #{@po_number}".rstrip
140
149
  out_str << "\n:Reference Numbers: #{reference_numbers}".rstrip
141
150
  out_str << "\n:Purchase Date: #{@date}".rstrip
142
151
  out_str << "\n:Due Date: #{@due_date}".rstrip
152
+ out_str << "\n:Payment Date: #{@payment_date}".rstrip
143
153
  out_str << "\n:Total Net: #{@total_net}".rstrip
144
154
  out_str << "\n:Total Amount: #{@total_amount}".rstrip
145
155
  out_str << "\n:Total Tax: #{@total_tax}".rstrip
@@ -6,7 +6,7 @@ require_relative 'invoice_v4_document'
6
6
  module Mindee
7
7
  module Product
8
8
  module Invoice
9
- # Invoice API version 4.7 page data.
9
+ # Invoice API version 4.8 page data.
10
10
  class InvoiceV4Page < Mindee::Parsing::Common::Page
11
11
  # @param prediction [Hash]
12
12
  def initialize(prediction)
@@ -3,7 +3,7 @@
3
3
  # Mindee
4
4
  module Mindee
5
5
  # Current version.
6
- VERSION = '3.13.0'
6
+ VERSION = '3.15.0'
7
7
 
8
8
  # Finds and return the current platform.
9
9
  # @return [String]
data/lib/mindee.rb CHANGED
@@ -19,6 +19,16 @@ module Mindee
19
19
  end
20
20
  end
21
21
 
22
+ module Image
23
+ # Miscellaneous image operations.
24
+ module ImageUtils
25
+ end
26
+
27
+ # Image compressor module to handle image compression.
28
+ module ImageCompressor
29
+ end
30
+ end
31
+
22
32
  # Custom extraction module
23
33
  module Extraction
24
34
  end
data/mindee.gemspec CHANGED
@@ -30,8 +30,9 @@ Gem::Specification.new do |spec|
30
30
  spec.required_ruby_version = Gem::Requirement.new('>= 2.6')
31
31
 
32
32
  spec.add_runtime_dependency 'marcel', '~> 1.0.2'
33
- spec.add_runtime_dependency 'mini_magick', '~> 4.13.0'
33
+ spec.add_runtime_dependency 'mini_magick', '>=4', '< 6'
34
34
  spec.add_runtime_dependency 'origamindee', '~> 3.1.0'
35
+ spec.add_runtime_dependency 'pdf-reader', '~> 2.12.0'
35
36
 
36
37
  spec.add_development_dependency 'rake', '~> 12.3.3'
37
38
  spec.add_development_dependency 'rspec', '~> 3.12.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mindee
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.13.0
4
+ version: 3.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mindee, SA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-18 00:00:00.000000000 Z
11
+ date: 2024-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: marcel
@@ -28,16 +28,22 @@ dependencies:
28
28
  name: mini_magick
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4'
34
+ - - "<"
32
35
  - !ruby/object:Gem::Version
33
- version: 4.13.0
36
+ version: '6'
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - "~>"
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '4'
44
+ - - "<"
39
45
  - !ruby/object:Gem::Version
40
- version: 4.13.0
46
+ version: '6'
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: origamindee
43
49
  requirement: !ruby/object:Gem::Requirement
@@ -52,6 +58,20 @@ dependencies:
52
58
  - - "~>"
53
59
  - !ruby/object:Gem::Version
54
60
  version: 3.1.0
61
+ - !ruby/object:Gem::Dependency
62
+ name: pdf-reader
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: 2.12.0
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: 2.12.0
55
75
  - !ruby/object:Gem::Dependency
56
76
  name: rake
57
77
  requirement: !ruby/object:Gem::Requirement
@@ -198,6 +218,7 @@ files:
198
218
  - docs/us_mail_v2.md
199
219
  - docs/us_w9_v1.md
200
220
  - examples/auto_invoice_splitter_extraction.rb
221
+ - examples/auto_multi_receipts_detector_extraction.rb
201
222
  - lib/mindee.rb
202
223
  - lib/mindee/client.rb
203
224
  - lib/mindee/extraction.rb
@@ -223,6 +244,9 @@ files:
223
244
  - lib/mindee/http/endpoint.rb
224
245
  - lib/mindee/http/error.rb
225
246
  - lib/mindee/http/response_validation.rb
247
+ - lib/mindee/image.rb
248
+ - lib/mindee/image/image_compressor.rb
249
+ - lib/mindee/image/image_utils.rb
226
250
  - lib/mindee/input.rb
227
251
  - lib/mindee/input/local_response.rb
228
252
  - lib/mindee/input/sources.rb
@@ -261,7 +285,8 @@ files:
261
285
  - lib/mindee/parsing/standard/string_field.rb
262
286
  - lib/mindee/parsing/standard/tax_field.rb
263
287
  - lib/mindee/pdf.rb
264
- - lib/mindee/pdf/pdf_processing.rb
288
+ - lib/mindee/pdf/pdf_compressor.rb
289
+ - lib/mindee/pdf/pdf_processor.rb
265
290
  - lib/mindee/pdf/pdf_tools.rb
266
291
  - lib/mindee/product.rb
267
292
  - lib/mindee/product/.rubocop.yml