ocr-file 0.0.2 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f188bc0b29f4232b379e5e15d924c57a64a1758f04d8e168d2a44a744d20d1af
4
- data.tar.gz: 5b54d844f01a5a5249572dd0abc270ae1fb37ff0070df9ad47eb84cf5f233fe7
3
+ metadata.gz: 9660e3d19c210789a7aeab56e63b002b507c12917edd1418202900d05647773b
4
+ data.tar.gz: 28861544f58374db141e5e3cc3ee8569201c4ffd611458e77774c98835e2f882
5
5
  SHA512:
6
- metadata.gz: c51ab724a77e8b22568dc0c7cefcf3ba28407f7050976d6900824954221d4f04e677b31b58ae644c87752e60024e1667194eda8b00c89dfab30f9a81d53ba1d5
7
- data.tar.gz: 9b521be6e75808899398e77cf0c0b9dee842350a5c81c0ba513ad56125725607906c8c19e6b493201750ba331521db4ba247723a1c09d82dfb61e8caec857428
6
+ metadata.gz: 55f4266d6877a7f2c8a4175f5601930c9940aa1a8a06fc1d6d84faca455232173f2ae4be887dc9d20246cb224498a213b91871b66ee3fb3b02699399be33453a
7
+ data.tar.gz: e78263baffd1ae1ff1246f705250d788ae6b5c24b3d59d78e169b3ddf93e139ac9b9688cf9d954053a913a1c3bd6be54c40466d372f915c4430d55d93282f1bb
data/Gemfile.lock CHANGED
@@ -1,12 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ocr-file (0.0.2)
4
+ ocr-file (0.0.6)
5
5
  active_attr (~> 0.15.4)
6
6
  console-style (~> 0.0.1)
7
7
  hexapdf (~> 0.23.0)
8
8
  mini_magick (~> 4.11.0)
9
9
  rtesseract (~> 3.1.2)
10
+ ruby-spellchecker (~> 0.1.5)
10
11
 
11
12
  GEM
12
13
  remote: https://rubygems.org/
@@ -60,7 +61,7 @@ GEM
60
61
  coderay (~> 1.1)
61
62
  method_source (~> 1.0)
62
63
  racc (1.6.0)
63
- rack (2.2.3.1)
64
+ rack (2.2.4)
64
65
  rack-test (1.1.0)
65
66
  rack (>= 1.0, < 3)
66
67
  rails-dom-testing (2.0.3)
@@ -69,6 +70,7 @@ GEM
69
70
  rails-html-sanitizer (1.4.3)
70
71
  loofah (~> 2.3)
71
72
  rtesseract (3.1.2)
73
+ ruby-spellchecker (0.1.5)
72
74
  tzinfo (2.0.4)
73
75
  concurrent-ruby (~> 1.0)
74
76
 
data/README.md CHANGED
@@ -43,13 +43,16 @@ You will need to install `tesseract` with your desired language on your system,
43
43
  ocr_engine: 'tesseract', # 'cloud-vision'
44
44
  # Image Pre-Processing
45
45
  image_preprocess: true,
46
- effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'bw'], # Applies effects as listed. 'norm' is also available
46
+ effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'], # Applies effects as listed. 'norm' is also available
47
+ automatic_reprocess: true, # Will possibly do double + the operations but can produce better results automatically
47
48
  # PDF to Image Processing
48
49
  optimise_pdf: true,
49
50
  extract_pdf_images: true, # if false will screenshot each PDF page
50
51
  temp_filename_prefix: 'image',
52
+ spelling_correction: true, # Will attempt to fix text at the end (not used for searchable pdf output)
51
53
  # Console Output
52
54
  verbose: true,
55
+ timing: true,
53
56
  }
54
57
 
55
58
  doc = OcrFile::Document.new(
@@ -74,6 +77,7 @@ You will need to install `tesseract` with your desired language on your system,
74
77
  doc.to_pdf
75
78
 
76
79
  # How to merge files into a single PDF:
80
+ # The files can be images or other PDFs
77
81
  filepaths = []
78
82
  documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.open_pdf(path, password: '') }
79
83
  merged_document = OcrFile::ImageEngines::PdfEngine.merge(documents)
@@ -83,7 +87,9 @@ You will need to install `tesseract` with your desired language on your system,
83
87
  ### Notes / Tips
84
88
  Set `extract_pdf_images` to `false` for higher quality OCR. However this will consume more temporary space per PDF page and also be considerably slower.
85
89
 
86
- Image pre-processing only thresholds (bw), normalises the colour space, removes speckles and tries to straighten the image. Will make the end result Black and White but have far more accurate OCR (PDFs). The order of operations is important, but steps can be removed when necessary.
90
+ Image pre-processing only thresholds (bw), normalises the colour space, removes speckles, removes shadows and tries to straighten the image. Will make the end result Black and White but have far more accurate OCR (PDFs). The order of operations is important, but steps can be removed when necessary. Expanding the colour dynamic range with `'norm'` can also be done but isn't recommended.
91
+
92
+ `automatic_reprocess` is much slower as it has to re-do operations per image (in some cases) but will select the best result for each page.
87
93
 
88
94
  ### Simple CLI
89
95
  Once installed you can use `ocr-file` as a CLI. Its currently a reduced set of options. These are subject to change in future versions
@@ -108,7 +114,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
108
114
  ### TODOs
109
115
  - input validation
110
116
  - Better CLI
111
- - image processing
112
117
  - password
113
118
  - Base64 encoding
114
119
  - requirements checking (installed dependencies etc ...)
@@ -117,7 +122,11 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
117
122
  - Improve console output
118
123
  - Fix spaces in file names
119
124
  - Better verbosity
120
- - Timing
125
+ - Docker
126
+ - pdftk / pdf merge for text and bookmarks etc ...
127
+ - https://github.com/tesseract-ocr/tesseract/issues/660
128
+ - tesseract -c naked_pdf=true
129
+ -
121
130
 
122
131
  ### Tests
123
132
  To run tests execute:
@@ -5,6 +5,7 @@ module OcrFile
5
5
 
6
6
  ACCEPTED_IMAGE_TYPES = ['png', 'jpeg', 'jpg', 'tiff', 'bmp']
7
7
  PAGE_BREAK = "\n\r\n" # TODO: Make configurable
8
+ EFFECTS_TO_REMOVE = ['', 'norm', 'remove_shadow', 'bw']
8
9
  DEFAULT_CONFIG = {
9
10
  # Images from PDF
10
11
  filetype: 'png',
@@ -22,13 +23,16 @@ module OcrFile
22
23
  ocr_engine: 'tesseract', # 'cloud-vision'
23
24
  # Image Pre-Processing
24
25
  image_preprocess: true,
25
- effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'bw'],
26
+ effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'],
27
+ automatic_reprocess: true,
26
28
  # PDF to Image Processing
27
29
  optimise_pdf: true,
28
30
  extract_pdf_images: true, # if false will screenshot each PDF page
29
31
  temp_filename_prefix: 'image',
32
+ spelling_correction: true,
30
33
  # Console Output
31
34
  verbose: true,
35
+ timing: true,
32
36
  }
33
37
 
34
38
  attr_reader :original_file_path,
@@ -36,7 +40,9 @@ module OcrFile
36
40
  :save_file_path,
37
41
  :final_save_file,
38
42
  :config,
39
- :ocr_engine
43
+ :ocr_engine,
44
+ :start_time,
45
+ :end_time
40
46
 
41
47
  # save_file_path will also generate a tmp path for tmp files. Expected folder path
42
48
  # TODO: Add in more input validation
@@ -69,72 +75,46 @@ module OcrFile
69
75
 
70
76
  # Trigger OCR pipeline
71
77
  def to_pdf
72
- if pdf?
73
- create_temp_folder
74
- image_paths = extract_image_paths_from_pdf(@original_file_path)
75
-
76
- pdfs_to_merge = []
77
-
78
- image_paths.each do |image_path|
79
- pdfs_to_merge << @ocr_engine.ocr_to_pdf(process_image(image_path), options: @config)
80
- end
81
-
82
- merged_pdf = OcrFile::ImageEngines::PdfEngine.merge(pdfs_to_merge)
78
+ @start_time = Time.now
79
+ find_best_image_processing(save: false) if config[:automatic_reprocess] && !text?
83
80
 
84
- OcrFile::ImageEngines::PdfEngine
85
- .save_pdf(merged_pdf, "#{@final_save_file}.pdf", optimise: @config[:optimise_pdf])
81
+ if pdf?
82
+ ocr_pdf_to_searchable_pdf
86
83
  elsif text?
87
- text = ::OcrFile::FileHelpers.open_text_file(@original_file_path)
88
- pdf_file = OcrFile::ImageEngines::PdfEngine.pdf_from_text(text, @config)
89
-
90
- OcrFile::ImageEngines::PdfEngine
91
- .save_pdf(pdf_file, "#{@final_save_file}.pdf", optimise: @config[:optimise_pdf])
84
+ text_to_pdf
92
85
  else # is an image
93
86
  ocr_image_to_pdf
94
87
  end
95
88
 
96
89
  close
90
+
91
+ @end_time = Time.now
92
+ print_time
97
93
  end
98
94
 
99
95
  def to_text
100
- if pdf?
101
- create_temp_folder
102
- image_paths = extract_image_paths_from_pdf(@original_file_path)
103
-
104
- image_paths.each do |image_path|
105
- text = @ocr_engine.ocr_to_text(process_image(image_path), options: @config)
106
- ::OcrFile::FileHelpers.append_file("#{@final_save_file}.txt", "#{text}#{PAGE_BREAK}")
107
- end
108
- elsif text?
109
- ::OcrFile::FileHelpers.open_text_file(@original_file_path)
110
- else # is an image
111
- ocr_image_to_text(save: true)
112
- end
96
+ @start_time = Time.now
97
+ return ::OcrFile::FileHelpers.open_text_file(@original_file_path) if text?
113
98
 
99
+ find_best_image_processing(save: true)
114
100
  close
101
+
102
+ @end_time = Time.now
103
+ print_time
115
104
  end
116
105
 
117
106
  def to_s
118
- if pdf?
119
- create_temp_folder
120
- image_paths = extract_image_paths_from_pdf(@original_file_path)
107
+ @start_time = Time.now
108
+ return ::OcrFile::FileHelpers.open_text_file(@original_file_path) if text?
121
109
 
122
- text = ''
110
+ text = find_best_image_processing(save: false)
123
111
 
124
- image_paths.each do |image_path|
125
- text = "#{text}#{PAGE_BREAK}#{@ocr_engine.ocr_to_text(process_image(image_path), options: @config)}"
126
- end
112
+ close
127
113
 
128
- close
129
- text
130
- elsif text?
131
- ::OcrFile::FileHelpers.open_text_file(@original_file_path)
132
- else # is an image
133
- text = ocr_image_to_text(save: false)
114
+ @end_time = Time.now
115
+ print_time
134
116
 
135
- close
136
- text
137
- end
117
+ text
138
118
  end
139
119
 
140
120
  def close
@@ -185,14 +165,67 @@ module OcrFile
185
165
  image_processor.convert!
186
166
  end
187
167
 
168
+ def ocr_pdf_to_searchable_pdf
169
+ create_temp_folder
170
+ image_paths = extract_image_paths_from_pdf(@original_file_path)
171
+
172
+ pdfs_to_merge = []
173
+
174
+ image_paths.each do |image_path|
175
+ puts image_path
176
+ pdfs_to_merge << @ocr_engine.ocr_to_pdf(process_image(image_path), options: @config)
177
+ end
178
+
179
+ merged_pdf = OcrFile::ImageEngines::PdfEngine.merge(pdfs_to_merge)
180
+
181
+ OcrFile::ImageEngines::PdfEngine
182
+ .save_pdf(merged_pdf, "#{@final_save_file}.pdf", optimise: @config[:optimise_pdf])
183
+ end
184
+
185
+ def text_to_pdf
186
+ text = ::OcrFile::FileHelpers.open_text_file(@original_file_path)
187
+ text = OcrFile::TextEngines::ResultProcessor.new(text).correct if config[:spelling_correction]
188
+
189
+ pdf_file = OcrFile::ImageEngines::PdfEngine.pdf_from_text(text, @config)
190
+
191
+ OcrFile::ImageEngines::PdfEngine
192
+ .save_pdf(pdf_file, "#{@final_save_file}.pdf", optimise: @config[:optimise_pdf])
193
+ end
194
+
188
195
  def ocr_image_to_pdf
196
+ find_best_image_processing(save: false) if config[:automatic_reprocess]
197
+
189
198
  pdf_document = @ocr_engine.ocr_to_pdf(process_image(@original_file_path), options: @config)
190
199
  OcrFile::ImageEngines::PdfEngine
191
200
  .save_pdf(pdf_document, "#{@final_save_file}.pdf", optimise: @config[:optimise_pdf])
192
201
  end
193
202
 
194
- def ocr_image_to_text(save: true)
203
+ def ocr_pdf_to_text(save:)
204
+ create_temp_folder
205
+ image_paths = extract_image_paths_from_pdf(@original_file_path)
206
+
207
+ text = ''
208
+
209
+ image_paths.each do |image_path|
210
+ puts image_path
211
+ text = @ocr_engine.ocr_to_text(process_image(image_path), options: @config) || ''
212
+
213
+ text = OcrFile::TextEngines::ResultProcessor.new(text).correct if config[:spelling_correction]
214
+ text = "#{text}#{PAGE_BREAK}#{text}"
215
+ end
216
+
217
+ if save
218
+ ::OcrFile::FileHelpers.append_file("#{@final_save_file}.txt", "#{text}#{PAGE_BREAK}")
219
+ else
220
+ text
221
+ end
222
+ end
223
+
224
+ def ocr_image_to_text(save:)
225
+ create_temp_folder
226
+
195
227
  text = @ocr_engine.ocr_to_text(process_image(@original_file_path), options: @config)
228
+ text = OcrFile::TextEngines::ResultProcessor.new(text).correct if config[:spelling_correction]
196
229
 
197
230
  if save
198
231
  ::OcrFile::FileHelpers.append_file("#{@final_save_file}.txt", text)
@@ -201,6 +234,57 @@ module OcrFile
201
234
  end
202
235
  end
203
236
 
237
+ def ocr_file_to_text(save:)
238
+ if pdf?
239
+ ocr_pdf_to_text(save: save)
240
+ else # is an image
241
+ ocr_image_to_text(save: save)
242
+ end
243
+ end
244
+
245
+ def find_best_image_processing(save:)
246
+ ocr_file_to_text(save: save) unless config[:automatic_reprocess]
247
+
248
+ text = ''
249
+ best_text_count = 0
250
+ best_effects = config[:effects]
251
+
252
+ effects_to_test = [''] + (EFFECTS_TO_REMOVE - (EFFECTS_TO_REMOVE - config[:effects]))
253
+ effects_to_test.each do |effect|
254
+ text = test_ocr_settings(effect)
255
+ processed_result = OcrFile::TextEngines::ResultProcessor.new(text)
256
+
257
+ if processed_result.count_of_issues < best_text_count
258
+ best_text_count = processed_result.count_of_issues
259
+ best_effects = config[:effects]
260
+ end
261
+
262
+ break if processed_result.valid_words?
263
+ end
264
+
265
+ # Fallback
266
+ if OcrFile::TextEngines::ResultProcessor.new(text).invalid_words?
267
+ config[:effects] = best_effects
268
+ text = ocr_file_to_text(save: false)
269
+ end
270
+
271
+ # Adds in extra operations which is unfortunately inefficient
272
+ if save
273
+ ocr_file_to_text(save: save)
274
+ else
275
+ text
276
+ end
277
+ end
278
+
279
+ def test_ocr_settings(effect)
280
+ config[:effects] = config[:effects] - [effect]
281
+ ocr_file_to_text(save: false)
282
+ end
283
+
284
+ def print_time
285
+ puts "Total Time: #{end_time-start_time} secs.\n\n" if config[:timing]
286
+ end
287
+
204
288
  def find_ocr_engine(engine_id)
205
289
  ocr_engine_constants
206
290
  .map { |c| ocr_module(c) }
@@ -61,6 +61,13 @@ module OcrFile
61
61
  @image.sharpen('0x4') # radiusXsigma
62
62
  end
63
63
 
64
+ # https://github.com/ImageMagick/ImageMagick/discussions/4145
65
+ def remove_shadow
66
+ @image.negate
67
+ @image.lat("20x20+10\%")
68
+ @image.negate
69
+ end
70
+
64
71
  def deskew
65
72
  @image.deskew('40%') # threshold recommended in the docs
66
73
  end
@@ -61,11 +61,38 @@ module OcrFile
61
61
  image_paths
62
62
  end
63
63
 
64
+ def insert_image(document, image_path)
65
+ canvas = document.pages.add.canvas
66
+ canvas.image(image_path, at: [0, 0], height: 700)
67
+ end
68
+
69
+ def combine(text, pdf_of_images)
70
+ return unless pdf_of_images.is_a?(::HexaPDF::Document)
71
+
72
+ if text.is_a?(::HexaPDF::Document)
73
+ pages_of_text = text.pages
74
+ else # Assume raw text with PAGE_BREAK
75
+ pages_of_text = text.split(PAGE_BREAK)
76
+ end
77
+
78
+ return unless pages_of_text.size == pdf_of_images.pages.size
79
+
80
+ if text.is_a?(::HexaPDF::Document) # Keep the page structure
81
+
82
+ else # Just text to embed
83
+
84
+ end
85
+ end
86
+
64
87
  def merge(documents)
65
88
  target = ::HexaPDF::Document.new
66
89
 
67
90
  documents.each do |document|
68
- document.pages.each { |page| target.pages << target.import(page) }
91
+ if document.is_a?(::HexaPDF::Document)
92
+ document.pages.each { |page| target.pages << target.import(page) }
93
+ else # Assume an image
94
+ insert_image(target, document)
95
+ end
69
96
  end
70
97
 
71
98
  target
@@ -0,0 +1,82 @@
1
+ module OcrFile
2
+ module TextEngines
3
+ class ResultProcessor
4
+ MINIMUM_WORD_LENGTH = 4
5
+ ACCEPTABLE_NUMBER_OF_ERRORS = 8 # Random number I pulled out of nowhere
6
+ ACCEPTABLE_UNIDENTIFIED_WORDS = 8 # Random number I pulled out of nowhere
7
+
8
+ # REGEX
9
+ ASCII_ONLY = /[^\u{0000}-\u{007f}]/
10
+ NOISE_CHARACTERS = /[^\w\s\/-;:]/
11
+ DUPLICATE_WORDS = /\b(\w+)\s+\1\b/
12
+ EVERYTHING_BUT_CHARACTERS = /[^\w\s]|(\d)/
13
+
14
+ attr_reader :text, :clear_text
15
+
16
+ def initialize(text)
17
+ @text = text
18
+ @clear_text = generate_clear_text || text || ''
19
+ end
20
+
21
+ def correct
22
+ Spellchecker.correct(text.gsub(NOISE_CHARACTERS, '')).gsub("\n ", "\n").strip
23
+ end
24
+
25
+ # This is a very naive way of determining if we should re-do OCR with
26
+ # shifted options
27
+ def valid_words?
28
+ word_size_average >= MINIMUM_WORD_LENGTH &&
29
+ spelling_error_count <= ACCEPTABLE_NUMBER_OF_ERRORS &&
30
+ unidentified_word_count <= ACCEPTABLE_UNIDENTIFIED_WORDS
31
+ end
32
+
33
+ def invalid_words?
34
+ !valid_words?
35
+ end
36
+
37
+ def word_count
38
+ return 0 if empty_text?
39
+ @_word_count ||= clear_words.size
40
+ end
41
+
42
+ def word_size_average
43
+ return 0 if empty_text?
44
+ @_word_size_average ||= clear_words.map(&:size).sum / word_count
45
+ end
46
+
47
+ # Assume English
48
+ def unidentified_word_count
49
+ clear_words.reject { |word| Spellchecker::Dictionaries::EnglishWords.include?(word) }.count
50
+ end
51
+
52
+ def spelling_error_count
53
+ Spellchecker.check(clear_text).count
54
+ end
55
+
56
+ def count_of_issues
57
+ spelling_error_count + unidentified_word_count
58
+ end
59
+
60
+ private
61
+
62
+ def empty_text?
63
+ clear_text.nil? || clear_text == ''
64
+ end
65
+
66
+ def clear_words
67
+ @clear_words ||= clear_text.gsub(EVERYTHING_BUT_CHARACTERS, '').split(' ')
68
+ end
69
+
70
+ def generate_clear_text
71
+ remove_lines
72
+ &.gsub(ASCII_ONLY, '')
73
+ &.gsub(NOISE_CHARACTERS, '')
74
+ &.gsub(DUPLICATE_WORDS, '')
75
+ end
76
+
77
+ def remove_lines
78
+ text&.gsub("\n", ' ')&.gsub("\r", ' ')&.gsub(' ', '')
79
+ end
80
+ end
81
+ end
82
+ end
@@ -1,3 +1,3 @@
1
1
  module OcrFile
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/ocr-file.rb CHANGED
@@ -2,6 +2,7 @@ require 'hexapdf'
2
2
  require 'hexapdf/cli/images'
3
3
  require 'rtesseract'
4
4
  require 'mini_magick'
5
+ require 'ruby-spellchecker'
5
6
 
6
7
  require 'ocr-file/version'
7
8
 
@@ -10,6 +11,7 @@ require 'ocr-file/image_engines/image_magick'
10
11
  require 'ocr-file/image_engines/pdftoppm'
11
12
  require 'ocr-file/ocr_engines/tesseract'
12
13
  require 'ocr-file/ocr_engines/cloud_vision'
14
+ require 'ocr-file/text_engines/result_processor'
13
15
  require 'ocr-file/file_helpers'
14
16
  require 'ocr-file/document'
15
17
  require 'ocr-file/cli'
data/ocr-file.gemspec CHANGED
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency "hexapdf", "~> 0.23.0"
33
33
  spec.add_dependency "rtesseract", "~> 3.1.2"
34
34
  spec.add_dependency "mini_magick", "~> 4.11.0"
35
+ spec.add_dependency "ruby-spellchecker", "~> 0.1.5"
35
36
 
36
37
  # Development Dependencies
37
38
  spec.add_development_dependency "pry", "~> 0.14.1"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-20 00:00:00.000000000 Z
11
+ date: 2022-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: console-style
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: 4.11.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: ruby-spellchecker
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.1.5
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.1.5
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: pry
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +136,7 @@ files:
122
136
  - lib/ocr-file/image_engines/pdftoppm.rb
123
137
  - lib/ocr-file/ocr_engines/cloud_vision.rb
124
138
  - lib/ocr-file/ocr_engines/tesseract.rb
139
+ - lib/ocr-file/text_engines/result_processor.rb
125
140
  - lib/ocr-file/version.rb
126
141
  - ocr-file.gemspec
127
142
  homepage: https://github.com/TRex22/ocr-file