rtesseract 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.document +1 -2
  3. data/.gitignore +12 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +13 -10
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +4 -17
  8. data/Gemfile.lock +40 -85
  9. data/LICENSE.txt +18 -17
  10. data/README.md +137 -0
  11. data/Rakefile +4 -48
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/lib/rtesseract.rb +22 -220
  15. data/lib/rtesseract/box.rb +15 -60
  16. data/lib/rtesseract/check.rb +14 -0
  17. data/lib/rtesseract/command.rb +41 -0
  18. data/lib/rtesseract/configuration.rb +15 -64
  19. data/lib/rtesseract/pdf.rb +18 -0
  20. data/lib/rtesseract/text.rb +9 -0
  21. data/lib/rtesseract/tsv.rb +18 -0
  22. data/lib/rtesseract/version.rb +3 -0
  23. data/rtesseract.gemspec +27 -98
  24. metadata +36 -85
  25. data/README.rdoc +0 -156
  26. data/VERSION +0 -1
  27. data/lib/processors/mini_magick.rb +0 -43
  28. data/lib/processors/none.rb +0 -34
  29. data/lib/processors/rmagick.rb +0 -46
  30. data/lib/rtesseract/blob.rb +0 -34
  31. data/lib/rtesseract/box_char.rb +0 -31
  32. data/lib/rtesseract/errors.rb +0 -21
  33. data/lib/rtesseract/mixed.rb +0 -54
  34. data/lib/rtesseract/processor.rb +0 -19
  35. data/lib/rtesseract/utils.rb +0 -44
  36. data/lib/rtesseract/uzn.rb +0 -47
  37. data/spec/configs/eng.user-words.txt +0 -13
  38. data/spec/images/README.pdf +0 -0
  39. data/spec/images/blank.tif +0 -0
  40. data/spec/images/mixed.tif +0 -0
  41. data/spec/images/orientation_reverse.png +0 -0
  42. data/spec/images/test with spaces.tif +0 -0
  43. data/spec/images/test-pdf.png +0 -0
  44. data/spec/images/test.bmp +0 -0
  45. data/spec/images/test.jpg +0 -0
  46. data/spec/images/test.png +0 -0
  47. data/spec/images/test.tif +0 -0
  48. data/spec/images/test1.tif +0 -0
  49. data/spec/images/test_words.png +0 -0
  50. data/spec/rtesseract_box_char_spec.rb +0 -82
  51. data/spec/rtesseract_box_spec.rb +0 -36
  52. data/spec/rtesseract_mixed_spec.rb +0 -49
  53. data/spec/rtesseract_spec.rb +0 -282
  54. data/spec/rtesseract_uzn_spec.rb +0 -56
  55. data/spec/spec_helper.rb +0 -21
@@ -1,44 +0,0 @@
1
- require 'open3'
2
-
3
- # RTesseract
4
- class RTesseract
5
- # Some utils methods
6
- module Utils
7
- # Remove files or Tempfile
8
- def self.remove_files(files = [])
9
- files.each do |file|
10
- self.remove_file(file)
11
- end
12
- true
13
- rescue => error
14
- raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
15
- end
16
-
17
- # Remove file or Tempfile
18
- def self.remove_file(file)
19
- if file.is_a?(Tempfile)
20
- file.close
21
- file.unlink
22
- else
23
- File.unlink(file)
24
- end
25
- true
26
- end
27
-
28
- # Extract tesseract version number
29
- def self.version_number
30
- output, st = Open3.capture2e(RTesseract.default_command, "--version")
31
-
32
- version = output.split("\n")[0].split(" ")[1].split('.')[0, 2].join('.')
33
- Float(version) rescue nil
34
- end
35
- end
36
- end
37
-
38
- # Hash
39
- class Hash
40
- # return the value and remove from hash
41
- def option(attr_name, default)
42
- delete(attr_name.to_s) || delete(attr_name) || default
43
- end
44
- end
@@ -1,47 +0,0 @@
1
- # encoding: UTF-8
2
- # RTesseract
3
- class RTesseract
4
- # Alternative approach to Mixed when you want to read from specific areas.
5
- # Requires `-psm 4` which means the text must be "a single column of text of variable sizes".
6
- class Uzn < RTesseract
7
- attr_reader :areas
8
- DEFAULT_ALPHABET = 'Text/Latin'
9
-
10
- def initialize(src = '', options = {})
11
- @areas = options.delete(:areas) || []
12
- @alphabet = options.delete(:alphabet) || DEFAULT_ALPHABET
13
- super(src, options.merge(psm: 4))
14
- yield self if block_given?
15
- end
16
-
17
- # Add areas
18
- def area(points)
19
- areas << points
20
- end
21
-
22
- def convert_command
23
- @image = image
24
- write_uzn_file
25
- `#{configuration.command} "#{@image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
26
- end
27
-
28
- def after_convert_hook
29
- RTesseract::Utils.remove_files([@uzn_file])
30
- end
31
-
32
- private
33
-
34
- def write_uzn_file
35
- folder = File.dirname(@image)
36
- basename = File.basename(@image, '.tif')
37
- @uzn_file = File.new("#{folder}/#{basename}.uzn", File::CREAT|File::TRUNC|File::RDWR)
38
-
39
- areas.each do |points|
40
- s = "#{points[:x]} #{points[:y]} #{points[:w]} #{points[:h]} #{@alphabet}\n"
41
- @uzn_file.write(s)
42
- @uzn_file.flush
43
- end
44
- end
45
-
46
- end
47
- end
@@ -1,13 +0,0 @@
1
- you
2
- are
3
- a
4
- friend
5
- you
6
- speak
7
- the
8
- password
9
- and
10
- the
11
- doors
12
- will
13
- open
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/spec/images/test.bmp DELETED
Binary file
data/spec/images/test.jpg DELETED
Binary file
data/spec/images/test.png DELETED
Binary file
data/spec/images/test.tif DELETED
Binary file
Binary file
Binary file
@@ -1,82 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
-
4
- describe 'Rtesseract::BoxChar' do
5
- before do
6
- @path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
7
- @image_tiff = @path.join('images', 'test.tif').to_s
8
- @words_image = @path.join('images', 'test_words.png').to_s
9
- @values = [
10
- { char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
11
- { char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
12
- { char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
13
- { char: 'o', x_start: 80, y_start: 91, x_end: 90, y_end: 101 },
14
- { char: 'u', x_start: 92, y_start: 91, x_end: 100, y_end: 101 },
15
- { char: 'a', x_start: 108, y_start: 91, x_end: 116, y_end: 101 },
16
- { char: 'r', x_start: 119, y_start: 91, x_end: 125, y_end: 101 },
17
- { char: 'e', x_start: 126, y_start: 91, x_end: 136, y_end: 101 },
18
- { char: 'a', x_start: 143, y_start: 91, x_end: 151, y_end: 101 },
19
- { char: 'f', x_start: 158, y_start: 91, x_end: 165, y_end: 105 },
20
- { char: 'r', x_start: 166, y_start: 91, x_end: 172, y_end: 101 },
21
- { char: 'i', x_start: 174, y_start: 91, x_end: 176, y_end: 105 },
22
- { char: 'e', x_start: 178, y_start: 91, x_end: 188, y_end: 101 },
23
- { char: 'n', x_start: 190, y_start: 91, x_end: 198, y_end: 101 },
24
- { char: 'd', x_start: 200, y_start: 91, x_end: 209, y_end: 105 },
25
- { char: ',', x_start: 211, y_start: 89, x_end: 214, y_end: 93 },
26
- { char: 'y', x_start: 51, y_start: 65, x_end: 61, y_end: 79 },
27
- { char: 'o', x_start: 62, y_start: 69, x_end: 72, y_end: 79 },
28
- { char: 'u', x_start: 74, y_start: 69, x_end: 82, y_end: 79 },
29
- { char: 's', x_start: 90, y_start: 69, x_end: 97, y_end: 79 },
30
- { char: 'p', x_start: 99, y_start: 65, x_end: 108, y_end: 79 },
31
- { char: 'e', x_start: 109, y_start: 69, x_end: 119, y_end: 79 },
32
- { char: 'a', x_start: 120, y_start: 69, x_end: 128, y_end: 79 },
33
- { char: 'k', x_start: 131, y_start: 69, x_end: 140, y_end: 83 },
34
- { char: 't', x_start: 146, y_start: 69, x_end: 152, y_end: 82 },
35
- { char: 'h', x_start: 154, y_start: 69, x_end: 162, y_end: 83 },
36
- { char: 'e', x_start: 164, y_start: 69, x_end: 174, y_end: 79 },
37
- { char: 'p', x_start: 182, y_start: 65, x_end: 191, y_end: 79 },
38
- { char: 'a', x_start: 192, y_start: 69, x_end: 200, y_end: 79 },
39
- { char: 's', x_start: 202, y_start: 69, x_end: 209, y_end: 79 },
40
- { char: 's', x_start: 210, y_start: 69, x_end: 217, y_end: 79 },
41
- { char: 'w', x_start: 219, y_start: 69, x_end: 232, y_end: 79 },
42
- { char: 'o', x_start: 234, y_start: 69, x_end: 244, y_end: 79 },
43
- { char: 'r', x_start: 246, y_start: 69, x_end: 252, y_end: 79 },
44
- { char: 'd', x_start: 253, y_start: 69, x_end: 262, y_end: 83 },
45
- { char: ',', x_start: 264, y_start: 67, x_end: 267, y_end: 71 },
46
- { char: 'a', x_start: 51, y_start: 47, x_end: 59, y_end: 57 },
47
- { char: 'n', x_start: 62, y_start: 47, x_end: 70, y_end: 57 },
48
- { char: 'd', x_start: 72, y_start: 47, x_end: 81, y_end: 61 },
49
- { char: 't', x_start: 89, y_start: 47, x_end: 95, y_end: 60 },
50
- { char: 'h', x_start: 97, y_start: 47, x_end: 105, y_end: 61 },
51
- { char: 'e', x_start: 107, y_start: 47, x_end: 117, y_end: 57 },
52
- { char: 'd', x_start: 124, y_start: 47, x_end: 133, y_end: 61 },
53
- { char: 'o', x_start: 135, y_start: 47, x_end: 145, y_end: 57 },
54
- { char: 'o', x_start: 146, y_start: 47, x_end: 156, y_end: 57 },
55
- { char: 'r', x_start: 158, y_start: 47, x_end: 164, y_end: 57 },
56
- { char: 's', x_start: 165, y_start: 47, x_end: 172, y_end: 57 },
57
- { char: 'w', x_start: 180, y_start: 47, x_end: 193, y_end: 57 },
58
- { char: 'i', x_start: 196, y_start: 47, x_end: 198, y_end: 61 },
59
- { char: 'l', x_start: 201, y_start: 47, x_end: 203, y_end: 61 },
60
- { char: 'l', x_start: 206, y_start: 47, x_end: 208, y_end: 61 },
61
- { char: 'o', x_start: 216, y_start: 47, x_end: 226, y_end: 57 },
62
- { char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
63
- { char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
64
- { char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
65
- { char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
66
- end
67
-
68
- it 'bounding box by char' do
69
- expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
70
- expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
71
- { char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
72
- { char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
73
- { char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
74
- { char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
75
-
76
- expect(RTesseract::BoxChar.new(@words_image).characters).to eql(@values)
77
-
78
- expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
79
- expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
80
- # expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
81
- end
82
- end
@@ -1,36 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
-
4
- describe 'Rtesseract::Box' do
5
- before do
6
- @path = Pathname.new(__FILE__.gsub('rtesseract_box_spec.rb', '')).expand_path
7
- @image_tiff = @path.join('images', 'test.tif').to_s
8
- @words_image = @path.join('images', 'test_words.png').to_s
9
- end
10
-
11
- it 'bounding box' do
12
- expect(RTesseract.new(@words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
13
- expect(RTesseract::Box.new(@words_image).words).to eql([
14
- { word: 'If', x_start: 52, y_start: 13, x_end: 63, y_end: 27 },
15
- { word: 'you', x_start: 69, y_start: 17, x_end: 100, y_end: 31 },
16
- { word: 'are', x_start: 108, y_start: 17, x_end: 136, y_end: 27 },
17
- { word: 'a', x_start: 143, y_start: 17, x_end: 151, y_end: 27 },
18
- { word: 'friend,', x_start: 158, y_start: 13, x_end: 214, y_end: 29 },
19
- { word: 'you', x_start: 51, y_start: 39, x_end: 82, y_end: 53 },
20
- { word: 'speak', x_start: 90, y_start: 35, x_end: 140, y_end: 53 },
21
- { word: 'the', x_start: 146, y_start: 35, x_end: 174, y_end: 49 },
22
- { word: 'password,', x_start: 182, y_start: 35, x_end: 267, y_end: 53 },
23
- { word: 'and', x_start: 51, y_start: 57, x_end: 81, y_end: 71 },
24
- { word: 'the', x_start: 89, y_start: 57, x_end: 117, y_end: 71 },
25
- { word: 'doors', x_start: 124, y_start: 57, x_end: 172, y_end: 71 },
26
- { word: 'will', x_start: 180, y_start: 57, x_end: 208, y_end: 71 },
27
- { word: 'open.', x_start: 216, y_start: 61, x_end: 263, y_end: 75 }
28
- ])
29
-
30
- expect(RTesseract::Box.new(@image_tiff).words.is_a?(Array)).to eql(true)
31
- expect(RTesseract::Box.new(@words_image).to_s).to eql('If you are a friend, you speak the password, and the doors will open.')
32
- expect { RTesseract::Box.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
33
- expect { RTesseract::Box.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
34
- # expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
35
- end
36
- end
@@ -1,49 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
-
4
- describe 'Rtesseract::Mixed' do
5
- before do
6
- @path = Pathname.new(__FILE__.gsub('rtesseract_mixed_spec.rb', '')).expand_path
7
- @image_tif = @path.join('images', 'mixed.tif').to_s
8
- @image2_tif = @path.join('images', 'mixed2.tif').to_s
9
- end
10
-
11
- it 'should be instantiable' do
12
- expect(RTesseract::Mixed.new.class).to eql(RTesseract::Mixed)
13
- expect(RTesseract::Mixed.new(@image_tif).class).to eql(RTesseract::Mixed)
14
- end
15
-
16
- it 'should translate parts of the image to text' do
17
- mix_block = RTesseract::Mixed.new(@image_tif, psm: 7) do |image|
18
- image.area(x: 28, y: 19, w: 25, h: 25) # position of 4
19
- image.area(x: 180, y: 22, w: 20, h: 28) # position of 3
20
- image.area(x: 218, y: 22, w: 24, h: 28) # position of F
21
- image.area(x: 248, y: 24, w: 22, h: 22) # position of F
22
- end
23
- expect(mix_block.to_s_without_spaces).to eql('43FF')
24
- mix_block.clear_areas
25
- expect(mix_block.areas).to eql([])
26
-
27
- @areas = []
28
- @areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
29
- @areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
30
- @areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
31
- @areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
32
-
33
- mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7)
34
- expect(mix_block.to_s_without_spaces).to eql('43FF')
35
-
36
- mix_block = RTesseract::Mixed.new(@path.join('images', 'blank.tif').to_s, areas: @areas, psm: 7)
37
- expect(mix_block.to_s_without_spaces).to eql('')
38
- end
39
-
40
- it ' get a error' do
41
- @areas = [{ x: 28, y: 19, w: 25, h: 25 }]
42
-
43
- mix_block = RTesseract::Mixed.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
44
- expect { mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
45
-
46
- mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7, command: 'tesseract_error')
47
- expect { mix_block.to_s }.to raise_error(RTesseract::ConversionError)
48
- end
49
- end
@@ -1,282 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
- # encoding: UTF-8
3
- require 'pathname'
4
- RTesseract::Processor::RMagickProcessor.setup
5
-
6
- # Class to rise error
7
- class MakeStringError
8
- def to_s
9
- fail 'error'
10
- end
11
- end
12
-
13
- describe 'Rtesseract' do
14
- before do
15
- @path = Pathname.new(__FILE__.gsub('rtesseract_spec.rb', '')).expand_path
16
- @image_tif = @path.join('images', 'test.tif').to_s
17
- @image_for_pdf = @path.join('images', 'test-pdf.png').to_s
18
- end
19
-
20
- it ' be instantiable' do
21
- expect(RTesseract.new.class).to eql(RTesseract)
22
- expect(RTesseract.new('').class).to eql(RTesseract)
23
- expect(RTesseract.new(@image_tif).class).to eql(RTesseract)
24
- end
25
-
26
- it ' translate image to text' do
27
- expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
28
- expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
29
- expect(RTesseract.new(@path.join('images', 'test1.tif').to_s).to_s_without_spaces).to eql('V2V4')
30
- expect(RTesseract.new(@path.join('images', 'test with spaces.tif').to_s).to_s_without_spaces).to eql('V2V4')
31
- end
32
-
33
- it ' translate images .png, .jpg, .bmp' do
34
- expect(RTesseract.new(@path.join('images', 'test.png').to_s).to_s_without_spaces).to eql('HW9W')
35
- expect(RTesseract.new(@path.join('images', 'test.jpg').to_s).to_s_without_spaces).to eql('3R8F')
36
- expect(RTesseract.new(@path.join('images', 'test.bmp').to_s).to_s_without_spaces).to eql('FLA6')
37
- end
38
-
39
- it ' should not error with depth > 32' do
40
- # expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
41
- end
42
-
43
- it ' support different processors' do
44
- # Rmagick
45
- expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
46
- expect(RTesseract.new(@image_tif, processor: 'rmagick').to_s_without_spaces).to eql('43XF')
47
- expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'rmagick').to_s_without_spaces).to eql('HW9W')
48
-
49
- # MiniMagick
50
- expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
51
- expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W')
52
-
53
- # NoneMagick
54
- expect(RTesseract.new(@image_tif, processor: 'none').to_s_without_spaces).to eql('43XF')
55
- end
56
-
57
- it ' change the image' do
58
- image = RTesseract.new(@image_tif)
59
- expect(image.to_s_without_spaces).to eql('43XF')
60
- image.source = @path.join('images', 'test1.tif').to_s
61
- expect(image.to_s_without_spaces).to eql('V2V4')
62
- end
63
-
64
- it ' returns the source' do
65
- image = RTesseract.new(@image_tif)
66
- expect(image.source).to eql(Pathname.new(@image_tif))
67
- end
68
-
69
- it ' select the language' do
70
- # English
71
- expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
72
- expect(RTesseract.new(@image_tif, lang: 'en').lang).to eql(' -l eng ')
73
- expect(RTesseract.new(@image_tif, lang: 'en-US').lang).to eql(' -l eng ')
74
- expect(RTesseract.new(@image_tif, lang: 'english').lang).to eql(' -l eng ')
75
-
76
- # Portuguese
77
- expect(RTesseract.new(@image_tif, lang: 'por').lang).to eql(' -l por ')
78
- expect(RTesseract.new(@image_tif, lang: 'pt-BR').lang).to eql(' -l por ')
79
- expect(RTesseract.new(@image_tif, lang: 'pt-br').lang).to eql(' -l por ')
80
- expect(RTesseract.new(@image_tif, lang: 'pt').lang).to eql(' -l por ')
81
- expect(RTesseract.new(@image_tif, lang: 'portuguese').lang).to eql(' -l por ')
82
-
83
- expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
84
-
85
- expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
86
- expect(RTesseract.new(@image_tif, lang: 'it').lang).to eql(' -l ita ')
87
-
88
- # Invalid lang object
89
- expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
90
- end
91
-
92
- it ' select options' do
93
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
94
- expect(RTesseract.new(@image_tif, options: 'digits').options_cmd).to eql(['digits'])
95
- expect(RTesseract.new(@image_tif, options: :digits).options_cmd).to eql([:digits])
96
- expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
97
- end
98
-
99
- it ' support pdf output mode' do
100
- # Internal test. Consider 'pdf' option only when #to_pdf is called.
101
- expect(RTesseract.new(@image_tif, options: 'pdf').options_cmd).to eql([])
102
- expect(RTesseract.new(@image_for_pdf, options: :pdf).options_cmd).to eql([])
103
-
104
- pdf_ocr = RTesseract.new(@image_for_pdf)
105
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(true)
106
- expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf')
107
- # Comment next line and go to tmp dir to see generated pdf.
108
- expect(pdf_ocr.clean).to eq(true)
109
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(false)
110
-
111
- # Still have original functionality (i.e. #to_s, #to_s_without_spaces).
112
- pdf_ocr = RTesseract.new(@image_tif)
113
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(true)
114
- expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf')
115
- expect(pdf_ocr.to_s_without_spaces).to eql('43XF')
116
- expect(pdf_ocr.clean).to eq(true)
117
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(false)
118
- end
119
-
120
- it ' warn when tesseract cannot give pdf' do
121
- rtesseract = RTesseract.new(@image_for_pdf)
122
-
123
- allow(rtesseract).to receive(:tesseract_version).and_return(3.02)
124
- expect { rtesseract.to_pdf }.to raise_error(RTesseract::TesseractVersionError)
125
-
126
- allow(rtesseract).to receive(:tesseract_version).and_return(3.03)
127
- expect { rtesseract.to_pdf }.not_to raise_error
128
- end
129
-
130
- it ' be configurable' do
131
- expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
132
- expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
133
- expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
134
- expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
135
- expect(RTesseract.new(@image_tif, tessedit_char_whitelist: 'ABCDEF12345').to_s_without_spaces).to eql('43F')
136
- end
137
-
138
- it ' crop image' do
139
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4')
140
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3')
141
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 20, h: 40, x: 216, y: 10).to_s_without_spaces).to eql('X')
142
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 30, h: 40, x: 240, y: 10).to_s_without_spaces).to eql('F')
143
- end
144
-
145
- it ' read image from blob' do
146
- image = Magick::Image.read(@path.join('images', 'test.png').to_s).first
147
- blob = image.quantize(256, Magick::GRAYColorspace).to_blob
148
-
149
- test = RTesseract.new('', psm: 7)
150
- test.from_blob(blob)
151
- expect(test.to_s_without_spaces).to eql('HW9W')
152
-
153
- test = RTesseract.new('', psm: 7)
154
- expect { test.from_blob('') }.to raise_error(RTesseract::ConversionError)
155
- end
156
-
157
- it ' use a instance' do
158
- expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
159
- expect(RTesseract::Processor::RMagickProcessor.a_name?('teste')).to eql(false)
160
- expect(RTesseract::Processor::RMagickProcessor.a_name?('rmagick')).to eql(true)
161
- expect(RTesseract::Processor::RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
162
- expect(RTesseract::Processor::MiniMagickProcessor.a_name?('teste')).to eql(false)
163
- expect(RTesseract::Processor::MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
164
- expect(RTesseract::Processor::MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
165
- expect(RTesseract::Processor::NoneProcessor.a_name?('none')).to eql(true)
166
- expect(RTesseract::Processor::NoneProcessor.a_name?('NoneProcessor')).to eql(true)
167
- end
168
-
169
- it ' change image in a block' do
170
- test = RTesseract.read(@path.join('images', 'test.png').to_s) {}
171
- expect(test.class).to eql(RTesseract)
172
-
173
- test = RTesseract.new(@image_tif)
174
- test.read do |_image|
175
- _image = _image.quantize(256, Magick::GRAYColorspace)
176
- end
177
- expect(test.to_s_without_spaces).to eql('43XF')
178
-
179
- test = RTesseract.new(@path.join('images', 'blank.tif').to_s)
180
- test.read do |_image|
181
- _image
182
- end
183
- expect(test.to_s_without_spaces).to eql('')
184
-
185
- test = RTesseract.read(@path.join('images', 'test.png').to_s) do |_image|
186
- _image.rotate(90)
187
- end
188
- expect(test.to_s_without_spaces).to eql('HW9W')
189
-
190
- test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |_image|
191
- _image = _image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
192
- end
193
- expect(test.to_s_without_spaces).to eql('3R8F')
194
-
195
- test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |_image|
196
- _image.gravity 'south'
197
- end
198
- expect(test.to_s_without_spaces).to eql('3R8F')
199
- end
200
-
201
- it 'does not raise on read with image_magick processor' do
202
- expect {
203
- instance = RTesseract.read(@image_tif, processor: 'mini_magick') {}
204
- expect(instance.processor.a_name?('mini_magick')).to be_truthy
205
- }.not_to raise_error
206
- end
207
-
208
- it ' get a error' do
209
- expect { RTesseract.new(@path.join('images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
210
- expect { RTesseract.new(@path.join('images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
211
-
212
- # Invalid psm object
213
- expect(RTesseract.new(@image_tif, psm: MakeStringError.new).psm).to eql('')
214
- end
215
-
216
- it 'remove a file' do
217
- RTesseract::Utils.remove_files(Tempfile.new('config'))
218
-
219
- expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
220
- end
221
-
222
- it ' get a numeric value for tesseract version' do
223
- expect(RTesseract::Utils.version_number).to be_a Float
224
- end
225
-
226
- it ' support default config processors' do
227
- # Rmagick
228
- RTesseract.configure { |config| config.processor = 'rmagick' }
229
- expect(RTesseract.new(@image_tif).processor.a_name?('rmagick')).to eql(true)
230
-
231
- # MiniMagick
232
- RTesseract.configure { |config| config.processor = 'mini_magick' }
233
- expect(RTesseract.new(@image_tif).processor.a_name?('mini_magick')).to eql(true)
234
-
235
- # NoneMagick
236
- RTesseract.configure { |config| config.processor = 'none' }
237
- expect(RTesseract.new(@image_tif).processor.a_name?('none')).to eql(true)
238
-
239
- # overwrite default
240
- RTesseract.configure { |config| config.processor = 'rmagick' }
241
- expect(RTesseract.new(@image_tif, processor: 'mini_magick').processor.a_name?('mini_magick')).to eql(true)
242
-
243
- RTesseract.configure { |config| config.lang = 'portuguese' }
244
- expect(RTesseract.new(@image_tif).lang).to eql(' -l por ')
245
-
246
- RTesseract.configure { |config| config.psm = 7 }
247
- expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
248
-
249
- RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
250
- expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
251
-
252
- RTesseract.configure { |config| config.user_words = '/tmp/test' }
253
- expect(RTesseract.new(@image_tif).user_words).to eql(' --user-words /tmp/test ')
254
-
255
- RTesseract.configure { |config| config.user_patterns = '/tmp/test' }
256
- expect(RTesseract.new(@image_tif).user_patterns).to eql(' --user-patterns /tmp/test ')
257
- end
258
-
259
- it ' configure pdf has no effect and kept in-house' do
260
- # So it does not interfere with #to_s outputting.
261
- RTesseract.configure { |config| config.options_cmd = ['pdf'] }
262
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
263
-
264
- RTesseract.configure { |config| config.options_cmd = [:pdf] }
265
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
266
-
267
- RTesseract.configure { |config| config.options_cmd = [:pdf, 'pdf'] }
268
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
269
- end
270
-
271
- it ' support new configs' do
272
- expect(RTesseract.new(@image_tif, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
273
- expect(RTesseract.new(@image_tif, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ')
274
- expect(RTesseract.new(@image_tif, user_patterns: '/tmp/test').user_patterns).to eql(' --user-patterns /tmp/test ')
275
-
276
- expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
277
- expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
278
- expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
279
-
280
- # expect(RTesseract.new(@path.join('images', 'test_words.png').to_s, psm: 3, user_words: @path.join('configs', 'eng.user-words.txt').to_s).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
281
- end
282
- end