rtesseract 2.2.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.document +1 -2
  3. data/.gitignore +12 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +13 -10
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +4 -17
  8. data/Gemfile.lock +40 -85
  9. data/LICENSE.txt +18 -17
  10. data/README.md +137 -0
  11. data/Rakefile +4 -48
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/lib/rtesseract.rb +22 -220
  15. data/lib/rtesseract/box.rb +15 -60
  16. data/lib/rtesseract/check.rb +14 -0
  17. data/lib/rtesseract/command.rb +41 -0
  18. data/lib/rtesseract/configuration.rb +15 -64
  19. data/lib/rtesseract/pdf.rb +18 -0
  20. data/lib/rtesseract/text.rb +9 -0
  21. data/lib/rtesseract/tsv.rb +18 -0
  22. data/lib/rtesseract/version.rb +3 -0
  23. data/rtesseract.gemspec +27 -98
  24. metadata +36 -85
  25. data/README.rdoc +0 -156
  26. data/VERSION +0 -1
  27. data/lib/processors/mini_magick.rb +0 -43
  28. data/lib/processors/none.rb +0 -34
  29. data/lib/processors/rmagick.rb +0 -46
  30. data/lib/rtesseract/blob.rb +0 -34
  31. data/lib/rtesseract/box_char.rb +0 -31
  32. data/lib/rtesseract/errors.rb +0 -21
  33. data/lib/rtesseract/mixed.rb +0 -54
  34. data/lib/rtesseract/processor.rb +0 -19
  35. data/lib/rtesseract/utils.rb +0 -44
  36. data/lib/rtesseract/uzn.rb +0 -47
  37. data/spec/configs/eng.user-words.txt +0 -13
  38. data/spec/images/README.pdf +0 -0
  39. data/spec/images/blank.tif +0 -0
  40. data/spec/images/mixed.tif +0 -0
  41. data/spec/images/orientation_reverse.png +0 -0
  42. data/spec/images/test with spaces.tif +0 -0
  43. data/spec/images/test-pdf.png +0 -0
  44. data/spec/images/test.bmp +0 -0
  45. data/spec/images/test.jpg +0 -0
  46. data/spec/images/test.png +0 -0
  47. data/spec/images/test.tif +0 -0
  48. data/spec/images/test1.tif +0 -0
  49. data/spec/images/test_words.png +0 -0
  50. data/spec/rtesseract_box_char_spec.rb +0 -82
  51. data/spec/rtesseract_box_spec.rb +0 -36
  52. data/spec/rtesseract_mixed_spec.rb +0 -49
  53. data/spec/rtesseract_spec.rb +0 -282
  54. data/spec/rtesseract_uzn_spec.rb +0 -56
  55. data/spec/spec_helper.rb +0 -21
@@ -1,44 +0,0 @@
1
- require 'open3'
2
-
3
- # RTesseract
4
- class RTesseract
5
- # Some utils methods
6
- module Utils
7
- # Remove files or Tempfile
8
- def self.remove_files(files = [])
9
- files.each do |file|
10
- self.remove_file(file)
11
- end
12
- true
13
- rescue => error
14
- raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
15
- end
16
-
17
- # Remove file or Tempfile
18
- def self.remove_file(file)
19
- if file.is_a?(Tempfile)
20
- file.close
21
- file.unlink
22
- else
23
- File.unlink(file)
24
- end
25
- true
26
- end
27
-
28
- # Extract tesseract version number
29
- def self.version_number
30
- output, st = Open3.capture2e(RTesseract.default_command, "--version")
31
-
32
- version = output.split("\n")[0].split(" ")[1].split('.')[0, 2].join('.')
33
- Float(version) rescue nil
34
- end
35
- end
36
- end
37
-
38
- # Hash
39
- class Hash
40
- # return the value and remove from hash
41
- def option(attr_name, default)
42
- delete(attr_name.to_s) || delete(attr_name) || default
43
- end
44
- end
@@ -1,47 +0,0 @@
1
- # encoding: UTF-8
2
- # RTesseract
3
- class RTesseract
4
- # Alternative approach to Mixed when you want to read from specific areas.
5
- # Requires `-psm 4` which means the text must be "a single column of text of variable sizes".
6
- class Uzn < RTesseract
7
- attr_reader :areas
8
- DEFAULT_ALPHABET = 'Text/Latin'
9
-
10
- def initialize(src = '', options = {})
11
- @areas = options.delete(:areas) || []
12
- @alphabet = options.delete(:alphabet) || DEFAULT_ALPHABET
13
- super(src, options.merge(psm: 4))
14
- yield self if block_given?
15
- end
16
-
17
- # Add areas
18
- def area(points)
19
- areas << points
20
- end
21
-
22
- def convert_command
23
- @image = image
24
- write_uzn_file
25
- `#{configuration.command} "#{@image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
26
- end
27
-
28
- def after_convert_hook
29
- RTesseract::Utils.remove_files([@uzn_file])
30
- end
31
-
32
- private
33
-
34
- def write_uzn_file
35
- folder = File.dirname(@image)
36
- basename = File.basename(@image, '.tif')
37
- @uzn_file = File.new("#{folder}/#{basename}.uzn", File::CREAT|File::TRUNC|File::RDWR)
38
-
39
- areas.each do |points|
40
- s = "#{points[:x]} #{points[:y]} #{points[:w]} #{points[:h]} #{@alphabet}\n"
41
- @uzn_file.write(s)
42
- @uzn_file.flush
43
- end
44
- end
45
-
46
- end
47
- end
@@ -1,13 +0,0 @@
1
- you
2
- are
3
- a
4
- friend
5
- you
6
- speak
7
- the
8
- password
9
- and
10
- the
11
- doors
12
- will
13
- open
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/spec/images/test.bmp DELETED
Binary file
data/spec/images/test.jpg DELETED
Binary file
data/spec/images/test.png DELETED
Binary file
data/spec/images/test.tif DELETED
Binary file
Binary file
Binary file
@@ -1,82 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
-
4
- describe 'Rtesseract::BoxChar' do
5
- before do
6
- @path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
7
- @image_tiff = @path.join('images', 'test.tif').to_s
8
- @words_image = @path.join('images', 'test_words.png').to_s
9
- @values = [
10
- { char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
11
- { char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
12
- { char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
13
- { char: 'o', x_start: 80, y_start: 91, x_end: 90, y_end: 101 },
14
- { char: 'u', x_start: 92, y_start: 91, x_end: 100, y_end: 101 },
15
- { char: 'a', x_start: 108, y_start: 91, x_end: 116, y_end: 101 },
16
- { char: 'r', x_start: 119, y_start: 91, x_end: 125, y_end: 101 },
17
- { char: 'e', x_start: 126, y_start: 91, x_end: 136, y_end: 101 },
18
- { char: 'a', x_start: 143, y_start: 91, x_end: 151, y_end: 101 },
19
- { char: 'f', x_start: 158, y_start: 91, x_end: 165, y_end: 105 },
20
- { char: 'r', x_start: 166, y_start: 91, x_end: 172, y_end: 101 },
21
- { char: 'i', x_start: 174, y_start: 91, x_end: 176, y_end: 105 },
22
- { char: 'e', x_start: 178, y_start: 91, x_end: 188, y_end: 101 },
23
- { char: 'n', x_start: 190, y_start: 91, x_end: 198, y_end: 101 },
24
- { char: 'd', x_start: 200, y_start: 91, x_end: 209, y_end: 105 },
25
- { char: ',', x_start: 211, y_start: 89, x_end: 214, y_end: 93 },
26
- { char: 'y', x_start: 51, y_start: 65, x_end: 61, y_end: 79 },
27
- { char: 'o', x_start: 62, y_start: 69, x_end: 72, y_end: 79 },
28
- { char: 'u', x_start: 74, y_start: 69, x_end: 82, y_end: 79 },
29
- { char: 's', x_start: 90, y_start: 69, x_end: 97, y_end: 79 },
30
- { char: 'p', x_start: 99, y_start: 65, x_end: 108, y_end: 79 },
31
- { char: 'e', x_start: 109, y_start: 69, x_end: 119, y_end: 79 },
32
- { char: 'a', x_start: 120, y_start: 69, x_end: 128, y_end: 79 },
33
- { char: 'k', x_start: 131, y_start: 69, x_end: 140, y_end: 83 },
34
- { char: 't', x_start: 146, y_start: 69, x_end: 152, y_end: 82 },
35
- { char: 'h', x_start: 154, y_start: 69, x_end: 162, y_end: 83 },
36
- { char: 'e', x_start: 164, y_start: 69, x_end: 174, y_end: 79 },
37
- { char: 'p', x_start: 182, y_start: 65, x_end: 191, y_end: 79 },
38
- { char: 'a', x_start: 192, y_start: 69, x_end: 200, y_end: 79 },
39
- { char: 's', x_start: 202, y_start: 69, x_end: 209, y_end: 79 },
40
- { char: 's', x_start: 210, y_start: 69, x_end: 217, y_end: 79 },
41
- { char: 'w', x_start: 219, y_start: 69, x_end: 232, y_end: 79 },
42
- { char: 'o', x_start: 234, y_start: 69, x_end: 244, y_end: 79 },
43
- { char: 'r', x_start: 246, y_start: 69, x_end: 252, y_end: 79 },
44
- { char: 'd', x_start: 253, y_start: 69, x_end: 262, y_end: 83 },
45
- { char: ',', x_start: 264, y_start: 67, x_end: 267, y_end: 71 },
46
- { char: 'a', x_start: 51, y_start: 47, x_end: 59, y_end: 57 },
47
- { char: 'n', x_start: 62, y_start: 47, x_end: 70, y_end: 57 },
48
- { char: 'd', x_start: 72, y_start: 47, x_end: 81, y_end: 61 },
49
- { char: 't', x_start: 89, y_start: 47, x_end: 95, y_end: 60 },
50
- { char: 'h', x_start: 97, y_start: 47, x_end: 105, y_end: 61 },
51
- { char: 'e', x_start: 107, y_start: 47, x_end: 117, y_end: 57 },
52
- { char: 'd', x_start: 124, y_start: 47, x_end: 133, y_end: 61 },
53
- { char: 'o', x_start: 135, y_start: 47, x_end: 145, y_end: 57 },
54
- { char: 'o', x_start: 146, y_start: 47, x_end: 156, y_end: 57 },
55
- { char: 'r', x_start: 158, y_start: 47, x_end: 164, y_end: 57 },
56
- { char: 's', x_start: 165, y_start: 47, x_end: 172, y_end: 57 },
57
- { char: 'w', x_start: 180, y_start: 47, x_end: 193, y_end: 57 },
58
- { char: 'i', x_start: 196, y_start: 47, x_end: 198, y_end: 61 },
59
- { char: 'l', x_start: 201, y_start: 47, x_end: 203, y_end: 61 },
60
- { char: 'l', x_start: 206, y_start: 47, x_end: 208, y_end: 61 },
61
- { char: 'o', x_start: 216, y_start: 47, x_end: 226, y_end: 57 },
62
- { char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
63
- { char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
64
- { char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
65
- { char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
66
- end
67
-
68
- it 'bounding box by char' do
69
- expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
70
- expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
71
- { char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
72
- { char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
73
- { char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
74
- { char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
75
-
76
- expect(RTesseract::BoxChar.new(@words_image).characters).to eql(@values)
77
-
78
- expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
79
- expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
80
- # expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
81
- end
82
- end
@@ -1,36 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
-
4
- describe 'Rtesseract::Box' do
5
- before do
6
- @path = Pathname.new(__FILE__.gsub('rtesseract_box_spec.rb', '')).expand_path
7
- @image_tiff = @path.join('images', 'test.tif').to_s
8
- @words_image = @path.join('images', 'test_words.png').to_s
9
- end
10
-
11
- it 'bounding box' do
12
- expect(RTesseract.new(@words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
13
- expect(RTesseract::Box.new(@words_image).words).to eql([
14
- { word: 'If', x_start: 52, y_start: 13, x_end: 63, y_end: 27 },
15
- { word: 'you', x_start: 69, y_start: 17, x_end: 100, y_end: 31 },
16
- { word: 'are', x_start: 108, y_start: 17, x_end: 136, y_end: 27 },
17
- { word: 'a', x_start: 143, y_start: 17, x_end: 151, y_end: 27 },
18
- { word: 'friend,', x_start: 158, y_start: 13, x_end: 214, y_end: 29 },
19
- { word: 'you', x_start: 51, y_start: 39, x_end: 82, y_end: 53 },
20
- { word: 'speak', x_start: 90, y_start: 35, x_end: 140, y_end: 53 },
21
- { word: 'the', x_start: 146, y_start: 35, x_end: 174, y_end: 49 },
22
- { word: 'password,', x_start: 182, y_start: 35, x_end: 267, y_end: 53 },
23
- { word: 'and', x_start: 51, y_start: 57, x_end: 81, y_end: 71 },
24
- { word: 'the', x_start: 89, y_start: 57, x_end: 117, y_end: 71 },
25
- { word: 'doors', x_start: 124, y_start: 57, x_end: 172, y_end: 71 },
26
- { word: 'will', x_start: 180, y_start: 57, x_end: 208, y_end: 71 },
27
- { word: 'open.', x_start: 216, y_start: 61, x_end: 263, y_end: 75 }
28
- ])
29
-
30
- expect(RTesseract::Box.new(@image_tiff).words.is_a?(Array)).to eql(true)
31
- expect(RTesseract::Box.new(@words_image).to_s).to eql('If you are a friend, you speak the password, and the doors will open.')
32
- expect { RTesseract::Box.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
33
- expect { RTesseract::Box.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
34
- # expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
35
- end
36
- end
@@ -1,49 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
-
4
- describe 'Rtesseract::Mixed' do
5
- before do
6
- @path = Pathname.new(__FILE__.gsub('rtesseract_mixed_spec.rb', '')).expand_path
7
- @image_tif = @path.join('images', 'mixed.tif').to_s
8
- @image2_tif = @path.join('images', 'mixed2.tif').to_s
9
- end
10
-
11
- it 'should be instantiable' do
12
- expect(RTesseract::Mixed.new.class).to eql(RTesseract::Mixed)
13
- expect(RTesseract::Mixed.new(@image_tif).class).to eql(RTesseract::Mixed)
14
- end
15
-
16
- it 'should translate parts of the image to text' do
17
- mix_block = RTesseract::Mixed.new(@image_tif, psm: 7) do |image|
18
- image.area(x: 28, y: 19, w: 25, h: 25) # position of 4
19
- image.area(x: 180, y: 22, w: 20, h: 28) # position of 3
20
- image.area(x: 218, y: 22, w: 24, h: 28) # position of F
21
- image.area(x: 248, y: 24, w: 22, h: 22) # position of F
22
- end
23
- expect(mix_block.to_s_without_spaces).to eql('43FF')
24
- mix_block.clear_areas
25
- expect(mix_block.areas).to eql([])
26
-
27
- @areas = []
28
- @areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
29
- @areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
30
- @areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
31
- @areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
32
-
33
- mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7)
34
- expect(mix_block.to_s_without_spaces).to eql('43FF')
35
-
36
- mix_block = RTesseract::Mixed.new(@path.join('images', 'blank.tif').to_s, areas: @areas, psm: 7)
37
- expect(mix_block.to_s_without_spaces).to eql('')
38
- end
39
-
40
- it ' get a error' do
41
- @areas = [{ x: 28, y: 19, w: 25, h: 25 }]
42
-
43
- mix_block = RTesseract::Mixed.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
44
- expect { mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
45
-
46
- mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7, command: 'tesseract_error')
47
- expect { mix_block.to_s }.to raise_error(RTesseract::ConversionError)
48
- end
49
- end
@@ -1,282 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
- # encoding: UTF-8
3
- require 'pathname'
4
- RTesseract::Processor::RMagickProcessor.setup
5
-
6
- # Class to rise error
7
- class MakeStringError
8
- def to_s
9
- fail 'error'
10
- end
11
- end
12
-
13
- describe 'Rtesseract' do
14
- before do
15
- @path = Pathname.new(__FILE__.gsub('rtesseract_spec.rb', '')).expand_path
16
- @image_tif = @path.join('images', 'test.tif').to_s
17
- @image_for_pdf = @path.join('images', 'test-pdf.png').to_s
18
- end
19
-
20
- it ' be instantiable' do
21
- expect(RTesseract.new.class).to eql(RTesseract)
22
- expect(RTesseract.new('').class).to eql(RTesseract)
23
- expect(RTesseract.new(@image_tif).class).to eql(RTesseract)
24
- end
25
-
26
- it ' translate image to text' do
27
- expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
28
- expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
29
- expect(RTesseract.new(@path.join('images', 'test1.tif').to_s).to_s_without_spaces).to eql('V2V4')
30
- expect(RTesseract.new(@path.join('images', 'test with spaces.tif').to_s).to_s_without_spaces).to eql('V2V4')
31
- end
32
-
33
- it ' translate images .png, .jpg, .bmp' do
34
- expect(RTesseract.new(@path.join('images', 'test.png').to_s).to_s_without_spaces).to eql('HW9W')
35
- expect(RTesseract.new(@path.join('images', 'test.jpg').to_s).to_s_without_spaces).to eql('3R8F')
36
- expect(RTesseract.new(@path.join('images', 'test.bmp').to_s).to_s_without_spaces).to eql('FLA6')
37
- end
38
-
39
- it ' should not error with depth > 32' do
40
- # expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
41
- end
42
-
43
- it ' support different processors' do
44
- # Rmagick
45
- expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
46
- expect(RTesseract.new(@image_tif, processor: 'rmagick').to_s_without_spaces).to eql('43XF')
47
- expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'rmagick').to_s_without_spaces).to eql('HW9W')
48
-
49
- # MiniMagick
50
- expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
51
- expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W')
52
-
53
- # NoneMagick
54
- expect(RTesseract.new(@image_tif, processor: 'none').to_s_without_spaces).to eql('43XF')
55
- end
56
-
57
- it ' change the image' do
58
- image = RTesseract.new(@image_tif)
59
- expect(image.to_s_without_spaces).to eql('43XF')
60
- image.source = @path.join('images', 'test1.tif').to_s
61
- expect(image.to_s_without_spaces).to eql('V2V4')
62
- end
63
-
64
- it ' returns the source' do
65
- image = RTesseract.new(@image_tif)
66
- expect(image.source).to eql(Pathname.new(@image_tif))
67
- end
68
-
69
- it ' select the language' do
70
- # English
71
- expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
72
- expect(RTesseract.new(@image_tif, lang: 'en').lang).to eql(' -l eng ')
73
- expect(RTesseract.new(@image_tif, lang: 'en-US').lang).to eql(' -l eng ')
74
- expect(RTesseract.new(@image_tif, lang: 'english').lang).to eql(' -l eng ')
75
-
76
- # Portuguese
77
- expect(RTesseract.new(@image_tif, lang: 'por').lang).to eql(' -l por ')
78
- expect(RTesseract.new(@image_tif, lang: 'pt-BR').lang).to eql(' -l por ')
79
- expect(RTesseract.new(@image_tif, lang: 'pt-br').lang).to eql(' -l por ')
80
- expect(RTesseract.new(@image_tif, lang: 'pt').lang).to eql(' -l por ')
81
- expect(RTesseract.new(@image_tif, lang: 'portuguese').lang).to eql(' -l por ')
82
-
83
- expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
84
-
85
- expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
86
- expect(RTesseract.new(@image_tif, lang: 'it').lang).to eql(' -l ita ')
87
-
88
- # Invalid lang object
89
- expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
90
- end
91
-
92
- it ' select options' do
93
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
94
- expect(RTesseract.new(@image_tif, options: 'digits').options_cmd).to eql(['digits'])
95
- expect(RTesseract.new(@image_tif, options: :digits).options_cmd).to eql([:digits])
96
- expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
97
- end
98
-
99
- it ' support pdf output mode' do
100
- # Internal test. Consider 'pdf' option only when #to_pdf is called.
101
- expect(RTesseract.new(@image_tif, options: 'pdf').options_cmd).to eql([])
102
- expect(RTesseract.new(@image_for_pdf, options: :pdf).options_cmd).to eql([])
103
-
104
- pdf_ocr = RTesseract.new(@image_for_pdf)
105
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(true)
106
- expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf')
107
- # Comment next line and go to tmp dir to see generated pdf.
108
- expect(pdf_ocr.clean).to eq(true)
109
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(false)
110
-
111
- # Still have original functionality (i.e. #to_s, #to_s_without_spaces).
112
- pdf_ocr = RTesseract.new(@image_tif)
113
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(true)
114
- expect(File.extname(pdf_ocr.to_pdf)).to eql('.pdf')
115
- expect(pdf_ocr.to_s_without_spaces).to eql('43XF')
116
- expect(pdf_ocr.clean).to eq(true)
117
- expect(File.exists?(pdf_ocr.to_pdf)).to eql(false)
118
- end
119
-
120
- it ' warn when tesseract cannot give pdf' do
121
- rtesseract = RTesseract.new(@image_for_pdf)
122
-
123
- allow(rtesseract).to receive(:tesseract_version).and_return(3.02)
124
- expect { rtesseract.to_pdf }.to raise_error(RTesseract::TesseractVersionError)
125
-
126
- allow(rtesseract).to receive(:tesseract_version).and_return(3.03)
127
- expect { rtesseract.to_pdf }.not_to raise_error
128
- end
129
-
130
- it ' be configurable' do
131
- expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
132
- expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
133
- expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
134
- expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
135
- expect(RTesseract.new(@image_tif, tessedit_char_whitelist: 'ABCDEF12345').to_s_without_spaces).to eql('43F')
136
- end
137
-
138
- it ' crop image' do
139
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4')
140
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3')
141
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 20, h: 40, x: 216, y: 10).to_s_without_spaces).to eql('X')
142
- expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 30, h: 40, x: 240, y: 10).to_s_without_spaces).to eql('F')
143
- end
144
-
145
- it ' read image from blob' do
146
- image = Magick::Image.read(@path.join('images', 'test.png').to_s).first
147
- blob = image.quantize(256, Magick::GRAYColorspace).to_blob
148
-
149
- test = RTesseract.new('', psm: 7)
150
- test.from_blob(blob)
151
- expect(test.to_s_without_spaces).to eql('HW9W')
152
-
153
- test = RTesseract.new('', psm: 7)
154
- expect { test.from_blob('') }.to raise_error(RTesseract::ConversionError)
155
- end
156
-
157
- it ' use a instance' do
158
- expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
159
- expect(RTesseract::Processor::RMagickProcessor.a_name?('teste')).to eql(false)
160
- expect(RTesseract::Processor::RMagickProcessor.a_name?('rmagick')).to eql(true)
161
- expect(RTesseract::Processor::RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
162
- expect(RTesseract::Processor::MiniMagickProcessor.a_name?('teste')).to eql(false)
163
- expect(RTesseract::Processor::MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
164
- expect(RTesseract::Processor::MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
165
- expect(RTesseract::Processor::NoneProcessor.a_name?('none')).to eql(true)
166
- expect(RTesseract::Processor::NoneProcessor.a_name?('NoneProcessor')).to eql(true)
167
- end
168
-
169
- it ' change image in a block' do
170
- test = RTesseract.read(@path.join('images', 'test.png').to_s) {}
171
- expect(test.class).to eql(RTesseract)
172
-
173
- test = RTesseract.new(@image_tif)
174
- test.read do |_image|
175
- _image = _image.quantize(256, Magick::GRAYColorspace)
176
- end
177
- expect(test.to_s_without_spaces).to eql('43XF')
178
-
179
- test = RTesseract.new(@path.join('images', 'blank.tif').to_s)
180
- test.read do |_image|
181
- _image
182
- end
183
- expect(test.to_s_without_spaces).to eql('')
184
-
185
- test = RTesseract.read(@path.join('images', 'test.png').to_s) do |_image|
186
- _image.rotate(90)
187
- end
188
- expect(test.to_s_without_spaces).to eql('HW9W')
189
-
190
- test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |_image|
191
- _image = _image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
192
- end
193
- expect(test.to_s_without_spaces).to eql('3R8F')
194
-
195
- test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |_image|
196
- _image.gravity 'south'
197
- end
198
- expect(test.to_s_without_spaces).to eql('3R8F')
199
- end
200
-
201
- it 'does not raise on read with image_magick processor' do
202
- expect {
203
- instance = RTesseract.read(@image_tif, processor: 'mini_magick') {}
204
- expect(instance.processor.a_name?('mini_magick')).to be_truthy
205
- }.not_to raise_error
206
- end
207
-
208
- it ' get a error' do
209
- expect { RTesseract.new(@path.join('images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
210
- expect { RTesseract.new(@path.join('images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
211
-
212
- # Invalid psm object
213
- expect(RTesseract.new(@image_tif, psm: MakeStringError.new).psm).to eql('')
214
- end
215
-
216
- it 'remove a file' do
217
- RTesseract::Utils.remove_files(Tempfile.new('config'))
218
-
219
- expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
220
- end
221
-
222
- it ' get a numeric value for tesseract version' do
223
- expect(RTesseract::Utils.version_number).to be_a Float
224
- end
225
-
226
- it ' support default config processors' do
227
- # Rmagick
228
- RTesseract.configure { |config| config.processor = 'rmagick' }
229
- expect(RTesseract.new(@image_tif).processor.a_name?('rmagick')).to eql(true)
230
-
231
- # MiniMagick
232
- RTesseract.configure { |config| config.processor = 'mini_magick' }
233
- expect(RTesseract.new(@image_tif).processor.a_name?('mini_magick')).to eql(true)
234
-
235
- # NoneMagick
236
- RTesseract.configure { |config| config.processor = 'none' }
237
- expect(RTesseract.new(@image_tif).processor.a_name?('none')).to eql(true)
238
-
239
- # overwrite default
240
- RTesseract.configure { |config| config.processor = 'rmagick' }
241
- expect(RTesseract.new(@image_tif, processor: 'mini_magick').processor.a_name?('mini_magick')).to eql(true)
242
-
243
- RTesseract.configure { |config| config.lang = 'portuguese' }
244
- expect(RTesseract.new(@image_tif).lang).to eql(' -l por ')
245
-
246
- RTesseract.configure { |config| config.psm = 7 }
247
- expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
248
-
249
- RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
250
- expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
251
-
252
- RTesseract.configure { |config| config.user_words = '/tmp/test' }
253
- expect(RTesseract.new(@image_tif).user_words).to eql(' --user-words /tmp/test ')
254
-
255
- RTesseract.configure { |config| config.user_patterns = '/tmp/test' }
256
- expect(RTesseract.new(@image_tif).user_patterns).to eql(' --user-patterns /tmp/test ')
257
- end
258
-
259
- it ' configure pdf has no effect and kept in-house' do
260
- # So it does not interfere with #to_s outputting.
261
- RTesseract.configure { |config| config.options_cmd = ['pdf'] }
262
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
263
-
264
- RTesseract.configure { |config| config.options_cmd = [:pdf] }
265
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
266
-
267
- RTesseract.configure { |config| config.options_cmd = [:pdf, 'pdf'] }
268
- expect(RTesseract.new(@image_tif).options_cmd).to eql([])
269
- end
270
-
271
- it ' support new configs' do
272
- expect(RTesseract.new(@image_tif, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
273
- expect(RTesseract.new(@image_tif, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ')
274
- expect(RTesseract.new(@image_tif, user_patterns: '/tmp/test').user_patterns).to eql(' --user-patterns /tmp/test ')
275
-
276
- expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
277
- expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
278
- expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
279
-
280
- # expect(RTesseract.new(@path.join('images', 'test_words.png').to_s, psm: 3, user_words: @path.join('configs', 'eng.user-words.txt').to_s).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
281
- end
282
- end