rtesseract 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.travis.yml +1 -0
 - data/CHANGELOG.md +22 -0
 - data/Gemfile.lock +6 -20
 - data/VERSION +1 -1
 - data/lib/processors/mini_magick.rb +37 -26
 - data/lib/processors/none.rb +27 -19
 - data/lib/processors/rmagick.rb +39 -28
 - data/lib/rtesseract.rb +46 -93
 - data/lib/rtesseract/blob.rb +34 -0
 - data/lib/rtesseract/box.rb +10 -1
 - data/lib/rtesseract/box_char.rb +3 -0
 - data/lib/rtesseract/configuration.rb +16 -8
 - data/lib/rtesseract/errors.rb +1 -0
 - data/lib/rtesseract/mixed.rb +7 -4
 - data/lib/rtesseract/processor.rb +19 -0
 - data/lib/rtesseract/utils.rb +34 -0
 - data/rtesseract.gemspec +8 -4
 - data/spec/configs/eng.user-words.txt +13 -0
 - data/spec/rtesseract_box_char_spec.rb +13 -12
 - data/spec/rtesseract_spec.rb +14 -12
 - metadata +7 -3
 - data/lib/utils.rb +0 -5
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 6eae58279cf744227e79b7bbc9180f7aea852547
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 3836aa96d24b7f1a0b957cf803553f547cc33544
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 0ef57359c7c7f43094a50838b6d29d28d7808c9cadd8f2b8514c613be030161f8d640c41ba3d403c00fb59fdf85ffcbc57795f6c65b8418ad348eb1a6c07e901
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: ff5f0f94c8039bd0b38b0c9ec2618b4c38b07b9707e28ff29a3bb943abc85d5afaa543dfba1ba2b9e565d056ea558eda9b7f6d222a6adb43614cd86c6e8fdcac
         
     | 
    
        data/.travis.yml
    CHANGED
    
    
    
        data/CHANGELOG.md
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ## v2.0.1
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            #### Changed
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            * Refactoring of some small classes
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ## v2.0.0
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            #### Added
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            * Support to options --tessdata-dir, --user-words and --user-patterns
         
     | 
| 
      
 12 
     | 
    
         
            +
            * Ruby 2.3.0 to travis tests.
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            #### Changed
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            * Refactoring of some classes
         
     | 
| 
      
 17 
     | 
    
         
            +
            * Crop options is a hash with x,y,w,h keys.
         
     | 
| 
      
 18 
     | 
    
         
            +
            * Areas of RTesseract::Mixed now changed :width to :w and :height to :h.
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            #### Removed
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            * Support to quick_magick gem.
         
     | 
    
        data/Gemfile.lock
    CHANGED
    
    | 
         @@ -3,9 +3,8 @@ GEM 
     | 
|
| 
       3 
3 
     | 
    
         
             
              specs:
         
     | 
| 
       4 
4 
     | 
    
         
             
                addressable (2.4.0)
         
     | 
| 
       5 
5 
     | 
    
         
             
                builder (3.2.2)
         
     | 
| 
       6 
     | 
    
         
            -
                coveralls (0.8. 
     | 
| 
      
 6 
     | 
    
         
            +
                coveralls (0.8.13)
         
     | 
| 
       7 
7 
     | 
    
         
             
                  json (~> 1.8)
         
     | 
| 
       8 
     | 
    
         
            -
                  rest-client (>= 1.6.8, < 2)
         
     | 
| 
       9 
8 
     | 
    
         
             
                  simplecov (~> 0.11.0)
         
     | 
| 
       10 
9 
     | 
    
         
             
                  term-ansicolor (~> 1.3)
         
     | 
| 
       11 
10 
     | 
    
         
             
                  thor (~> 0.19.1)
         
     | 
| 
         @@ -14,8 +13,6 @@ GEM 
     | 
|
| 
       14 
13 
     | 
    
         
             
                  thread_safe (~> 0.3, >= 0.3.1)
         
     | 
| 
       15 
14 
     | 
    
         
             
                diff-lcs (1.2.5)
         
     | 
| 
       16 
15 
     | 
    
         
             
                docile (1.1.5)
         
     | 
| 
       17 
     | 
    
         
            -
                domain_name (0.5.25)
         
     | 
| 
       18 
     | 
    
         
            -
                  unf (>= 0.0.5, < 1.0.0)
         
     | 
| 
       19 
16 
     | 
    
         
             
                faraday (0.9.2)
         
     | 
| 
       20 
17 
     | 
    
         
             
                  multipart-post (>= 1.2, < 3)
         
     | 
| 
       21 
18 
     | 
    
         
             
                git (1.3.0)
         
     | 
| 
         @@ -28,8 +25,6 @@ GEM 
     | 
|
| 
       28 
25 
     | 
    
         
             
                  oauth2
         
     | 
| 
       29 
26 
     | 
    
         
             
                hashie (3.4.3)
         
     | 
| 
       30 
27 
     | 
    
         
             
                highline (1.7.8)
         
     | 
| 
       31 
     | 
    
         
            -
                http-cookie (1.0.2)
         
     | 
| 
       32 
     | 
    
         
            -
                  domain_name (~> 0.5)
         
     | 
| 
       33 
28 
     | 
    
         
             
                jeweler (2.1.1)
         
     | 
| 
       34 
29 
     | 
    
         
             
                  builder
         
     | 
| 
       35 
30 
     | 
    
         
             
                  bundler (>= 1.0)
         
     | 
| 
         @@ -42,13 +37,11 @@ GEM 
     | 
|
| 
       42 
37 
     | 
    
         
             
                  semver
         
     | 
| 
       43 
38 
     | 
    
         
             
                json (1.8.3)
         
     | 
| 
       44 
39 
     | 
    
         
             
                jwt (1.5.1)
         
     | 
| 
       45 
     | 
    
         
            -
                 
     | 
| 
       46 
     | 
    
         
            -
                mini_magick (4.3.6)
         
     | 
| 
      
 40 
     | 
    
         
            +
                mini_magick (4.5.1)
         
     | 
| 
       47 
41 
     | 
    
         
             
                mini_portile2 (2.0.0)
         
     | 
| 
       48 
42 
     | 
    
         
             
                multi_json (1.11.2)
         
     | 
| 
       49 
43 
     | 
    
         
             
                multi_xml (0.5.5)
         
     | 
| 
       50 
44 
     | 
    
         
             
                multipart-post (2.0.0)
         
     | 
| 
       51 
     | 
    
         
            -
                netrc (0.11.0)
         
     | 
| 
       52 
45 
     | 
    
         
             
                nokogiri (1.6.7.2)
         
     | 
| 
       53 
46 
     | 
    
         
             
                  mini_portile2 (~> 2.0.0.rc2)
         
     | 
| 
       54 
47 
     | 
    
         
             
                oauth2 (1.1.0)
         
     | 
| 
         @@ -61,26 +54,22 @@ GEM 
     | 
|
| 
       61 
54 
     | 
    
         
             
                rake (11.1.2)
         
     | 
| 
       62 
55 
     | 
    
         
             
                rdoc (4.2.2)
         
     | 
| 
       63 
56 
     | 
    
         
             
                  json (~> 1.4)
         
     | 
| 
       64 
     | 
    
         
            -
                rest-client (1.8.0)
         
     | 
| 
       65 
     | 
    
         
            -
                  http-cookie (>= 1.0.2, < 2.0)
         
     | 
| 
       66 
     | 
    
         
            -
                  mime-types (>= 1.16, < 3.0)
         
     | 
| 
       67 
     | 
    
         
            -
                  netrc (~> 0.7)
         
     | 
| 
       68 
57 
     | 
    
         
             
                rmagick (2.15.4)
         
     | 
| 
       69 
58 
     | 
    
         
             
                rspec (3.4.0)
         
     | 
| 
       70 
59 
     | 
    
         
             
                  rspec-core (~> 3.4.0)
         
     | 
| 
       71 
60 
     | 
    
         
             
                  rspec-expectations (~> 3.4.0)
         
     | 
| 
       72 
61 
     | 
    
         
             
                  rspec-mocks (~> 3.4.0)
         
     | 
| 
       73 
     | 
    
         
            -
                rspec-core (3.4. 
     | 
| 
      
 62 
     | 
    
         
            +
                rspec-core (3.4.4)
         
     | 
| 
       74 
63 
     | 
    
         
             
                  rspec-support (~> 3.4.0)
         
     | 
| 
       75 
64 
     | 
    
         
             
                rspec-expectations (3.4.0)
         
     | 
| 
       76 
65 
     | 
    
         
             
                  diff-lcs (>= 1.2.0, < 2.0)
         
     | 
| 
       77 
66 
     | 
    
         
             
                  rspec-support (~> 3.4.0)
         
     | 
| 
       78 
     | 
    
         
            -
                rspec-mocks (3.4. 
     | 
| 
      
 67 
     | 
    
         
            +
                rspec-mocks (3.4.1)
         
     | 
| 
       79 
68 
     | 
    
         
             
                  diff-lcs (>= 1.2.0, < 2.0)
         
     | 
| 
       80 
69 
     | 
    
         
             
                  rspec-support (~> 3.4.0)
         
     | 
| 
       81 
70 
     | 
    
         
             
                rspec-support (3.4.1)
         
     | 
| 
       82 
71 
     | 
    
         
             
                semver (1.0.1)
         
     | 
| 
       83 
     | 
    
         
            -
                simplecov (0.11. 
     | 
| 
      
 72 
     | 
    
         
            +
                simplecov (0.11.2)
         
     | 
| 
       84 
73 
     | 
    
         
             
                  docile (~> 1.1.0)
         
     | 
| 
       85 
74 
     | 
    
         
             
                  json (~> 1.8)
         
     | 
| 
       86 
75 
     | 
    
         
             
                  simplecov-html (~> 0.10.0)
         
     | 
| 
         @@ -90,9 +79,6 @@ GEM 
     | 
|
| 
       90 
79 
     | 
    
         
             
                thor (0.19.1)
         
     | 
| 
       91 
80 
     | 
    
         
             
                thread_safe (0.3.5)
         
     | 
| 
       92 
81 
     | 
    
         
             
                tins (1.6.0)
         
     | 
| 
       93 
     | 
    
         
            -
                unf (0.1.4)
         
     | 
| 
       94 
     | 
    
         
            -
                  unf_ext
         
     | 
| 
       95 
     | 
    
         
            -
                unf_ext (0.0.7.1)
         
     | 
| 
       96 
82 
     | 
    
         | 
| 
       97 
83 
     | 
    
         
             
            PLATFORMS
         
     | 
| 
       98 
84 
     | 
    
         
             
              ruby
         
     | 
| 
         @@ -109,4 +95,4 @@ DEPENDENCIES 
     | 
|
| 
       109 
95 
     | 
    
         
             
              simplecov
         
     | 
| 
       110 
96 
     | 
    
         | 
| 
       111 
97 
     | 
    
         
             
            BUNDLED WITH
         
     | 
| 
       112 
     | 
    
         
            -
               1. 
     | 
| 
      
 98 
     | 
    
         
            +
               1.11.2
         
     | 
    
        data/VERSION
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            2.0. 
     | 
| 
      
 1 
     | 
    
         
            +
            2.0.1
         
     | 
| 
         @@ -1,32 +1,43 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
       2 
     | 
    
         
            -
            #  
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
               
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
            # RTesseract class
         
     | 
| 
      
 3 
     | 
    
         
            +
            class RTesseract
         
     | 
| 
      
 4 
     | 
    
         
            +
              # Processor Module
         
     | 
| 
      
 5 
     | 
    
         
            +
              module Processor
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Add to rtesseract a image manipulation with MiniMagick
         
     | 
| 
      
 7 
     | 
    
         
            +
                module MiniMagickProcessor
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # Setup Processor
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def self.setup
         
     | 
| 
      
 10 
     | 
    
         
            +
                    require 'mini_magick'
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
       7 
12 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
      
 13 
     | 
    
         
            +
                  # Check if is this Processor
         
     | 
| 
      
 14 
     | 
    
         
            +
                  def self.a_name?(name)
         
     | 
| 
      
 15 
     | 
    
         
            +
                    %w(mini_magick MiniMagickProcessor).include?(name.to_s)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  end
         
     | 
| 
       11 
17 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
      
 18 
     | 
    
         
            +
                  # Convert Image to Tiff
         
     | 
| 
      
 19 
     | 
    
         
            +
                  def self.image_to_tif(source, points = {})
         
     | 
| 
      
 20 
     | 
    
         
            +
                    tmp_file = Tempfile.new(['', '.tif'])
         
     | 
| 
      
 21 
     | 
    
         
            +
                    cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
         
     | 
| 
      
 22 
     | 
    
         
            +
                    cat.format('tif') do |c|
         
     | 
| 
      
 23 
     | 
    
         
            +
                      c.compress 'None'
         
     | 
| 
      
 24 
     | 
    
         
            +
                      c.alpha 'off'
         
     | 
| 
      
 25 
     | 
    
         
            +
                    end
         
     | 
| 
      
 26 
     | 
    
         
            +
                    cat.crop("#{points[:w]}x#{points[:h]}+#{points[:x]}+#{points[:y]}") if points.is_a?(Hash) && points.values.compact != []
         
     | 
| 
      
 27 
     | 
    
         
            +
                    cat.alpha 'off'
         
     | 
| 
      
 28 
     | 
    
         
            +
                    cat.write tmp_file.path.to_s
         
     | 
| 
      
 29 
     | 
    
         
            +
                    tmp_file
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
       24 
31 
     | 
    
         | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
      
 32 
     | 
    
         
            +
                  # Cast instance of image
         
     | 
| 
      
 33 
     | 
    
         
            +
                  def self.read_with_processor(path)
         
     | 
| 
      
 34 
     | 
    
         
            +
                    MiniMagick::Image.open(path.to_s)
         
     | 
| 
      
 35 
     | 
    
         
            +
                  end
         
     | 
| 
       28 
36 
     | 
    
         | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
      
 37 
     | 
    
         
            +
                  # Check if is a MiniMagick image
         
     | 
| 
      
 38 
     | 
    
         
            +
                  def self.image?(object)
         
     | 
| 
      
 39 
     | 
    
         
            +
                    object.class == MiniMagick::Image
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
       31 
42 
     | 
    
         
             
              end
         
     | 
| 
       32 
     | 
    
         
            -
            end
         
     | 
| 
      
 43 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/processors/none.rb
    CHANGED
    
    | 
         @@ -1,26 +1,34 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
       2 
     | 
    
         
            -
            #  
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
               
     | 
| 
       5 
     | 
    
         
            -
               
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
            # RTesseract class
         
     | 
| 
      
 3 
     | 
    
         
            +
            class RTesseract
         
     | 
| 
      
 4 
     | 
    
         
            +
              # Processor Module
         
     | 
| 
      
 5 
     | 
    
         
            +
              module Processor
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Add to rtesseract a image without manipulation
         
     | 
| 
      
 7 
     | 
    
         
            +
                module NoneProcessor
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # Setup Processor
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def self.setup
         
     | 
| 
      
 10 
     | 
    
         
            +
                  end
         
     | 
| 
       10 
11 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
                  # Check if is this Processor
         
     | 
| 
      
 13 
     | 
    
         
            +
                  def self.a_name?(name)
         
     | 
| 
      
 14 
     | 
    
         
            +
                    %w(none NoneProcessor).include?(name.to_s)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
      
 17 
     | 
    
         
            +
                  # Convert Image to Tiff
         
     | 
| 
      
 18 
     | 
    
         
            +
                  def self.image_to_tif(source, _points = {})
         
     | 
| 
      
 19 
     | 
    
         
            +
                    tmp_file = Tempfile.new(['', '.tif'])
         
     | 
| 
      
 20 
     | 
    
         
            +
                    tmp_file.write(read_with_processor(source))
         
     | 
| 
      
 21 
     | 
    
         
            +
                    tmp_file
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
       19 
23 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
      
 24 
     | 
    
         
            +
                  # Cast instance of image
         
     | 
| 
      
 25 
     | 
    
         
            +
                  def self.read_with_processor(path)
         
     | 
| 
      
 26 
     | 
    
         
            +
                    File.read(path)
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
       23 
28 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
      
 29 
     | 
    
         
            +
                  # Check if is a image
         
     | 
| 
      
 30 
     | 
    
         
            +
                  def self.image?(*)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  end
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
       25 
33 
     | 
    
         
             
              end
         
     | 
| 
       26 
34 
     | 
    
         
             
            end
         
     | 
    
        data/lib/processors/rmagick.rb
    CHANGED
    
    | 
         @@ -1,35 +1,46 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
       2 
     | 
    
         
            -
            #  
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
               
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                 
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
            # RTesseract class
         
     | 
| 
      
 3 
     | 
    
         
            +
            class RTesseract
         
     | 
| 
      
 4 
     | 
    
         
            +
              # Processor Module
         
     | 
| 
      
 5 
     | 
    
         
            +
              module Processor
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Add to rtesseract a image manipulation with RMagick
         
     | 
| 
      
 7 
     | 
    
         
            +
                module RMagickProcessor
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # Setup Processor
         
     | 
| 
      
 9 
     | 
    
         
            +
                  def self.setup
         
     | 
| 
      
 10 
     | 
    
         
            +
                    require 'rmagick'
         
     | 
| 
      
 11 
     | 
    
         
            +
                  rescue LoadError
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # :nocov:
         
     | 
| 
      
 13 
     | 
    
         
            +
                    require 'RMagick'
         
     | 
| 
      
 14 
     | 
    
         
            +
                    # :nocov:
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
       11 
16 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
      
 17 
     | 
    
         
            +
                  # Check if is this Processor
         
     | 
| 
      
 18 
     | 
    
         
            +
                  def self.a_name?(name)
         
     | 
| 
      
 19 
     | 
    
         
            +
                    %w(rmagick RMagickProcessor).include?(name.to_s)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  end
         
     | 
| 
       15 
21 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
      
 22 
     | 
    
         
            +
                  # Convert Image to Tiff
         
     | 
| 
      
 23 
     | 
    
         
            +
                  def self.image_to_tif(source, points = {})
         
     | 
| 
      
 24 
     | 
    
         
            +
                    tmp_file = Tempfile.new(['', '.tif'])
         
     | 
| 
      
 25 
     | 
    
         
            +
                    cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
         
     | 
| 
      
 26 
     | 
    
         
            +
                    cat.crop!(points[:x], points[:y], points[:w], points[:h]) if points.is_a?(Hash) && points.values.compact != []
         
     | 
| 
      
 27 
     | 
    
         
            +
                    cat.alpha Magick::DeactivateAlphaChannel
         
     | 
| 
      
 28 
     | 
    
         
            +
                    cat.write(tmp_file.path.to_s) do
         
     | 
| 
      
 29 
     | 
    
         
            +
                      # self.depth = 16
         
     | 
| 
      
 30 
     | 
    
         
            +
                      self.compression = Magick::NoCompression
         
     | 
| 
      
 31 
     | 
    
         
            +
                    end
         
     | 
| 
      
 32 
     | 
    
         
            +
                    tmp_file
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
       27 
34 
     | 
    
         | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
      
 35 
     | 
    
         
            +
                  # Cast instance of image
         
     | 
| 
      
 36 
     | 
    
         
            +
                  def self.read_with_processor(path)
         
     | 
| 
      
 37 
     | 
    
         
            +
                    Magick::Image.read(path.to_s).first
         
     | 
| 
      
 38 
     | 
    
         
            +
                  end
         
     | 
| 
       31 
39 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
      
 40 
     | 
    
         
            +
                  # Check if is a RMagick image
         
     | 
| 
      
 41 
     | 
    
         
            +
                  def self.image?(object)
         
     | 
| 
      
 42 
     | 
    
         
            +
                    object.class == Magick::Image
         
     | 
| 
      
 43 
     | 
    
         
            +
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
       34 
45 
     | 
    
         
             
              end
         
     | 
| 
       35 
46 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rtesseract.rb
    CHANGED
    
    | 
         @@ -1,18 +1,10 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'pathname'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require 'tempfile'
         
     | 
| 
       4 
     | 
    
         
            -
            require 'utils'
         
     | 
| 
       5 
4 
     | 
    
         | 
| 
      
 5 
     | 
    
         
            +
            require 'rtesseract/utils'
         
     | 
| 
       6 
6 
     | 
    
         
             
            require 'rtesseract/configuration'
         
     | 
| 
       7 
7 
     | 
    
         
             
            require 'rtesseract/errors'
         
     | 
| 
       8 
     | 
    
         
            -
            require 'rtesseract/mixed'
         
     | 
| 
       9 
     | 
    
         
            -
            require 'rtesseract/box'
         
     | 
| 
       10 
     | 
    
         
            -
            require 'rtesseract/box_char'
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
            # Processors
         
     | 
| 
       13 
     | 
    
         
            -
            require 'processors/rmagick.rb'
         
     | 
| 
       14 
     | 
    
         
            -
            require 'processors/mini_magick.rb'
         
     | 
| 
       15 
     | 
    
         
            -
            require 'processors/none.rb'
         
     | 
| 
       16 
8 
     | 
    
         | 
| 
       17 
9 
     | 
    
         
             
            # Ruby wrapper for Tesseract OCR
         
     | 
| 
       18 
10 
     | 
    
         
             
            class RTesseract
         
     | 
| 
         @@ -23,58 +15,30 @@ class RTesseract 
     | 
|
| 
       23 
15 
     | 
    
         
             
              def initialize(src = '', options = {})
         
     | 
| 
       24 
16 
     | 
    
         
             
                self.configuration = RTesseract.local_config(options)
         
     | 
| 
       25 
17 
     | 
    
         
             
                @options = options || {}
         
     | 
| 
       26 
     | 
    
         
            -
                @value 
     | 
| 
       27 
     | 
    
         
            -
                @ 
     | 
| 
      
 18 
     | 
    
         
            +
                @value = nil
         
     | 
| 
      
 19 
     | 
    
         
            +
                @points = {}
         
     | 
| 
      
 20 
     | 
    
         
            +
                @processor = RTesseract::Processor.choose_processor!(configuration.processor)
         
     | 
| 
       28 
21 
     | 
    
         
             
                @source = @processor.image?(src) ? src : Pathname.new(src)
         
     | 
| 
       29 
22 
     | 
    
         
             
                initialize_hook
         
     | 
| 
       30 
23 
     | 
    
         
             
              end
         
     | 
| 
       31 
24 
     | 
    
         | 
| 
      
 25 
     | 
    
         
            +
              # Hook to end of initialize method
         
     | 
| 
       32 
26 
     | 
    
         
             
              def initialize_hook
         
     | 
| 
       33 
27 
     | 
    
         
             
              end
         
     | 
| 
       34 
28 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
               
     | 
| 
       36 
     | 
    
         
            -
                fail RTesseract::ImageNotSelectedError if src.nil?
         
     | 
| 
       37 
     | 
    
         
            -
                processor = RTesseract.choose_processor!(options.option(:processor, nil))
         
     | 
| 
       38 
     | 
    
         
            -
                image = processor.read_with_processor(src.to_s)
         
     | 
| 
       39 
     | 
    
         
            -
                yield(image)
         
     | 
| 
       40 
     | 
    
         
            -
                object = RTesseract.new('', options).from_blob(image.to_blob)
         
     | 
| 
       41 
     | 
    
         
            -
                object
         
     | 
| 
       42 
     | 
    
         
            -
              end
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
              def read
         
     | 
| 
       45 
     | 
    
         
            -
                image = @processor.read_with_processor(@source.to_s)
         
     | 
| 
       46 
     | 
    
         
            -
                new_image = yield(image)
         
     | 
| 
       47 
     | 
    
         
            -
                from_blob(new_image.to_blob, File.extname(@source.to_s))
         
     | 
| 
       48 
     | 
    
         
            -
                self
         
     | 
| 
       49 
     | 
    
         
            -
              end
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
      
 29 
     | 
    
         
            +
              # Define the source
         
     | 
| 
       51 
30 
     | 
    
         
             
              def source=(src)
         
     | 
| 
       52 
31 
     | 
    
         
             
                @value = nil
         
     | 
| 
       53 
32 
     | 
    
         
             
                @source = @processor.image?(src) ? src : Pathname.new(src)
         
     | 
| 
       54 
33 
     | 
    
         
             
              end
         
     | 
| 
       55 
34 
     | 
    
         | 
| 
       56 
35 
     | 
    
         
             
              # Crop image to convert
         
     | 
| 
       57 
     | 
    
         
            -
              def crop!( 
     | 
| 
      
 36 
     | 
    
         
            +
              def crop!(points = {})
         
     | 
| 
       58 
37 
     | 
    
         
             
                @value = nil
         
     | 
| 
       59 
     | 
    
         
            -
                @points =  
     | 
| 
      
 38 
     | 
    
         
            +
                @points = points
         
     | 
| 
       60 
39 
     | 
    
         
             
                self
         
     | 
| 
       61 
40 
     | 
    
         
             
              end
         
     | 
| 
       62 
41 
     | 
    
         | 
| 
       63 
     | 
    
         
            -
              # Remove files
         
     | 
| 
       64 
     | 
    
         
            -
              def remove_file(files = [])
         
     | 
| 
       65 
     | 
    
         
            -
                files.each do |file|
         
     | 
| 
       66 
     | 
    
         
            -
                  if file.is_a?(Tempfile)
         
     | 
| 
       67 
     | 
    
         
            -
                    file.close
         
     | 
| 
       68 
     | 
    
         
            -
                    file.unlink
         
     | 
| 
       69 
     | 
    
         
            -
                  else
         
     | 
| 
       70 
     | 
    
         
            -
                    File.unlink(file)
         
     | 
| 
       71 
     | 
    
         
            -
                  end
         
     | 
| 
       72 
     | 
    
         
            -
                end
         
     | 
| 
       73 
     | 
    
         
            -
                true
         
     | 
| 
       74 
     | 
    
         
            -
              rescue => error
         
     | 
| 
       75 
     | 
    
         
            -
                raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
         
     | 
| 
       76 
     | 
    
         
            -
              end
         
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
42 
     | 
    
         
             
              # Select the language
         
     | 
| 
       79 
43 
     | 
    
         
             
              # ===Languages
         
     | 
| 
       80 
44 
     | 
    
         
             
              ## * eng   - English
         
     | 
| 
         @@ -88,58 +52,56 @@ class RTesseract 
     | 
|
| 
       88 
52 
     | 
    
         
             
              ## * vie   - Vietnamese
         
     | 
| 
       89 
53 
     | 
    
         
             
              ## Note: Make sure you have installed the language to tesseract
         
     | 
| 
       90 
54 
     | 
    
         
             
              def lang
         
     | 
| 
       91 
     | 
    
         
            -
                language =  
     | 
| 
       92 
     | 
    
         
            -
                LANGUAGES 
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                end
         
     | 
| 
       95 
     | 
    
         
            -
                return " -l #{language} " if language.size > 0
         
     | 
| 
      
 55 
     | 
    
         
            +
                language = (configuration.lang || 'eng').to_s.strip.downcase
         
     | 
| 
      
 56 
     | 
    
         
            +
                " -l #{LANGUAGES[language] || language} "
         
     | 
| 
      
 57 
     | 
    
         
            +
              rescue
         
     | 
| 
       96 
58 
     | 
    
         
             
                ''
         
     | 
| 
      
 59 
     | 
    
         
            +
              end
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
              # Convert option to command
         
     | 
| 
      
 62 
     | 
    
         
            +
              def option_to_string(prefix, value = nil)
         
     | 
| 
      
 63 
     | 
    
         
            +
                (value.nil? ? '' : " #{prefix} #{value} ")
         
     | 
| 
       97 
64 
     | 
    
         
             
              rescue
         
     | 
| 
       98 
65 
     | 
    
         
             
                ''
         
     | 
| 
       99 
66 
     | 
    
         
             
              end
         
     | 
| 
       100 
67 
     | 
    
         | 
| 
       101 
68 
     | 
    
         
             
              # Page Segment Mode
         
     | 
| 
       102 
69 
     | 
    
         
             
              def psm
         
     | 
| 
       103 
     | 
    
         
            -
                ( 
     | 
| 
       104 
     | 
    
         
            -
              rescue
         
     | 
| 
       105 
     | 
    
         
            -
                ''
         
     | 
| 
      
 70 
     | 
    
         
            +
                option_to_string('-psm', configuration.psm)
         
     | 
| 
       106 
71 
     | 
    
         
             
              end
         
     | 
| 
       107 
72 
     | 
    
         | 
| 
       108 
73 
     | 
    
         
             
              # Tessdata Dir
         
     | 
| 
       109 
74 
     | 
    
         
             
              def tessdata_dir
         
     | 
| 
       110 
     | 
    
         
            -
                ( 
     | 
| 
       111 
     | 
    
         
            -
              rescue
         
     | 
| 
       112 
     | 
    
         
            -
                ''
         
     | 
| 
      
 75 
     | 
    
         
            +
                option_to_string('--tessdata-dir', configuration.tessdata_dir)
         
     | 
| 
       113 
76 
     | 
    
         
             
              end
         
     | 
| 
       114 
77 
     | 
    
         | 
| 
       115 
78 
     | 
    
         
             
              # User Words
         
     | 
| 
       116 
79 
     | 
    
         
             
              def user_words
         
     | 
| 
       117 
     | 
    
         
            -
                ( 
     | 
| 
       118 
     | 
    
         
            -
              rescue
         
     | 
| 
       119 
     | 
    
         
            -
                ''
         
     | 
| 
      
 80 
     | 
    
         
            +
                option_to_string('--user-words', configuration.user_words)
         
     | 
| 
       120 
81 
     | 
    
         
             
              end
         
     | 
| 
       121 
82 
     | 
    
         | 
| 
       122 
83 
     | 
    
         
             
              # User Patterns
         
     | 
| 
       123 
84 
     | 
    
         
             
              def user_patterns
         
     | 
| 
       124 
     | 
    
         
            -
                ( 
     | 
| 
       125 
     | 
    
         
            -
              rescue
         
     | 
| 
       126 
     | 
    
         
            -
                ''
         
     | 
| 
      
 85 
     | 
    
         
            +
                option_to_string('--user-patterns', configuration.user_patterns)
         
     | 
| 
       127 
86 
     | 
    
         
             
              end
         
     | 
| 
       128 
87 
     | 
    
         | 
| 
       129 
88 
     | 
    
         
             
              # Options on line
         
     | 
| 
       130 
89 
     | 
    
         
             
              def options_cmd
         
     | 
| 
       131 
     | 
    
         
            -
                 
     | 
| 
      
 90 
     | 
    
         
            +
                configuration.options_cmd
         
     | 
| 
       132 
91 
     | 
    
         
             
              end
         
     | 
| 
       133 
92 
     | 
    
         | 
| 
      
 93 
     | 
    
         
            +
              # Hook to before config
         
     | 
| 
       134 
94 
     | 
    
         
             
              def config_hook
         
     | 
| 
       135 
95 
     | 
    
         
             
              end
         
     | 
| 
       136 
96 
     | 
    
         | 
| 
      
 97 
     | 
    
         
            +
              # Convert configurations
         
     | 
| 
       137 
98 
     | 
    
         
             
              def config
         
     | 
| 
       138 
99 
     | 
    
         
             
                @options ||= {}
         
     | 
| 
       139 
100 
     | 
    
         
             
                config_hook
         
     | 
| 
       140 
101 
     | 
    
         
             
                @options.map { |k, v| "#{k} #{v}" }.join("\n")
         
     | 
| 
       141 
102 
     | 
    
         
             
              end
         
     | 
| 
       142 
103 
     | 
    
         | 
| 
      
 104 
     | 
    
         
            +
              # Write config to file
         
     | 
| 
       143 
105 
     | 
    
         
             
              def config_file
         
     | 
| 
       144 
106 
     | 
    
         
             
                config_hook
         
     | 
| 
       145 
107 
     | 
    
         
             
                return '' if @options == {}
         
     | 
| 
         @@ -151,34 +113,41 @@ class RTesseract 
     | 
|
| 
       151 
113 
     | 
    
         | 
| 
       152 
114 
     | 
    
         
             
              # TODO: Clear console for MacOS or Windows
         
     | 
| 
       153 
115 
     | 
    
         
             
              def clear_console_output
         
     | 
| 
       154 
     | 
    
         
            -
                return '' if  
     | 
| 
      
 116 
     | 
    
         
            +
                return '' if configuration.debug
         
     | 
| 
       155 
117 
     | 
    
         
             
                return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
         
     | 
| 
       156 
118 
     | 
    
         
             
              end
         
     | 
| 
       157 
119 
     | 
    
         | 
| 
      
 120 
     | 
    
         
            +
              # Get image
         
     | 
| 
       158 
121 
     | 
    
         
             
              def image
         
     | 
| 
       159 
122 
     | 
    
         
             
                (@image = @processor.image_to_tif(@source, @points)).path
         
     | 
| 
       160 
123 
     | 
    
         
             
              end
         
     | 
| 
       161 
124 
     | 
    
         | 
| 
      
 125 
     | 
    
         
            +
              # Extension of file
         
     | 
| 
       162 
126 
     | 
    
         
             
              def file_ext
         
     | 
| 
       163 
127 
     | 
    
         
             
                '.txt'
         
     | 
| 
       164 
128 
     | 
    
         
             
              end
         
     | 
| 
       165 
129 
     | 
    
         | 
| 
      
 130 
     | 
    
         
            +
              # Rand file path
         
     | 
| 
       166 
131 
     | 
    
         
             
              def text_file
         
     | 
| 
       167 
132 
     | 
    
         
             
                @text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}").to_s
         
     | 
| 
       168 
133 
     | 
    
         
             
              end
         
     | 
| 
       169 
134 
     | 
    
         | 
| 
      
 135 
     | 
    
         
            +
              # Full path of file with extension
         
     | 
| 
       170 
136 
     | 
    
         
             
              def text_file_with_ext(ext = nil)
         
     | 
| 
       171 
137 
     | 
    
         
             
                [@text_file, ext || file_ext].join('')
         
     | 
| 
       172 
138 
     | 
    
         
             
              end
         
     | 
| 
       173 
139 
     | 
    
         | 
| 
      
 140 
     | 
    
         
            +
              # Run command
         
     | 
| 
       174 
141 
     | 
    
         
             
              def convert_command
         
     | 
| 
       175 
     | 
    
         
            -
                `#{ 
     | 
| 
      
 142 
     | 
    
         
            +
                `#{configuration.command} "#{image}" "#{text_file}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{configuration.options_cmd.join(' ')}`
         
     | 
| 
       176 
143 
     | 
    
         
             
              end
         
     | 
| 
       177 
144 
     | 
    
         | 
| 
      
 145 
     | 
    
         
            +
              # Read result file
         
     | 
| 
       178 
146 
     | 
    
         
             
              def convert_text
         
     | 
| 
       179 
147 
     | 
    
         
             
                @value = File.read(text_file_with_ext).to_s
         
     | 
| 
       180 
148 
     | 
    
         
             
              end
         
     | 
| 
       181 
149 
     | 
    
         | 
| 
      
 150 
     | 
    
         
            +
              # Hook to convert
         
     | 
| 
       182 
151 
     | 
    
         
             
              def after_convert_hook
         
     | 
| 
       183 
152 
     | 
    
         
             
              end
         
     | 
| 
       184 
153 
     | 
    
         | 
| 
         @@ -187,21 +156,7 @@ class RTesseract 
     | 
|
| 
       187 
156 
     | 
    
         
             
                convert_command
         
     | 
| 
       188 
157 
     | 
    
         
             
                after_convert_hook
         
     | 
| 
       189 
158 
     | 
    
         
             
                convert_text
         
     | 
| 
       190 
     | 
    
         
            -
                 
     | 
| 
       191 
     | 
    
         
            -
              rescue => error
         
     | 
| 
       192 
     | 
    
         
            -
                raise RTesseract::ConversionError.new(error), error, caller
         
     | 
| 
       193 
     | 
    
         
            -
              end
         
     | 
| 
       194 
     | 
    
         
            -
             
     | 
| 
       195 
     | 
    
         
            -
              # Read image from memory blob
         
     | 
| 
       196 
     | 
    
         
            -
              def from_blob(blob, ext = '')
         
     | 
| 
       197 
     | 
    
         
            -
                blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
         
     | 
| 
       198 
     | 
    
         
            -
                blob_file.binmode.write(blob)
         
     | 
| 
       199 
     | 
    
         
            -
                blob_file.rewind
         
     | 
| 
       200 
     | 
    
         
            -
                blob_file.flush
         
     | 
| 
       201 
     | 
    
         
            -
                self.source = blob_file.path
         
     | 
| 
       202 
     | 
    
         
            -
                convert
         
     | 
| 
       203 
     | 
    
         
            -
                remove_file([blob_file])
         
     | 
| 
       204 
     | 
    
         
            -
                self
         
     | 
| 
      
 159 
     | 
    
         
            +
                RTesseract::Utils.remove_files([@image, text_file_with_ext])
         
     | 
| 
       205 
160 
     | 
    
         
             
              rescue => error
         
     | 
| 
       206 
161 
     | 
    
         
             
                raise RTesseract::ConversionError.new(error), error, caller
         
     | 
| 
       207 
162 
     | 
    
         
             
              end
         
     | 
| 
         @@ -220,19 +175,17 @@ class RTesseract 
     | 
|
| 
       220 
175 
     | 
    
         | 
| 
       221 
176 
     | 
    
         
             
              # Remove spaces and break-lines
         
     | 
| 
       222 
177 
     | 
    
         
             
              def to_s_without_spaces
         
     | 
| 
       223 
     | 
    
         
            -
                to_s. 
     | 
| 
       224 
     | 
    
         
            -
              end
         
     | 
| 
       225 
     | 
    
         
            -
             
     | 
| 
       226 
     | 
    
         
            -
              def self.choose_processor!(processor)
         
     | 
| 
       227 
     | 
    
         
            -
                processor =
         
     | 
| 
       228 
     | 
    
         
            -
                if MiniMagickProcessor.a_name?(processor.to_s)
         
     | 
| 
       229 
     | 
    
         
            -
                  MiniMagickProcessor
         
     | 
| 
       230 
     | 
    
         
            -
                elsif NoneProcessor.a_name?(processor.to_s)
         
     | 
| 
       231 
     | 
    
         
            -
                  NoneProcessor
         
     | 
| 
       232 
     | 
    
         
            -
                else
         
     | 
| 
       233 
     | 
    
         
            -
                  RMagickProcessor
         
     | 
| 
       234 
     | 
    
         
            -
                end
         
     | 
| 
       235 
     | 
    
         
            -
                processor.setup
         
     | 
| 
       236 
     | 
    
         
            -
                processor
         
     | 
| 
      
 178 
     | 
    
         
            +
                to_s.delete(' ').delete("\n").delete("\r")
         
     | 
| 
       237 
179 
     | 
    
         
             
              end
         
     | 
| 
       238 
180 
     | 
    
         
             
            end
         
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
      
 182 
     | 
    
         
            +
            require 'rtesseract/mixed'
         
     | 
| 
      
 183 
     | 
    
         
            +
            require 'rtesseract/box'
         
     | 
| 
      
 184 
     | 
    
         
            +
            require 'rtesseract/box_char'
         
     | 
| 
      
 185 
     | 
    
         
            +
            require 'rtesseract/blob'
         
     | 
| 
      
 186 
     | 
    
         
            +
            require 'rtesseract/processor'
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
            # Processors
         
     | 
| 
      
 189 
     | 
    
         
            +
            require 'processors/rmagick.rb'
         
     | 
| 
      
 190 
     | 
    
         
            +
            require 'processors/mini_magick.rb'
         
     | 
| 
      
 191 
     | 
    
         
            +
            require 'processors/none.rb'
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Blob methods
         
     | 
| 
      
 2 
     | 
    
         
            +
            class RTesseract
         
     | 
| 
      
 3 
     | 
    
         
            +
              # Read image from memory blob
         
     | 
| 
      
 4 
     | 
    
         
            +
              def self.read(src = nil, options = {})
         
     | 
| 
      
 5 
     | 
    
         
            +
                fail RTesseract::ImageNotSelectedError if src.nil?
         
     | 
| 
      
 6 
     | 
    
         
            +
                processor = RTesseract::Processor.choose_processor!(options.option(:processor, nil))
         
     | 
| 
      
 7 
     | 
    
         
            +
                image = processor.read_with_processor(src.to_s)
         
     | 
| 
      
 8 
     | 
    
         
            +
                yield(image)
         
     | 
| 
      
 9 
     | 
    
         
            +
                object = RTesseract.new('', options).from_blob(image.to_blob)
         
     | 
| 
      
 10 
     | 
    
         
            +
                object
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              # Read image from memory blob
         
     | 
| 
      
 14 
     | 
    
         
            +
              def read
         
     | 
| 
      
 15 
     | 
    
         
            +
                image = @processor.read_with_processor(@source.to_s)
         
     | 
| 
      
 16 
     | 
    
         
            +
                new_image = yield(image)
         
     | 
| 
      
 17 
     | 
    
         
            +
                from_blob(new_image.to_blob, File.extname(@source.to_s))
         
     | 
| 
      
 18 
     | 
    
         
            +
                self
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              # Read image from memory blob
         
     | 
| 
      
 22 
     | 
    
         
            +
              def from_blob(blob, ext = '')
         
     | 
| 
      
 23 
     | 
    
         
            +
                blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
         
     | 
| 
      
 24 
     | 
    
         
            +
                blob_file.binmode.write(blob)
         
     | 
| 
      
 25 
     | 
    
         
            +
                blob_file.rewind
         
     | 
| 
      
 26 
     | 
    
         
            +
                blob_file.flush
         
     | 
| 
      
 27 
     | 
    
         
            +
                self.source = blob_file.path
         
     | 
| 
      
 28 
     | 
    
         
            +
                convert
         
     | 
| 
      
 29 
     | 
    
         
            +
                RTesseract::Utils.remove_files([blob_file])
         
     | 
| 
      
 30 
     | 
    
         
            +
                self
         
     | 
| 
      
 31 
     | 
    
         
            +
              rescue => error
         
     | 
| 
      
 32 
     | 
    
         
            +
                raise RTesseract::ConversionError.new(error), error, caller
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/rtesseract/box.rb
    CHANGED
    
    | 
         @@ -2,37 +2,45 @@ 
     | 
|
| 
       2 
2 
     | 
    
         
             
            require 'nokogiri'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require 'fileutils'
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
      
 5 
     | 
    
         
            +
            # RTesseract
         
     | 
| 
       5 
6 
     | 
    
         
             
            class RTesseract
         
     | 
| 
       6 
7 
     | 
    
         
             
              # Class to read char positions from an image
         
     | 
| 
       7 
8 
     | 
    
         
             
              class Box < RTesseract
         
     | 
| 
      
 9 
     | 
    
         
            +
                # Setting value as blank array
         
     | 
| 
       8 
10 
     | 
    
         
             
                def initialize_hook
         
     | 
| 
       9 
     | 
    
         
            -
                  @value 
     | 
| 
      
 11 
     | 
    
         
            +
                  @value = []
         
     | 
| 
       10 
12 
     | 
    
         
             
                end
         
     | 
| 
       11 
13 
     | 
    
         | 
| 
      
 14 
     | 
    
         
            +
                # Aditional options to config file
         
     | 
| 
       12 
15 
     | 
    
         
             
                def config_hook
         
     | 
| 
       13 
16 
     | 
    
         
             
                  @options['tessedit_create_hocr'] = 1 # Split Words configuration
         
     | 
| 
       14 
17 
     | 
    
         
             
                end
         
     | 
| 
       15 
18 
     | 
    
         | 
| 
      
 19 
     | 
    
         
            +
                # Words converted
         
     | 
| 
       16 
20 
     | 
    
         
             
                def words
         
     | 
| 
       17 
21 
     | 
    
         
             
                  convert if @value == []
         
     | 
| 
       18 
22 
     | 
    
         
             
                  @value
         
     | 
| 
       19 
23 
     | 
    
         
             
                end
         
     | 
| 
       20 
24 
     | 
    
         | 
| 
      
 25 
     | 
    
         
            +
                # Extension of file
         
     | 
| 
       21 
26 
     | 
    
         
             
                def file_ext
         
     | 
| 
       22 
27 
     | 
    
         
             
                  '.hocr'
         
     | 
| 
       23 
28 
     | 
    
         
             
                end
         
     | 
| 
       24 
29 
     | 
    
         | 
| 
      
 30 
     | 
    
         
            +
                # Read the result file
         
     | 
| 
       25 
31 
     | 
    
         
             
                def parse_file
         
     | 
| 
       26 
32 
     | 
    
         
             
                  html = Nokogiri::HTML(File.read(text_file_with_ext))
         
     | 
| 
       27 
33 
     | 
    
         
             
                  html.css('span.ocrx_word, span.ocr_word')
         
     | 
| 
       28 
34 
     | 
    
         
             
                end
         
     | 
| 
       29 
35 
     | 
    
         | 
| 
      
 36 
     | 
    
         
            +
                # Return words to value
         
     | 
| 
       30 
37 
     | 
    
         
             
                def convert_text
         
     | 
| 
       31 
38 
     | 
    
         
             
                  text_objects =  []
         
     | 
| 
       32 
39 
     | 
    
         
             
                  parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
         
     | 
| 
       33 
40 
     | 
    
         
             
                  @value = text_objects
         
     | 
| 
       34 
41 
     | 
    
         
             
                end
         
     | 
| 
       35 
42 
     | 
    
         | 
| 
      
 43 
     | 
    
         
            +
                # Move file html to hocr
         
     | 
| 
       36 
44 
     | 
    
         
             
                def after_convert_hook
         
     | 
| 
       37 
45 
     | 
    
         
             
                  FileUtils.mv(text_file_with_ext('.html'), text_file_with_ext) rescue nil
         
     | 
| 
       38 
46 
     | 
    
         
             
                end
         
     | 
| 
         @@ -56,6 +64,7 @@ class RTesseract 
     | 
|
| 
       56 
64 
     | 
    
         
             
                    @attributes = title.gsub(';', '').split(' ')
         
     | 
| 
       57 
65 
     | 
    
         
             
                  end
         
     | 
| 
       58 
66 
     | 
    
         | 
| 
      
 67 
     | 
    
         
            +
                  # Hash of word and position
         
     | 
| 
       59 
68 
     | 
    
         
             
                  def to_h
         
     | 
| 
       60 
69 
     | 
    
         
             
                    {
         
     | 
| 
       61 
70 
     | 
    
         
             
                      word: @word.text,
         
     | 
    
        data/lib/rtesseract/box_char.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            # RTesseract
         
     | 
| 
       2 
3 
     | 
    
         
             
            class RTesseract
         
     | 
| 
       3 
4 
     | 
    
         
             
              # Class to read char positions from an image
         
     | 
| 
       4 
5 
     | 
    
         
             
              class BoxChar < Box
         
     | 
| 
         @@ -8,10 +9,12 @@ class RTesseract 
     | 
|
| 
       8 
9 
     | 
    
         | 
| 
       9 
10 
     | 
    
         
             
                alias_method :characters, :words
         
     | 
| 
       10 
11 
     | 
    
         | 
| 
      
 12 
     | 
    
         
            +
                # Extension of file
         
     | 
| 
       11 
13 
     | 
    
         
             
                def file_ext
         
     | 
| 
       12 
14 
     | 
    
         
             
                  '.box'
         
     | 
| 
       13 
15 
     | 
    
         
             
                end
         
     | 
| 
       14 
16 
     | 
    
         | 
| 
      
 17 
     | 
    
         
            +
                # Read the result file
         
     | 
| 
       15 
18 
     | 
    
         
             
                def parse_file
         
     | 
| 
       16 
19 
     | 
    
         
             
                  File.read(text_file_with_ext).to_s
         
     | 
| 
       17 
20 
     | 
    
         
             
                end
         
     | 
| 
         @@ -1,12 +1,16 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #  
     | 
| 
      
 1 
     | 
    
         
            +
            # RTesseract
         
     | 
| 
       2 
2 
     | 
    
         
             
            class RTesseract
         
     | 
| 
       3 
3 
     | 
    
         
             
              # Aliases to languages names
         
     | 
| 
       4 
4 
     | 
    
         
             
              LANGUAGES = {
         
     | 
| 
       5 
     | 
    
         
            -
                ' 
     | 
| 
       6 
     | 
    
         
            -
                ' 
     | 
| 
       7 
     | 
    
         
            -
                ' 
     | 
| 
       8 
     | 
    
         
            -
                ' 
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
      
 5 
     | 
    
         
            +
                'en' => 'eng',
         
     | 
| 
      
 6 
     | 
    
         
            +
                'en-us' => 'eng',
         
     | 
| 
      
 7 
     | 
    
         
            +
                'english' => 'eng',
         
     | 
| 
      
 8 
     | 
    
         
            +
                'pt' => 'por',
         
     | 
| 
      
 9 
     | 
    
         
            +
                'pt-br' => 'por',
         
     | 
| 
      
 10 
     | 
    
         
            +
                'portuguese' => 'por',
         
     | 
| 
      
 11 
     | 
    
         
            +
                'it' => 'ita',
         
     | 
| 
      
 12 
     | 
    
         
            +
                'sp' => 'spa'
         
     | 
| 
      
 13 
     | 
    
         
            +
              }.freeze
         
     | 
| 
       10 
14 
     | 
    
         | 
| 
       11 
15 
     | 
    
         
             
              # Configuration class
         
     | 
| 
       12 
16 
     | 
    
         
             
              class Configuration
         
     | 
| 
         @@ -16,16 +20,19 @@ class RTesseract 
     | 
|
| 
       16 
20 
     | 
    
         
             
                  @processor = 'rmagick'
         
     | 
| 
       17 
21 
     | 
    
         
             
                end
         
     | 
| 
       18 
22 
     | 
    
         | 
| 
      
 23 
     | 
    
         
            +
                # Global configuration
         
     | 
| 
       19 
24 
     | 
    
         
             
                def parent
         
     | 
| 
       20 
25 
     | 
    
         
             
                  @parent ||= RTesseract.configuration || RTesseract::Configuration.new
         
     | 
| 
       21 
26 
     | 
    
         
             
                end
         
     | 
| 
       22 
27 
     | 
    
         | 
| 
      
 28 
     | 
    
         
            +
                # Set value of option
         
     | 
| 
       23 
29 
     | 
    
         
             
                def option(options, name, default = nil)
         
     | 
| 
       24 
30 
     | 
    
         
             
                  self.instance_variable_set("@#{name}", options.option(name, parent.send(name)) || default)
         
     | 
| 
       25 
31 
     | 
    
         
             
                end
         
     | 
| 
       26 
32 
     | 
    
         | 
| 
      
 33 
     | 
    
         
            +
                # Return the values of options
         
     | 
| 
       27 
34 
     | 
    
         
             
                def load_options(options, names = [])
         
     | 
| 
       28 
     | 
    
         
            -
                  names.each{ |name| option(options, name, nil) }
         
     | 
| 
      
 35 
     | 
    
         
            +
                  names.each { |name| option(options, name, nil) }
         
     | 
| 
       29 
36 
     | 
    
         
             
                end
         
     | 
| 
       30 
37 
     | 
    
         
             
              end
         
     | 
| 
       31 
38 
     | 
    
         | 
| 
         @@ -38,6 +45,7 @@ class RTesseract 
     | 
|
| 
       38 
45 
     | 
    
         
             
                yield(configuration)
         
     | 
| 
       39 
46 
     | 
    
         
             
              end
         
     | 
| 
       40 
47 
     | 
    
         | 
| 
      
 48 
     | 
    
         
            +
              # Default command
         
     | 
| 
       41 
49 
     | 
    
         
             
              def self.default_command
         
     | 
| 
       42 
50 
     | 
    
         
             
                TesseractBin::Executables[:tesseract] || 'tesseract'
         
     | 
| 
       43 
51 
     | 
    
         
             
              rescue
         
     | 
| 
         @@ -49,7 +57,7 @@ class RTesseract 
     | 
|
| 
       49 
57 
     | 
    
         
             
                RTesseract::Configuration.new.tap do |config|
         
     | 
| 
       50 
58 
     | 
    
         
             
                  config.command = config.option(options, :command, RTesseract.default_command)
         
     | 
| 
       51 
59 
     | 
    
         
             
                  config.processor = config.option(options, :processor, 'rmagick')
         
     | 
| 
       52 
     | 
    
         
            -
                  config.load_options(options, [ 
     | 
| 
      
 60 
     | 
    
         
            +
                  config.load_options(options, [:lang, :psm, :tessdata_dir, :user_words, :user_patterns])
         
     | 
| 
       53 
61 
     | 
    
         
             
                  config.debug = config.option(options, :debug, false)
         
     | 
| 
       54 
62 
     | 
    
         
             
                  config.options_cmd = [options.option(:options, nil)].flatten.compact
         
     | 
| 
       55 
63 
     | 
    
         
             
                end
         
     | 
    
        data/lib/rtesseract/errors.rb
    CHANGED
    
    
    
        data/lib/rtesseract/mixed.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            # RTesseract
         
     | 
| 
       2 
3 
     | 
    
         
             
            class RTesseract
         
     | 
| 
       3 
4 
     | 
    
         
             
              # Class to read an image from specified areas
         
     | 
| 
       4 
5 
     | 
    
         
             
              class Mixed
         
     | 
| 
         @@ -12,11 +13,13 @@ class RTesseract 
     | 
|
| 
       12 
13 
     | 
    
         
             
                  yield self if block_given?
         
     | 
| 
       13 
14 
     | 
    
         
             
                end
         
     | 
| 
       14 
15 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
      
 16 
     | 
    
         
            +
                # Add areas
         
     | 
| 
      
 17 
     | 
    
         
            +
                def area(points)
         
     | 
| 
       16 
18 
     | 
    
         
             
                  @value = ''
         
     | 
| 
       17 
     | 
    
         
            -
                  @areas <<  
     | 
| 
      
 19 
     | 
    
         
            +
                  @areas << points
         
     | 
| 
       18 
20 
     | 
    
         
             
                end
         
     | 
| 
       19 
21 
     | 
    
         | 
| 
      
 22 
     | 
    
         
            +
                # Clear areas
         
     | 
| 
       20 
23 
     | 
    
         
             
                def clear_areas
         
     | 
| 
       21 
24 
     | 
    
         
             
                  @areas = []
         
     | 
| 
       22 
25 
     | 
    
         
             
                end
         
     | 
| 
         @@ -25,7 +28,7 @@ class RTesseract 
     | 
|
| 
       25 
28 
     | 
    
         
             
                def convert
         
     | 
| 
       26 
29 
     | 
    
         
             
                  @value = []
         
     | 
| 
       27 
30 
     | 
    
         
             
                  @areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
         
     | 
| 
       28 
     | 
    
         
            -
                    image.crop!(area) 
     | 
| 
      
 31 
     | 
    
         
            +
                    image.crop!(area)
         
     | 
| 
       29 
32 
     | 
    
         
             
                    @value << image.to_s
         
     | 
| 
       30 
33 
     | 
    
         
             
                  end
         
     | 
| 
       31 
34 
     | 
    
         
             
                rescue => error
         
     | 
| 
         @@ -45,7 +48,7 @@ class RTesseract 
     | 
|
| 
       45 
48 
     | 
    
         | 
| 
       46 
49 
     | 
    
         
             
                # Remove spaces and break-lines
         
     | 
| 
       47 
50 
     | 
    
         
             
                def to_s_without_spaces
         
     | 
| 
       48 
     | 
    
         
            -
                  to_s. 
     | 
| 
      
 51 
     | 
    
         
            +
                  to_s.delete(' ').delete("\n").delete("\r")
         
     | 
| 
       49 
52 
     | 
    
         
             
                end
         
     | 
| 
       50 
53 
     | 
    
         
             
              end
         
     | 
| 
       51 
54 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,19 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # RTesseract
         
     | 
| 
      
 2 
     | 
    
         
            +
            class RTesseract
         
     | 
| 
      
 3 
     | 
    
         
            +
              # Processor managment
         
     | 
| 
      
 4 
     | 
    
         
            +
              module Processor
         
     | 
| 
      
 5 
     | 
    
         
            +
                # Return the processor
         
     | 
| 
      
 6 
     | 
    
         
            +
                def self.choose_processor!(processor)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  processor =
         
     | 
| 
      
 8 
     | 
    
         
            +
                  if RTesseract::Processor::MiniMagickProcessor.a_name?(processor.to_s)
         
     | 
| 
      
 9 
     | 
    
         
            +
                    MiniMagickProcessor
         
     | 
| 
      
 10 
     | 
    
         
            +
                  elsif RTesseract::Processor::NoneProcessor.a_name?(processor.to_s)
         
     | 
| 
      
 11 
     | 
    
         
            +
                    NoneProcessor
         
     | 
| 
      
 12 
     | 
    
         
            +
                  else
         
     | 
| 
      
 13 
     | 
    
         
            +
                    RMagickProcessor
         
     | 
| 
      
 14 
     | 
    
         
            +
                  end
         
     | 
| 
      
 15 
     | 
    
         
            +
                  processor.setup
         
     | 
| 
      
 16 
     | 
    
         
            +
                  processor
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # RTesseract
         
     | 
| 
      
 2 
     | 
    
         
            +
            class RTesseract
         
     | 
| 
      
 3 
     | 
    
         
            +
              # Some utils methods
         
     | 
| 
      
 4 
     | 
    
         
            +
              module Utils
         
     | 
| 
      
 5 
     | 
    
         
            +
                # Remove files or Tempfile
         
     | 
| 
      
 6 
     | 
    
         
            +
                def self.remove_files(files = [])
         
     | 
| 
      
 7 
     | 
    
         
            +
                  files.each do |file|
         
     | 
| 
      
 8 
     | 
    
         
            +
                    self.remove_file(file)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  end
         
     | 
| 
      
 10 
     | 
    
         
            +
                  true
         
     | 
| 
      
 11 
     | 
    
         
            +
                rescue => error
         
     | 
| 
      
 12 
     | 
    
         
            +
                  raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                # Remove file or Tempfile
         
     | 
| 
      
 16 
     | 
    
         
            +
                def self.remove_file(file)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  if file.is_a?(Tempfile)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    file.close
         
     | 
| 
      
 19 
     | 
    
         
            +
                    file.unlink
         
     | 
| 
      
 20 
     | 
    
         
            +
                  else
         
     | 
| 
      
 21 
     | 
    
         
            +
                    File.unlink(file)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
                  true
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
            end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            # Hash
         
     | 
| 
      
 29 
     | 
    
         
            +
            class Hash
         
     | 
| 
      
 30 
     | 
    
         
            +
              # return the value and remove from hash
         
     | 
| 
      
 31 
     | 
    
         
            +
              def option(attr_name, default)
         
     | 
| 
      
 32 
     | 
    
         
            +
                delete(attr_name.to_s) || delete(attr_name) || default
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
    
        data/rtesseract.gemspec
    CHANGED
    
    | 
         @@ -2,16 +2,16 @@ 
     | 
|
| 
       2 
2 
     | 
    
         
             
            # DO NOT EDIT THIS FILE DIRECTLY
         
     | 
| 
       3 
3 
     | 
    
         
             
            # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
         
     | 
| 
       4 
4 
     | 
    
         
             
            # -*- encoding: utf-8 -*-
         
     | 
| 
       5 
     | 
    
         
            -
            # stub: rtesseract 2.0. 
     | 
| 
      
 5 
     | 
    
         
            +
            # stub: rtesseract 2.0.1 ruby lib
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       8 
8 
     | 
    
         
             
              s.name = "rtesseract"
         
     | 
| 
       9 
     | 
    
         
            -
              s.version = "2.0. 
     | 
| 
      
 9 
     | 
    
         
            +
              s.version = "2.0.1"
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         
     | 
| 
       12 
12 
     | 
    
         
             
              s.require_paths = ["lib"]
         
     | 
| 
       13 
13 
     | 
    
         
             
              s.authors = ["Danilo Jeremias da Silva"]
         
     | 
| 
       14 
     | 
    
         
            -
              s.date = "2016- 
     | 
| 
      
 14 
     | 
    
         
            +
              s.date = "2016-05-17"
         
     | 
| 
       15 
15 
     | 
    
         
             
              s.description = "Ruby library for working with the Tesseract OCR."
         
     | 
| 
       16 
16 
     | 
    
         
             
              s.email = "dannnylo@gmail.com"
         
     | 
| 
       17 
17 
     | 
    
         
             
              s.extra_rdoc_files = [
         
     | 
| 
         @@ -22,6 +22,7 @@ Gem::Specification.new do |s| 
     | 
|
| 
       22 
22 
     | 
    
         
             
                ".document",
         
     | 
| 
       23 
23 
     | 
    
         
             
                ".rspec",
         
     | 
| 
       24 
24 
     | 
    
         
             
                ".travis.yml",
         
     | 
| 
      
 25 
     | 
    
         
            +
                "CHANGELOG.md",
         
     | 
| 
       25 
26 
     | 
    
         
             
                "Gemfile",
         
     | 
| 
       26 
27 
     | 
    
         
             
                "Gemfile.lock",
         
     | 
| 
       27 
28 
     | 
    
         
             
                "LICENSE.txt",
         
     | 
| 
         @@ -32,13 +33,16 @@ Gem::Specification.new do |s| 
     | 
|
| 
       32 
33 
     | 
    
         
             
                "lib/processors/none.rb",
         
     | 
| 
       33 
34 
     | 
    
         
             
                "lib/processors/rmagick.rb",
         
     | 
| 
       34 
35 
     | 
    
         
             
                "lib/rtesseract.rb",
         
     | 
| 
      
 36 
     | 
    
         
            +
                "lib/rtesseract/blob.rb",
         
     | 
| 
       35 
37 
     | 
    
         
             
                "lib/rtesseract/box.rb",
         
     | 
| 
       36 
38 
     | 
    
         
             
                "lib/rtesseract/box_char.rb",
         
     | 
| 
       37 
39 
     | 
    
         
             
                "lib/rtesseract/configuration.rb",
         
     | 
| 
       38 
40 
     | 
    
         
             
                "lib/rtesseract/errors.rb",
         
     | 
| 
       39 
41 
     | 
    
         
             
                "lib/rtesseract/mixed.rb",
         
     | 
| 
       40 
     | 
    
         
            -
                "lib/ 
     | 
| 
      
 42 
     | 
    
         
            +
                "lib/rtesseract/processor.rb",
         
     | 
| 
      
 43 
     | 
    
         
            +
                "lib/rtesseract/utils.rb",
         
     | 
| 
       41 
44 
     | 
    
         
             
                "rtesseract.gemspec",
         
     | 
| 
      
 45 
     | 
    
         
            +
                "spec/configs/eng.user-words.txt",
         
     | 
| 
       42 
46 
     | 
    
         
             
                "spec/images/README.pdf",
         
     | 
| 
       43 
47 
     | 
    
         
             
                "spec/images/blank.tif",
         
     | 
| 
       44 
48 
     | 
    
         
             
                "spec/images/mixed.tif",
         
     | 
| 
         @@ -6,17 +6,7 @@ describe 'Rtesseract::BoxChar' do 
     | 
|
| 
       6 
6 
     | 
    
         
             
                @path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
         
     | 
| 
       7 
7 
     | 
    
         
             
                @image_tiff = @path.join('images', 'test.tif').to_s
         
     | 
| 
       8 
8 
     | 
    
         
             
                @words_image = @path.join('images', 'test_words.png').to_s
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
              it 'bounding box by char' do
         
     | 
| 
       12 
     | 
    
         
            -
                expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
         
     | 
| 
       13 
     | 
    
         
            -
                expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
         
     | 
| 
       14 
     | 
    
         
            -
                  { char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
         
     | 
| 
       15 
     | 
    
         
            -
                  { char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
         
     | 
| 
       16 
     | 
    
         
            -
                  { char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
         
     | 
| 
       17 
     | 
    
         
            -
                  { char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                expect(RTesseract::BoxChar.new(@words_image).characters).to eql([
         
     | 
| 
      
 9 
     | 
    
         
            +
                @values = [
         
     | 
| 
       20 
10 
     | 
    
         
             
                  { char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
         
     | 
| 
       21 
11 
     | 
    
         
             
                  { char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
         
     | 
| 
       22 
12 
     | 
    
         
             
                  { char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
         
     | 
| 
         @@ -72,7 +62,18 @@ describe 'Rtesseract::BoxChar' do 
     | 
|
| 
       72 
62 
     | 
    
         
             
                  { char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
         
     | 
| 
       73 
63 
     | 
    
         
             
                  { char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
         
     | 
| 
       74 
64 
     | 
    
         
             
                  { char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
         
     | 
| 
       75 
     | 
    
         
            -
                  { char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }] 
     | 
| 
      
 65 
     | 
    
         
            +
                  { char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
         
     | 
| 
      
 66 
     | 
    
         
            +
              end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
              it 'bounding box by char' do
         
     | 
| 
      
 69 
     | 
    
         
            +
                expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
         
     | 
| 
      
 70 
     | 
    
         
            +
                expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
         
     | 
| 
      
 71 
     | 
    
         
            +
                  { char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
         
     | 
| 
      
 72 
     | 
    
         
            +
                  { char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
         
     | 
| 
      
 73 
     | 
    
         
            +
                  { char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
         
     | 
| 
      
 74 
     | 
    
         
            +
                  { char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                expect(RTesseract::BoxChar.new(@words_image).characters).to eql(@values)
         
     | 
| 
       76 
77 
     | 
    
         | 
| 
       77 
78 
     | 
    
         
             
                expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
         
     | 
| 
       78 
79 
     | 
    
         
             
                expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
         
     | 
    
        data/spec/rtesseract_spec.rb
    CHANGED
    
    | 
         @@ -81,6 +81,7 @@ describe 'Rtesseract' do 
     | 
|
| 
       81 
81 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
         
     | 
| 
       82 
82 
     | 
    
         | 
| 
       83 
83 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
         
     | 
| 
      
 84 
     | 
    
         
            +
                expect(RTesseract.new(@image_tif, lang: 'it').lang).to eql(' -l ita ')
         
     | 
| 
       84 
85 
     | 
    
         | 
| 
       85 
86 
     | 
    
         
             
                # Invalid lang object
         
     | 
| 
       86 
87 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
         
     | 
| 
         @@ -98,6 +99,7 @@ describe 'Rtesseract' do 
     | 
|
| 
       98 
99 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
         
     | 
| 
       99 
100 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
         
     | 
| 
       100 
101 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
         
     | 
| 
      
 102 
     | 
    
         
            +
                expect(RTesseract.new(@image_tif, tessedit_char_whitelist: "ABCDEF12345").to_s_without_spaces).to eql('43F')
         
     | 
| 
       101 
103 
     | 
    
         
             
              end
         
     | 
| 
       102 
104 
     | 
    
         | 
| 
       103 
105 
     | 
    
         
             
              it ' crop image' do
         
     | 
| 
         @@ -121,14 +123,14 @@ describe 'Rtesseract' do 
     | 
|
| 
       121 
123 
     | 
    
         | 
| 
       122 
124 
     | 
    
         
             
              it ' use a instance' do
         
     | 
| 
       123 
125 
     | 
    
         
             
                expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
         
     | 
| 
       124 
     | 
    
         
            -
                expect(RMagickProcessor.a_name?('teste')).to eql(false)
         
     | 
| 
       125 
     | 
    
         
            -
                expect(RMagickProcessor.a_name?('rmagick')).to eql(true)
         
     | 
| 
       126 
     | 
    
         
            -
                expect(RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
         
     | 
| 
       127 
     | 
    
         
            -
                expect(MiniMagickProcessor.a_name?('teste')).to eql(false)
         
     | 
| 
       128 
     | 
    
         
            -
                expect(MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
         
     | 
| 
       129 
     | 
    
         
            -
                expect(MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
         
     | 
| 
       130 
     | 
    
         
            -
                expect(NoneProcessor.a_name?('none')).to eql(true)
         
     | 
| 
       131 
     | 
    
         
            -
                expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
         
     | 
| 
      
 126 
     | 
    
         
            +
                expect(RTesseract::Processor::RMagickProcessor.a_name?('teste')).to eql(false)
         
     | 
| 
      
 127 
     | 
    
         
            +
                expect(RTesseract::Processor::RMagickProcessor.a_name?('rmagick')).to eql(true)
         
     | 
| 
      
 128 
     | 
    
         
            +
                expect(RTesseract::Processor::RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
         
     | 
| 
      
 129 
     | 
    
         
            +
                expect(RTesseract::Processor::MiniMagickProcessor.a_name?('teste')).to eql(false)
         
     | 
| 
      
 130 
     | 
    
         
            +
                expect(RTesseract::Processor::MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
         
     | 
| 
      
 131 
     | 
    
         
            +
                expect(RTesseract::Processor::MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
         
     | 
| 
      
 132 
     | 
    
         
            +
                expect(RTesseract::Processor::NoneProcessor.a_name?('none')).to eql(true)
         
     | 
| 
      
 133 
     | 
    
         
            +
                expect(RTesseract::Processor::NoneProcessor.a_name?('NoneProcessor')).to eql(true)
         
     | 
| 
       132 
134 
     | 
    
         
             
              end
         
     | 
| 
       133 
135 
     | 
    
         | 
| 
       134 
136 
     | 
    
         
             
              it ' change image in a block' do
         
     | 
| 
         @@ -172,10 +174,9 @@ describe 'Rtesseract' do 
     | 
|
| 
       172 
174 
     | 
    
         
             
              end
         
     | 
| 
       173 
175 
     | 
    
         | 
| 
       174 
176 
     | 
    
         
             
              it 'remove a file' do
         
     | 
| 
       175 
     | 
    
         
            -
                 
     | 
| 
       176 
     | 
    
         
            -
                rtesseract.remove_file(Tempfile.new('config'))
         
     | 
| 
      
 177 
     | 
    
         
            +
                RTesseract::Utils.remove_files(Tempfile.new('config'))
         
     | 
| 
       177 
178 
     | 
    
         | 
| 
       178 
     | 
    
         
            -
                expect {  
     | 
| 
      
 179 
     | 
    
         
            +
                expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
         
     | 
| 
       179 
180 
     | 
    
         
             
              end
         
     | 
| 
       180 
181 
     | 
    
         | 
| 
       181 
182 
     | 
    
         
             
              it ' support  default config processors' do
         
     | 
| 
         @@ -201,7 +202,6 @@ describe 'Rtesseract' do 
     | 
|
| 
       201 
202 
     | 
    
         
             
                RTesseract.configure { |config| config.psm = 7 }
         
     | 
| 
       202 
203 
     | 
    
         
             
                expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
         
     | 
| 
       203 
204 
     | 
    
         | 
| 
       204 
     | 
    
         
            -
             
     | 
| 
       205 
205 
     | 
    
         
             
                RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
         
     | 
| 
       206 
206 
     | 
    
         
             
                expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
         
     | 
| 
       207 
207 
     | 
    
         | 
| 
         @@ -220,5 +220,7 @@ describe 'Rtesseract' do 
     | 
|
| 
       220 
220 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
         
     | 
| 
       221 
221 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
         
     | 
| 
       222 
222 
     | 
    
         
             
                expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
         
     | 
| 
      
 223 
     | 
    
         
            +
             
     | 
| 
      
 224 
     | 
    
         
            +
                # expect(RTesseract.new(@path.join('images', 'test_words.png').to_s, psm: 3, user_words: @path.join('configs', 'eng.user-words.txt').to_s).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
         
     | 
| 
       223 
225 
     | 
    
         
             
              end
         
     | 
| 
       224 
226 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: rtesseract
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 2.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.0.1
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Danilo Jeremias da Silva
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2016- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2016-05-17 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: nokogiri
         
     | 
| 
         @@ -119,6 +119,7 @@ files: 
     | 
|
| 
       119 
119 
     | 
    
         
             
            - ".document"
         
     | 
| 
       120 
120 
     | 
    
         
             
            - ".rspec"
         
     | 
| 
       121 
121 
     | 
    
         
             
            - ".travis.yml"
         
     | 
| 
      
 122 
     | 
    
         
            +
            - CHANGELOG.md
         
     | 
| 
       122 
123 
     | 
    
         
             
            - Gemfile
         
     | 
| 
       123 
124 
     | 
    
         
             
            - Gemfile.lock
         
     | 
| 
       124 
125 
     | 
    
         
             
            - LICENSE.txt
         
     | 
| 
         @@ -129,13 +130,16 @@ files: 
     | 
|
| 
       129 
130 
     | 
    
         
             
            - lib/processors/none.rb
         
     | 
| 
       130 
131 
     | 
    
         
             
            - lib/processors/rmagick.rb
         
     | 
| 
       131 
132 
     | 
    
         
             
            - lib/rtesseract.rb
         
     | 
| 
      
 133 
     | 
    
         
            +
            - lib/rtesseract/blob.rb
         
     | 
| 
       132 
134 
     | 
    
         
             
            - lib/rtesseract/box.rb
         
     | 
| 
       133 
135 
     | 
    
         
             
            - lib/rtesseract/box_char.rb
         
     | 
| 
       134 
136 
     | 
    
         
             
            - lib/rtesseract/configuration.rb
         
     | 
| 
       135 
137 
     | 
    
         
             
            - lib/rtesseract/errors.rb
         
     | 
| 
       136 
138 
     | 
    
         
             
            - lib/rtesseract/mixed.rb
         
     | 
| 
       137 
     | 
    
         
            -
            - lib/ 
     | 
| 
      
 139 
     | 
    
         
            +
            - lib/rtesseract/processor.rb
         
     | 
| 
      
 140 
     | 
    
         
            +
            - lib/rtesseract/utils.rb
         
     | 
| 
       138 
141 
     | 
    
         
             
            - rtesseract.gemspec
         
     | 
| 
      
 142 
     | 
    
         
            +
            - spec/configs/eng.user-words.txt
         
     | 
| 
       139 
143 
     | 
    
         
             
            - spec/images/README.pdf
         
     | 
| 
       140 
144 
     | 
    
         
             
            - spec/images/blank.tif
         
     | 
| 
       141 
145 
     | 
    
         
             
            - spec/images/mixed.tif
         
     |