simple_tesseract 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/extconf.rb +6 -0
- data/ext/simple_tesseract_ext.cpp +41 -0
- data/lib/tesseract.rb +72 -0
- metadata +78 -0
    
        data/ext/extconf.rb
    ADDED
    
    
| @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            #include <stdio.h>
         | 
| 2 | 
            +
            #include <ruby/ruby.h>
         | 
| 3 | 
            +
            #include <tiffio.h>
         | 
| 4 | 
            +
            #include <tesseract/img.h>
         | 
| 5 | 
            +
            #include <tesseract/imgs.h>
         | 
| 6 | 
            +
            #include <tesseract/baseapi.h>
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            void read_tiff_image(TIFF* tif, IMAGE* image);
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            extern "C" {
         | 
| 11 | 
            +
              VALUE rb_cTesseract;
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              VALUE
         | 
| 14 | 
            +
              rb_cTesseract_get_text(VALUE self, VALUE language, VALUE file,
         | 
| 15 | 
            +
                  VALUE rx, VALUE ry, VALUE rwidth, VALUE rheight) {
         | 
| 16 | 
            +
                const char *lang = (const char *)(language == Qnil ? NULL : RSTRING_PTR(language));
         | 
| 17 | 
            +
                TIFF * tif = TIFFOpen(RSTRING_PTR(file), "r");
         | 
| 18 | 
            +
                int x = NUM2INT(rx),
         | 
| 19 | 
            +
                    y = NUM2INT(ry),
         | 
| 20 | 
            +
                    width = NUM2INT(rwidth),
         | 
| 21 | 
            +
                    height = NUM2INT(rheight);
         | 
| 22 | 
            +
                IMAGE img;
         | 
| 23 | 
            +
                read_tiff_image(tif, &img);
         | 
| 24 | 
            +
                int bytes_per_line = check_legal_image_size(img.get_xsize(),
         | 
| 25 | 
            +
                    img.get_ysize(), img.get_bpp());
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL);
         | 
| 28 | 
            +
                char *text = TessBaseAPI::TesseractRect(img.get_buffer(), img.get_bpp()/8,
         | 
| 29 | 
            +
                    bytes_per_line, x, y, width, height);
         | 
| 30 | 
            +
                TessBaseAPI::End();
         | 
| 31 | 
            +
                TIFFClose(tif);
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                return rb_str_new2(text);
         | 
| 34 | 
            +
              }
         | 
| 35 | 
            +
             | 
| 36 | 
            +
              void
         | 
| 37 | 
            +
              Init_simple_tesseract_ext() {
         | 
| 38 | 
            +
                rb_cTesseract = rb_define_class("Tesseract", rb_cObject);
         | 
| 39 | 
            +
                rb_define_private_method(rb_cTesseract, "get_text", (VALUE (*)(...))rb_cTesseract_get_text, 6);
         | 
| 40 | 
            +
              }
         | 
| 41 | 
            +
            }
         | 
    
        data/lib/tesseract.rb
    ADDED
    
    | @@ -0,0 +1,72 @@ | |
| 1 | 
            +
            require 'RMagick'
         | 
| 2 | 
            +
            require 'simple_tesseract_ext'
         | 
| 3 | 
            +
            require 'stringio'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class Tesseract
         | 
| 6 | 
            +
              attr_reader :src, :blob
         | 
| 7 | 
            +
              attr_accessor :lang, :editor
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              def initialize (opts={})
         | 
| 10 | 
            +
                @lang = opts.delete(:lang) || opts.delete(:language) || 'eng'
         | 
| 11 | 
            +
                self.src = opts.delete(:src) || opts.delete(:source) || opts.delete(:image)
         | 
| 12 | 
            +
                self.blob = opts.delete(:blob)
         | 
| 13 | 
            +
                @editor = opts.delete(:editor) || lambda {|x|x}
         | 
| 14 | 
            +
                self.strip = opts.delete(:strip)
         | 
| 15 | 
            +
                @tmp = Tempfile.new(['rbtesseract', '.tiff']).tap {|x| x.close }.path
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                ObjectSpace.define_finalizer(self, method(:finalize))
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              def src= (file)
         | 
| 21 | 
            +
                @blob = nil if file
         | 
| 22 | 
            +
                @src = file
         | 
| 23 | 
            +
              end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              def blob= (string)
         | 
| 26 | 
            +
                @src = nil if string
         | 
| 27 | 
            +
                @blob = string
         | 
| 28 | 
            +
              end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              def strip= (bool)
         | 
| 31 | 
            +
                @strip = !!bool
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              def strip?
         | 
| 35 | 
            +
                @strip
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              alias language lang
         | 
| 39 | 
            +
              alias language= lang=
         | 
| 40 | 
            +
              alias source src
         | 
| 41 | 
            +
              alias source= src=
         | 
| 42 | 
            +
              alias image src
         | 
| 43 | 
            +
              alias image= src=
         | 
| 44 | 
            +
             | 
| 45 | 
            +
              def solve (x=0, y=0, width=nil, height=nil)
         | 
| 46 | 
            +
                editor.call((@src ? Magick::Image.read(@src) : Magick::Image.from_blob(@blob)).first).write(@tmp)
         | 
| 47 | 
            +
                img = Magick::Image.read(@tmp).first
         | 
| 48 | 
            +
                x ||= 0
         | 
| 49 | 
            +
                y ||= 0
         | 
| 50 | 
            +
                width ||= img.columns
         | 
| 51 | 
            +
                height ||= img.rows
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                get_text(@lang, @tmp, x, y, width, height).tap {|x|
         | 
| 54 | 
            +
                  x.strip! if strip?
         | 
| 55 | 
            +
                }
         | 
| 56 | 
            +
              end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              def crops (*areas)
         | 
| 59 | 
            +
                areas.map {|area|
         | 
| 60 | 
            +
                  solve(*area)
         | 
| 61 | 
            +
                }.join
         | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
              def to_s
         | 
| 65 | 
            +
                solve
         | 
| 66 | 
            +
              end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
              def finalize
         | 
| 69 | 
            +
                File.unlink(@tmp) rescue nil
         | 
| 70 | 
            +
              end
         | 
| 71 | 
            +
              alias close finalize
         | 
| 72 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,78 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            +
            name: simple_tesseract
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              prerelease: false
         | 
| 5 | 
            +
              segments: 
         | 
| 6 | 
            +
              - 0
         | 
| 7 | 
            +
              - 0
         | 
| 8 | 
            +
              - 1
         | 
| 9 | 
            +
              version: 0.0.1
         | 
| 10 | 
            +
            platform: ruby
         | 
| 11 | 
            +
            authors: 
         | 
| 12 | 
            +
            - shura
         | 
| 13 | 
            +
            autorequire: 
         | 
| 14 | 
            +
            bindir: bin
         | 
| 15 | 
            +
            cert_chain: []
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            date: 2011-09-06 00:00:00 +02:00
         | 
| 18 | 
            +
            default_executable: 
         | 
| 19 | 
            +
            dependencies: 
         | 
| 20 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 21 | 
            +
              name: rmagick
         | 
| 22 | 
            +
              prerelease: false
         | 
| 23 | 
            +
              requirement: &id001 !ruby/object:Gem::Requirement 
         | 
| 24 | 
            +
                none: false
         | 
| 25 | 
            +
                requirements: 
         | 
| 26 | 
            +
                - - ">="
         | 
| 27 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 28 | 
            +
                    segments: 
         | 
| 29 | 
            +
                    - 0
         | 
| 30 | 
            +
                    version: "0"
         | 
| 31 | 
            +
              type: :runtime
         | 
| 32 | 
            +
              version_requirements: *id001
         | 
| 33 | 
            +
            description: tesseract ruby bindings
         | 
| 34 | 
            +
            email: shura1991@gmail.com
         | 
| 35 | 
            +
            executables: []
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            extensions: 
         | 
| 38 | 
            +
            - ext/extconf.rb
         | 
| 39 | 
            +
            extra_rdoc_files: []
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            files: 
         | 
| 42 | 
            +
            - lib/tesseract.rb
         | 
| 43 | 
            +
            - ext/simple_tesseract_ext.cpp
         | 
| 44 | 
            +
            - ext/extconf.rb
         | 
| 45 | 
            +
            has_rdoc: true
         | 
| 46 | 
            +
            homepage: http://github.com/shurizzle/simple_tesseract
         | 
| 47 | 
            +
            licenses: []
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            post_install_message: 
         | 
| 50 | 
            +
            rdoc_options: []
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            require_paths: 
         | 
| 53 | 
            +
            - lib
         | 
| 54 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 55 | 
            +
              none: false
         | 
| 56 | 
            +
              requirements: 
         | 
| 57 | 
            +
              - - ">="
         | 
| 58 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 59 | 
            +
                  segments: 
         | 
| 60 | 
            +
                  - 0
         | 
| 61 | 
            +
                  version: "0"
         | 
| 62 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 63 | 
            +
              none: false
         | 
| 64 | 
            +
              requirements: 
         | 
| 65 | 
            +
              - - ">="
         | 
| 66 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 67 | 
            +
                  segments: 
         | 
| 68 | 
            +
                  - 0
         | 
| 69 | 
            +
                  version: "0"
         | 
| 70 | 
            +
            requirements: []
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            rubyforge_project: 
         | 
| 73 | 
            +
            rubygems_version: 1.3.7
         | 
| 74 | 
            +
            signing_key: 
         | 
| 75 | 
            +
            specification_version: 3
         | 
| 76 | 
            +
            summary: tesseract ruby bindings
         | 
| 77 | 
            +
            test_files: []
         | 
| 78 | 
            +
             |