simple_tesseract 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ have_library('tiff')
4
+ have_library('tesseract_full')
5
+
6
+ create_makefile('simple_tesseract_ext')
@@ -0,0 +1,41 @@
1
+ #include <stdio.h>
2
+ #include <ruby/ruby.h>
3
+ #include <tiffio.h>
4
+ #include <tesseract/img.h>
5
+ #include <tesseract/imgs.h>
6
+ #include <tesseract/baseapi.h>
7
+
8
+ void read_tiff_image(TIFF* tif, IMAGE* image);
9
+
10
+ extern "C" {
11
+ VALUE rb_cTesseract;
12
+
13
+ VALUE
14
+ rb_cTesseract_get_text(VALUE self, VALUE language, VALUE file,
15
+ VALUE rx, VALUE ry, VALUE rwidth, VALUE rheight) {
16
+ const char *lang = (const char *)(language == Qnil ? NULL : RSTRING_PTR(language));
17
+ TIFF * tif = TIFFOpen(RSTRING_PTR(file), "r");
18
+ int x = NUM2INT(rx),
19
+ y = NUM2INT(ry),
20
+ width = NUM2INT(rwidth),
21
+ height = NUM2INT(rheight);
22
+ IMAGE img;
23
+ read_tiff_image(tif, &img);
24
+ int bytes_per_line = check_legal_image_size(img.get_xsize(),
25
+ img.get_ysize(), img.get_bpp());
26
+
27
+ TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL);
28
+ char *text = TessBaseAPI::TesseractRect(img.get_buffer(), img.get_bpp()/8,
29
+ bytes_per_line, x, y, width, height);
30
+ TessBaseAPI::End();
31
+ TIFFClose(tif);
32
+
33
+ return rb_str_new2(text);
34
+ }
35
+
36
+ void
37
+ Init_simple_tesseract_ext() {
38
+ rb_cTesseract = rb_define_class("Tesseract", rb_cObject);
39
+ rb_define_private_method(rb_cTesseract, "get_text", (VALUE (*)(...))rb_cTesseract_get_text, 6);
40
+ }
41
+ }
@@ -0,0 +1,72 @@
1
+ require 'RMagick'
2
+ require 'simple_tesseract_ext'
3
+ require 'stringio'
4
+
5
+ class Tesseract
6
+ attr_reader :src, :blob
7
+ attr_accessor :lang, :editor
8
+
9
+ def initialize (opts={})
10
+ @lang = opts.delete(:lang) || opts.delete(:language) || 'eng'
11
+ self.src = opts.delete(:src) || opts.delete(:source) || opts.delete(:image)
12
+ self.blob = opts.delete(:blob)
13
+ @editor = opts.delete(:editor) || lambda {|x|x}
14
+ self.strip = opts.delete(:strip)
15
+ @tmp = Tempfile.new(['rbtesseract', '.tiff']).tap {|x| x.close }.path
16
+
17
+ ObjectSpace.define_finalizer(self, method(:finalize))
18
+ end
19
+
20
+ def src= (file)
21
+ @blob = nil if file
22
+ @src = file
23
+ end
24
+
25
+ def blob= (string)
26
+ @src = nil if string
27
+ @blob = string
28
+ end
29
+
30
+ def strip= (bool)
31
+ @strip = !!bool
32
+ end
33
+
34
+ def strip?
35
+ @strip
36
+ end
37
+
38
+ alias language lang
39
+ alias language= lang=
40
+ alias source src
41
+ alias source= src=
42
+ alias image src
43
+ alias image= src=
44
+
45
+ def solve (x=0, y=0, width=nil, height=nil)
46
+ editor.call((@src ? Magick::Image.read(@src) : Magick::Image.from_blob(@blob)).first).write(@tmp)
47
+ img = Magick::Image.read(@tmp).first
48
+ x ||= 0
49
+ y ||= 0
50
+ width ||= img.columns
51
+ height ||= img.rows
52
+
53
+ get_text(@lang, @tmp, x, y, width, height).tap {|x|
54
+ x.strip! if strip?
55
+ }
56
+ end
57
+
58
+ def crops (*areas)
59
+ areas.map {|area|
60
+ solve(*area)
61
+ }.join
62
+ end
63
+
64
+ def to_s
65
+ solve
66
+ end
67
+
68
+ def finalize
69
+ File.unlink(@tmp) rescue nil
70
+ end
71
+ alias close finalize
72
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_tesseract
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - shura
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-09-06 00:00:00 +02:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rmagick
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :runtime
32
+ version_requirements: *id001
33
+ description: tesseract ruby bindings
34
+ email: shura1991@gmail.com
35
+ executables: []
36
+
37
+ extensions:
38
+ - ext/extconf.rb
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - lib/tesseract.rb
43
+ - ext/simple_tesseract_ext.cpp
44
+ - ext/extconf.rb
45
+ has_rdoc: true
46
+ homepage: http://github.com/shurizzle/simple_tesseract
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.7
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: tesseract ruby bindings
77
+ test_files: []
78
+