simple_tesseract 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ have_library('tiff')
4
+ have_library('tesseract_full')
5
+
6
+ create_makefile('simple_tesseract_ext')
@@ -0,0 +1,41 @@
1
+ #include <stdio.h>
2
+ #include <ruby/ruby.h>
3
+ #include <tiffio.h>
4
+ #include <tesseract/img.h>
5
+ #include <tesseract/imgs.h>
6
+ #include <tesseract/baseapi.h>
7
+
8
+ void read_tiff_image(TIFF* tif, IMAGE* image);
9
+
10
+ extern "C" {
11
+ VALUE rb_cTesseract;
12
+
13
+ VALUE
14
+ rb_cTesseract_get_text(VALUE self, VALUE language, VALUE file,
15
+ VALUE rx, VALUE ry, VALUE rwidth, VALUE rheight) {
16
+ const char *lang = (const char *)(language == Qnil ? NULL : RSTRING_PTR(language));
17
+ TIFF * tif = TIFFOpen(RSTRING_PTR(file), "r");
18
+ int x = NUM2INT(rx),
19
+ y = NUM2INT(ry),
20
+ width = NUM2INT(rwidth),
21
+ height = NUM2INT(rheight);
22
+ IMAGE img;
23
+ read_tiff_image(tif, &img);
24
+ int bytes_per_line = check_legal_image_size(img.get_xsize(),
25
+ img.get_ysize(), img.get_bpp());
26
+
27
+ TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL);
28
+ char *text = TessBaseAPI::TesseractRect(img.get_buffer(), img.get_bpp()/8,
29
+ bytes_per_line, x, y, width, height);
30
+ TessBaseAPI::End();
31
+ TIFFClose(tif);
32
+
33
+ return rb_str_new2(text);
34
+ }
35
+
36
+ void
37
+ Init_simple_tesseract_ext() {
38
+ rb_cTesseract = rb_define_class("Tesseract", rb_cObject);
39
+ rb_define_private_method(rb_cTesseract, "get_text", (VALUE (*)(...))rb_cTesseract_get_text, 6);
40
+ }
41
+ }
@@ -0,0 +1,72 @@
1
+ require 'RMagick'
2
+ require 'simple_tesseract_ext'
3
+ require 'stringio'
4
+
5
+ class Tesseract
6
+ attr_reader :src, :blob
7
+ attr_accessor :lang, :editor
8
+
9
+ def initialize (opts={})
10
+ @lang = opts.delete(:lang) || opts.delete(:language) || 'eng'
11
+ self.src = opts.delete(:src) || opts.delete(:source) || opts.delete(:image)
12
+ self.blob = opts.delete(:blob)
13
+ @editor = opts.delete(:editor) || lambda {|x|x}
14
+ self.strip = opts.delete(:strip)
15
+ @tmp = Tempfile.new(['rbtesseract', '.tiff']).tap {|x| x.close }.path
16
+
17
+ ObjectSpace.define_finalizer(self, method(:finalize))
18
+ end
19
+
20
+ def src= (file)
21
+ @blob = nil if file
22
+ @src = file
23
+ end
24
+
25
+ def blob= (string)
26
+ @src = nil if string
27
+ @blob = string
28
+ end
29
+
30
+ def strip= (bool)
31
+ @strip = !!bool
32
+ end
33
+
34
+ def strip?
35
+ @strip
36
+ end
37
+
38
+ alias language lang
39
+ alias language= lang=
40
+ alias source src
41
+ alias source= src=
42
+ alias image src
43
+ alias image= src=
44
+
45
+ def solve (x=0, y=0, width=nil, height=nil)
46
+ editor.call((@src ? Magick::Image.read(@src) : Magick::Image.from_blob(@blob)).first).write(@tmp)
47
+ img = Magick::Image.read(@tmp).first
48
+ x ||= 0
49
+ y ||= 0
50
+ width ||= img.columns
51
+ height ||= img.rows
52
+
53
+ get_text(@lang, @tmp, x, y, width, height).tap {|x|
54
+ x.strip! if strip?
55
+ }
56
+ end
57
+
58
+ def crops (*areas)
59
+ areas.map {|area|
60
+ solve(*area)
61
+ }.join
62
+ end
63
+
64
+ def to_s
65
+ solve
66
+ end
67
+
68
+ def finalize
69
+ File.unlink(@tmp) rescue nil
70
+ end
71
+ alias close finalize
72
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_tesseract
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - shura
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-09-06 00:00:00 +02:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rmagick
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :runtime
32
+ version_requirements: *id001
33
+ description: tesseract ruby bindings
34
+ email: shura1991@gmail.com
35
+ executables: []
36
+
37
+ extensions:
38
+ - ext/extconf.rb
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - lib/tesseract.rb
43
+ - ext/simple_tesseract_ext.cpp
44
+ - ext/extconf.rb
45
+ has_rdoc: true
46
+ homepage: http://github.com/shurizzle/simple_tesseract
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.7
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: tesseract ruby bindings
77
+ test_files: []
78
+