tesseract-ocr 0.0.1.4 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -19,15 +19,14 @@ Example
19
19
  require 'tesseract'
20
20
 
21
21
  e = Tesseract::Engine.new {|e|
22
- e.language = :eng
23
-
24
- e.blacklist '|'
22
+ e.language = :eng
23
+ e.blacklist = '|'
25
24
  }
26
25
 
27
26
  e.text_for('test/first.png').strip # => 'ABC'
28
27
  e.words_for('test/second.png') # => ["I'm", "12", "and", "what", "is", "this.", "INSTALL", "GENTOO", "OH", "HAI", "1234"]
29
28
 
30
- e.with { |e| e.whitelist '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
29
+ e.with { |e| e.whitelist = '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
31
30
  ```
32
31
 
33
32
  You can pass to `#text_for` either a path, an IO object or a string containing the image,
data/lib/tesseract/api.rb CHANGED
@@ -22,7 +22,7 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'iso-639'
25
+ require 'tesseract/extensions'
26
26
  require 'tesseract/c'
27
27
 
28
28
  module Tesseract
@@ -32,33 +32,35 @@ class API
32
32
  # Get a pointer to a tesseract-ocr usable image from a path, a string
33
33
  # with the data or an IO stream.
34
34
  def self.image_for (image)
35
- if image.is_a?(String) && File.exists?(File.expand_path(image))
36
- C::pix_read(File.expand_path(image))
37
- elsif image.is_a?(String)
38
- C::pix_read_mem(image, image.bytesize)
39
- elsif image.is_a?(IO)
40
- C::pix_read_stream(image.to_i)
41
- else
42
- raise ArgumentError, 'invalid image'
43
- end.tap {|image|
44
- class << image
45
- def width
46
- C::pix_get_width(self)
47
- end
48
-
49
- def height
50
- C::pix_get_height(self)
51
- end
35
+ image = suppress_stderr {
36
+ if image.is_a?(String) && (File.exists?(File.expand_path(image)) rescue nil)
37
+ C::pix_read(File.expand_path(image))
38
+ elsif image.is_a?(String)
39
+ C::pix_read_mem(image, image.bytesize)
40
+ elsif image.is_a?(IO)
41
+ C::pix_read_stream(image.to_i)
52
42
  end
53
-
54
- ObjectSpace.define_finalizer image, image_finalizer(image)
55
43
  }
44
+
45
+ raise ArgumentError, 'invalid image' if image.nil? || image.null?
46
+
47
+ image = FFI::AutoPointer.new(image, method(:image_finalizer))
48
+
49
+ class << image
50
+ def width
51
+ C::pix_get_width(self)
52
+ end
53
+
54
+ def height
55
+ C::pix_get_height(self)
56
+ end
57
+ end
58
+
59
+ image
56
60
  end
57
61
 
58
- def self.image_finalizer (image) # :nodoc:
59
- proc {
60
- C::pix_destroy(pointer)
61
- }
62
+ def self.image_finalizer (pointer) # :nodoc:
63
+ C::pix_destroy(pointer)
62
64
  end
63
65
 
64
66
  ##
@@ -77,15 +79,11 @@ class API
77
79
  }
78
80
 
79
81
  def initialize
80
- @internal = C::create
81
-
82
- ObjectSpace.define_finalizer self, self.class.finalizer(to_ffi)
82
+ @internal = FFI::AutoPointer.new(C::create, self.class.method(:finalizer))
83
83
  end
84
84
 
85
85
  def self.finalizer (pointer) # :nodoc:
86
- proc {
87
- C::destroy(pointer)
88
- }
86
+ C::destroy(pointer)
89
87
  end
90
88
 
91
89
  def version
data/lib/tesseract/c.rb CHANGED
@@ -44,7 +44,7 @@ module C
44
44
 
45
45
  cpp.function %{
46
46
  Pix* pix_read_fd (int fd) {
47
- return pixReadStream(fdopen(fd, "r"), 0);
47
+ return pixReadStream(fdopen(fd, "rb"), 0);
48
48
  }
49
49
  }
50
50
 
@@ -22,7 +22,6 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'namedic'
26
25
  require 'tesseract/api'
27
26
 
28
27
  module Tesseract
@@ -91,20 +90,20 @@ class Engine
91
90
  end
92
91
  }
93
92
 
94
- def blacklist (what = nil)
95
- if what
96
- set('tessedit_char_blacklist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
97
- else
98
- get('tessedit_char_blacklist').chars.to_a
99
- end
93
+ def blacklist
94
+ get('tessedit_char_blacklist').chars.to_a
100
95
  end
101
96
 
102
- def whitelist (what = nil)
103
- if what
104
- set('tessedit_char_whitelist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
105
- else
106
- get('tessedit_char_whitelist').chars.to_a
107
- end
97
+ def blacklist= (value)
98
+ set('tessedit_char_blacklist', value.respond_to?(:to_a) ? value.to_a.join : value.to_s)
99
+ end
100
+
101
+ def whitelist
102
+ get('tessedit_char_whitelist').chars.to_a
103
+ end
104
+
105
+ def whitelist= (value)
106
+ set('tessedit_char_whitelist', value.respond_to?(:to_a) ? value.to_a.join : value.to_s)
108
107
  end
109
108
 
110
109
  def page_segmentation_mode
@@ -0,0 +1,38 @@
1
+ #--
2
+ # Copyright 2011 meh. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification, are
5
+ # permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ # conditions and the following disclaimer.
9
+ #
10
+ # THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
11
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
12
+ # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
13
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
14
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
15
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
17
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
18
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
+ #
20
+ # The views and conclusions contained in the software and documentation are those of the
21
+ # authors and should not be interpreted as representing official policies, either expressed
22
+ # or implied, of meh.
23
+ #++
24
+
25
+ require 'namedic'
26
+ require 'iso-639'
27
+
28
+ module Kernel
29
+ def suppress_stderr
30
+ old = IO.pipe.last.reopen($stderr)
31
+
32
+ $stderr.reopen(IO.pipe.last)
33
+ result = yield
34
+ $stderr.reopen(old)
35
+
36
+ result
37
+ end
38
+ end
@@ -24,6 +24,6 @@
24
24
 
25
25
  module Tesseract
26
26
  def self.version
27
- '0.0.1.4'
27
+ '0.0.2'
28
28
  end
29
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.4
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-27 00:00:00.000000000 Z
12
+ date: 2011-11-28 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: namedic
16
- requirement: &14007960 !ruby/object:Gem::Requirement
16
+ requirement: &10345380 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *14007960
24
+ version_requirements: *10345380
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: iso-639
27
- requirement: &14007440 !ruby/object:Gem::Requirement
27
+ requirement: &10344840 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *14007440
35
+ version_requirements: *10344840
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ffi-extra
38
- requirement: &14006760 !ruby/object:Gem::Requirement
38
+ requirement: &10341680 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *14006760
46
+ version_requirements: *10341680
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: ffi-inliner
49
- requirement: &14005940 !ruby/object:Gem::Requirement
49
+ requirement: &10340760 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *14005940
57
+ version_requirements: *10340760
58
58
  description:
59
59
  email: meh@paranoici.org
60
60
  executables:
@@ -70,6 +70,7 @@ files:
70
70
  - lib/tesseract/api.rb
71
71
  - lib/tesseract/c.rb
72
72
  - lib/tesseract/engine.rb
73
+ - lib/tesseract/extensions.rb
73
74
  - lib/tesseract/version.rb
74
75
  - tesseract-ocr.gemspec
75
76
  - test/first.png