tesseract-ocr 0.0.1.4 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -19,15 +19,14 @@ Example
19
19
  require 'tesseract'
20
20
 
21
21
  e = Tesseract::Engine.new {|e|
22
- e.language = :eng
23
-
24
- e.blacklist '|'
22
+ e.language = :eng
23
+ e.blacklist = '|'
25
24
  }
26
25
 
27
26
  e.text_for('test/first.png').strip # => 'ABC'
28
27
  e.words_for('test/second.png') # => ["I'm", "12", "and", "what", "is", "this.", "INSTALL", "GENTOO", "OH", "HAI", "1234"]
29
28
 
30
- e.with { |e| e.whitelist '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
29
+ e.with { |e| e.whitelist = '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
31
30
  ```
32
31
 
33
32
  You can pass to `#text_for` either a path, an IO object or a string containing the image,
data/lib/tesseract/api.rb CHANGED
@@ -22,7 +22,7 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'iso-639'
25
+ require 'tesseract/extensions'
26
26
  require 'tesseract/c'
27
27
 
28
28
  module Tesseract
@@ -32,33 +32,35 @@ class API
32
32
  # Get a pointer to a tesseract-ocr usable image from a path, a string
33
33
  # with the data or an IO stream.
34
34
  def self.image_for (image)
35
- if image.is_a?(String) && File.exists?(File.expand_path(image))
36
- C::pix_read(File.expand_path(image))
37
- elsif image.is_a?(String)
38
- C::pix_read_mem(image, image.bytesize)
39
- elsif image.is_a?(IO)
40
- C::pix_read_stream(image.to_i)
41
- else
42
- raise ArgumentError, 'invalid image'
43
- end.tap {|image|
44
- class << image
45
- def width
46
- C::pix_get_width(self)
47
- end
48
-
49
- def height
50
- C::pix_get_height(self)
51
- end
35
+ image = suppress_stderr {
36
+ if image.is_a?(String) && (File.exists?(File.expand_path(image)) rescue nil)
37
+ C::pix_read(File.expand_path(image))
38
+ elsif image.is_a?(String)
39
+ C::pix_read_mem(image, image.bytesize)
40
+ elsif image.is_a?(IO)
41
+ C::pix_read_stream(image.to_i)
52
42
  end
53
-
54
- ObjectSpace.define_finalizer image, image_finalizer(image)
55
43
  }
44
+
45
+ raise ArgumentError, 'invalid image' if image.nil? || image.null?
46
+
47
+ image = FFI::AutoPointer.new(image, method(:image_finalizer))
48
+
49
+ class << image
50
+ def width
51
+ C::pix_get_width(self)
52
+ end
53
+
54
+ def height
55
+ C::pix_get_height(self)
56
+ end
57
+ end
58
+
59
+ image
56
60
  end
57
61
 
58
- def self.image_finalizer (image) # :nodoc:
59
- proc {
60
- C::pix_destroy(pointer)
61
- }
62
+ def self.image_finalizer (pointer) # :nodoc:
63
+ C::pix_destroy(pointer)
62
64
  end
63
65
 
64
66
  ##
@@ -77,15 +79,11 @@ class API
77
79
  }
78
80
 
79
81
  def initialize
80
- @internal = C::create
81
-
82
- ObjectSpace.define_finalizer self, self.class.finalizer(to_ffi)
82
+ @internal = FFI::AutoPointer.new(C::create, self.class.method(:finalizer))
83
83
  end
84
84
 
85
85
  def self.finalizer (pointer) # :nodoc:
86
- proc {
87
- C::destroy(pointer)
88
- }
86
+ C::destroy(pointer)
89
87
  end
90
88
 
91
89
  def version
data/lib/tesseract/c.rb CHANGED
@@ -44,7 +44,7 @@ module C
44
44
 
45
45
  cpp.function %{
46
46
  Pix* pix_read_fd (int fd) {
47
- return pixReadStream(fdopen(fd, "r"), 0);
47
+ return pixReadStream(fdopen(fd, "rb"), 0);
48
48
  }
49
49
  }
50
50
 
@@ -22,7 +22,6 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'namedic'
26
25
  require 'tesseract/api'
27
26
 
28
27
  module Tesseract
@@ -91,20 +90,20 @@ class Engine
91
90
  end
92
91
  }
93
92
 
94
- def blacklist (what = nil)
95
- if what
96
- set('tessedit_char_blacklist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
97
- else
98
- get('tessedit_char_blacklist').chars.to_a
99
- end
93
+ def blacklist
94
+ get('tessedit_char_blacklist').chars.to_a
100
95
  end
101
96
 
102
- def whitelist (what = nil)
103
- if what
104
- set('tessedit_char_whitelist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
105
- else
106
- get('tessedit_char_whitelist').chars.to_a
107
- end
97
+ def blacklist= (value)
98
+ set('tessedit_char_blacklist', value.respond_to?(:to_a) ? value.to_a.join : value.to_s)
99
+ end
100
+
101
+ def whitelist
102
+ get('tessedit_char_whitelist').chars.to_a
103
+ end
104
+
105
+ def whitelist= (value)
106
+ set('tessedit_char_whitelist', value.respond_to?(:to_a) ? value.to_a.join : value.to_s)
108
107
  end
109
108
 
110
109
  def page_segmentation_mode
@@ -0,0 +1,38 @@
1
+ #--
2
+ # Copyright 2011 meh. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification, are
5
+ # permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ # conditions and the following disclaimer.
9
+ #
10
+ # THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
11
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
12
+ # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
13
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
14
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
15
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
17
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
18
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
+ #
20
+ # The views and conclusions contained in the software and documentation are those of the
21
+ # authors and should not be interpreted as representing official policies, either expressed
22
+ # or implied, of meh.
23
+ #++
24
+
25
+ require 'namedic'
26
+ require 'iso-639'
27
+
28
+ module Kernel
29
+ def suppress_stderr
30
+ old = IO.pipe.last.reopen($stderr)
31
+
32
+ $stderr.reopen(IO.pipe.last)
33
+ result = yield
34
+ $stderr.reopen(old)
35
+
36
+ result
37
+ end
38
+ end
@@ -24,6 +24,6 @@
24
24
 
25
25
  module Tesseract
26
26
  def self.version
27
- '0.0.1.4'
27
+ '0.0.2'
28
28
  end
29
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.4
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-27 00:00:00.000000000 Z
12
+ date: 2011-11-28 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: namedic
16
- requirement: &14007960 !ruby/object:Gem::Requirement
16
+ requirement: &10345380 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *14007960
24
+ version_requirements: *10345380
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: iso-639
27
- requirement: &14007440 !ruby/object:Gem::Requirement
27
+ requirement: &10344840 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *14007440
35
+ version_requirements: *10344840
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ffi-extra
38
- requirement: &14006760 !ruby/object:Gem::Requirement
38
+ requirement: &10341680 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *14006760
46
+ version_requirements: *10341680
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: ffi-inliner
49
- requirement: &14005940 !ruby/object:Gem::Requirement
49
+ requirement: &10340760 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *14005940
57
+ version_requirements: *10340760
58
58
  description:
59
59
  email: meh@paranoici.org
60
60
  executables:
@@ -70,6 +70,7 @@ files:
70
70
  - lib/tesseract/api.rb
71
71
  - lib/tesseract/c.rb
72
72
  - lib/tesseract/engine.rb
73
+ - lib/tesseract/extensions.rb
73
74
  - lib/tesseract/version.rb
74
75
  - tesseract-ocr.gemspec
75
76
  - test/first.png