tesseract-ocr 0.0.1.4 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +3 -4
- data/lib/tesseract/api.rb +28 -30
- data/lib/tesseract/c.rb +1 -1
- data/lib/tesseract/engine.rb +12 -13
- data/lib/tesseract/extensions.rb +38 -0
- data/lib/tesseract/version.rb +1 -1
- metadata +11 -10
data/README.md
CHANGED
@@ -19,15 +19,14 @@ Example
|
|
19
19
|
require 'tesseract'
|
20
20
|
|
21
21
|
e = Tesseract::Engine.new {|e|
|
22
|
-
e.language
|
23
|
-
|
24
|
-
e.blacklist '|'
|
22
|
+
e.language = :eng
|
23
|
+
e.blacklist = '|'
|
25
24
|
}
|
26
25
|
|
27
26
|
e.text_for('test/first.png').strip # => 'ABC'
|
28
27
|
e.words_for('test/second.png') # => ["I'm", "12", "and", "what", "is", "this.", "INSTALL", "GENTOO", "OH", "HAI", "1234"]
|
29
28
|
|
30
|
-
e.with { |e| e.whitelist '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
|
29
|
+
e.with { |e| e.whitelist = '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
|
31
30
|
```
|
32
31
|
|
33
32
|
You can pass to `#text_for` either a path, an IO object or a string containing the image,
|
data/lib/tesseract/api.rb
CHANGED
@@ -22,7 +22,7 @@
|
|
22
22
|
# or implied, of meh.
|
23
23
|
#++
|
24
24
|
|
25
|
-
require '
|
25
|
+
require 'tesseract/extensions'
|
26
26
|
require 'tesseract/c'
|
27
27
|
|
28
28
|
module Tesseract
|
@@ -32,33 +32,35 @@ class API
|
|
32
32
|
# Get a pointer to a tesseract-ocr usable image from a path, a string
|
33
33
|
# with the data or an IO stream.
|
34
34
|
def self.image_for (image)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
raise ArgumentError, 'invalid image'
|
43
|
-
end.tap {|image|
|
44
|
-
class << image
|
45
|
-
def width
|
46
|
-
C::pix_get_width(self)
|
47
|
-
end
|
48
|
-
|
49
|
-
def height
|
50
|
-
C::pix_get_height(self)
|
51
|
-
end
|
35
|
+
image = suppress_stderr {
|
36
|
+
if image.is_a?(String) && (File.exists?(File.expand_path(image)) rescue nil)
|
37
|
+
C::pix_read(File.expand_path(image))
|
38
|
+
elsif image.is_a?(String)
|
39
|
+
C::pix_read_mem(image, image.bytesize)
|
40
|
+
elsif image.is_a?(IO)
|
41
|
+
C::pix_read_stream(image.to_i)
|
52
42
|
end
|
53
|
-
|
54
|
-
ObjectSpace.define_finalizer image, image_finalizer(image)
|
55
43
|
}
|
44
|
+
|
45
|
+
raise ArgumentError, 'invalid image' if image.nil? || image.null?
|
46
|
+
|
47
|
+
image = FFI::AutoPointer.new(image, method(:image_finalizer))
|
48
|
+
|
49
|
+
class << image
|
50
|
+
def width
|
51
|
+
C::pix_get_width(self)
|
52
|
+
end
|
53
|
+
|
54
|
+
def height
|
55
|
+
C::pix_get_height(self)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
image
|
56
60
|
end
|
57
61
|
|
58
|
-
def self.image_finalizer (
|
59
|
-
|
60
|
-
C::pix_destroy(pointer)
|
61
|
-
}
|
62
|
+
def self.image_finalizer (pointer) # :nodoc:
|
63
|
+
C::pix_destroy(pointer)
|
62
64
|
end
|
63
65
|
|
64
66
|
##
|
@@ -77,15 +79,11 @@ class API
|
|
77
79
|
}
|
78
80
|
|
79
81
|
def initialize
|
80
|
-
@internal = C::create
|
81
|
-
|
82
|
-
ObjectSpace.define_finalizer self, self.class.finalizer(to_ffi)
|
82
|
+
@internal = FFI::AutoPointer.new(C::create, self.class.method(:finalizer))
|
83
83
|
end
|
84
84
|
|
85
85
|
def self.finalizer (pointer) # :nodoc:
|
86
|
-
|
87
|
-
C::destroy(pointer)
|
88
|
-
}
|
86
|
+
C::destroy(pointer)
|
89
87
|
end
|
90
88
|
|
91
89
|
def version
|
data/lib/tesseract/c.rb
CHANGED
data/lib/tesseract/engine.rb
CHANGED
@@ -22,7 +22,6 @@
|
|
22
22
|
# or implied, of meh.
|
23
23
|
#++
|
24
24
|
|
25
|
-
require 'namedic'
|
26
25
|
require 'tesseract/api'
|
27
26
|
|
28
27
|
module Tesseract
|
@@ -91,20 +90,20 @@ class Engine
|
|
91
90
|
end
|
92
91
|
}
|
93
92
|
|
94
|
-
def blacklist
|
95
|
-
|
96
|
-
set('tessedit_char_blacklist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
|
97
|
-
else
|
98
|
-
get('tessedit_char_blacklist').chars.to_a
|
99
|
-
end
|
93
|
+
def blacklist
|
94
|
+
get('tessedit_char_blacklist').chars.to_a
|
100
95
|
end
|
101
96
|
|
102
|
-
def
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
97
|
+
def blacklist= (value)
|
98
|
+
set('tessedit_char_blacklist', value.respond_to?(:to_a) ? value.to_a.join : value.to_s)
|
99
|
+
end
|
100
|
+
|
101
|
+
def whitelist
|
102
|
+
get('tessedit_char_whitelist').chars.to_a
|
103
|
+
end
|
104
|
+
|
105
|
+
def whitelist= (value)
|
106
|
+
set('tessedit_char_whitelist', value.respond_to?(:to_a) ? value.to_a.join : value.to_s)
|
108
107
|
end
|
109
108
|
|
110
109
|
def page_segmentation_mode
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2011 meh. All rights reserved.
|
3
|
+
#
|
4
|
+
# Redistribution and use in source and binary forms, with or without modification, are
|
5
|
+
# permitted provided that the following conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright notice, this list of
|
8
|
+
# conditions and the following disclaimer.
|
9
|
+
#
|
10
|
+
# THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
|
11
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
12
|
+
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
|
13
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
14
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
15
|
+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
16
|
+
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
17
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
18
|
+
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
19
|
+
#
|
20
|
+
# The views and conclusions contained in the software and documentation are those of the
|
21
|
+
# authors and should not be interpreted as representing official policies, either expressed
|
22
|
+
# or implied, of meh.
|
23
|
+
#++
|
24
|
+
|
25
|
+
require 'namedic'
|
26
|
+
require 'iso-639'
|
27
|
+
|
28
|
+
module Kernel
|
29
|
+
def suppress_stderr
|
30
|
+
old = IO.pipe.last.reopen($stderr)
|
31
|
+
|
32
|
+
$stderr.reopen(IO.pipe.last)
|
33
|
+
result = yield
|
34
|
+
$stderr.reopen(old)
|
35
|
+
|
36
|
+
result
|
37
|
+
end
|
38
|
+
end
|
data/lib/tesseract/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-28 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: namedic
|
16
|
-
requirement: &
|
16
|
+
requirement: &10345380 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *10345380
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: iso-639
|
27
|
-
requirement: &
|
27
|
+
requirement: &10344840 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *10344840
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ffi-extra
|
38
|
-
requirement: &
|
38
|
+
requirement: &10341680 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *10341680
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ffi-inliner
|
49
|
-
requirement: &
|
49
|
+
requirement: &10340760 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *10340760
|
58
58
|
description:
|
59
59
|
email: meh@paranoici.org
|
60
60
|
executables:
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- lib/tesseract/api.rb
|
71
71
|
- lib/tesseract/c.rb
|
72
72
|
- lib/tesseract/engine.rb
|
73
|
+
- lib/tesseract/extensions.rb
|
73
74
|
- lib/tesseract/version.rb
|
74
75
|
- tesseract-ocr.gemspec
|
75
76
|
- test/first.png
|