tesseract-ocr 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -19,19 +19,30 @@ Example
19
19
  require 'tesseract'
20
20
 
21
21
  e = Tesseract::Engine.new {|e|
22
- e.language = :eng
23
- e.blacklist = '|'
22
+ e.language = :eng
23
+ e.blacklist = '|'
24
24
  }
25
25
 
26
26
  e.text_for('test/first.png').strip # => 'ABC'
27
- e.words_for('test/second.png') # => ["I'm", "12", "and", "what", "is", "this.", "INSTALL", "GENTOO", "OH", "HAI", "1234"]
28
27
 
29
- e.with { |e| e.whitelist = '1234567890' }.text_for('test/second.png') # => "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234\n\n"
28
+ e.words_for('test/second.png') # [
29
+ # [ 0] #<Tesseract(93.41653442382812): "|'m">,
30
+ # [ 1] #<Tesseract(91.11811828613281): "12">,
31
+ # [ 2] #<Tesseract(85.71760559082031): "and">,
32
+ # [ 3] #<Tesseract(83.4853515625): "what">,
33
+ # [ 4] #<Tesseract(86.71072387695312): "is">,
34
+ # [ 5] #<Tesseract(83.2227783203125): "this.">,
35
+ # [ 6] #<Tesseract(82.81439208984375): "INSTALL">,
36
+ # [ 7] #<Tesseract(86.46566772460938): "GENTOO">,
37
+ # [ 8] #<Tesseract(93.19613647460938): "OH">,
38
+ # [ 9] #<Tesseract(82.81439208984375): "HAI">,
39
+ # [10] #<Tesseract(85.9158935546875): "1234">
40
+ # ]
30
41
  ```
31
42
 
32
- You can pass to `#text_for` either a path, an IO object or a string containing the image,
33
- there are few supported formats so try to stay in the BMP/JPEG/PNG boundaries. This means
34
- that you can also work on an image with RMagick or similar and then pass the raw data.
43
+ You can pass to `#text_for` either a path, an IO object, a string containing the image or
44
+ an object that responds to `#to_blob` (for example Magick::Image), keep in mind that
45
+ the format has to be supported by leptonica.
35
46
 
36
47
  Using the binary
37
48
  ----------------
data/Rakefile CHANGED
@@ -5,6 +5,10 @@ task :default => :test
5
5
 
6
6
  task :test do
7
7
  Dir.chdir 'test'
8
-
9
8
  sh 'rspec tesseract_spec.rb --color --format doc'
10
9
  end
10
+
11
+ task :bench do
12
+ Dir.chdir 'test'
13
+ sh 'ruby tesseract_bench.rb'
14
+ end
@@ -0,0 +1,56 @@
1
+ #! /usr/bin/env ruby
2
+ require 'tesseract'
3
+ require 'RMagick'
4
+
5
+ def near (x, y)
6
+ [
7
+ [x - 1, y - 1],
8
+ [x, y - 1],
9
+ [x + 1, y - 1],
10
+ [x - 1, y ],
11
+ # FIRE IN THE HOLE
12
+ [x + 1, y ],
13
+ [x - 1, y + 1],
14
+ [x, y + 1],
15
+ [x + 1, y + 1]
16
+ ]
17
+ end
18
+
19
+ Tesseract::Engine.new.tap {|engine|
20
+ ARGV.each {|path|
21
+ image = Magick::Image.read(path).first
22
+ pixels = Hash.new { |h, k| h[k] = 0 }
23
+
24
+ image.each_pixel {|p|
25
+ pixels[p] += 1
26
+ }
27
+
28
+ pixels.delete(Magick::Pixel.from_color('black'))
29
+
30
+ text_color, count = pixels.max { |a, b| a.last <=> b.last }
31
+
32
+ image.each_pixel {|p, x, y|
33
+ next unless p == text_color or p.to_color == 'black'
34
+
35
+ image.pixel_color x, y, p == text_color ? 'black' : 'white'
36
+ }
37
+
38
+ image.each_pixel {|p, x, y|
39
+ next if p.to_color == 'black' || p.to_color == 'white'
40
+
41
+ if near(x, y).map { |(x, y)| image.pixel_color x, y }.any? { |p| p.to_color == 'black' }
42
+ image.pixel_color x, y, 'gray'
43
+ else
44
+ image.pixel_color x, y, 'white'
45
+ end
46
+ }
47
+
48
+ image.each_pixel {|p, x, y|
49
+ next unless p.to_color == 'gray'
50
+
51
+ image.pixel_color x, y, 'black'
52
+ }
53
+
54
+ puts engine.text_for(image.resize 10).strip
55
+ }
56
+ }
data/lib/tesseract/api.rb CHANGED
@@ -25,6 +25,9 @@
25
25
  require 'tesseract/extensions'
26
26
  require 'tesseract/c'
27
27
 
28
+ require 'tesseract/api/image'
29
+ require 'tesseract/api/iterator'
30
+
28
31
  module Tesseract
29
32
 
30
33
  class API
@@ -32,35 +35,7 @@ class API
32
35
  # Get a pointer to a tesseract-ocr usable image from a path, a string
33
36
  # with the data or an IO stream.
34
37
  def self.image_for (image)
35
- image = suppress_stderr {
36
- if image.is_a?(String) && (File.exists?(File.expand_path(image)) rescue nil)
37
- C::pix_read(File.expand_path(image))
38
- elsif image.is_a?(String)
39
- C::pix_read_mem(image, image.bytesize)
40
- elsif image.is_a?(IO)
41
- C::pix_read_stream(image.to_i)
42
- end
43
- }
44
-
45
- raise ArgumentError, 'invalid image' if image.nil? || image.null?
46
-
47
- image = FFI::AutoPointer.new(image, method(:image_finalizer))
48
-
49
- class << image
50
- def width
51
- C::pix_get_width(self)
52
- end
53
-
54
- def height
55
- C::pix_get_height(self)
56
- end
57
- end
58
-
59
- image
60
- end
61
-
62
- def self.image_finalizer (pointer) # :nodoc:
63
- C::pix_destroy(pointer)
38
+ Image.new(image)
64
39
  end
65
40
 
66
41
  ##
@@ -79,35 +54,35 @@ class API
79
54
  }
80
55
 
81
56
  def initialize
82
- @internal = FFI::AutoPointer.new(C::create, self.class.method(:finalizer))
57
+ @internal = FFI::AutoPointer.new(C::BaseAPI.create, self.class.method(:finalize))
83
58
  end
84
59
 
85
- def self.finalizer (pointer) # :nodoc:
86
- C::destroy(pointer)
60
+ def self.finalize (pointer) # :nodoc:
61
+ C::BaseAPI.destroy(pointer)
87
62
  end
88
63
 
89
64
  def version
90
- C::version(to_ffi)
65
+ C::BaseAPI.version(to_ffi)
91
66
  end
92
67
 
93
68
  def input_name= (name)
94
- C::set_input_name(to_ffi, name)
69
+ C::BaseAPI.set_input_name(to_ffi, name)
95
70
  end
96
71
 
97
72
  def output_name= (name)
98
- C::set_output_name(to_ffi, name)
73
+ C::BaseAPI.set_output_name(to_ffi, name)
99
74
  end
100
75
 
101
76
  def set_variable (name, value)
102
- C::set_variable(to_ffi, name, value)
77
+ C::BaseAPI.set_variable(to_ffi, name, value)
103
78
  end
104
79
 
105
80
  def get_variable (name, type = nil)
106
81
  if type.nil?
107
- type = Types.keys.find { |type| C.__send__ "has_#{type}_variable", to_ffi, name }
82
+ type = Types.keys.find { |type| C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name }
108
83
 
109
84
  if type
110
- C.__send__ "get_#{type}_variable", to_ffi, name
85
+ C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name
111
86
  end
112
87
  else
113
88
  unless Types.has_key?(type)
@@ -118,87 +93,83 @@ class API
118
93
  type = name
119
94
  end
120
95
 
121
- if C.__send__ "has_#{type}_variable", to_ffi, name
122
- C.__send__ "get_#{type}_variable", to_ffi, name
96
+ if C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name
97
+ C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name
123
98
  end
124
99
  end
125
100
  end
126
101
 
127
102
  def init (datapath = '.', language = 'eng', mode = :DEFAULT)
128
- unless C::init(to_ffi, datapath, language.to_s, mode).zero?
103
+ unless C::BaseAPI.init(to_ffi, datapath, language.to_s, mode).zero?
129
104
  raise 'the API did not Init correctly'
130
105
  end
131
106
  end
132
107
 
133
108
  def read_config_file (path, init_only = false)
134
- C::read_config_file(to_ffi, path, init_only)
109
+ C::BaseAPI.read_config_file(to_ffi, path, init_only)
135
110
  end
136
111
 
137
- def page_seg_mode
138
- C::get_page_seg_mode(to_ffi)
112
+ def get_page_seg_mode
113
+ C::BaseAPI.get_page_seg_mode(to_ffi)
139
114
  end
140
115
 
141
- def page_seg_mode= (value)
142
- C::set_page_seg_mode(to_ffi, value)
116
+ def set_page_seg_mode (value)
117
+ C::BaseAPI.set_page_seg_mode(to_ffi, value)
143
118
  end
144
119
 
145
120
  def set_image (pix)
146
- C::set_image(to_ffi, pix)
121
+ C::BaseAPI.set_image(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix)
147
122
  end
148
123
 
149
124
  def set_rectangle (left, top, width, height)
150
- C::set_rectangle(to_ffi, left, top, width, height)
125
+ C::BaseAPI.set_rectangle(to_ffi, left, top, width, height)
151
126
  end
152
127
 
153
- def get_text
154
- pointer = C::get_utf8_text(to_ffi)
155
- result = pointer.read_string
156
- result.force_encoding 'UTF-8'
157
- C::free_string(pointer)
158
-
159
- result
128
+ def get_iterator
129
+ Iterator.new(C::BaseAPI.get_iterator(to_ffi))
160
130
  end
161
131
 
162
- def get_hocr (page = 0)
163
- pointer = C::get_hocr_text(to_ffi, page)
132
+ def get_text
133
+ pointer = C::BaseAPI.get_utf8_text(to_ffi)
164
134
  result = pointer.read_string
165
135
  result.force_encoding 'UTF-8'
136
+ C.free_string(pointer)
166
137
 
167
138
  result
168
139
  end
169
140
 
170
141
  def get_box (page = 0)
171
- pointer = C::get_box_text(to_ffi, page)
142
+ pointer = C::BaseAPI.get_box_text(to_ffi, page)
172
143
  result = pointer.read_string
173
144
  result.force_encoding 'UTF-8'
174
- C::free_string(pointer)
145
+ C.free_string(pointer)
175
146
 
176
147
  result
177
148
  end
178
149
 
179
150
  def get_unlv
180
- pointer = C::get_unlv_text(to_ffi)
151
+ pointer = C::BaseAPI.get_unlv_text(to_ffi)
181
152
  result = pointer.read_string
182
153
  result.force_encoding 'ISO8859-1'
183
- C::free_string(pointer)
154
+ C.free_string(pointer)
184
155
 
185
156
  result
186
157
  end
187
158
 
188
159
  def mean_text_confidence
189
- C::mean_text_conf(to_ffi)
160
+ C::BaseAPI.mean_text_conf(to_ffi)
190
161
  end
191
162
 
192
163
  def all_word_confidences
193
- C::all_word_confidences(to_ffi)
164
+ C::BaseAPI.all_word_confidences(to_ffi)
194
165
  end
195
166
 
196
167
  def clear
197
- C::clear(to_ffi)
168
+ C::BaseAPI.clear(to_ffi)
198
169
  end
199
170
 
200
171
  def end
201
- C::end(to_ffi)
172
+ C::BaseAPI.end(to_ffi)
202
173
  end
203
174
 
204
175
  def to_ffi
@@ -0,0 +1,84 @@
1
+ #--
2
+ # Copyright 2011 meh. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification, are
5
+ # permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ # conditions and the following disclaimer.
9
+ #
10
+ # THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
11
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
12
+ # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
13
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
14
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
15
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
17
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
18
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
+ #
20
+ # The views and conclusions contained in the software and documentation are those of the
21
+ # authors and should not be interpreted as representing official policies, either expressed
22
+ # or implied, of meh.
23
+ #++
24
+
25
+ module Tesseract; class API
26
+
27
+ class Image
28
+ def self.new (image)
29
+ image = if image.is_a?(String) && (File.exists?(File.expand_path(image)) rescue nil)
30
+ C::Leptonica.pix_read(File.expand_path(image))
31
+ elsif image.is_a?(String)
32
+ C::Leptonica.pix_read_mem(image, image.bytesize)
33
+ elsif image.is_a?(IO)
34
+ C::Leptonica.pix_read_stream(image.to_i)
35
+ elsif image.respond_to? :to_blob
36
+ image = image.to_blob
37
+
38
+ C::Leptonica.pix_read_mem(image, image.bytesize)
39
+ end
40
+
41
+ raise ArgumentError, 'invalid image' if image.nil? || image.null?
42
+
43
+ super(image)
44
+ end
45
+
46
+ attr_accessor :x, :y
47
+
48
+ def initialize (pointer, x = 0, y = 0)
49
+ @internal = FFI::AutoPointer.new(pointer, self.class.method(:finalize))
50
+ @x = x
51
+ @y = y
52
+ end
53
+
54
+ def self.finalize (pointer)
55
+ C::Leptonica.pix_destroy(pointer)
56
+ end
57
+
58
+ def width
59
+ C::Leptonica.pix_get_width(to_ffi)
60
+ end
61
+
62
+ def height
63
+ C::Leptonica.pix_get_height(to_ffi)
64
+ end
65
+
66
+ def to_blob (format = :default)
67
+ data = FFI::MemoryPointer.new(:pointer)
68
+ size = FFI::MemoryPointer.new(:size_t)
69
+
70
+ C::Leptonica.pix_write_mem(to_ffi, data, size, C.for_enum(format))
71
+
72
+ result = data.typecast(:pointer).read_string(size.typecast(:size_t))
73
+
74
+ data.typecast(:pointer).free
75
+
76
+ result
77
+ end
78
+
79
+ def to_ffi
80
+ @internal
81
+ end
82
+ end
83
+
84
+ end; end
@@ -0,0 +1,122 @@
1
+ #--
2
+ # Copyright 2011 meh. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification, are
5
+ # permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ # conditions and the following disclaimer.
9
+ #
10
+ # THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
11
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
12
+ # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
13
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
14
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
15
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
17
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
18
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
+ #
20
+ # The views and conclusions contained in the software and documentation are those of the
21
+ # authors and should not be interpreted as representing official policies, either expressed
22
+ # or implied, of meh.
23
+ #++
24
+
25
+ module Tesseract; class API
26
+
27
+ class Iterator
28
+ def initialize (pointer)
29
+ raise ArgumentError, 'the pointer is null' if pointer.nil? || pointer.null?
30
+
31
+ @internal = FFI::AutoPointer.new(pointer, self.class.method(:finalize))
32
+ end
33
+
34
+ def self.finalize (pointer) # :nodoc:
35
+ C::Iterator.destroy(pointer)
36
+ end
37
+
38
+ def begin
39
+ C::Iterator.begin(to_ffi)
40
+ end
41
+
42
+ def beginning? (level = :word)
43
+ C::Iterator.is_at_beginning_of(to_ffi, C.for_enum(level))
44
+ end
45
+
46
+ def end? (level, element)
47
+ C::Iterator.is_at_final_element(to_ffi, C.for_enum(level), C.for_enum(element))
48
+ end
49
+
50
+ def next (level = :word)
51
+ C::Iterator.next(to_ffi, C.for_enum(level))
52
+ end
53
+
54
+ def bounding_box (level = :word)
55
+ C::Iterator.bounding_box(to_ffi, C.for_enum(level))
56
+ end
57
+
58
+ def get_binary_image (level = :word)
59
+ Image.new(C::Iterator.get_binary_image(to_ffi, C.for_enum(level)))
60
+ end
61
+
62
+ def get_image (level = :word, padding = 0)
63
+ image = C::Iterator.get_image(to_ffi, C.for_enum(level), padding)
64
+
65
+ Image.new(image.pix, image.x, image.y)
66
+ end
67
+
68
+ def baseline (level = :word)
69
+ C::Iterator.baseline(to_ffi, C.for_enum(level))
70
+ end
71
+
72
+ def orientation
73
+ C::Iterator.orientation(to_ffi)
74
+ end
75
+
76
+ def get_text (level = :word)
77
+ pointer = C::Iterator.get_utf8_text(to_ffi, C.for_enum(level))
78
+ result = pointer.read_string
79
+ result.force_encoding 'UTF-8'
80
+ C.free_string(pointer)
81
+
82
+ result
83
+ end
84
+
85
+ def confidence (level = :word)
86
+ C::Iterator.confidence(to_ffi, C.for_enum(level))
87
+ end
88
+
89
+ def block_type
90
+ C::Iterator.block_type(to_ffi)
91
+ end
92
+
93
+ def word_font_attributes
94
+ C::Iterator.word_font_attributes(to_ffi)
95
+ end
96
+
97
+ def word_is_from_dictionary?
98
+ C::Iterator.word_is_from_dictionary(to_ffi)
99
+ end
100
+
101
+ def word_is_numeric?
102
+ C::Iterator.word_is_numeric(to_ffi)
103
+ end
104
+
105
+ def symbol_is_superscript?
106
+ C::Iterator.symbol_is_superscript(to_ffi)
107
+ end
108
+
109
+ def symbol_is_subscript?
110
+ C::Iterator.symbol_is_subscript(to_ffi)
111
+ end
112
+
113
+ def symbol_is_dropcap?
114
+ C::Iterator.symbol_is_dropcap(to_ffi)
115
+ end
116
+
117
+ def to_ffi
118
+ @internal
119
+ end
120
+ end
121
+
122
+ end; end