tesseract-ocr 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,12 +7,12 @@ require 'shellwords'
7
7
  options = {}
8
8
 
9
9
  OptionParser.new do |o|
10
- o.on '-b', '--box FILE', 'the box file to use' do |value|
11
- options[:box] = File.realpath(value)
10
+ o.on '-b', '--box FILE...', Array, 'the box files to use' do |value|
11
+ options[:box] = value.map { |path| File.realpath(path) }
12
12
  end
13
13
 
14
- o.on '-i', '--image FILE', 'the image file to use' do |value|
15
- options[:image] = File.realpath(value)
14
+ o.on '-i', '--image FILE...', Array, 'the image files to use' do |value|
15
+ options[:image] = value.map { |path| File.realpath(path) }
16
16
  end
17
17
 
18
18
  o.on '-o', '--output FILE', 'the path where to output the traineddata' do |value|
@@ -25,24 +25,28 @@ if language = ARGV.shift
25
25
  options[:image] = File.realpath("#{language}.tif")
26
26
  options[:output] = File.expand_path("#{language}.traineddata")
27
27
  else
28
- language = options[:box][/^(.*?)\./, 1]
28
+ language = options[:output][/^(.*?)\./, 1]
29
29
  end
30
30
 
31
31
  Dir.chdir FileUtils.mkpath(File.join(Dir.tmpdir, rand.to_s)).first
32
32
 
33
33
  language = language.shellescape
34
34
 
35
- %x{
36
- cp #{options[:box].shellescape} #{language}.box
37
- cp #{options[:image].shellescape} #{language}#{File.extname(options[:image])}
35
+ options[:box].each_with_index {|box, index|
36
+ %x{
37
+ cp #{box.shellescape} #{language}.#{index}.box
38
+ cp #{options[:image][index].shellescape} #{language}.#{index}#{File.extname(options[:image][index]}
39
+
40
+ tesseract #{language}#{File.extname(options[:image])} #{language} nobatch box.train.stderr
38
41
 
39
- tesseract #{language}#{File.extname(options[:image])} #{language} nobatch box.train.stderr
42
+ unicharset_extractor #{language}.box
40
43
 
41
- unicharset_extractor #{language}.box
44
+ echo #{language}.#{index} 0 0 0 0 0 >> font_properties
45
+ mftraining -F font_properties -U unicharset -O #{language}.unicharset #{language}.tr
46
+ }
47
+ }
42
48
 
43
- echo #{language} 0 0 0 0 0 > font_properties
44
- mftraining -F font_properties -U unicharset #{language}.tr
45
- mftraining -F font_properties -U unicharset -O #{language}.unicharset #{language}.tr
49
+ %x{
46
50
  cntraining #{language}.tr
47
51
 
48
52
  mv Microfeat #{language}.Microfeat
@@ -18,6 +18,14 @@ def near (x, y)
18
18
  ]
19
19
  end
20
20
 
21
+ class Magick::Pixel
22
+ def =~ (other)
23
+ other = Magick::Pixel.from_color(other) if other.is_a?(String)
24
+
25
+ red == other.red && green == other.green && blue == other.blue
26
+ end
27
+ end
28
+
21
29
  ENV['TESSDATA_PREFIX'] = './'
22
30
 
23
31
  Tesseract::Engine.new {|engine|
@@ -33,20 +41,20 @@ Tesseract::Engine.new {|engine|
33
41
  pixels[p] += 1
34
42
  }
35
43
 
36
- pixels.delete(Magick::Pixel.from_color('black'))
44
+ pixels.reject! { |p| p =~ 'black' }
37
45
 
38
46
  text_color, count = pixels.max { |a, b| a.last <=> b.last }
39
47
 
40
48
  image.each_pixel {|p, x, y|
41
- next unless p == text_color or p.to_color == 'black'
49
+ next unless p =~ text_color or p =~ 'black'
42
50
 
43
- image.pixel_color x, y, p == text_color ? 'black' : 'white'
51
+ image.pixel_color x, y, p =~ text_color ? 'black' : 'white'
44
52
  }
45
53
 
46
54
  image.each_pixel {|p, x, y|
47
- next if p.to_color == 'black' || p.to_color == 'white'
55
+ next if p =~ 'black' || p =~ 'white'
48
56
 
49
- if near(x, y).map { |(x, y)| image.pixel_color x, y }.any? { |p| p.to_color == 'black' }
57
+ if near(x, y).map { |(x, y)| image.pixel_color x, y }.any? { |p| p =~ 'black' }
50
58
  image.pixel_color x, y, 'gray'
51
59
  else
52
60
  image.pixel_color x, y, 'white'
@@ -54,7 +62,7 @@ Tesseract::Engine.new {|engine|
54
62
  }
55
63
 
56
64
  image.each_pixel {|p, x, y|
57
- next unless p.to_color == 'gray'
65
+ next unless p =~ 'gray'
58
66
 
59
67
  image.pixel_color x, y, 'black'
60
68
  }
data/lib/tesseract.rb CHANGED
@@ -22,5 +22,4 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'tesseract/api'
26
- require 'tesseract/engine'
25
+ require 'tesseract-ocr'
@@ -31,7 +31,7 @@ module Tesseract
31
31
  class Engine
32
32
  attr_reader :config
33
33
 
34
- namedic :path, :language, :mode, :variables,
34
+ named :path, :language, :mode, :variables,
35
35
  :optional => { :path => '.', :language => :eng, :mode => :DEFAULT, :variables => {}, :config => [] },
36
36
  :alias => { :data => :path, :lang => :language }
37
37
  def initialize (path = '.', language = :eng, mode = :DEFAULT, variables = {}, config = [], &block) # :yields: self
@@ -135,14 +135,14 @@ class Engine
135
135
  @image = image
136
136
  end
137
137
 
138
- namedic :x, :y, :width, :height,
138
+ named :x, :y, :width, :height,
139
139
  :optional => 0 .. -1,
140
140
  :alias => { :w => :width, :h => :height }
141
141
  def select (x = nil, y = nil, width = nil, height = nil)
142
142
  @rectangle = [x, y, width, height]
143
143
  end
144
144
 
145
- namedic :image, :x, :y, :width, :height,
145
+ named :image, :x, :y, :width, :height,
146
146
  :optional => 0 .. -1,
147
147
  :alias => { :w => :width, :h => :height }
148
148
  def text_for (image = nil, x = nil, y = nil, width = nil, height = nil)
@@ -160,7 +160,7 @@ class Engine
160
160
  }
161
161
  end
162
162
 
163
- namedic :x, :y, :width, :height,
163
+ named :x, :y, :width, :height,
164
164
  :optional => 0 .. -1,
165
165
  :alias => { :w => :width, :h => :height }
166
166
  def text_at (x = nil, y = nil, width = nil, height = nil)
@@ -178,7 +178,7 @@ class Engine
178
178
  _iterator.__send__ "each_#{level}", &block
179
179
  end
180
180
 
181
- namedic :image, :x, :y, :width, :height,
181
+ named :image, :x, :y, :width, :height,
182
182
  :optional => 0 .. -1,
183
183
  :alias => { :w => :width, :h => :height }
184
184
  define_method "each_#{level}_for" do |image = nil, x = nil, y = nil, width = nil, height = nil, &block|
@@ -188,7 +188,7 @@ class Engine
188
188
  __send__ "each_#{level}", &block
189
189
  end
190
190
 
191
- namedic :x, :y, :width, :height,
191
+ named :x, :y, :width, :height,
192
192
  :optional => 0 .. -1,
193
193
  :alias => { :w => :width, :h => :height }
194
194
  define_method "each_#{level}_at" do |x = nil, y = nil, width = nil, height = nil, &block|
@@ -199,7 +199,7 @@ class Engine
199
199
  _iterator.__send__ "#{level}s"
200
200
  end
201
201
 
202
- namedic :image, :x, :y, :width, :height,
202
+ named :image, :x, :y, :width, :height,
203
203
  :optional => 0 .. -1,
204
204
  :alias => { :w => :width, :h => :height }
205
205
  define_method "#{level}s_for" do |image = nil, x = nil, y = nil, width = nil, height = nil|
@@ -209,7 +209,7 @@ class Engine
209
209
  __send__ "#{level}s"
210
210
  end
211
211
 
212
- namedic :x, :y, :width, :height,
212
+ named :x, :y, :width, :height,
213
213
  :optional => 0 .. -1,
214
214
  :alias => { :w => :width, :h => :height }
215
215
  define_method "#{level}s_at" do |x = nil, y = nil, width = nil, height = nil|
@@ -143,7 +143,7 @@ class Iterator
143
143
  define_method "#{level}s" do
144
144
  __send__("each_#{level}").map {|e|
145
145
  e.methods.each {|name|
146
- if e.respond_to? "__memoized_#{name}"
146
+ if e.is_memoized?(name)
147
147
  e.__send__ name
148
148
  end
149
149
  }
@@ -22,6 +22,6 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'namedic'
26
- require 'memoized'
25
+ require 'call-me/named'
26
+ require 'call-me/memoize'
27
27
  require 'iso-639'
@@ -24,6 +24,6 @@
24
24
 
25
25
  module Tesseract
26
26
  def self.version
27
- '0.1.0'
27
+ '0.1.1'
28
28
  end
29
29
  end
@@ -14,8 +14,7 @@ Gem::Specification.new {|s|
14
14
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
15
  s.require_paths = ['lib']
16
16
 
17
- s.add_dependency 'namedic'
18
- s.add_dependency 'memoized'
17
+ s.add_dependency 'call-me'
19
18
  s.add_dependency 'iso-639'
20
19
 
21
20
  s.add_dependency 'ffi-extra'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-03 00:00:00.000000000 Z
12
+ date: 2011-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: namedic
16
- requirement: &20715740 !ruby/object:Gem::Requirement
15
+ name: call-me
16
+ requirement: &15076460 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,21 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *20715740
25
- - !ruby/object:Gem::Dependency
26
- name: memoized
27
- requirement: &20730960 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '0'
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: *20730960
24
+ version_requirements: *15076460
36
25
  - !ruby/object:Gem::Dependency
37
26
  name: iso-639
38
- requirement: &20729480 !ruby/object:Gem::Requirement
27
+ requirement: &15075360 !ruby/object:Gem::Requirement
39
28
  none: false
40
29
  requirements:
41
30
  - - ! '>='
@@ -43,10 +32,10 @@ dependencies:
43
32
  version: '0'
44
33
  type: :runtime
45
34
  prerelease: false
46
- version_requirements: *20729480
35
+ version_requirements: *15075360
47
36
  - !ruby/object:Gem::Dependency
48
37
  name: ffi-extra
49
- requirement: &20727420 !ruby/object:Gem::Requirement
38
+ requirement: &15074140 !ruby/object:Gem::Requirement
50
39
  none: false
51
40
  requirements:
52
41
  - - ! '>='
@@ -54,10 +43,10 @@ dependencies:
54
43
  version: '0'
55
44
  type: :runtime
56
45
  prerelease: false
57
- version_requirements: *20727420
46
+ version_requirements: *15074140
58
47
  - !ruby/object:Gem::Dependency
59
48
  name: ffi-inliner
60
- requirement: &20726120 !ruby/object:Gem::Requirement
49
+ requirement: &15072700 !ruby/object:Gem::Requirement
61
50
  none: false
62
51
  requirements:
63
52
  - - ! '>='
@@ -65,7 +54,7 @@ dependencies:
65
54
  version: '0'
66
55
  type: :runtime
67
56
  prerelease: false
68
- version_requirements: *20726120
57
+ version_requirements: *15072700
69
58
  description:
70
59
  email: meh@paranoici.org
71
60
  executables: