tesseract-ocr 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,12 +7,12 @@ require 'shellwords'
7
7
  options = {}
8
8
 
9
9
  OptionParser.new do |o|
10
- o.on '-b', '--box FILE', 'the box file to use' do |value|
11
- options[:box] = File.realpath(value)
10
+ o.on '-b', '--box FILE...', Array, 'the box files to use' do |value|
11
+ options[:box] = value.map { |path| File.realpath(path) }
12
12
  end
13
13
 
14
- o.on '-i', '--image FILE', 'the image file to use' do |value|
15
- options[:image] = File.realpath(value)
14
+ o.on '-i', '--image FILE...', Array, 'the image files to use' do |value|
15
+ options[:image] = value.map { |path| File.realpath(path) }
16
16
  end
17
17
 
18
18
  o.on '-o', '--output FILE', 'the path where to output the traineddata' do |value|
@@ -25,24 +25,28 @@ if language = ARGV.shift
25
25
  options[:image] = File.realpath("#{language}.tif")
26
26
  options[:output] = File.expand_path("#{language}.traineddata")
27
27
  else
28
- language = options[:box][/^(.*?)\./, 1]
28
+ language = options[:output][/^(.*?)\./, 1]
29
29
  end
30
30
 
31
31
  Dir.chdir FileUtils.mkpath(File.join(Dir.tmpdir, rand.to_s)).first
32
32
 
33
33
  language = language.shellescape
34
34
 
35
- %x{
36
- cp #{options[:box].shellescape} #{language}.box
37
- cp #{options[:image].shellescape} #{language}#{File.extname(options[:image])}
35
+ options[:box].each_with_index {|box, index|
36
+ %x{
37
+ cp #{box.shellescape} #{language}.#{index}.box
38
+ cp #{options[:image][index].shellescape} #{language}.#{index}#{File.extname(options[:image][index]}
39
+
40
+ tesseract #{language}#{File.extname(options[:image])} #{language} nobatch box.train.stderr
38
41
 
39
- tesseract #{language}#{File.extname(options[:image])} #{language} nobatch box.train.stderr
42
+ unicharset_extractor #{language}.box
40
43
 
41
- unicharset_extractor #{language}.box
44
+ echo #{language}.#{index} 0 0 0 0 0 >> font_properties
45
+ mftraining -F font_properties -U unicharset -O #{language}.unicharset #{language}.tr
46
+ }
47
+ }
42
48
 
43
- echo #{language} 0 0 0 0 0 > font_properties
44
- mftraining -F font_properties -U unicharset #{language}.tr
45
- mftraining -F font_properties -U unicharset -O #{language}.unicharset #{language}.tr
49
+ %x{
46
50
  cntraining #{language}.tr
47
51
 
48
52
  mv Microfeat #{language}.Microfeat
@@ -18,6 +18,14 @@ def near (x, y)
18
18
  ]
19
19
  end
20
20
 
21
+ class Magick::Pixel
22
+ def =~ (other)
23
+ other = Magick::Pixel.from_color(other) if other.is_a?(String)
24
+
25
+ red == other.red && green == other.green && blue == other.blue
26
+ end
27
+ end
28
+
21
29
  ENV['TESSDATA_PREFIX'] = './'
22
30
 
23
31
  Tesseract::Engine.new {|engine|
@@ -33,20 +41,20 @@ Tesseract::Engine.new {|engine|
33
41
  pixels[p] += 1
34
42
  }
35
43
 
36
- pixels.delete(Magick::Pixel.from_color('black'))
44
+ pixels.reject! { |p| p =~ 'black' }
37
45
 
38
46
  text_color, count = pixels.max { |a, b| a.last <=> b.last }
39
47
 
40
48
  image.each_pixel {|p, x, y|
41
- next unless p == text_color or p.to_color == 'black'
49
+ next unless p =~ text_color or p =~ 'black'
42
50
 
43
- image.pixel_color x, y, p == text_color ? 'black' : 'white'
51
+ image.pixel_color x, y, p =~ text_color ? 'black' : 'white'
44
52
  }
45
53
 
46
54
  image.each_pixel {|p, x, y|
47
- next if p.to_color == 'black' || p.to_color == 'white'
55
+ next if p =~ 'black' || p =~ 'white'
48
56
 
49
- if near(x, y).map { |(x, y)| image.pixel_color x, y }.any? { |p| p.to_color == 'black' }
57
+ if near(x, y).map { |(x, y)| image.pixel_color x, y }.any? { |p| p =~ 'black' }
50
58
  image.pixel_color x, y, 'gray'
51
59
  else
52
60
  image.pixel_color x, y, 'white'
@@ -54,7 +62,7 @@ Tesseract::Engine.new {|engine|
54
62
  }
55
63
 
56
64
  image.each_pixel {|p, x, y|
57
- next unless p.to_color == 'gray'
65
+ next unless p =~ 'gray'
58
66
 
59
67
  image.pixel_color x, y, 'black'
60
68
  }
data/lib/tesseract.rb CHANGED
@@ -22,5 +22,4 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'tesseract/api'
26
- require 'tesseract/engine'
25
+ require 'tesseract-ocr'
@@ -31,7 +31,7 @@ module Tesseract
31
31
  class Engine
32
32
  attr_reader :config
33
33
 
34
- namedic :path, :language, :mode, :variables,
34
+ named :path, :language, :mode, :variables,
35
35
  :optional => { :path => '.', :language => :eng, :mode => :DEFAULT, :variables => {}, :config => [] },
36
36
  :alias => { :data => :path, :lang => :language }
37
37
  def initialize (path = '.', language = :eng, mode = :DEFAULT, variables = {}, config = [], &block) # :yields: self
@@ -135,14 +135,14 @@ class Engine
135
135
  @image = image
136
136
  end
137
137
 
138
- namedic :x, :y, :width, :height,
138
+ named :x, :y, :width, :height,
139
139
  :optional => 0 .. -1,
140
140
  :alias => { :w => :width, :h => :height }
141
141
  def select (x = nil, y = nil, width = nil, height = nil)
142
142
  @rectangle = [x, y, width, height]
143
143
  end
144
144
 
145
- namedic :image, :x, :y, :width, :height,
145
+ named :image, :x, :y, :width, :height,
146
146
  :optional => 0 .. -1,
147
147
  :alias => { :w => :width, :h => :height }
148
148
  def text_for (image = nil, x = nil, y = nil, width = nil, height = nil)
@@ -160,7 +160,7 @@ class Engine
160
160
  }
161
161
  end
162
162
 
163
- namedic :x, :y, :width, :height,
163
+ named :x, :y, :width, :height,
164
164
  :optional => 0 .. -1,
165
165
  :alias => { :w => :width, :h => :height }
166
166
  def text_at (x = nil, y = nil, width = nil, height = nil)
@@ -178,7 +178,7 @@ class Engine
178
178
  _iterator.__send__ "each_#{level}", &block
179
179
  end
180
180
 
181
- namedic :image, :x, :y, :width, :height,
181
+ named :image, :x, :y, :width, :height,
182
182
  :optional => 0 .. -1,
183
183
  :alias => { :w => :width, :h => :height }
184
184
  define_method "each_#{level}_for" do |image = nil, x = nil, y = nil, width = nil, height = nil, &block|
@@ -188,7 +188,7 @@ class Engine
188
188
  __send__ "each_#{level}", &block
189
189
  end
190
190
 
191
- namedic :x, :y, :width, :height,
191
+ named :x, :y, :width, :height,
192
192
  :optional => 0 .. -1,
193
193
  :alias => { :w => :width, :h => :height }
194
194
  define_method "each_#{level}_at" do |x = nil, y = nil, width = nil, height = nil, &block|
@@ -199,7 +199,7 @@ class Engine
199
199
  _iterator.__send__ "#{level}s"
200
200
  end
201
201
 
202
- namedic :image, :x, :y, :width, :height,
202
+ named :image, :x, :y, :width, :height,
203
203
  :optional => 0 .. -1,
204
204
  :alias => { :w => :width, :h => :height }
205
205
  define_method "#{level}s_for" do |image = nil, x = nil, y = nil, width = nil, height = nil|
@@ -209,7 +209,7 @@ class Engine
209
209
  __send__ "#{level}s"
210
210
  end
211
211
 
212
- namedic :x, :y, :width, :height,
212
+ named :x, :y, :width, :height,
213
213
  :optional => 0 .. -1,
214
214
  :alias => { :w => :width, :h => :height }
215
215
  define_method "#{level}s_at" do |x = nil, y = nil, width = nil, height = nil|
@@ -143,7 +143,7 @@ class Iterator
143
143
  define_method "#{level}s" do
144
144
  __send__("each_#{level}").map {|e|
145
145
  e.methods.each {|name|
146
- if e.respond_to? "__memoized_#{name}"
146
+ if e.is_memoized?(name)
147
147
  e.__send__ name
148
148
  end
149
149
  }
@@ -22,6 +22,6 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
- require 'namedic'
26
- require 'memoized'
25
+ require 'call-me/named'
26
+ require 'call-me/memoize'
27
27
  require 'iso-639'
@@ -24,6 +24,6 @@
24
24
 
25
25
  module Tesseract
26
26
  def self.version
27
- '0.1.0'
27
+ '0.1.1'
28
28
  end
29
29
  end
@@ -14,8 +14,7 @@ Gem::Specification.new {|s|
14
14
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
15
  s.require_paths = ['lib']
16
16
 
17
- s.add_dependency 'namedic'
18
- s.add_dependency 'memoized'
17
+ s.add_dependency 'call-me'
19
18
  s.add_dependency 'iso-639'
20
19
 
21
20
  s.add_dependency 'ffi-extra'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-03 00:00:00.000000000 Z
12
+ date: 2011-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: namedic
16
- requirement: &20715740 !ruby/object:Gem::Requirement
15
+ name: call-me
16
+ requirement: &15076460 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,21 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *20715740
25
- - !ruby/object:Gem::Dependency
26
- name: memoized
27
- requirement: &20730960 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '0'
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: *20730960
24
+ version_requirements: *15076460
36
25
  - !ruby/object:Gem::Dependency
37
26
  name: iso-639
38
- requirement: &20729480 !ruby/object:Gem::Requirement
27
+ requirement: &15075360 !ruby/object:Gem::Requirement
39
28
  none: false
40
29
  requirements:
41
30
  - - ! '>='
@@ -43,10 +32,10 @@ dependencies:
43
32
  version: '0'
44
33
  type: :runtime
45
34
  prerelease: false
46
- version_requirements: *20729480
35
+ version_requirements: *15075360
47
36
  - !ruby/object:Gem::Dependency
48
37
  name: ffi-extra
49
- requirement: &20727420 !ruby/object:Gem::Requirement
38
+ requirement: &15074140 !ruby/object:Gem::Requirement
50
39
  none: false
51
40
  requirements:
52
41
  - - ! '>='
@@ -54,10 +43,10 @@ dependencies:
54
43
  version: '0'
55
44
  type: :runtime
56
45
  prerelease: false
57
- version_requirements: *20727420
46
+ version_requirements: *15074140
58
47
  - !ruby/object:Gem::Dependency
59
48
  name: ffi-inliner
60
- requirement: &20726120 !ruby/object:Gem::Requirement
49
+ requirement: &15072700 !ruby/object:Gem::Requirement
61
50
  none: false
62
51
  requirements:
63
52
  - - ! '>='
@@ -65,7 +54,7 @@ dependencies:
65
54
  version: '0'
66
55
  type: :runtime
67
56
  prerelease: false
68
- version_requirements: *20726120
57
+ version_requirements: *15072700
69
58
  description:
70
59
  email: meh@paranoici.org
71
60
  executables: