tesseract-ocr 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,12 +7,8 @@ require 'shellwords'
7
7
  options = {}
8
8
 
9
9
  OptionParser.new do |o|
10
- o.on '-b', '--box FILE...', Array, 'the box files to use' do |value|
11
- options[:box] = value.map { |path| File.realpath(path) }
12
- end
13
-
14
- o.on '-i', '--image FILE...', Array, 'the image files to use' do |value|
15
- options[:image] = value.map { |path| File.realpath(path) }
10
+ o.on '-d', '--data DATA...', Array, 'the data to use' do |value|
11
+ options[:data] = Hash[value.map { |e| e.split(?:).map { |p| File.realpath(p) } }]
16
12
  end
17
13
 
18
14
  o.on '-o', '--output FILE', 'the path where to output the traineddata' do |value|
@@ -32,12 +28,12 @@ Dir.chdir FileUtils.mkpath(File.join(Dir.tmpdir, rand.to_s)).first
32
28
 
33
29
  language = language.shellescape
34
30
 
35
- options[:box].each_with_index {|box, index|
31
+ options[:data].each_with_index {|(box, image), index|
36
32
  %x{
37
33
  cp #{box.shellescape} #{language}.#{index}.box
38
- cp #{options[:image][index].shellescape} #{language}.#{index}#{File.extname(options[:image][index]}
34
+ cp #{image.shellescape} #{language}.#{index}#{File.extname(image)}
39
35
 
40
- tesseract #{language}#{File.extname(options[:image])} #{language} nobatch box.train.stderr
36
+ tesseract #{language}.#{index}#{File.extname(image)} #{language} nobatch box.train.stderr
41
37
 
42
38
  unicharset_extractor #{language}.box
43
39
 
@@ -60,8 +56,10 @@ options[:box].each_with_index {|box, index|
60
56
  mv #{language}.traineddata #{options[:output].shellescape}
61
57
  }
62
58
 
59
+ =begin
63
60
  path = File.realpath(Dir.pwd)
64
61
 
65
62
  Dir.chdir '/'
66
63
 
67
64
  FileUtils.rm_rf path
65
+ =end
data/bin/tesseract.rb CHANGED
@@ -44,6 +44,14 @@ OptionParser.new do |o|
44
44
  o.on '-w', '--whitelist LIST', 'whitelist the following chars' do |value|
45
45
  options[:whitelist] = value
46
46
  end
47
+
48
+ o.on '-s', '--scale VALUE', Float, 'scale the image before analyzing it' do |value|
49
+ options[:scale] = value
50
+ end
51
+
52
+ o.on '-r', '--resize VALUE', Float, 'resize the image before analyzing it' do |value|
53
+ options[:resize] = value
54
+ end
47
55
  end.parse!
48
56
 
49
57
  Tesseract::Engine.new(options[:path], options[:language], options[:mode]) {|engine|
@@ -53,11 +61,19 @@ Tesseract::Engine.new(options[:path], options[:language], options[:mode]) {|engi
53
61
  engine.page_segmentation_mode = options[:psm] if options[:psm]
54
62
  engine.load_config options[:config] if options[:config]
55
63
  }.tap {|engine|
64
+ image = if options[:scale]
65
+ require 'RMagick'; Magick::Image.read(ARGV.first).first.scale(options[:scale])
66
+ elsif options[:resize]
67
+ require 'RMagick'; Magick::Image.read(ARGV.first).first.resize(options[:resize])
68
+ else
69
+ ARGV.first
70
+ end
71
+
56
72
  if options[:unlv]
57
- puts engine.text_for(ARGV.first).unlv.strip
73
+ puts engine.text_for(image).unlv.strip
58
74
  elsif options[:confidence]
59
- puts engine.text_for(ARGV.first).confidence
75
+ puts engine.text_for(image).confidence
60
76
  else
61
- puts engine.text_for(ARGV.first).strip
77
+ puts engine.text_for(image).strip
62
78
  end
63
79
  }
@@ -26,7 +26,7 @@ class Magick::Pixel
26
26
  end
27
27
  end
28
28
 
29
- ENV['TESSDATA_PREFIX'] = './'
29
+ Tesseract.prefix = './'
30
30
 
31
31
  Tesseract::Engine.new {|engine|
32
32
  engine.language = :lol
data/lib/tesseract-ocr.rb CHANGED
@@ -22,5 +22,15 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
+ module Tesseract
26
+ def prefix
27
+ ENV['TESSDATA_PREFIX']
28
+ end
29
+
30
+ def prefix= (path)
31
+ ENV['TESSDATA_PREFIX'] = path
32
+ end
33
+ end
34
+
25
35
  require 'tesseract/api'
26
36
  require 'tesseract/engine'
@@ -24,6 +24,6 @@
24
24
 
25
25
  module Tesseract
26
26
  def self.version
27
- '0.1.1'
27
+ '0.1.2'
28
28
  end
29
29
  end
@@ -18,5 +18,5 @@ Gem::Specification.new {|s|
18
18
  s.add_dependency 'iso-639'
19
19
 
20
20
  s.add_dependency 'ffi-extra'
21
- s.add_dependency 'ffi-inliner'
21
+ s.add_dependency 'ffi-inline'
22
22
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-07 00:00:00.000000000 Z
12
+ date: 2012-02-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: call-me
16
- requirement: &15076460 !ruby/object:Gem::Requirement
16
+ requirement: &19410320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *15076460
24
+ version_requirements: *19410320
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: iso-639
27
- requirement: &15075360 !ruby/object:Gem::Requirement
27
+ requirement: &19408960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *15075360
35
+ version_requirements: *19408960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ffi-extra
38
- requirement: &15074140 !ruby/object:Gem::Requirement
38
+ requirement: &19407840 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *15074140
46
+ version_requirements: *19407840
47
47
  - !ruby/object:Gem::Dependency
48
- name: ffi-inliner
49
- requirement: &15072700 !ruby/object:Gem::Requirement
48
+ name: ffi-inline
49
+ requirement: &19407000 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *15072700
57
+ version_requirements: *19407000
58
58
  description:
59
59
  email: meh@paranoici.org
60
60
  executables:
@@ -220,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
220
220
  version: '0'
221
221
  requirements: []
222
222
  rubyforge_project:
223
- rubygems_version: 1.8.10
223
+ rubygems_version: 1.8.15
224
224
  signing_key:
225
225
  specification_version: 3
226
226
  summary: A wrapper library to the tesseract-ocr API.