tesseract-ocr 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,12 +7,8 @@ require 'shellwords'
7
7
  options = {}
8
8
 
9
9
  OptionParser.new do |o|
10
- o.on '-b', '--box FILE...', Array, 'the box files to use' do |value|
11
- options[:box] = value.map { |path| File.realpath(path) }
12
- end
13
-
14
- o.on '-i', '--image FILE...', Array, 'the image files to use' do |value|
15
- options[:image] = value.map { |path| File.realpath(path) }
10
+ o.on '-d', '--data DATA...', Array, 'the data to use' do |value|
11
+ options[:data] = Hash[value.map { |e| e.split(?:).map { |p| File.realpath(p) } }]
16
12
  end
17
13
 
18
14
  o.on '-o', '--output FILE', 'the path where to output the traineddata' do |value|
@@ -32,12 +28,12 @@ Dir.chdir FileUtils.mkpath(File.join(Dir.tmpdir, rand.to_s)).first
32
28
 
33
29
  language = language.shellescape
34
30
 
35
- options[:box].each_with_index {|box, index|
31
+ options[:data].each_with_index {|(box, image), index|
36
32
  %x{
37
33
  cp #{box.shellescape} #{language}.#{index}.box
38
- cp #{options[:image][index].shellescape} #{language}.#{index}#{File.extname(options[:image][index]}
34
+ cp #{image.shellescape} #{language}.#{index}#{File.extname(image)}
39
35
 
40
- tesseract #{language}#{File.extname(options[:image])} #{language} nobatch box.train.stderr
36
+ tesseract #{language}.#{index}#{File.extname(image)} #{language} nobatch box.train.stderr
41
37
 
42
38
  unicharset_extractor #{language}.box
43
39
 
@@ -60,8 +56,10 @@ options[:box].each_with_index {|box, index|
60
56
  mv #{language}.traineddata #{options[:output].shellescape}
61
57
  }
62
58
 
59
+ =begin
63
60
  path = File.realpath(Dir.pwd)
64
61
 
65
62
  Dir.chdir '/'
66
63
 
67
64
  FileUtils.rm_rf path
65
+ =end
data/bin/tesseract.rb CHANGED
@@ -44,6 +44,14 @@ OptionParser.new do |o|
44
44
  o.on '-w', '--whitelist LIST', 'whitelist the following chars' do |value|
45
45
  options[:whitelist] = value
46
46
  end
47
+
48
+ o.on '-s', '--scale VALUE', Float, 'scale the image before analyzing it' do |value|
49
+ options[:scale] = value
50
+ end
51
+
52
+ o.on '-r', '--resize VALUE', Float, 'resize the image before analyzing it' do |value|
53
+ options[:resize] = value
54
+ end
47
55
  end.parse!
48
56
 
49
57
  Tesseract::Engine.new(options[:path], options[:language], options[:mode]) {|engine|
@@ -53,11 +61,19 @@ Tesseract::Engine.new(options[:path], options[:language], options[:mode]) {|engi
53
61
  engine.page_segmentation_mode = options[:psm] if options[:psm]
54
62
  engine.load_config options[:config] if options[:config]
55
63
  }.tap {|engine|
64
+ image = if options[:scale]
65
+ require 'RMagick'; Magick::Image.read(ARGV.first).first.scale(options[:scale])
66
+ elsif options[:resize]
67
+ require 'RMagick'; Magick::Image.read(ARGV.first).first.resize(options[:resize])
68
+ else
69
+ ARGV.first
70
+ end
71
+
56
72
  if options[:unlv]
57
- puts engine.text_for(ARGV.first).unlv.strip
73
+ puts engine.text_for(image).unlv.strip
58
74
  elsif options[:confidence]
59
- puts engine.text_for(ARGV.first).confidence
75
+ puts engine.text_for(image).confidence
60
76
  else
61
- puts engine.text_for(ARGV.first).strip
77
+ puts engine.text_for(image).strip
62
78
  end
63
79
  }
@@ -26,7 +26,7 @@ class Magick::Pixel
26
26
  end
27
27
  end
28
28
 
29
- ENV['TESSDATA_PREFIX'] = './'
29
+ Tesseract.prefix = './'
30
30
 
31
31
  Tesseract::Engine.new {|engine|
32
32
  engine.language = :lol
data/lib/tesseract-ocr.rb CHANGED
@@ -22,5 +22,15 @@
22
22
  # or implied, of meh.
23
23
  #++
24
24
 
25
+ module Tesseract
26
+ def prefix
27
+ ENV['TESSDATA_PREFIX']
28
+ end
29
+
30
+ def prefix= (path)
31
+ ENV['TESSDATA_PREFIX'] = path
32
+ end
33
+ end
34
+
25
35
  require 'tesseract/api'
26
36
  require 'tesseract/engine'
@@ -24,6 +24,6 @@
24
24
 
25
25
  module Tesseract
26
26
  def self.version
27
- '0.1.1'
27
+ '0.1.2'
28
28
  end
29
29
  end
@@ -18,5 +18,5 @@ Gem::Specification.new {|s|
18
18
  s.add_dependency 'iso-639'
19
19
 
20
20
  s.add_dependency 'ffi-extra'
21
- s.add_dependency 'ffi-inliner'
21
+ s.add_dependency 'ffi-inline'
22
22
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tesseract-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-07 00:00:00.000000000 Z
12
+ date: 2012-02-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: call-me
16
- requirement: &15076460 !ruby/object:Gem::Requirement
16
+ requirement: &19410320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *15076460
24
+ version_requirements: *19410320
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: iso-639
27
- requirement: &15075360 !ruby/object:Gem::Requirement
27
+ requirement: &19408960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *15075360
35
+ version_requirements: *19408960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ffi-extra
38
- requirement: &15074140 !ruby/object:Gem::Requirement
38
+ requirement: &19407840 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *15074140
46
+ version_requirements: *19407840
47
47
  - !ruby/object:Gem::Dependency
48
- name: ffi-inliner
49
- requirement: &15072700 !ruby/object:Gem::Requirement
48
+ name: ffi-inline
49
+ requirement: &19407000 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *15072700
57
+ version_requirements: *19407000
58
58
  description:
59
59
  email: meh@paranoici.org
60
60
  executables:
@@ -220,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
220
220
  version: '0'
221
221
  requirements: []
222
222
  rubyforge_project:
223
- rubygems_version: 1.8.10
223
+ rubygems_version: 1.8.15
224
224
  signing_key:
225
225
  specification_version: 3
226
226
  summary: A wrapper library to the tesseract-ocr API.