tesseract-ocr 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
data/bin/tesseract-train.rb
CHANGED
@@ -7,12 +7,8 @@ require 'shellwords'
|
|
7
7
|
options = {}
|
8
8
|
|
9
9
|
OptionParser.new do |o|
|
10
|
-
o.on '-
|
11
|
-
options[:
|
12
|
-
end
|
13
|
-
|
14
|
-
o.on '-i', '--image FILE...', Array, 'the image files to use' do |value|
|
15
|
-
options[:image] = value.map { |path| File.realpath(path) }
|
10
|
+
o.on '-d', '--data DATA...', Array, 'the data to use' do |value|
|
11
|
+
options[:data] = Hash[value.map { |e| e.split(?:).map { |p| File.realpath(p) } }]
|
16
12
|
end
|
17
13
|
|
18
14
|
o.on '-o', '--output FILE', 'the path where to output the traineddata' do |value|
|
@@ -32,12 +28,12 @@ Dir.chdir FileUtils.mkpath(File.join(Dir.tmpdir, rand.to_s)).first
|
|
32
28
|
|
33
29
|
language = language.shellescape
|
34
30
|
|
35
|
-
options[:
|
31
|
+
options[:data].each_with_index {|(box, image), index|
|
36
32
|
%x{
|
37
33
|
cp #{box.shellescape} #{language}.#{index}.box
|
38
|
-
cp #{
|
34
|
+
cp #{image.shellescape} #{language}.#{index}#{File.extname(image)}
|
39
35
|
|
40
|
-
tesseract #{language}#{File.extname(
|
36
|
+
tesseract #{language}.#{index}#{File.extname(image)} #{language} nobatch box.train.stderr
|
41
37
|
|
42
38
|
unicharset_extractor #{language}.box
|
43
39
|
|
@@ -60,8 +56,10 @@ options[:box].each_with_index {|box, index|
|
|
60
56
|
mv #{language}.traineddata #{options[:output].shellescape}
|
61
57
|
}
|
62
58
|
|
59
|
+
=begin
|
63
60
|
path = File.realpath(Dir.pwd)
|
64
61
|
|
65
62
|
Dir.chdir '/'
|
66
63
|
|
67
64
|
FileUtils.rm_rf path
|
65
|
+
=end
|
data/bin/tesseract.rb
CHANGED
@@ -44,6 +44,14 @@ OptionParser.new do |o|
|
|
44
44
|
o.on '-w', '--whitelist LIST', 'whitelist the following chars' do |value|
|
45
45
|
options[:whitelist] = value
|
46
46
|
end
|
47
|
+
|
48
|
+
o.on '-s', '--scale VALUE', Float, 'scale the image before analyzing it' do |value|
|
49
|
+
options[:scale] = value
|
50
|
+
end
|
51
|
+
|
52
|
+
o.on '-r', '--resize VALUE', Float, 'resize the image before analyzing it' do |value|
|
53
|
+
options[:resize] = value
|
54
|
+
end
|
47
55
|
end.parse!
|
48
56
|
|
49
57
|
Tesseract::Engine.new(options[:path], options[:language], options[:mode]) {|engine|
|
@@ -53,11 +61,19 @@ Tesseract::Engine.new(options[:path], options[:language], options[:mode]) {|engi
|
|
53
61
|
engine.page_segmentation_mode = options[:psm] if options[:psm]
|
54
62
|
engine.load_config options[:config] if options[:config]
|
55
63
|
}.tap {|engine|
|
64
|
+
image = if options[:scale]
|
65
|
+
require 'RMagick'; Magick::Image.read(ARGV.first).first.scale(options[:scale])
|
66
|
+
elsif options[:resize]
|
67
|
+
require 'RMagick'; Magick::Image.read(ARGV.first).first.resize(options[:resize])
|
68
|
+
else
|
69
|
+
ARGV.first
|
70
|
+
end
|
71
|
+
|
56
72
|
if options[:unlv]
|
57
|
-
puts engine.text_for(
|
73
|
+
puts engine.text_for(image).unlv.strip
|
58
74
|
elsif options[:confidence]
|
59
|
-
puts engine.text_for(
|
75
|
+
puts engine.text_for(image).confidence
|
60
76
|
else
|
61
|
-
puts engine.text_for(
|
77
|
+
puts engine.text_for(image).strip
|
62
78
|
end
|
63
79
|
}
|
Binary file
|
data/lib/tesseract-ocr.rb
CHANGED
data/lib/tesseract/version.rb
CHANGED
data/tesseract-ocr.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-02-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: call-me
|
16
|
-
requirement: &
|
16
|
+
requirement: &19410320 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *19410320
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: iso-639
|
27
|
-
requirement: &
|
27
|
+
requirement: &19408960 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *19408960
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ffi-extra
|
38
|
-
requirement: &
|
38
|
+
requirement: &19407840 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *19407840
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
|
-
name: ffi-
|
49
|
-
requirement: &
|
48
|
+
name: ffi-inline
|
49
|
+
requirement: &19407000 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *19407000
|
58
58
|
description:
|
59
59
|
email: meh@paranoici.org
|
60
60
|
executables:
|
@@ -220,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
220
220
|
version: '0'
|
221
221
|
requirements: []
|
222
222
|
rubyforge_project:
|
223
|
-
rubygems_version: 1.8.
|
223
|
+
rubygems_version: 1.8.15
|
224
224
|
signing_key:
|
225
225
|
specification_version: 3
|
226
226
|
summary: A wrapper library to the tesseract-ocr API.
|