rtesseract 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +20 -0
- data/Rakefile +3 -1
- data/lib/rtesseract/errors.rb +2 -0
- data/lib/rtesseract/mixed.rb +1 -1
- data/lib/rtesseract.rb +32 -9
- data/rtesseract.gemspec +5 -2
- data/test/test_mixed.rb +12 -4
- metadata +24 -8
data/README.rdoc
CHANGED
@@ -24,6 +24,26 @@ It's very simple to use rtesseract:
|
|
24
24
|
image.source = "new_image.png"
|
25
25
|
image.to_s
|
26
26
|
|
27
|
+
=== CONVERT PARTS OF IMAGE TO STRING
|
28
|
+
|
29
|
+
mix_block = RTesseract::Mixed.new("test.jpg") do |image|
|
30
|
+
image.area(28, 19, 25, 25)
|
31
|
+
image.area(180, 22, 20, 28)
|
32
|
+
image.area(218, 22, 24, 28)
|
33
|
+
image.area(248, 24, 22, 22)
|
34
|
+
end
|
35
|
+
mix_block.to_s
|
36
|
+
|
37
|
+
OR
|
38
|
+
|
39
|
+
mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
|
40
|
+
{:x => 28, :y=>19, :width=>25, :height=>25 },
|
41
|
+
{:x => 180, :y=>22, :width=>20, :height=>28},
|
42
|
+
{:x => 218, :y=>22, :width=>24, :height=>28},
|
43
|
+
{:x => 248, :y=>24, :width=>22, :height=>22}
|
44
|
+
]})
|
45
|
+
mix_block.to_s
|
46
|
+
|
27
47
|
== Note on Patches/Pull Requests
|
28
48
|
|
29
49
|
* Fork the project.
|
data/Rakefile
CHANGED
@@ -5,13 +5,15 @@ begin
|
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "rtesseract"
|
8
|
-
gem.version = '0.0.
|
8
|
+
gem.version = '0.0.7'
|
9
9
|
gem.summary = "Ruby library for working with the Tesseract OCR."
|
10
10
|
gem.description = "Ruby library for working with the Tesseract OCR."
|
11
11
|
gem.email = "dannnylo@gmail.com"
|
12
12
|
gem.homepage = "http://github.com/dannnylo/rtesseract"
|
13
13
|
gem.authors = ["Danilo Jeremias da Silva"]
|
14
|
+
gem.add_development_dependency "jeweler", ">=1.4.0"
|
14
15
|
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
16
|
+
|
15
17
|
gem.add_runtime_dependency "rmagick", '>= 2.10.1'
|
16
18
|
end
|
17
19
|
Jeweler::GemcutterTasks.new
|
data/lib/rtesseract/errors.rb
CHANGED
data/lib/rtesseract/mixed.rb
CHANGED
data/lib/rtesseract.rb
CHANGED
@@ -6,15 +6,19 @@ require "rtesseract/errors"
|
|
6
6
|
require "rtesseract/mixed"
|
7
7
|
|
8
8
|
class RTesseract
|
9
|
-
VERSION = '0.0.
|
9
|
+
VERSION = '0.0.7'
|
10
10
|
attr_accessor :options
|
11
11
|
attr_writer :lang
|
12
|
+
attr_writer :psm
|
12
13
|
|
13
14
|
def initialize(src="", options={})
|
14
15
|
@uid = options.delete(:uid) || nil
|
15
16
|
@source = Pathname.new src
|
16
17
|
@command = options.delete(:command) || "tesseract"
|
17
18
|
@lang = options.delete(:lang) || options.delete("lang") || ""
|
19
|
+
@psm = options.delete(:psm) || options.delete("psm") || nil
|
20
|
+
@clear_console_output = options.delete(:clear_console_output)
|
21
|
+
@clear_console_output = true if @clear_console_output.nil?
|
18
22
|
@options = options
|
19
23
|
@value = ""
|
20
24
|
@x,@y,@w,@h = []
|
@@ -55,8 +59,8 @@ class RTesseract
|
|
55
59
|
end
|
56
60
|
end
|
57
61
|
true
|
58
|
-
|
59
|
-
|
62
|
+
rescue
|
63
|
+
raise RTesseract::TempFilesNotRemovedError
|
60
64
|
end
|
61
65
|
|
62
66
|
def generate_uid
|
@@ -77,15 +81,28 @@ class RTesseract
|
|
77
81
|
## * vie - Vietnamese
|
78
82
|
## Note: Make sure you have installed the language to tesseract
|
79
83
|
def lang
|
80
|
-
language = "#{@lang}".strip
|
81
|
-
{
|
82
|
-
|
84
|
+
language = "#{@lang}".strip.downcase
|
85
|
+
{ #Aliases to languages names
|
86
|
+
"eng" => ["en","en-us","english"],
|
87
|
+
"ita" => ["it"],
|
88
|
+
"por" => ["pt","pt-br","portuguese"],
|
89
|
+
"spa" => ["sp"]
|
90
|
+
}.each do |value,names|
|
91
|
+
return " -l #{value} " if names.include? language
|
83
92
|
end
|
93
|
+
return " -l #{language} " if language.size > 0
|
84
94
|
""
|
85
95
|
rescue
|
86
96
|
""
|
87
97
|
end
|
88
98
|
|
99
|
+
#Page Segment Mode
|
100
|
+
def psm
|
101
|
+
@psm.nil? ? "" : " -psm #{@psm} "
|
102
|
+
rescue
|
103
|
+
""
|
104
|
+
end
|
105
|
+
|
89
106
|
def config
|
90
107
|
@options ||= {}
|
91
108
|
@options.collect{|k,v| "#{k} #{v}" }.join("\n")
|
@@ -98,17 +115,23 @@ class RTesseract
|
|
98
115
|
conf.path
|
99
116
|
end
|
100
117
|
|
118
|
+
#TODO: Clear console for MacOS or Windows
|
119
|
+
def clear_console_output
|
120
|
+
return "" unless @clear_console_output
|
121
|
+
return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
|
122
|
+
end
|
123
|
+
|
101
124
|
#Convert image to string
|
102
125
|
def convert
|
103
126
|
generate_uid
|
104
127
|
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
|
105
128
|
tmp_image = image_to_tiff
|
106
|
-
`#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{config_file}`
|
129
|
+
`#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}`
|
107
130
|
@value = File.read("#{tmp_file.to_s}.txt").to_s
|
108
131
|
@uid = nil
|
109
132
|
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
110
133
|
rescue
|
111
|
-
raise
|
134
|
+
raise RTesseract::ConversionError
|
112
135
|
end
|
113
136
|
|
114
137
|
#Output value
|
@@ -118,7 +141,7 @@ class RTesseract
|
|
118
141
|
convert
|
119
142
|
@value
|
120
143
|
else
|
121
|
-
raise
|
144
|
+
raise RTesseract::ImageNotSelectedError
|
122
145
|
end
|
123
146
|
end
|
124
147
|
|
data/rtesseract.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rtesseract}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Danilo Jeremias da Silva"]
|
12
|
-
s.date = %q{2011-03-
|
12
|
+
s.date = %q{2011-03-16}
|
13
13
|
s.description = %q{Ruby library for working with the Tesseract OCR.}
|
14
14
|
s.email = %q{dannnylo@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -49,13 +49,16 @@ Gem::Specification.new do |s|
|
|
49
49
|
s.specification_version = 3
|
50
50
|
|
51
51
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
52
|
+
s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
|
52
53
|
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
53
54
|
s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
|
54
55
|
else
|
56
|
+
s.add_dependency(%q<jeweler>, [">= 1.4.0"])
|
55
57
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
56
58
|
s.add_dependency(%q<rmagick>, [">= 2.10.1"])
|
57
59
|
end
|
58
60
|
else
|
61
|
+
s.add_dependency(%q<jeweler>, [">= 1.4.0"])
|
59
62
|
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
60
63
|
s.add_dependency(%q<rmagick>, [">= 2.10.1"])
|
61
64
|
end
|
data/test/test_mixed.rb
CHANGED
@@ -16,12 +16,20 @@ class TestMixed < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
should "translate parts of the image to text" do
|
18
18
|
mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
image.area(28, 19, 25, 25) #position of 4
|
20
|
+
image.area(180, 22, 20, 28) # position of 3
|
21
|
+
image.area(218, 22, 24, 28) # position of z
|
22
|
+
image.area(248, 24, 22, 22) # position of z
|
23
23
|
end
|
24
24
|
assert_equal mix_block.to_s_without_spaces , "43ZZ"
|
25
|
+
|
26
|
+
mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
|
27
|
+
{:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
|
28
|
+
{:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
|
29
|
+
{:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
|
30
|
+
{:x => 248, :y=>24, :width=>22, :height=>22} # position of z
|
31
|
+
]})
|
32
|
+
assert_equal mix_block.to_s_without_spaces , "43ZZ"
|
25
33
|
end
|
26
34
|
end
|
27
35
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 7
|
10
|
+
version: 0.0.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Danilo Jeremias da Silva
|
@@ -15,13 +15,29 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-03-
|
18
|
+
date: 2011-03-16 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: jeweler
|
23
23
|
prerelease: false
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 4
|
33
|
+
- 0
|
34
|
+
version: 1.4.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: thoughtbot-shoulda
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
25
41
|
none: false
|
26
42
|
requirements:
|
27
43
|
- - ">="
|
@@ -31,11 +47,11 @@ dependencies:
|
|
31
47
|
- 0
|
32
48
|
version: "0"
|
33
49
|
type: :development
|
34
|
-
version_requirements: *
|
50
|
+
version_requirements: *id002
|
35
51
|
- !ruby/object:Gem::Dependency
|
36
52
|
name: rmagick
|
37
53
|
prerelease: false
|
38
|
-
requirement: &
|
54
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
39
55
|
none: false
|
40
56
|
requirements:
|
41
57
|
- - ">="
|
@@ -47,7 +63,7 @@ dependencies:
|
|
47
63
|
- 1
|
48
64
|
version: 2.10.1
|
49
65
|
type: :runtime
|
50
|
-
version_requirements: *
|
66
|
+
version_requirements: *id003
|
51
67
|
description: Ruby library for working with the Tesseract OCR.
|
52
68
|
email: dannnylo@gmail.com
|
53
69
|
executables: []
|