mini_tesseract 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/LICENSE +20 -0
- data/README.rdoc +61 -0
- data/Rakefile +58 -0
- data/lib/processors/mini_magick.rb +12 -0
- data/lib/processors/rmagick.rb +12 -0
- data/lib/rtesseract.rb +157 -0
- data/lib/rtesseract/errors.rb +6 -0
- data/lib/rtesseract/mixed.rb +53 -0
- data/mini_tesseract.gemspec +67 -0
- data/rtesseract.gemspec +66 -0
- data/test/helper.rb +11 -0
- data/test/images/mixed.tif +0 -0
- data/test/images/test.bmp +0 -0
- data/test/images/test.jpg +0 -0
- data/test/images/test.png +0 -0
- data/test/images/test.tif +0 -0
- data/test/images/test1.tif +0 -0
- data/test/test_mixed.rb +36 -0
- data/test/test_rtesseract.rb +82 -0
- metadata +120 -0
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Danilo Jeremias da Silva
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
= rtesseract
|
2
|
+
|
3
|
+
Ruby library for working with the Tesseract OCR.
|
4
|
+
|
5
|
+
== REQUIREMENTS:
|
6
|
+
|
7
|
+
To work properly rtesseract are needed:
|
8
|
+
* Tesseract - Program
|
9
|
+
* ImageMagic - Program
|
10
|
+
* RMagick - Gem
|
11
|
+
|
12
|
+
== EXAMPLE USAGE
|
13
|
+
|
14
|
+
It's very simple to use rtesseract:
|
15
|
+
|
16
|
+
=== CONVERT IMAGE TO STRING
|
17
|
+
|
18
|
+
image = RTesseract.new("my_image.jpg")
|
19
|
+
image.to_s #Getting the value
|
20
|
+
|
21
|
+
=== CHANGE THE IMAGE
|
22
|
+
|
23
|
+
image = RTesseract.new("my_image.jpg")
|
24
|
+
image.source = "new_image.png"
|
25
|
+
image.to_s
|
26
|
+
|
27
|
+
=== CONVERT PARTS OF IMAGE TO STRING
|
28
|
+
|
29
|
+
mix_block = RTesseract::Mixed.new("test.jpg") do |image|
|
30
|
+
image.area(28, 19, 25, 25)
|
31
|
+
image.area(180, 22, 20, 28)
|
32
|
+
image.area(218, 22, 24, 28)
|
33
|
+
image.area(248, 24, 22, 22)
|
34
|
+
end
|
35
|
+
mix_block.to_s
|
36
|
+
|
37
|
+
OR
|
38
|
+
|
39
|
+
mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
|
40
|
+
{:x => 28, :y=>19, :width=>25, :height=>25 },
|
41
|
+
{:x => 180, :y=>22, :width=>20, :height=>28},
|
42
|
+
{:x => 218, :y=>22, :width=>24, :height=>28},
|
43
|
+
{:x => 248, :y=>24, :width=>22, :height=>22}
|
44
|
+
]})
|
45
|
+
mix_block.to_s
|
46
|
+
|
47
|
+
== Note on Patches/Pull Requests
|
48
|
+
|
49
|
+
* Fork the project.
|
50
|
+
* Make your feature addition or bug fix.
|
51
|
+
* Send me a pull request. Bonus points for topic branches.
|
52
|
+
|
53
|
+
== Links
|
54
|
+
|
55
|
+
* Github - http://github.com/dannnylo/rtesseract
|
56
|
+
* Rubygems - http://rubygems.org/gems/rtesseract
|
57
|
+
|
58
|
+
== Copyright
|
59
|
+
|
60
|
+
Copyright (c) 2010 Danilo Jeremias da Silva. See LICENSE for details.
|
61
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "mini_tesseract"
|
8
|
+
gem.version = '0.0.7'
|
9
|
+
gem.summary = "Ruby library for working with the Tesseract OCR."
|
10
|
+
gem.description = "Ruby library for working with the Tesseract OCR."
|
11
|
+
gem.email = "hoooopo@gmail.com"
|
12
|
+
gem.homepage = "http://github.com/hooopo/rtesseract"
|
13
|
+
gem.authors = ["Danilo Jeremias da Silva", "Hooopo"]
|
14
|
+
gem.add_development_dependency "jeweler", ">=1.4.0"
|
15
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
16
|
+
|
17
|
+
gem.add_runtime_dependency "rmagick", '>= 2.10.1'
|
18
|
+
gem.add_runtime_dependency "mini_magick", '~> 3.0'
|
19
|
+
end
|
20
|
+
Jeweler::GemcutterTasks.new
|
21
|
+
rescue LoadError
|
22
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'rake/testtask'
|
26
|
+
Rake::TestTask.new(:test) do |test|
|
27
|
+
test.libs << 'lib' << 'test'
|
28
|
+
test.pattern = 'test/**/test_*.rb'
|
29
|
+
test.verbose = true
|
30
|
+
end
|
31
|
+
|
32
|
+
begin
|
33
|
+
require 'rcov/rcovtask'
|
34
|
+
Rcov::RcovTask.new do |test|
|
35
|
+
test.libs << 'test'
|
36
|
+
test.pattern = 'test/**/test_*.rb'
|
37
|
+
test.verbose = true
|
38
|
+
end
|
39
|
+
rescue LoadError
|
40
|
+
task :rcov do
|
41
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
task :test => :check_dependencies
|
46
|
+
|
47
|
+
task :default => :test
|
48
|
+
|
49
|
+
require 'rake/rdoctask'
|
50
|
+
Rake::RDocTask.new do |rdoc|
|
51
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "rtesseract #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
58
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mini_magick'
|
2
|
+
module MiniMagickProcessor
|
3
|
+
def image_to_tiff
|
4
|
+
generate_uid
|
5
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
6
|
+
cat = MiniMagick::Image.open(@source.to_s)
|
7
|
+
cat.format("tif")
|
8
|
+
cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
|
9
|
+
cat.write tmp_file.to_s
|
10
|
+
return tmp_file
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'rmagick'
|
2
|
+
module RMagickProcessor
|
3
|
+
def image_to_tiff
|
4
|
+
puts "processing with rmagick"
|
5
|
+
generate_uid
|
6
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
7
|
+
cat = Magick::Image.read(@source.to_s).first
|
8
|
+
cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
|
9
|
+
cat.write tmp_file.to_s
|
10
|
+
return tmp_file
|
11
|
+
end
|
12
|
+
end
|
data/lib/rtesseract.rb
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
require "pathname"
|
2
|
+
require "tempfile"
|
3
|
+
|
4
|
+
require "rtesseract/errors"
|
5
|
+
require "rtesseract/mixed"
|
6
|
+
|
7
|
+
class RTesseract
|
8
|
+
VERSION = '0.0.7'
|
9
|
+
attr_accessor :options
|
10
|
+
attr_writer :lang
|
11
|
+
attr_writer :psm
|
12
|
+
attr_reader :processor
|
13
|
+
|
14
|
+
def initialize(src = "", options = {})
|
15
|
+
@uid = options.delete(:uid) || nil
|
16
|
+
@source = Pathname.new src
|
17
|
+
@command = options.delete(:command) || "tesseract"
|
18
|
+
@lang = options.delete(:lang) || options.delete("lang") || ""
|
19
|
+
@psm = options.delete(:psm) || options.delete("psm") || nil
|
20
|
+
@clear_console_output = options.delete(:clear_console_output)
|
21
|
+
@clear_console_output = true if @clear_console_output.nil?
|
22
|
+
@options = options
|
23
|
+
@value = ""
|
24
|
+
@x, @y, @w, @h = []
|
25
|
+
@processor = options.delete(:processor) || options.delete("processor")
|
26
|
+
choose_processor!
|
27
|
+
end
|
28
|
+
|
29
|
+
def source= src
|
30
|
+
@value = ""
|
31
|
+
@source = Pathname.new src
|
32
|
+
end
|
33
|
+
|
34
|
+
def image_name
|
35
|
+
@source.basename
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
#Crop image to convert
|
40
|
+
def crop!(x,y,width,height)
|
41
|
+
@x, @y, @w, @h = x, y, width, height
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
#Remove files
|
46
|
+
def remove_file(files=[])
|
47
|
+
files.each do |file|
|
48
|
+
begin
|
49
|
+
File.unlink(file) if File.exist?(file)
|
50
|
+
rescue
|
51
|
+
system "rm -f #{file}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
true
|
55
|
+
rescue
|
56
|
+
raise RTesseract::TempFilesNotRemovedError
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_uid
|
60
|
+
@uid = rand.to_s[2,10] if @uid.nil?
|
61
|
+
@uid
|
62
|
+
end
|
63
|
+
|
64
|
+
# Select the language
|
65
|
+
#===Languages
|
66
|
+
## * eng - English
|
67
|
+
## * deu - German
|
68
|
+
## * deu-f - German fraktur
|
69
|
+
## * fra - French
|
70
|
+
## * ita - Italian
|
71
|
+
## * nld - Dutch
|
72
|
+
## * por - Portuguese
|
73
|
+
## * spa - Spanish
|
74
|
+
## * vie - Vietnamese
|
75
|
+
## Note: Make sure you have installed the language to tesseract
|
76
|
+
def lang
|
77
|
+
language = "#{@lang}".strip.downcase
|
78
|
+
{ #Aliases to languages names
|
79
|
+
"eng" => ["en","en-us","english"],
|
80
|
+
"ita" => ["it"],
|
81
|
+
"por" => ["pt","pt-br","portuguese"],
|
82
|
+
"spa" => ["sp"]
|
83
|
+
}.each do |value,names|
|
84
|
+
return " -l #{value} " if names.include? language
|
85
|
+
end
|
86
|
+
return " -l #{language} " if language.size > 0
|
87
|
+
""
|
88
|
+
rescue
|
89
|
+
""
|
90
|
+
end
|
91
|
+
|
92
|
+
#Page Segment Mode
|
93
|
+
def psm
|
94
|
+
@psm.nil? ? "" : " -psm #{@psm} "
|
95
|
+
rescue
|
96
|
+
""
|
97
|
+
end
|
98
|
+
|
99
|
+
def config
|
100
|
+
@options ||= {}
|
101
|
+
@options.collect{|k,v| "#{k} #{v}" }.join("\n")
|
102
|
+
end
|
103
|
+
|
104
|
+
def config_file
|
105
|
+
return "" if @options == {}
|
106
|
+
conf = Tempfile.new("config")
|
107
|
+
conf.write(config)
|
108
|
+
conf.path
|
109
|
+
end
|
110
|
+
|
111
|
+
#TODO: Clear console for MacOS or Windows
|
112
|
+
def clear_console_output
|
113
|
+
return "" unless @clear_console_output
|
114
|
+
return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
|
115
|
+
end
|
116
|
+
|
117
|
+
#Convert image to string
|
118
|
+
def convert
|
119
|
+
generate_uid
|
120
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
|
121
|
+
tmp_image = image_to_tiff
|
122
|
+
`#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}`
|
123
|
+
@value = File.read("#{tmp_file.to_s}.txt").to_s
|
124
|
+
@uid = nil
|
125
|
+
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
126
|
+
rescue
|
127
|
+
raise RTesseract::ConversionError
|
128
|
+
end
|
129
|
+
|
130
|
+
#Output value
|
131
|
+
def to_s
|
132
|
+
return @value if @value != ""
|
133
|
+
if @source.file?
|
134
|
+
convert
|
135
|
+
@value
|
136
|
+
else
|
137
|
+
raise RTesseract::ImageNotSelectedError
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
#Remove spaces and break-lines
|
142
|
+
def to_s_without_spaces
|
143
|
+
to_s.gsub(" ","").gsub("\n","").gsub("\r","")
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
def choose_processor!
|
148
|
+
if @processor.to_s == "mini_magick"
|
149
|
+
require File.expand_path(File.dirname(__FILE__) + "/processors/mini_magick.rb")
|
150
|
+
self.class.send(:include, MiniMagickProcessor)
|
151
|
+
else
|
152
|
+
require File.expand_path(File.dirname(__FILE__) + "/processors/rmagick.rb")
|
153
|
+
self.class.send(:include, RMagickProcessor)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class RTesseract
|
2
|
+
class Mixed
|
3
|
+
def initialize(src="", options={})
|
4
|
+
@source = Pathname.new src
|
5
|
+
@options = options
|
6
|
+
@value = ""
|
7
|
+
@areas = options.delete(:areas) || []
|
8
|
+
yield self if block_given?
|
9
|
+
end
|
10
|
+
|
11
|
+
def area(x, y, width, height)
|
12
|
+
@value = ""
|
13
|
+
@areas << {:x => x, :y => y, :width => width, :height => height}
|
14
|
+
end
|
15
|
+
|
16
|
+
def areas
|
17
|
+
@areas
|
18
|
+
end
|
19
|
+
|
20
|
+
def clear_areas
|
21
|
+
@areas = []
|
22
|
+
end
|
23
|
+
|
24
|
+
#Convert parts of image to string
|
25
|
+
def convert
|
26
|
+
@value = ""
|
27
|
+
@areas.each do |area|
|
28
|
+
image = RTesseract.new(@source.to_s,@options)
|
29
|
+
image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
|
30
|
+
@value << image.to_s
|
31
|
+
end
|
32
|
+
rescue
|
33
|
+
raise RTesseract::ConversionError
|
34
|
+
end
|
35
|
+
|
36
|
+
#Output value
|
37
|
+
def to_s
|
38
|
+
return @value if @value != ""
|
39
|
+
if @source.file?
|
40
|
+
convert
|
41
|
+
@value
|
42
|
+
else
|
43
|
+
raise RTesseract::ImageNotSelectedError
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
#Remove spaces and break-lines
|
48
|
+
def to_s_without_spaces
|
49
|
+
to_s.gsub(" ","").gsub("\n","").gsub("\r","")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{mini_tesseract}
|
8
|
+
s.version = "0.0.7"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Danilo Jeremias da Silva", "Hooopo"]
|
12
|
+
s.date = %q{2011-08-23}
|
13
|
+
s.description = %q{Ruby library for working with the Tesseract OCR.}
|
14
|
+
s.email = %q{hoooopo@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"LICENSE",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"lib/processors/mini_magick.rb",
|
25
|
+
"lib/processors/rmagick.rb",
|
26
|
+
"lib/rtesseract.rb",
|
27
|
+
"lib/rtesseract/errors.rb",
|
28
|
+
"lib/rtesseract/mixed.rb",
|
29
|
+
"mini_tesseract.gemspec",
|
30
|
+
"rtesseract.gemspec",
|
31
|
+
"test/helper.rb",
|
32
|
+
"test/images/mixed.tif",
|
33
|
+
"test/images/test.bmp",
|
34
|
+
"test/images/test.jpg",
|
35
|
+
"test/images/test.png",
|
36
|
+
"test/images/test.tif",
|
37
|
+
"test/images/test1.tif",
|
38
|
+
"test/test_mixed.rb",
|
39
|
+
"test/test_rtesseract.rb"
|
40
|
+
]
|
41
|
+
s.homepage = %q{http://github.com/hooopo/rtesseract}
|
42
|
+
s.require_paths = ["lib"]
|
43
|
+
s.rubygems_version = %q{1.3.9.2}
|
44
|
+
s.summary = %q{Ruby library for working with the Tesseract OCR.}
|
45
|
+
|
46
|
+
if s.respond_to? :specification_version then
|
47
|
+
s.specification_version = 3
|
48
|
+
|
49
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
50
|
+
s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
|
51
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
52
|
+
s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
|
53
|
+
s.add_runtime_dependency(%q<mini_magick>, ["~> 3.0"])
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<jeweler>, [">= 1.4.0"])
|
56
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
57
|
+
s.add_dependency(%q<rmagick>, [">= 2.10.1"])
|
58
|
+
s.add_dependency(%q<mini_magick>, ["~> 3.0"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<jeweler>, [">= 1.4.0"])
|
62
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
63
|
+
s.add_dependency(%q<rmagick>, [">= 2.10.1"])
|
64
|
+
s.add_dependency(%q<mini_magick>, ["~> 3.0"])
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
data/rtesseract.gemspec
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{rtesseract}
|
8
|
+
s.version = "0.0.7"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Danilo Jeremias da Silva"]
|
12
|
+
s.date = %q{2011-03-16}
|
13
|
+
s.description = %q{Ruby library for working with the Tesseract OCR.}
|
14
|
+
s.email = %q{dannnylo@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"LICENSE",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"lib/rtesseract.rb",
|
25
|
+
"lib/rtesseract/errors.rb",
|
26
|
+
"lib/rtesseract/mixed.rb",
|
27
|
+
"rtesseract.gemspec",
|
28
|
+
"test/helper.rb",
|
29
|
+
"test/images/mixed.tif",
|
30
|
+
"test/images/test.bmp",
|
31
|
+
"test/images/test.jpg",
|
32
|
+
"test/images/test.png",
|
33
|
+
"test/images/test.tif",
|
34
|
+
"test/images/test1.tif",
|
35
|
+
"test/test_mixed.rb",
|
36
|
+
"test/test_rtesseract.rb"
|
37
|
+
]
|
38
|
+
s.homepage = %q{http://github.com/dannnylo/rtesseract}
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.4.2}
|
41
|
+
s.summary = %q{Ruby library for working with the Tesseract OCR.}
|
42
|
+
s.test_files = [
|
43
|
+
"test/helper.rb",
|
44
|
+
"test/test_mixed.rb",
|
45
|
+
"test/test_rtesseract.rb"
|
46
|
+
]
|
47
|
+
|
48
|
+
if s.respond_to? :specification_version then
|
49
|
+
s.specification_version = 3
|
50
|
+
|
51
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
52
|
+
s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
|
53
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
54
|
+
s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<jeweler>, [">= 1.4.0"])
|
57
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
58
|
+
s.add_dependency(%q<rmagick>, [">= 2.10.1"])
|
59
|
+
end
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<jeweler>, [">= 1.4.0"])
|
62
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
63
|
+
s.add_dependency(%q<rmagick>, [">= 2.10.1"])
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
data/test/helper.rb
ADDED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/test/test_mixed.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
class TestMixed < Test::Unit::TestCase
|
5
|
+
context "Path" do
|
6
|
+
setup do
|
7
|
+
@path = Pathname.new(__FILE__.gsub("test_mixed.rb","")).expand_path
|
8
|
+
@image_tiff = @path.join("images","mixed.tif").to_s
|
9
|
+
@image2_tiff = @path.join("images","mixed2.tif").to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
should "be instantiable" do
|
13
|
+
assert_equal RTesseract::Mixed.new.class , RTesseract::Mixed
|
14
|
+
assert_equal RTesseract::Mixed.new(@image_tiff).class , RTesseract::Mixed
|
15
|
+
end
|
16
|
+
|
17
|
+
should "translate parts of the image to text" do
|
18
|
+
mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
|
19
|
+
image.area(28, 19, 25, 25) #position of 4
|
20
|
+
image.area(180, 22, 20, 28) # position of 3
|
21
|
+
image.area(218, 22, 24, 28) # position of z
|
22
|
+
image.area(248, 24, 22, 22) # position of z
|
23
|
+
end
|
24
|
+
assert_equal mix_block.to_s_without_spaces , "43ZZ"
|
25
|
+
|
26
|
+
mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
|
27
|
+
{:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
|
28
|
+
{:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
|
29
|
+
{:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
|
30
|
+
{:x => 248, :y=>24, :width=>22, :height=>22} # position of z
|
31
|
+
]})
|
32
|
+
assert_equal mix_block.to_s_without_spaces , "43ZZ"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
class TestRtesseract < Test::Unit::TestCase
|
5
|
+
context "Path" do
|
6
|
+
setup do
|
7
|
+
@path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
|
8
|
+
@image_tiff = @path.join("images","test.tif").to_s
|
9
|
+
end
|
10
|
+
|
11
|
+
should "be instantiable" do
|
12
|
+
assert_equal RTesseract.new.class , RTesseract
|
13
|
+
assert_equal RTesseract.new("").class , RTesseract
|
14
|
+
assert_equal RTesseract.new(@image_tiff).class , RTesseract
|
15
|
+
end
|
16
|
+
|
17
|
+
should "translate image to text" do
|
18
|
+
assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
|
19
|
+
assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
|
20
|
+
end
|
21
|
+
|
22
|
+
should "translate images .png, .jpg, .bmp" do
|
23
|
+
assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
|
24
|
+
assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
|
25
|
+
assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
|
26
|
+
end
|
27
|
+
|
28
|
+
should "change the image" do
|
29
|
+
image = RTesseract.new(@image_tiff)
|
30
|
+
assert_equal image.to_s_without_spaces,"43ZZ"
|
31
|
+
image.source = @path.join("images","test1.tif").to_s
|
32
|
+
assert_equal image.to_s_without_spaces,"V2V4"
|
33
|
+
end
|
34
|
+
|
35
|
+
should "select the language" do
|
36
|
+
#English
|
37
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
38
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
|
39
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
|
40
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
|
41
|
+
|
42
|
+
#Portuguese
|
43
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
|
44
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
|
45
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
|
46
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
|
47
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
|
48
|
+
|
49
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
|
50
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
|
51
|
+
|
52
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
53
|
+
end
|
54
|
+
|
55
|
+
should "be configurable" do
|
56
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
|
57
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
|
58
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config , "chop_enable 0\nenable_assoc 0"
|
59
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces , "43ZZ"
|
60
|
+
end
|
61
|
+
|
62
|
+
should "crop image" do
|
63
|
+
assert_equal RTesseract.new(@image_tiff).crop!(140,10,36,40).to_s_without_spaces, "4"
|
64
|
+
assert_equal RTesseract.new(@image_tiff).crop!(180,10,36,40).to_s_without_spaces, "3"
|
65
|
+
assert_equal RTesseract.new(@image_tiff).crop!(200,10,36,40).to_s_without_spaces, "Z"
|
66
|
+
assert_equal RTesseract.new(@image_tiff).crop!(220,10,30,40).to_s_without_spaces, "Z"
|
67
|
+
end
|
68
|
+
|
69
|
+
should "unique uid" do
|
70
|
+
assert_not_equal RTesseract.new(@image_tiff).generate_uid , RTesseract.new(@image_tiff).generate_uid
|
71
|
+
end
|
72
|
+
|
73
|
+
should "generate a unique id" do
|
74
|
+
reg = RTesseract.new(@image_tiff)
|
75
|
+
assert_equal reg.generate_uid , reg.generate_uid
|
76
|
+
value = reg.generate_uid
|
77
|
+
reg.convert
|
78
|
+
assert_not_equal value , reg.generate_uid
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mini_tesseract
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.7
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Danilo Jeremias da Silva
|
9
|
+
- Hooopo
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
|
14
|
+
date: 2011-08-23 00:00:00 +08:00
|
15
|
+
default_executable:
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
18
|
+
name: jeweler
|
19
|
+
prerelease: false
|
20
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
21
|
+
none: false
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 1.4.0
|
26
|
+
type: :development
|
27
|
+
version_requirements: *id001
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: thoughtbot-shoulda
|
30
|
+
prerelease: false
|
31
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
32
|
+
none: false
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: "0"
|
37
|
+
type: :development
|
38
|
+
version_requirements: *id002
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: rmagick
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.10.1
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id003
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: mini_magick
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ~>
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: "3.0"
|
59
|
+
type: :runtime
|
60
|
+
version_requirements: *id004
|
61
|
+
description: Ruby library for working with the Tesseract OCR.
|
62
|
+
email: hoooopo@gmail.com
|
63
|
+
executables: []
|
64
|
+
|
65
|
+
extensions: []
|
66
|
+
|
67
|
+
extra_rdoc_files:
|
68
|
+
- LICENSE
|
69
|
+
- README.rdoc
|
70
|
+
files:
|
71
|
+
- .document
|
72
|
+
- LICENSE
|
73
|
+
- README.rdoc
|
74
|
+
- Rakefile
|
75
|
+
- lib/processors/mini_magick.rb
|
76
|
+
- lib/processors/rmagick.rb
|
77
|
+
- lib/rtesseract.rb
|
78
|
+
- lib/rtesseract/errors.rb
|
79
|
+
- lib/rtesseract/mixed.rb
|
80
|
+
- mini_tesseract.gemspec
|
81
|
+
- rtesseract.gemspec
|
82
|
+
- test/helper.rb
|
83
|
+
- test/images/mixed.tif
|
84
|
+
- test/images/test.bmp
|
85
|
+
- test/images/test.jpg
|
86
|
+
- test/images/test.png
|
87
|
+
- test/images/test.tif
|
88
|
+
- test/images/test1.tif
|
89
|
+
- test/test_mixed.rb
|
90
|
+
- test/test_rtesseract.rb
|
91
|
+
has_rdoc: true
|
92
|
+
homepage: http://github.com/hooopo/rtesseract
|
93
|
+
licenses: []
|
94
|
+
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
|
98
|
+
require_paths:
|
99
|
+
- lib
|
100
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: "0"
|
112
|
+
requirements: []
|
113
|
+
|
114
|
+
rubyforge_project:
|
115
|
+
rubygems_version: 1.3.9.2
|
116
|
+
signing_key:
|
117
|
+
specification_version: 3
|
118
|
+
summary: Ruby library for working with the Tesseract OCR.
|
119
|
+
test_files: []
|
120
|
+
|