mini_tesseract 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Danilo Jeremias da Silva
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,61 @@
1
+ = rtesseract
2
+
3
+ Ruby library for working with the Tesseract OCR.
4
+
5
+ == REQUIREMENTS:
6
+
7
+ To work properly rtesseract are needed:
8
+ * Tesseract - Program
9
+ * ImageMagic - Program
10
+ * RMagick - Gem
11
+
12
+ == EXAMPLE USAGE
13
+
14
+ It's very simple to use rtesseract:
15
+
16
+ === CONVERT IMAGE TO STRING
17
+
18
+ image = RTesseract.new("my_image.jpg")
19
+ image.to_s #Getting the value
20
+
21
+ === CHANGE THE IMAGE
22
+
23
+ image = RTesseract.new("my_image.jpg")
24
+ image.source = "new_image.png"
25
+ image.to_s
26
+
27
+ === CONVERT PARTS OF IMAGE TO STRING
28
+
29
+ mix_block = RTesseract::Mixed.new("test.jpg") do |image|
30
+ image.area(28, 19, 25, 25)
31
+ image.area(180, 22, 20, 28)
32
+ image.area(218, 22, 24, 28)
33
+ image.area(248, 24, 22, 22)
34
+ end
35
+ mix_block.to_s
36
+
37
+ OR
38
+
39
+ mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
40
+ {:x => 28, :y=>19, :width=>25, :height=>25 },
41
+ {:x => 180, :y=>22, :width=>20, :height=>28},
42
+ {:x => 218, :y=>22, :width=>24, :height=>28},
43
+ {:x => 248, :y=>24, :width=>22, :height=>22}
44
+ ]})
45
+ mix_block.to_s
46
+
47
+ == Note on Patches/Pull Requests
48
+
49
+ * Fork the project.
50
+ * Make your feature addition or bug fix.
51
+ * Send me a pull request. Bonus points for topic branches.
52
+
53
+ == Links
54
+
55
+ * Github - http://github.com/dannnylo/rtesseract
56
+ * Rubygems - http://rubygems.org/gems/rtesseract
57
+
58
+ == Copyright
59
+
60
+ Copyright (c) 2010 Danilo Jeremias da Silva. See LICENSE for details.
61
+
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "mini_tesseract"
8
+ gem.version = '0.0.7'
9
+ gem.summary = "Ruby library for working with the Tesseract OCR."
10
+ gem.description = "Ruby library for working with the Tesseract OCR."
11
+ gem.email = "hoooopo@gmail.com"
12
+ gem.homepage = "http://github.com/hooopo/rtesseract"
13
+ gem.authors = ["Danilo Jeremias da Silva", "Hooopo"]
14
+ gem.add_development_dependency "jeweler", ">=1.4.0"
15
+ gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
16
+
17
+ gem.add_runtime_dependency "rmagick", '>= 2.10.1'
18
+ gem.add_runtime_dependency "mini_magick", '~> 3.0'
19
+ end
20
+ Jeweler::GemcutterTasks.new
21
+ rescue LoadError
22
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
23
+ end
24
+
25
+ require 'rake/testtask'
26
+ Rake::TestTask.new(:test) do |test|
27
+ test.libs << 'lib' << 'test'
28
+ test.pattern = 'test/**/test_*.rb'
29
+ test.verbose = true
30
+ end
31
+
32
+ begin
33
+ require 'rcov/rcovtask'
34
+ Rcov::RcovTask.new do |test|
35
+ test.libs << 'test'
36
+ test.pattern = 'test/**/test_*.rb'
37
+ test.verbose = true
38
+ end
39
+ rescue LoadError
40
+ task :rcov do
41
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
42
+ end
43
+ end
44
+
45
+ task :test => :check_dependencies
46
+
47
+ task :default => :test
48
+
49
+ require 'rake/rdoctask'
50
+ Rake::RDocTask.new do |rdoc|
51
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "rtesseract #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
58
+
@@ -0,0 +1,12 @@
1
+ require 'mini_magick'
2
+ module MiniMagickProcessor
3
+ def image_to_tiff
4
+ generate_uid
5
+ tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
6
+ cat = MiniMagick::Image.open(@source.to_s)
7
+ cat.format("tif")
8
+ cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
9
+ cat.write tmp_file.to_s
10
+ return tmp_file
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ require 'rmagick'
2
+ module RMagickProcessor
3
+ def image_to_tiff
4
+ puts "processing with rmagick"
5
+ generate_uid
6
+ tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
7
+ cat = Magick::Image.read(@source.to_s).first
8
+ cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
9
+ cat.write tmp_file.to_s
10
+ return tmp_file
11
+ end
12
+ end
@@ -0,0 +1,157 @@
1
+ require "pathname"
2
+ require "tempfile"
3
+
4
+ require "rtesseract/errors"
5
+ require "rtesseract/mixed"
6
+
7
+ class RTesseract
8
+ VERSION = '0.0.7'
9
+ attr_accessor :options
10
+ attr_writer :lang
11
+ attr_writer :psm
12
+ attr_reader :processor
13
+
14
+ def initialize(src = "", options = {})
15
+ @uid = options.delete(:uid) || nil
16
+ @source = Pathname.new src
17
+ @command = options.delete(:command) || "tesseract"
18
+ @lang = options.delete(:lang) || options.delete("lang") || ""
19
+ @psm = options.delete(:psm) || options.delete("psm") || nil
20
+ @clear_console_output = options.delete(:clear_console_output)
21
+ @clear_console_output = true if @clear_console_output.nil?
22
+ @options = options
23
+ @value = ""
24
+ @x, @y, @w, @h = []
25
+ @processor = options.delete(:processor) || options.delete("processor")
26
+ choose_processor!
27
+ end
28
+
29
+ def source= src
30
+ @value = ""
31
+ @source = Pathname.new src
32
+ end
33
+
34
+ def image_name
35
+ @source.basename
36
+ end
37
+
38
+
39
+ #Crop image to convert
40
+ def crop!(x,y,width,height)
41
+ @x, @y, @w, @h = x, y, width, height
42
+ self
43
+ end
44
+
45
+ #Remove files
46
+ def remove_file(files=[])
47
+ files.each do |file|
48
+ begin
49
+ File.unlink(file) if File.exist?(file)
50
+ rescue
51
+ system "rm -f #{file}"
52
+ end
53
+ end
54
+ true
55
+ rescue
56
+ raise RTesseract::TempFilesNotRemovedError
57
+ end
58
+
59
+ def generate_uid
60
+ @uid = rand.to_s[2,10] if @uid.nil?
61
+ @uid
62
+ end
63
+
64
+ # Select the language
65
+ #===Languages
66
+ ## * eng - English
67
+ ## * deu - German
68
+ ## * deu-f - German fraktur
69
+ ## * fra - French
70
+ ## * ita - Italian
71
+ ## * nld - Dutch
72
+ ## * por - Portuguese
73
+ ## * spa - Spanish
74
+ ## * vie - Vietnamese
75
+ ## Note: Make sure you have installed the language to tesseract
76
+ def lang
77
+ language = "#{@lang}".strip.downcase
78
+ { #Aliases to languages names
79
+ "eng" => ["en","en-us","english"],
80
+ "ita" => ["it"],
81
+ "por" => ["pt","pt-br","portuguese"],
82
+ "spa" => ["sp"]
83
+ }.each do |value,names|
84
+ return " -l #{value} " if names.include? language
85
+ end
86
+ return " -l #{language} " if language.size > 0
87
+ ""
88
+ rescue
89
+ ""
90
+ end
91
+
92
+ #Page Segment Mode
93
+ def psm
94
+ @psm.nil? ? "" : " -psm #{@psm} "
95
+ rescue
96
+ ""
97
+ end
98
+
99
+ def config
100
+ @options ||= {}
101
+ @options.collect{|k,v| "#{k} #{v}" }.join("\n")
102
+ end
103
+
104
+ def config_file
105
+ return "" if @options == {}
106
+ conf = Tempfile.new("config")
107
+ conf.write(config)
108
+ conf.path
109
+ end
110
+
111
+ #TODO: Clear console for MacOS or Windows
112
+ def clear_console_output
113
+ return "" unless @clear_console_output
114
+ return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
115
+ end
116
+
117
+ #Convert image to string
118
+ def convert
119
+ generate_uid
120
+ tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
121
+ tmp_image = image_to_tiff
122
+ `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}`
123
+ @value = File.read("#{tmp_file.to_s}.txt").to_s
124
+ @uid = nil
125
+ remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
126
+ rescue
127
+ raise RTesseract::ConversionError
128
+ end
129
+
130
+ #Output value
131
+ def to_s
132
+ return @value if @value != ""
133
+ if @source.file?
134
+ convert
135
+ @value
136
+ else
137
+ raise RTesseract::ImageNotSelectedError
138
+ end
139
+ end
140
+
141
+ #Remove spaces and break-lines
142
+ def to_s_without_spaces
143
+ to_s.gsub(" ","").gsub("\n","").gsub("\r","")
144
+ end
145
+
146
+ private
147
+ def choose_processor!
148
+ if @processor.to_s == "mini_magick"
149
+ require File.expand_path(File.dirname(__FILE__) + "/processors/mini_magick.rb")
150
+ self.class.send(:include, MiniMagickProcessor)
151
+ else
152
+ require File.expand_path(File.dirname(__FILE__) + "/processors/rmagick.rb")
153
+ self.class.send(:include, RMagickProcessor)
154
+ end
155
+ end
156
+ end
157
+
@@ -0,0 +1,6 @@
1
+ class RTesseract
2
+ class ConversionError < StandardError ;end
3
+ class ImageNotSelectedError < StandardError ;end
4
+ class TempFilesNotRemovedError < StandardError ;end
5
+ end
6
+
@@ -0,0 +1,53 @@
1
+ class RTesseract
2
+ class Mixed
3
+ def initialize(src="", options={})
4
+ @source = Pathname.new src
5
+ @options = options
6
+ @value = ""
7
+ @areas = options.delete(:areas) || []
8
+ yield self if block_given?
9
+ end
10
+
11
+ def area(x, y, width, height)
12
+ @value = ""
13
+ @areas << {:x => x, :y => y, :width => width, :height => height}
14
+ end
15
+
16
+ def areas
17
+ @areas
18
+ end
19
+
20
+ def clear_areas
21
+ @areas = []
22
+ end
23
+
24
+ #Convert parts of image to string
25
+ def convert
26
+ @value = ""
27
+ @areas.each do |area|
28
+ image = RTesseract.new(@source.to_s,@options)
29
+ image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
30
+ @value << image.to_s
31
+ end
32
+ rescue
33
+ raise RTesseract::ConversionError
34
+ end
35
+
36
+ #Output value
37
+ def to_s
38
+ return @value if @value != ""
39
+ if @source.file?
40
+ convert
41
+ @value
42
+ else
43
+ raise RTesseract::ImageNotSelectedError
44
+ end
45
+ end
46
+
47
+ #Remove spaces and break-lines
48
+ def to_s_without_spaces
49
+ to_s.gsub(" ","").gsub("\n","").gsub("\r","")
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,67 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{mini_tesseract}
8
+ s.version = "0.0.7"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Danilo Jeremias da Silva", "Hooopo"]
12
+ s.date = %q{2011-08-23}
13
+ s.description = %q{Ruby library for working with the Tesseract OCR.}
14
+ s.email = %q{hoooopo@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "lib/processors/mini_magick.rb",
25
+ "lib/processors/rmagick.rb",
26
+ "lib/rtesseract.rb",
27
+ "lib/rtesseract/errors.rb",
28
+ "lib/rtesseract/mixed.rb",
29
+ "mini_tesseract.gemspec",
30
+ "rtesseract.gemspec",
31
+ "test/helper.rb",
32
+ "test/images/mixed.tif",
33
+ "test/images/test.bmp",
34
+ "test/images/test.jpg",
35
+ "test/images/test.png",
36
+ "test/images/test.tif",
37
+ "test/images/test1.tif",
38
+ "test/test_mixed.rb",
39
+ "test/test_rtesseract.rb"
40
+ ]
41
+ s.homepage = %q{http://github.com/hooopo/rtesseract}
42
+ s.require_paths = ["lib"]
43
+ s.rubygems_version = %q{1.3.9.2}
44
+ s.summary = %q{Ruby library for working with the Tesseract OCR.}
45
+
46
+ if s.respond_to? :specification_version then
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
+ s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
51
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
52
+ s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
53
+ s.add_runtime_dependency(%q<mini_magick>, ["~> 3.0"])
54
+ else
55
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
56
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
57
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
58
+ s.add_dependency(%q<mini_magick>, ["~> 3.0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
62
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
63
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
64
+ s.add_dependency(%q<mini_magick>, ["~> 3.0"])
65
+ end
66
+ end
67
+
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{rtesseract}
8
+ s.version = "0.0.7"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Danilo Jeremias da Silva"]
12
+ s.date = %q{2011-03-16}
13
+ s.description = %q{Ruby library for working with the Tesseract OCR.}
14
+ s.email = %q{dannnylo@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "lib/rtesseract.rb",
25
+ "lib/rtesseract/errors.rb",
26
+ "lib/rtesseract/mixed.rb",
27
+ "rtesseract.gemspec",
28
+ "test/helper.rb",
29
+ "test/images/mixed.tif",
30
+ "test/images/test.bmp",
31
+ "test/images/test.jpg",
32
+ "test/images/test.png",
33
+ "test/images/test.tif",
34
+ "test/images/test1.tif",
35
+ "test/test_mixed.rb",
36
+ "test/test_rtesseract.rb"
37
+ ]
38
+ s.homepage = %q{http://github.com/dannnylo/rtesseract}
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.4.2}
41
+ s.summary = %q{Ruby library for working with the Tesseract OCR.}
42
+ s.test_files = [
43
+ "test/helper.rb",
44
+ "test/test_mixed.rb",
45
+ "test/test_rtesseract.rb"
46
+ ]
47
+
48
+ if s.respond_to? :specification_version then
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
53
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
55
+ else
56
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
57
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
58
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
62
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
63
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
64
+ end
65
+ end
66
+
@@ -0,0 +1,11 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'rtesseract'
8
+
9
+ class Test::Unit::TestCase
10
+ end
11
+
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,36 @@
1
+ require 'helper'
2
+ require 'pathname'
3
+
4
+ class TestMixed < Test::Unit::TestCase
5
+ context "Path" do
6
+ setup do
7
+ @path = Pathname.new(__FILE__.gsub("test_mixed.rb","")).expand_path
8
+ @image_tiff = @path.join("images","mixed.tif").to_s
9
+ @image2_tiff = @path.join("images","mixed2.tif").to_s
10
+ end
11
+
12
+ should "be instantiable" do
13
+ assert_equal RTesseract::Mixed.new.class , RTesseract::Mixed
14
+ assert_equal RTesseract::Mixed.new(@image_tiff).class , RTesseract::Mixed
15
+ end
16
+
17
+ should "translate parts of the image to text" do
18
+ mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
19
+ image.area(28, 19, 25, 25) #position of 4
20
+ image.area(180, 22, 20, 28) # position of 3
21
+ image.area(218, 22, 24, 28) # position of z
22
+ image.area(248, 24, 22, 22) # position of z
23
+ end
24
+ assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
+
26
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
27
+ {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
28
+ {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
29
+ {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
30
+ {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
31
+ ]})
32
+ assert_equal mix_block.to_s_without_spaces , "43ZZ"
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,82 @@
1
+ require 'helper'
2
+ require 'pathname'
3
+
4
+ class TestRtesseract < Test::Unit::TestCase
5
+ context "Path" do
6
+ setup do
7
+ @path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
8
+ @image_tiff = @path.join("images","test.tif").to_s
9
+ end
10
+
11
+ should "be instantiable" do
12
+ assert_equal RTesseract.new.class , RTesseract
13
+ assert_equal RTesseract.new("").class , RTesseract
14
+ assert_equal RTesseract.new(@image_tiff).class , RTesseract
15
+ end
16
+
17
+ should "translate image to text" do
18
+ assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
19
+ assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
20
+ end
21
+
22
+ should "translate images .png, .jpg, .bmp" do
23
+ assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
24
+ assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
25
+ assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
26
+ end
27
+
28
+ should "change the image" do
29
+ image = RTesseract.new(@image_tiff)
30
+ assert_equal image.to_s_without_spaces,"43ZZ"
31
+ image.source = @path.join("images","test1.tif").to_s
32
+ assert_equal image.to_s_without_spaces,"V2V4"
33
+ end
34
+
35
+ should "select the language" do
36
+ #English
37
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
38
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
39
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
40
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
41
+
42
+ #Portuguese
43
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
44
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
45
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
46
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
47
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
48
+
49
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
50
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
51
+
52
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
53
+ end
54
+
55
+ should "be configurable" do
56
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
57
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
58
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config , "chop_enable 0\nenable_assoc 0"
59
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces , "43ZZ"
60
+ end
61
+
62
+ should "crop image" do
63
+ assert_equal RTesseract.new(@image_tiff).crop!(140,10,36,40).to_s_without_spaces, "4"
64
+ assert_equal RTesseract.new(@image_tiff).crop!(180,10,36,40).to_s_without_spaces, "3"
65
+ assert_equal RTesseract.new(@image_tiff).crop!(200,10,36,40).to_s_without_spaces, "Z"
66
+ assert_equal RTesseract.new(@image_tiff).crop!(220,10,30,40).to_s_without_spaces, "Z"
67
+ end
68
+
69
+ should "unique uid" do
70
+ assert_not_equal RTesseract.new(@image_tiff).generate_uid , RTesseract.new(@image_tiff).generate_uid
71
+ end
72
+
73
+ should "generate a unique id" do
74
+ reg = RTesseract.new(@image_tiff)
75
+ assert_equal reg.generate_uid , reg.generate_uid
76
+ value = reg.generate_uid
77
+ reg.convert
78
+ assert_not_equal value , reg.generate_uid
79
+ end
80
+ end
81
+ end
82
+
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini_tesseract
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.7
6
+ platform: ruby
7
+ authors:
8
+ - Danilo Jeremias da Silva
9
+ - Hooopo
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+
14
+ date: 2011-08-23 00:00:00 +08:00
15
+ default_executable:
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
18
+ name: jeweler
19
+ prerelease: false
20
+ requirement: &id001 !ruby/object:Gem::Requirement
21
+ none: false
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: 1.4.0
26
+ type: :development
27
+ version_requirements: *id001
28
+ - !ruby/object:Gem::Dependency
29
+ name: thoughtbot-shoulda
30
+ prerelease: false
31
+ requirement: &id002 !ruby/object:Gem::Requirement
32
+ none: false
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: "0"
37
+ type: :development
38
+ version_requirements: *id002
39
+ - !ruby/object:Gem::Dependency
40
+ name: rmagick
41
+ prerelease: false
42
+ requirement: &id003 !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.10.1
48
+ type: :runtime
49
+ version_requirements: *id003
50
+ - !ruby/object:Gem::Dependency
51
+ name: mini_magick
52
+ prerelease: false
53
+ requirement: &id004 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ version: "3.0"
59
+ type: :runtime
60
+ version_requirements: *id004
61
+ description: Ruby library for working with the Tesseract OCR.
62
+ email: hoooopo@gmail.com
63
+ executables: []
64
+
65
+ extensions: []
66
+
67
+ extra_rdoc_files:
68
+ - LICENSE
69
+ - README.rdoc
70
+ files:
71
+ - .document
72
+ - LICENSE
73
+ - README.rdoc
74
+ - Rakefile
75
+ - lib/processors/mini_magick.rb
76
+ - lib/processors/rmagick.rb
77
+ - lib/rtesseract.rb
78
+ - lib/rtesseract/errors.rb
79
+ - lib/rtesseract/mixed.rb
80
+ - mini_tesseract.gemspec
81
+ - rtesseract.gemspec
82
+ - test/helper.rb
83
+ - test/images/mixed.tif
84
+ - test/images/test.bmp
85
+ - test/images/test.jpg
86
+ - test/images/test.png
87
+ - test/images/test.tif
88
+ - test/images/test1.tif
89
+ - test/test_mixed.rb
90
+ - test/test_rtesseract.rb
91
+ has_rdoc: true
92
+ homepage: http://github.com/hooopo/rtesseract
93
+ licenses: []
94
+
95
+ post_install_message:
96
+ rdoc_options: []
97
+
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: "0"
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: "0"
112
+ requirements: []
113
+
114
+ rubyforge_project:
115
+ rubygems_version: 1.3.9.2
116
+ signing_key:
117
+ specification_version: 3
118
+ summary: Ruby library for working with the Tesseract OCR.
119
+ test_files: []
120
+