mini_tesseract 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Danilo Jeremias da Silva
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,61 @@
1
+ = rtesseract
2
+
3
+ Ruby library for working with the Tesseract OCR.
4
+
5
+ == REQUIREMENTS:
6
+
7
+ To work properly rtesseract are needed:
8
+ * Tesseract - Program
9
+ * ImageMagic - Program
10
+ * RMagick - Gem
11
+
12
+ == EXAMPLE USAGE
13
+
14
+ It's very simple to use rtesseract:
15
+
16
+ === CONVERT IMAGE TO STRING
17
+
18
+ image = RTesseract.new("my_image.jpg")
19
+ image.to_s #Getting the value
20
+
21
+ === CHANGE THE IMAGE
22
+
23
+ image = RTesseract.new("my_image.jpg")
24
+ image.source = "new_image.png"
25
+ image.to_s
26
+
27
+ === CONVERT PARTS OF IMAGE TO STRING
28
+
29
+ mix_block = RTesseract::Mixed.new("test.jpg") do |image|
30
+ image.area(28, 19, 25, 25)
31
+ image.area(180, 22, 20, 28)
32
+ image.area(218, 22, 24, 28)
33
+ image.area(248, 24, 22, 22)
34
+ end
35
+ mix_block.to_s
36
+
37
+ OR
38
+
39
+ mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
40
+ {:x => 28, :y=>19, :width=>25, :height=>25 },
41
+ {:x => 180, :y=>22, :width=>20, :height=>28},
42
+ {:x => 218, :y=>22, :width=>24, :height=>28},
43
+ {:x => 248, :y=>24, :width=>22, :height=>22}
44
+ ]})
45
+ mix_block.to_s
46
+
47
+ == Note on Patches/Pull Requests
48
+
49
+ * Fork the project.
50
+ * Make your feature addition or bug fix.
51
+ * Send me a pull request. Bonus points for topic branches.
52
+
53
+ == Links
54
+
55
+ * Github - http://github.com/dannnylo/rtesseract
56
+ * Rubygems - http://rubygems.org/gems/rtesseract
57
+
58
+ == Copyright
59
+
60
+ Copyright (c) 2010 Danilo Jeremias da Silva. See LICENSE for details.
61
+
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "mini_tesseract"
8
+ gem.version = '0.0.7'
9
+ gem.summary = "Ruby library for working with the Tesseract OCR."
10
+ gem.description = "Ruby library for working with the Tesseract OCR."
11
+ gem.email = "hoooopo@gmail.com"
12
+ gem.homepage = "http://github.com/hooopo/rtesseract"
13
+ gem.authors = ["Danilo Jeremias da Silva", "Hooopo"]
14
+ gem.add_development_dependency "jeweler", ">=1.4.0"
15
+ gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
16
+
17
+ gem.add_runtime_dependency "rmagick", '>= 2.10.1'
18
+ gem.add_runtime_dependency "mini_magick", '~> 3.0'
19
+ end
20
+ Jeweler::GemcutterTasks.new
21
+ rescue LoadError
22
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
23
+ end
24
+
25
+ require 'rake/testtask'
26
+ Rake::TestTask.new(:test) do |test|
27
+ test.libs << 'lib' << 'test'
28
+ test.pattern = 'test/**/test_*.rb'
29
+ test.verbose = true
30
+ end
31
+
32
+ begin
33
+ require 'rcov/rcovtask'
34
+ Rcov::RcovTask.new do |test|
35
+ test.libs << 'test'
36
+ test.pattern = 'test/**/test_*.rb'
37
+ test.verbose = true
38
+ end
39
+ rescue LoadError
40
+ task :rcov do
41
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
42
+ end
43
+ end
44
+
45
+ task :test => :check_dependencies
46
+
47
+ task :default => :test
48
+
49
+ require 'rake/rdoctask'
50
+ Rake::RDocTask.new do |rdoc|
51
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "rtesseract #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
58
+
@@ -0,0 +1,12 @@
1
+ require 'mini_magick'
2
+ module MiniMagickProcessor
3
+ def image_to_tiff
4
+ generate_uid
5
+ tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
6
+ cat = MiniMagick::Image.open(@source.to_s)
7
+ cat.format("tif")
8
+ cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
9
+ cat.write tmp_file.to_s
10
+ return tmp_file
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ require 'rmagick'
2
+ module RMagickProcessor
3
+ def image_to_tiff
4
+ puts "processing with rmagick"
5
+ generate_uid
6
+ tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
7
+ cat = Magick::Image.read(@source.to_s).first
8
+ cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
9
+ cat.write tmp_file.to_s
10
+ return tmp_file
11
+ end
12
+ end
@@ -0,0 +1,157 @@
1
+ require "pathname"
2
+ require "tempfile"
3
+
4
+ require "rtesseract/errors"
5
+ require "rtesseract/mixed"
6
+
7
+ class RTesseract
8
+ VERSION = '0.0.7'
9
+ attr_accessor :options
10
+ attr_writer :lang
11
+ attr_writer :psm
12
+ attr_reader :processor
13
+
14
+ def initialize(src = "", options = {})
15
+ @uid = options.delete(:uid) || nil
16
+ @source = Pathname.new src
17
+ @command = options.delete(:command) || "tesseract"
18
+ @lang = options.delete(:lang) || options.delete("lang") || ""
19
+ @psm = options.delete(:psm) || options.delete("psm") || nil
20
+ @clear_console_output = options.delete(:clear_console_output)
21
+ @clear_console_output = true if @clear_console_output.nil?
22
+ @options = options
23
+ @value = ""
24
+ @x, @y, @w, @h = []
25
+ @processor = options.delete(:processor) || options.delete("processor")
26
+ choose_processor!
27
+ end
28
+
29
+ def source= src
30
+ @value = ""
31
+ @source = Pathname.new src
32
+ end
33
+
34
+ def image_name
35
+ @source.basename
36
+ end
37
+
38
+
39
+ #Crop image to convert
40
+ def crop!(x,y,width,height)
41
+ @x, @y, @w, @h = x, y, width, height
42
+ self
43
+ end
44
+
45
+ #Remove files
46
+ def remove_file(files=[])
47
+ files.each do |file|
48
+ begin
49
+ File.unlink(file) if File.exist?(file)
50
+ rescue
51
+ system "rm -f #{file}"
52
+ end
53
+ end
54
+ true
55
+ rescue
56
+ raise RTesseract::TempFilesNotRemovedError
57
+ end
58
+
59
+ def generate_uid
60
+ @uid = rand.to_s[2,10] if @uid.nil?
61
+ @uid
62
+ end
63
+
64
+ # Select the language
65
+ #===Languages
66
+ ## * eng - English
67
+ ## * deu - German
68
+ ## * deu-f - German fraktur
69
+ ## * fra - French
70
+ ## * ita - Italian
71
+ ## * nld - Dutch
72
+ ## * por - Portuguese
73
+ ## * spa - Spanish
74
+ ## * vie - Vietnamese
75
+ ## Note: Make sure you have installed the language to tesseract
76
+ def lang
77
+ language = "#{@lang}".strip.downcase
78
+ { #Aliases to languages names
79
+ "eng" => ["en","en-us","english"],
80
+ "ita" => ["it"],
81
+ "por" => ["pt","pt-br","portuguese"],
82
+ "spa" => ["sp"]
83
+ }.each do |value,names|
84
+ return " -l #{value} " if names.include? language
85
+ end
86
+ return " -l #{language} " if language.size > 0
87
+ ""
88
+ rescue
89
+ ""
90
+ end
91
+
92
+ #Page Segment Mode
93
+ def psm
94
+ @psm.nil? ? "" : " -psm #{@psm} "
95
+ rescue
96
+ ""
97
+ end
98
+
99
+ def config
100
+ @options ||= {}
101
+ @options.collect{|k,v| "#{k} #{v}" }.join("\n")
102
+ end
103
+
104
+ def config_file
105
+ return "" if @options == {}
106
+ conf = Tempfile.new("config")
107
+ conf.write(config)
108
+ conf.path
109
+ end
110
+
111
+ #TODO: Clear console for MacOS or Windows
112
+ def clear_console_output
113
+ return "" unless @clear_console_output
114
+ return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
115
+ end
116
+
117
+ #Convert image to string
118
+ def convert
119
+ generate_uid
120
+ tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
121
+ tmp_image = image_to_tiff
122
+ `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}`
123
+ @value = File.read("#{tmp_file.to_s}.txt").to_s
124
+ @uid = nil
125
+ remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
126
+ rescue
127
+ raise RTesseract::ConversionError
128
+ end
129
+
130
+ #Output value
131
+ def to_s
132
+ return @value if @value != ""
133
+ if @source.file?
134
+ convert
135
+ @value
136
+ else
137
+ raise RTesseract::ImageNotSelectedError
138
+ end
139
+ end
140
+
141
+ #Remove spaces and break-lines
142
+ def to_s_without_spaces
143
+ to_s.gsub(" ","").gsub("\n","").gsub("\r","")
144
+ end
145
+
146
+ private
147
+ def choose_processor!
148
+ if @processor.to_s == "mini_magick"
149
+ require File.expand_path(File.dirname(__FILE__) + "/processors/mini_magick.rb")
150
+ self.class.send(:include, MiniMagickProcessor)
151
+ else
152
+ require File.expand_path(File.dirname(__FILE__) + "/processors/rmagick.rb")
153
+ self.class.send(:include, RMagickProcessor)
154
+ end
155
+ end
156
+ end
157
+
@@ -0,0 +1,6 @@
1
+ class RTesseract
2
+ class ConversionError < StandardError ;end
3
+ class ImageNotSelectedError < StandardError ;end
4
+ class TempFilesNotRemovedError < StandardError ;end
5
+ end
6
+
@@ -0,0 +1,53 @@
1
+ class RTesseract
2
+ class Mixed
3
+ def initialize(src="", options={})
4
+ @source = Pathname.new src
5
+ @options = options
6
+ @value = ""
7
+ @areas = options.delete(:areas) || []
8
+ yield self if block_given?
9
+ end
10
+
11
+ def area(x, y, width, height)
12
+ @value = ""
13
+ @areas << {:x => x, :y => y, :width => width, :height => height}
14
+ end
15
+
16
+ def areas
17
+ @areas
18
+ end
19
+
20
+ def clear_areas
21
+ @areas = []
22
+ end
23
+
24
+ #Convert parts of image to string
25
+ def convert
26
+ @value = ""
27
+ @areas.each do |area|
28
+ image = RTesseract.new(@source.to_s,@options)
29
+ image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
30
+ @value << image.to_s
31
+ end
32
+ rescue
33
+ raise RTesseract::ConversionError
34
+ end
35
+
36
+ #Output value
37
+ def to_s
38
+ return @value if @value != ""
39
+ if @source.file?
40
+ convert
41
+ @value
42
+ else
43
+ raise RTesseract::ImageNotSelectedError
44
+ end
45
+ end
46
+
47
+ #Remove spaces and break-lines
48
+ def to_s_without_spaces
49
+ to_s.gsub(" ","").gsub("\n","").gsub("\r","")
50
+ end
51
+ end
52
+ end
53
+
@@ -0,0 +1,67 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{mini_tesseract}
8
+ s.version = "0.0.7"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Danilo Jeremias da Silva", "Hooopo"]
12
+ s.date = %q{2011-08-23}
13
+ s.description = %q{Ruby library for working with the Tesseract OCR.}
14
+ s.email = %q{hoooopo@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "lib/processors/mini_magick.rb",
25
+ "lib/processors/rmagick.rb",
26
+ "lib/rtesseract.rb",
27
+ "lib/rtesseract/errors.rb",
28
+ "lib/rtesseract/mixed.rb",
29
+ "mini_tesseract.gemspec",
30
+ "rtesseract.gemspec",
31
+ "test/helper.rb",
32
+ "test/images/mixed.tif",
33
+ "test/images/test.bmp",
34
+ "test/images/test.jpg",
35
+ "test/images/test.png",
36
+ "test/images/test.tif",
37
+ "test/images/test1.tif",
38
+ "test/test_mixed.rb",
39
+ "test/test_rtesseract.rb"
40
+ ]
41
+ s.homepage = %q{http://github.com/hooopo/rtesseract}
42
+ s.require_paths = ["lib"]
43
+ s.rubygems_version = %q{1.3.9.2}
44
+ s.summary = %q{Ruby library for working with the Tesseract OCR.}
45
+
46
+ if s.respond_to? :specification_version then
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
+ s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
51
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
52
+ s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
53
+ s.add_runtime_dependency(%q<mini_magick>, ["~> 3.0"])
54
+ else
55
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
56
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
57
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
58
+ s.add_dependency(%q<mini_magick>, ["~> 3.0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
62
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
63
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
64
+ s.add_dependency(%q<mini_magick>, ["~> 3.0"])
65
+ end
66
+ end
67
+
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{rtesseract}
8
+ s.version = "0.0.7"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Danilo Jeremias da Silva"]
12
+ s.date = %q{2011-03-16}
13
+ s.description = %q{Ruby library for working with the Tesseract OCR.}
14
+ s.email = %q{dannnylo@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "lib/rtesseract.rb",
25
+ "lib/rtesseract/errors.rb",
26
+ "lib/rtesseract/mixed.rb",
27
+ "rtesseract.gemspec",
28
+ "test/helper.rb",
29
+ "test/images/mixed.tif",
30
+ "test/images/test.bmp",
31
+ "test/images/test.jpg",
32
+ "test/images/test.png",
33
+ "test/images/test.tif",
34
+ "test/images/test1.tif",
35
+ "test/test_mixed.rb",
36
+ "test/test_rtesseract.rb"
37
+ ]
38
+ s.homepage = %q{http://github.com/dannnylo/rtesseract}
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = %q{1.4.2}
41
+ s.summary = %q{Ruby library for working with the Tesseract OCR.}
42
+ s.test_files = [
43
+ "test/helper.rb",
44
+ "test/test_mixed.rb",
45
+ "test/test_rtesseract.rb"
46
+ ]
47
+
48
+ if s.respond_to? :specification_version then
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
53
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
55
+ else
56
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
57
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
58
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
62
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
63
+ s.add_dependency(%q<rmagick>, [">= 2.10.1"])
64
+ end
65
+ end
66
+
@@ -0,0 +1,11 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'rtesseract'
8
+
9
+ class Test::Unit::TestCase
10
+ end
11
+
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,36 @@
1
+ require 'helper'
2
+ require 'pathname'
3
+
4
+ class TestMixed < Test::Unit::TestCase
5
+ context "Path" do
6
+ setup do
7
+ @path = Pathname.new(__FILE__.gsub("test_mixed.rb","")).expand_path
8
+ @image_tiff = @path.join("images","mixed.tif").to_s
9
+ @image2_tiff = @path.join("images","mixed2.tif").to_s
10
+ end
11
+
12
+ should "be instantiable" do
13
+ assert_equal RTesseract::Mixed.new.class , RTesseract::Mixed
14
+ assert_equal RTesseract::Mixed.new(@image_tiff).class , RTesseract::Mixed
15
+ end
16
+
17
+ should "translate parts of the image to text" do
18
+ mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
19
+ image.area(28, 19, 25, 25) #position of 4
20
+ image.area(180, 22, 20, 28) # position of 3
21
+ image.area(218, 22, 24, 28) # position of z
22
+ image.area(248, 24, 22, 22) # position of z
23
+ end
24
+ assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
+
26
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
27
+ {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
28
+ {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
29
+ {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
30
+ {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
31
+ ]})
32
+ assert_equal mix_block.to_s_without_spaces , "43ZZ"
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,82 @@
1
+ require 'helper'
2
+ require 'pathname'
3
+
4
+ class TestRtesseract < Test::Unit::TestCase
5
+ context "Path" do
6
+ setup do
7
+ @path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
8
+ @image_tiff = @path.join("images","test.tif").to_s
9
+ end
10
+
11
+ should "be instantiable" do
12
+ assert_equal RTesseract.new.class , RTesseract
13
+ assert_equal RTesseract.new("").class , RTesseract
14
+ assert_equal RTesseract.new(@image_tiff).class , RTesseract
15
+ end
16
+
17
+ should "translate image to text" do
18
+ assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
19
+ assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
20
+ end
21
+
22
+ should "translate images .png, .jpg, .bmp" do
23
+ assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
24
+ assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
25
+ assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
26
+ end
27
+
28
+ should "change the image" do
29
+ image = RTesseract.new(@image_tiff)
30
+ assert_equal image.to_s_without_spaces,"43ZZ"
31
+ image.source = @path.join("images","test1.tif").to_s
32
+ assert_equal image.to_s_without_spaces,"V2V4"
33
+ end
34
+
35
+ should "select the language" do
36
+ #English
37
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
38
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
39
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
40
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
41
+
42
+ #Portuguese
43
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
44
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
45
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
46
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
47
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
48
+
49
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
50
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
51
+
52
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
53
+ end
54
+
55
+ should "be configurable" do
56
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
57
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
58
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config , "chop_enable 0\nenable_assoc 0"
59
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces , "43ZZ"
60
+ end
61
+
62
+ should "crop image" do
63
+ assert_equal RTesseract.new(@image_tiff).crop!(140,10,36,40).to_s_without_spaces, "4"
64
+ assert_equal RTesseract.new(@image_tiff).crop!(180,10,36,40).to_s_without_spaces, "3"
65
+ assert_equal RTesseract.new(@image_tiff).crop!(200,10,36,40).to_s_without_spaces, "Z"
66
+ assert_equal RTesseract.new(@image_tiff).crop!(220,10,30,40).to_s_without_spaces, "Z"
67
+ end
68
+
69
+ should "unique uid" do
70
+ assert_not_equal RTesseract.new(@image_tiff).generate_uid , RTesseract.new(@image_tiff).generate_uid
71
+ end
72
+
73
+ should "generate a unique id" do
74
+ reg = RTesseract.new(@image_tiff)
75
+ assert_equal reg.generate_uid , reg.generate_uid
76
+ value = reg.generate_uid
77
+ reg.convert
78
+ assert_not_equal value , reg.generate_uid
79
+ end
80
+ end
81
+ end
82
+
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini_tesseract
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.7
6
+ platform: ruby
7
+ authors:
8
+ - Danilo Jeremias da Silva
9
+ - Hooopo
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+
14
+ date: 2011-08-23 00:00:00 +08:00
15
+ default_executable:
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
18
+ name: jeweler
19
+ prerelease: false
20
+ requirement: &id001 !ruby/object:Gem::Requirement
21
+ none: false
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: 1.4.0
26
+ type: :development
27
+ version_requirements: *id001
28
+ - !ruby/object:Gem::Dependency
29
+ name: thoughtbot-shoulda
30
+ prerelease: false
31
+ requirement: &id002 !ruby/object:Gem::Requirement
32
+ none: false
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: "0"
37
+ type: :development
38
+ version_requirements: *id002
39
+ - !ruby/object:Gem::Dependency
40
+ name: rmagick
41
+ prerelease: false
42
+ requirement: &id003 !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.10.1
48
+ type: :runtime
49
+ version_requirements: *id003
50
+ - !ruby/object:Gem::Dependency
51
+ name: mini_magick
52
+ prerelease: false
53
+ requirement: &id004 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ version: "3.0"
59
+ type: :runtime
60
+ version_requirements: *id004
61
+ description: Ruby library for working with the Tesseract OCR.
62
+ email: hoooopo@gmail.com
63
+ executables: []
64
+
65
+ extensions: []
66
+
67
+ extra_rdoc_files:
68
+ - LICENSE
69
+ - README.rdoc
70
+ files:
71
+ - .document
72
+ - LICENSE
73
+ - README.rdoc
74
+ - Rakefile
75
+ - lib/processors/mini_magick.rb
76
+ - lib/processors/rmagick.rb
77
+ - lib/rtesseract.rb
78
+ - lib/rtesseract/errors.rb
79
+ - lib/rtesseract/mixed.rb
80
+ - mini_tesseract.gemspec
81
+ - rtesseract.gemspec
82
+ - test/helper.rb
83
+ - test/images/mixed.tif
84
+ - test/images/test.bmp
85
+ - test/images/test.jpg
86
+ - test/images/test.png
87
+ - test/images/test.tif
88
+ - test/images/test1.tif
89
+ - test/test_mixed.rb
90
+ - test/test_rtesseract.rb
91
+ has_rdoc: true
92
+ homepage: http://github.com/hooopo/rtesseract
93
+ licenses: []
94
+
95
+ post_install_message:
96
+ rdoc_options: []
97
+
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: "0"
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: "0"
112
+ requirements: []
113
+
114
+ rubyforge_project:
115
+ rubygems_version: 1.3.9.2
116
+ signing_key:
117
+ specification_version: 3
118
+ summary: Ruby library for working with the Tesseract OCR.
119
+ test_files: []
120
+