rtesseract 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +0 -0
- data/.gitignore +0 -0
- data/LICENSE +0 -0
- data/README.rdoc +0 -0
- data/Rakefile +1 -1
- data/lib/rtesseract.rb +40 -6
- data/rtesseract.gemspec +7 -2
- data/test/helper.rb +0 -0
- data/test/images/test.bmp +0 -0
- data/test/images/test.jpg +0 -0
- data/test/images/test.png +0 -0
- data/test/images/test.tif +0 -0
- data/test/images/test1.tif +0 -0
- data/test/test_rtesseract.rb +56 -3
- metadata +9 -4
data/.document
CHANGED
File without changes
|
data/.gitignore
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.rdoc
CHANGED
File without changes
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "rtesseract"
|
8
|
-
gem.version = '0.0.
|
8
|
+
gem.version = '0.0.3'
|
9
9
|
gem.summary = "Ruby library for working with the Tesseract OCR."
|
10
10
|
gem.description = "Ruby library for working with the Tesseract OCR."
|
11
11
|
gem.email = "dannnylo@gmail.com"
|
data/lib/rtesseract.rb
CHANGED
@@ -3,18 +3,21 @@ require "pathname"
|
|
3
3
|
require "tempfile"
|
4
4
|
|
5
5
|
class RTesseract
|
6
|
-
VERSION = '0.0.
|
6
|
+
VERSION = '0.0.3'
|
7
|
+
attr_accessor :options
|
8
|
+
attr_writer :lang
|
7
9
|
|
8
10
|
def initialize(src="", options={})
|
9
|
-
@source
|
10
|
-
@command = options
|
11
|
+
@source = Pathname.new src
|
12
|
+
@command = options.delete(:command) || "tesseract"
|
13
|
+
@lang = options.delete(:lang) || options.delete("lang") || ""
|
11
14
|
@options = options
|
12
15
|
@value = ""
|
13
16
|
end
|
14
17
|
|
15
18
|
def source= src
|
16
|
-
@source = Pathname.new src
|
17
19
|
@value = ""
|
20
|
+
@source = Pathname.new src
|
18
21
|
end
|
19
22
|
|
20
23
|
def image_name
|
@@ -30,7 +33,7 @@ class RTesseract
|
|
30
33
|
end
|
31
34
|
|
32
35
|
#Remove files
|
33
|
-
def remove_file(files)
|
36
|
+
def remove_file(files=[])
|
34
37
|
files.each do |file|
|
35
38
|
begin
|
36
39
|
File.unlink(file) if File.exist?(file)
|
@@ -43,11 +46,42 @@ class RTesseract
|
|
43
46
|
raise "Error on remove file."
|
44
47
|
end
|
45
48
|
|
49
|
+
# Select the language
|
50
|
+
#===Languages
|
51
|
+
## * eng - English
|
52
|
+
## * deu - German
|
53
|
+
## * deu-f - German fraktur
|
54
|
+
## * fra - French
|
55
|
+
## * ita - Italian
|
56
|
+
## * nld - Dutch
|
57
|
+
## * por - Portuguese
|
58
|
+
## * spa - Spanish
|
59
|
+
## * vie - Vietnamese
|
60
|
+
## Note: Make sure you have installed the language to tesseract
|
61
|
+
def lang
|
62
|
+
language = "#{@lang}".strip
|
63
|
+
{"eng" => ["eng","en","en-us","english"], "deu" => ["deu"], "deu-f" => ["deu-f"] , "fra" => ["fra"], "ita" => ["ita","it"] , "nld" => ["nld"] , "por" => ["por","pt","pt-br","portuguese"] , "spa" => ["spa"] , "vie" => ["vie"]}.each do |value,names|
|
64
|
+
return " -l #{value} " if names.include? language.downcase
|
65
|
+
end
|
66
|
+
""
|
67
|
+
rescue
|
68
|
+
""
|
69
|
+
end
|
70
|
+
|
71
|
+
def config
|
72
|
+
@options ||= {}
|
73
|
+
@options.collect{|k,v| "#{k} #{v}" }.join("\n")
|
74
|
+
end
|
75
|
+
|
76
|
+
def config_file
|
77
|
+
#TODO: create the config
|
78
|
+
end
|
79
|
+
|
46
80
|
#Convert image to string
|
47
81
|
def convert
|
48
82
|
tmp_file = Pathname.new(Dir::tmpdir).join("#{@source.basename}")
|
49
83
|
tmp_image = image_to_tiff
|
50
|
-
`#{@command} #{tmp_image} #{tmp_file.to_s}`
|
84
|
+
`#{@command} #{tmp_image} #{tmp_file.to_s} #{lang}`
|
51
85
|
@value = File.read("#{tmp_file.to_s}.txt").to_s
|
52
86
|
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
53
87
|
rescue
|
data/rtesseract.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rtesseract}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Danilo Jeremias da Silva"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-10-11}
|
13
13
|
s.description = %q{Ruby library for working with the Tesseract OCR.}
|
14
14
|
s.email = %q{dannnylo@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,6 +25,11 @@ Gem::Specification.new do |s|
|
|
25
25
|
"lib/rtesseract.rb",
|
26
26
|
"rtesseract.gemspec",
|
27
27
|
"test/helper.rb",
|
28
|
+
"test/images/test.bmp",
|
29
|
+
"test/images/test.jpg",
|
30
|
+
"test/images/test.png",
|
31
|
+
"test/images/test.tif",
|
32
|
+
"test/images/test1.tif",
|
28
33
|
"test/test_rtesseract.rb"
|
29
34
|
]
|
30
35
|
s.homepage = %q{http://github.com/dannnylo/rtesseract}
|
data/test/helper.rb
CHANGED
File without changes
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/test/test_rtesseract.rb
CHANGED
@@ -1,9 +1,62 @@
|
|
1
1
|
require 'helper'
|
2
|
+
require 'pathname'
|
2
3
|
|
3
4
|
class TestRtesseract < Test::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
context "Path" do
|
6
|
+
setup do
|
7
|
+
@path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
|
8
|
+
@image_tiff = @path.join("images","test.tif").to_s
|
9
|
+
end
|
10
|
+
|
11
|
+
should "be instantiable" do
|
12
|
+
assert_equal RTesseract.new.class , RTesseract
|
13
|
+
assert_equal RTesseract.new("").class , RTesseract
|
14
|
+
assert_equal RTesseract.new(@image_tiff).class , RTesseract
|
15
|
+
end
|
16
|
+
|
17
|
+
should "translate image to text" do
|
18
|
+
assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
|
19
|
+
assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
|
20
|
+
end
|
21
|
+
|
22
|
+
should "translate images .png, .jpg, .bmp" do
|
23
|
+
assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
|
24
|
+
assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
|
25
|
+
assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
|
26
|
+
end
|
27
|
+
|
28
|
+
should "change the image" do
|
29
|
+
image = RTesseract.new(@image_tiff)
|
30
|
+
assert_equal image.to_s_without_spaces,"43ZZ"
|
31
|
+
image.source = @path.join("images","test1.tif").to_s
|
32
|
+
assert_equal image.to_s_without_spaces,"V2V4"
|
33
|
+
end
|
7
34
|
|
35
|
+
should "select the language" do
|
36
|
+
#English
|
37
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
38
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
|
39
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
|
40
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
|
41
|
+
|
42
|
+
#Portuguese
|
43
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
|
44
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
|
45
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
|
46
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
|
47
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
|
48
|
+
|
49
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
|
50
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
|
51
|
+
|
52
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
53
|
+
end
|
54
|
+
|
55
|
+
should "be configurable" do
|
56
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
|
57
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
|
58
|
+
assert_equal RTesseract.new(@image_tiff,{:enable_assoc=>0,:chop_enable=>0}).config , "chop_enable 0\nenable_assoc 0"
|
59
|
+
end
|
60
|
+
end
|
8
61
|
end
|
9
62
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Danilo Jeremias da Silva
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-10-11 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -66,6 +66,11 @@ files:
|
|
66
66
|
- lib/rtesseract.rb
|
67
67
|
- rtesseract.gemspec
|
68
68
|
- test/helper.rb
|
69
|
+
- test/images/test.bmp
|
70
|
+
- test/images/test.jpg
|
71
|
+
- test/images/test.png
|
72
|
+
- test/images/test.tif
|
73
|
+
- test/images/test1.tif
|
69
74
|
- test/test_rtesseract.rb
|
70
75
|
has_rdoc: true
|
71
76
|
homepage: http://github.com/dannnylo/rtesseract
|