rtesseract 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +0 -0
- data/.gitignore +0 -0
- data/LICENSE +0 -0
- data/README.rdoc +0 -0
- data/Rakefile +1 -1
- data/lib/rtesseract.rb +40 -6
- data/rtesseract.gemspec +7 -2
- data/test/helper.rb +0 -0
- data/test/images/test.bmp +0 -0
- data/test/images/test.jpg +0 -0
- data/test/images/test.png +0 -0
- data/test/images/test.tif +0 -0
- data/test/images/test1.tif +0 -0
- data/test/test_rtesseract.rb +56 -3
- metadata +9 -4
data/.document
CHANGED
File without changes
|
data/.gitignore
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.rdoc
CHANGED
File without changes
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "rtesseract"
|
8
|
-
gem.version = '0.0.
|
8
|
+
gem.version = '0.0.3'
|
9
9
|
gem.summary = "Ruby library for working with the Tesseract OCR."
|
10
10
|
gem.description = "Ruby library for working with the Tesseract OCR."
|
11
11
|
gem.email = "dannnylo@gmail.com"
|
data/lib/rtesseract.rb
CHANGED
@@ -3,18 +3,21 @@ require "pathname"
|
|
3
3
|
require "tempfile"
|
4
4
|
|
5
5
|
class RTesseract
|
6
|
-
VERSION = '0.0.
|
6
|
+
VERSION = '0.0.3'
|
7
|
+
attr_accessor :options
|
8
|
+
attr_writer :lang
|
7
9
|
|
8
10
|
def initialize(src="", options={})
|
9
|
-
@source
|
10
|
-
@command = options
|
11
|
+
@source = Pathname.new src
|
12
|
+
@command = options.delete(:command) || "tesseract"
|
13
|
+
@lang = options.delete(:lang) || options.delete("lang") || ""
|
11
14
|
@options = options
|
12
15
|
@value = ""
|
13
16
|
end
|
14
17
|
|
15
18
|
def source= src
|
16
|
-
@source = Pathname.new src
|
17
19
|
@value = ""
|
20
|
+
@source = Pathname.new src
|
18
21
|
end
|
19
22
|
|
20
23
|
def image_name
|
@@ -30,7 +33,7 @@ class RTesseract
|
|
30
33
|
end
|
31
34
|
|
32
35
|
#Remove files
|
33
|
-
def remove_file(files)
|
36
|
+
def remove_file(files=[])
|
34
37
|
files.each do |file|
|
35
38
|
begin
|
36
39
|
File.unlink(file) if File.exist?(file)
|
@@ -43,11 +46,42 @@ class RTesseract
|
|
43
46
|
raise "Error on remove file."
|
44
47
|
end
|
45
48
|
|
49
|
+
# Select the language
|
50
|
+
#===Languages
|
51
|
+
## * eng - English
|
52
|
+
## * deu - German
|
53
|
+
## * deu-f - German fraktur
|
54
|
+
## * fra - French
|
55
|
+
## * ita - Italian
|
56
|
+
## * nld - Dutch
|
57
|
+
## * por - Portuguese
|
58
|
+
## * spa - Spanish
|
59
|
+
## * vie - Vietnamese
|
60
|
+
## Note: Make sure you have installed the language to tesseract
|
61
|
+
def lang
|
62
|
+
language = "#{@lang}".strip
|
63
|
+
{"eng" => ["eng","en","en-us","english"], "deu" => ["deu"], "deu-f" => ["deu-f"] , "fra" => ["fra"], "ita" => ["ita","it"] , "nld" => ["nld"] , "por" => ["por","pt","pt-br","portuguese"] , "spa" => ["spa"] , "vie" => ["vie"]}.each do |value,names|
|
64
|
+
return " -l #{value} " if names.include? language.downcase
|
65
|
+
end
|
66
|
+
""
|
67
|
+
rescue
|
68
|
+
""
|
69
|
+
end
|
70
|
+
|
71
|
+
def config
|
72
|
+
@options ||= {}
|
73
|
+
@options.collect{|k,v| "#{k} #{v}" }.join("\n")
|
74
|
+
end
|
75
|
+
|
76
|
+
def config_file
|
77
|
+
#TODO: create the config
|
78
|
+
end
|
79
|
+
|
46
80
|
#Convert image to string
|
47
81
|
def convert
|
48
82
|
tmp_file = Pathname.new(Dir::tmpdir).join("#{@source.basename}")
|
49
83
|
tmp_image = image_to_tiff
|
50
|
-
`#{@command} #{tmp_image} #{tmp_file.to_s}`
|
84
|
+
`#{@command} #{tmp_image} #{tmp_file.to_s} #{lang}`
|
51
85
|
@value = File.read("#{tmp_file.to_s}.txt").to_s
|
52
86
|
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
53
87
|
rescue
|
data/rtesseract.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rtesseract}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Danilo Jeremias da Silva"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-10-11}
|
13
13
|
s.description = %q{Ruby library for working with the Tesseract OCR.}
|
14
14
|
s.email = %q{dannnylo@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,6 +25,11 @@ Gem::Specification.new do |s|
|
|
25
25
|
"lib/rtesseract.rb",
|
26
26
|
"rtesseract.gemspec",
|
27
27
|
"test/helper.rb",
|
28
|
+
"test/images/test.bmp",
|
29
|
+
"test/images/test.jpg",
|
30
|
+
"test/images/test.png",
|
31
|
+
"test/images/test.tif",
|
32
|
+
"test/images/test1.tif",
|
28
33
|
"test/test_rtesseract.rb"
|
29
34
|
]
|
30
35
|
s.homepage = %q{http://github.com/dannnylo/rtesseract}
|
data/test/helper.rb
CHANGED
File without changes
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/test/test_rtesseract.rb
CHANGED
@@ -1,9 +1,62 @@
|
|
1
1
|
require 'helper'
|
2
|
+
require 'pathname'
|
2
3
|
|
3
4
|
class TestRtesseract < Test::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
context "Path" do
|
6
|
+
setup do
|
7
|
+
@path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
|
8
|
+
@image_tiff = @path.join("images","test.tif").to_s
|
9
|
+
end
|
10
|
+
|
11
|
+
should "be instantiable" do
|
12
|
+
assert_equal RTesseract.new.class , RTesseract
|
13
|
+
assert_equal RTesseract.new("").class , RTesseract
|
14
|
+
assert_equal RTesseract.new(@image_tiff).class , RTesseract
|
15
|
+
end
|
16
|
+
|
17
|
+
should "translate image to text" do
|
18
|
+
assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
|
19
|
+
assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
|
20
|
+
end
|
21
|
+
|
22
|
+
should "translate images .png, .jpg, .bmp" do
|
23
|
+
assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
|
24
|
+
assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
|
25
|
+
assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
|
26
|
+
end
|
27
|
+
|
28
|
+
should "change the image" do
|
29
|
+
image = RTesseract.new(@image_tiff)
|
30
|
+
assert_equal image.to_s_without_spaces,"43ZZ"
|
31
|
+
image.source = @path.join("images","test1.tif").to_s
|
32
|
+
assert_equal image.to_s_without_spaces,"V2V4"
|
33
|
+
end
|
7
34
|
|
35
|
+
should "select the language" do
|
36
|
+
#English
|
37
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
38
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
|
39
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
|
40
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
|
41
|
+
|
42
|
+
#Portuguese
|
43
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
|
44
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
|
45
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
|
46
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
|
47
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
|
48
|
+
|
49
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
|
50
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
|
51
|
+
|
52
|
+
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
53
|
+
end
|
54
|
+
|
55
|
+
should "be configurable" do
|
56
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
|
57
|
+
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
|
58
|
+
assert_equal RTesseract.new(@image_tiff,{:enable_assoc=>0,:chop_enable=>0}).config , "chop_enable 0\nenable_assoc 0"
|
59
|
+
end
|
60
|
+
end
|
8
61
|
end
|
9
62
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Danilo Jeremias da Silva
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-10-11 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -66,6 +66,11 @@ files:
|
|
66
66
|
- lib/rtesseract.rb
|
67
67
|
- rtesseract.gemspec
|
68
68
|
- test/helper.rb
|
69
|
+
- test/images/test.bmp
|
70
|
+
- test/images/test.jpg
|
71
|
+
- test/images/test.png
|
72
|
+
- test/images/test.tif
|
73
|
+
- test/images/test1.tif
|
69
74
|
- test/test_rtesseract.rb
|
70
75
|
has_rdoc: true
|
71
76
|
homepage: http://github.com/dannnylo/rtesseract
|