rtesseract 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +8 -1
- data/Rakefile +1 -2
- data/lib/processors/mini_magick.rb +11 -1
- data/lib/processors/rmagick.rb +9 -0
- data/lib/rtesseract.rb +23 -2
- data/rtesseract.gemspec +10 -10
- data/test/test_rtesseract.rb +22 -1
- metadata +5 -5
data/README.rdoc
CHANGED
@@ -24,6 +24,14 @@ It's very simple to use rtesseract:
|
|
24
24
|
image.source = "new_image.png"
|
25
25
|
image.to_s
|
26
26
|
|
27
|
+
=== TRANSFORM THE IMAGE
|
28
|
+
|
29
|
+
image = RTesseract.read("my_image.jpg") do |img|
|
30
|
+
img = img.white_threshold(245)
|
31
|
+
img = img.quantize(256,Magick::GRAYColorspace)
|
32
|
+
end
|
33
|
+
image.to_s
|
34
|
+
|
27
35
|
=== CONVERT PARTS OF IMAGE TO STRING
|
28
36
|
|
29
37
|
mix_block = RTesseract::Mixed.new("test.jpg") do |image|
|
@@ -58,4 +66,3 @@ It's very simple to use rtesseract:
|
|
58
66
|
== Copyright
|
59
67
|
|
60
68
|
Copyright (c) 2010 Danilo Jeremias da Silva. See LICENSE for details.
|
61
|
-
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "rtesseract"
|
8
|
-
gem.version = '0.0.
|
8
|
+
gem.version = '0.0.10'
|
9
9
|
gem.summary = "Ruby library for working with the Tesseract OCR."
|
10
10
|
gem.description = "Ruby library for working with the Tesseract OCR."
|
11
11
|
gem.email = "dannnylo@gmail.com"
|
@@ -54,4 +54,3 @@ Rake::RDocTask.new do |rdoc|
|
|
54
54
|
rdoc.rdoc_files.include('README*')
|
55
55
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
56
56
|
end
|
57
|
-
|
@@ -9,4 +9,14 @@ module MiniMagickProcessor
|
|
9
9
|
cat.write tmp_file.to_s
|
10
10
|
return tmp_file
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
|
+
def image_from_blob(blob)
|
14
|
+
generate_uid
|
15
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
16
|
+
cat = MiniMagick::Image.read(blob)
|
17
|
+
cat.format("tif")
|
18
|
+
cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
|
19
|
+
cat.write tmp_file.to_s
|
20
|
+
return tmp_file
|
21
|
+
end
|
22
|
+
end
|
data/lib/processors/rmagick.rb
CHANGED
@@ -8,4 +8,13 @@ module RMagickProcessor
|
|
8
8
|
cat.write tmp_file.to_s
|
9
9
|
return tmp_file
|
10
10
|
end
|
11
|
+
|
12
|
+
def image_from_blob(blob)
|
13
|
+
generate_uid
|
14
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
15
|
+
cat = Magick::Image.from_blob(blob).first
|
16
|
+
cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
|
17
|
+
cat.write tmp_file.to_s
|
18
|
+
return tmp_file
|
19
|
+
end
|
11
20
|
end
|
data/lib/rtesseract.rb
CHANGED
@@ -5,7 +5,7 @@ require "rtesseract/errors"
|
|
5
5
|
require "rtesseract/mixed"
|
6
6
|
|
7
7
|
class RTesseract
|
8
|
-
VERSION = '0.0.
|
8
|
+
VERSION = '0.0.10'
|
9
9
|
attr_accessor :options
|
10
10
|
attr_writer :lang
|
11
11
|
attr_writer :psm
|
@@ -26,6 +26,15 @@ class RTesseract
|
|
26
26
|
choose_processor!
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.read(src = nil, options = {}, &block)
|
30
|
+
raise RTesseract::ImageNotSelectedError if src == nil
|
31
|
+
image = Magick::Image.read(src.to_s).first
|
32
|
+
yield image
|
33
|
+
object = RTesseract.new("", options)
|
34
|
+
object.from_blob(image.to_blob)
|
35
|
+
object
|
36
|
+
end
|
37
|
+
|
29
38
|
def source= src
|
30
39
|
@value = ""
|
31
40
|
@source = Pathname.new src
|
@@ -127,6 +136,19 @@ class RTesseract
|
|
127
136
|
raise RTesseract::ConversionError
|
128
137
|
end
|
129
138
|
|
139
|
+
#Read image from memory blob
|
140
|
+
def from_blob(blob)
|
141
|
+
generate_uid
|
142
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
|
143
|
+
tmp_image = image_from_blob(blob)
|
144
|
+
`#{@command} '#{tmp_image}' '#{tmp_file.to_s}' #{lang} #{psm} #{config_file} #{clear_console_output}`
|
145
|
+
@value = File.read("#{tmp_file.to_s}.txt").to_s
|
146
|
+
@uid = nil
|
147
|
+
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
148
|
+
rescue
|
149
|
+
raise RTesseract::ConversionError
|
150
|
+
end
|
151
|
+
|
130
152
|
#Output value
|
131
153
|
def to_s
|
132
154
|
return @value if @value != ""
|
@@ -154,4 +176,3 @@ class RTesseract
|
|
154
176
|
end
|
155
177
|
end
|
156
178
|
end
|
157
|
-
|
data/rtesseract.gemspec
CHANGED
@@ -4,14 +4,14 @@
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
8
|
-
s.version = "0.0.
|
7
|
+
s.name = "rtesseract"
|
8
|
+
s.version = "0.0.10"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = [
|
12
|
-
s.date =
|
13
|
-
s.description =
|
14
|
-
s.email =
|
11
|
+
s.authors = ["Danilo Jeremias da Silva"]
|
12
|
+
s.date = "2012-01-13"
|
13
|
+
s.description = "Ruby library for working with the Tesseract OCR."
|
14
|
+
s.email = "dannnylo@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
17
|
"README.rdoc"
|
@@ -37,10 +37,10 @@ Gem::Specification.new do |s|
|
|
37
37
|
"test/test_mixed.rb",
|
38
38
|
"test/test_rtesseract.rb"
|
39
39
|
]
|
40
|
-
s.homepage =
|
41
|
-
s.require_paths = [
|
42
|
-
s.rubygems_version =
|
43
|
-
s.summary =
|
40
|
+
s.homepage = "http://github.com/dannnylo/rtesseract"
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = "1.8.11"
|
43
|
+
s.summary = "Ruby library for working with the Tesseract OCR."
|
44
44
|
|
45
45
|
if s.respond_to? :specification_version then
|
46
46
|
s.specification_version = 3
|
data/test/test_rtesseract.rb
CHANGED
@@ -77,6 +77,27 @@ class TestRtesseract < Test::Unit::TestCase
|
|
77
77
|
reg.convert
|
78
78
|
assert_not_equal value , reg.generate_uid
|
79
79
|
end
|
80
|
+
|
81
|
+
should "read image from blob" do
|
82
|
+
image = Magick::Image.read(@path.join("images","test.png").to_s).first
|
83
|
+
blob = image.white_threshold(245).quantize(256,Magick::GRAYColorspace).to_blob
|
84
|
+
|
85
|
+
test = RTesseract.new
|
86
|
+
test.from_blob(blob)
|
87
|
+
assert_equal test.to_s_without_spaces , "HW9W"
|
88
|
+
end
|
89
|
+
|
90
|
+
should "change image in a block" do
|
91
|
+
test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
|
92
|
+
image = image.white_threshold(245)
|
93
|
+
image = image.quantize(256,Magick::GRAYColorspace)
|
94
|
+
end
|
95
|
+
assert_equal test.to_s_without_spaces , "HW9W"
|
96
|
+
|
97
|
+
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
|
98
|
+
image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
|
99
|
+
end
|
100
|
+
assert_equal test.to_s_without_spaces , "3R8Z"
|
101
|
+
end
|
80
102
|
end
|
81
103
|
end
|
82
|
-
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Danilo Jeremias da Silva
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-01-13 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: jeweler
|
@@ -121,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
121
|
requirements: []
|
122
122
|
|
123
123
|
rubyforge_project:
|
124
|
-
rubygems_version: 1.8.
|
124
|
+
rubygems_version: 1.8.11
|
125
125
|
signing_key:
|
126
126
|
specification_version: 3
|
127
127
|
summary: Ruby library for working with the Tesseract OCR.
|