rtesseract 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +8 -1
- data/Rakefile +1 -2
- data/lib/processors/mini_magick.rb +11 -1
- data/lib/processors/rmagick.rb +9 -0
- data/lib/rtesseract.rb +23 -2
- data/rtesseract.gemspec +10 -10
- data/test/test_rtesseract.rb +22 -1
- metadata +5 -5
data/README.rdoc
CHANGED
@@ -24,6 +24,14 @@ It's very simple to use rtesseract:
|
|
24
24
|
image.source = "new_image.png"
|
25
25
|
image.to_s
|
26
26
|
|
27
|
+
=== TRANSFORM THE IMAGE
|
28
|
+
|
29
|
+
image = RTesseract.read("my_image.jpg") do |img|
|
30
|
+
img = img.white_threshold(245)
|
31
|
+
img = img.quantize(256,Magick::GRAYColorspace)
|
32
|
+
end
|
33
|
+
image.to_s
|
34
|
+
|
27
35
|
=== CONVERT PARTS OF IMAGE TO STRING
|
28
36
|
|
29
37
|
mix_block = RTesseract::Mixed.new("test.jpg") do |image|
|
@@ -58,4 +66,3 @@ It's very simple to use rtesseract:
|
|
58
66
|
== Copyright
|
59
67
|
|
60
68
|
Copyright (c) 2010 Danilo Jeremias da Silva. See LICENSE for details.
|
61
|
-
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "rtesseract"
|
8
|
-
gem.version = '0.0.
|
8
|
+
gem.version = '0.0.10'
|
9
9
|
gem.summary = "Ruby library for working with the Tesseract OCR."
|
10
10
|
gem.description = "Ruby library for working with the Tesseract OCR."
|
11
11
|
gem.email = "dannnylo@gmail.com"
|
@@ -54,4 +54,3 @@ Rake::RDocTask.new do |rdoc|
|
|
54
54
|
rdoc.rdoc_files.include('README*')
|
55
55
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
56
56
|
end
|
57
|
-
|
@@ -9,4 +9,14 @@ module MiniMagickProcessor
|
|
9
9
|
cat.write tmp_file.to_s
|
10
10
|
return tmp_file
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
|
+
def image_from_blob(blob)
|
14
|
+
generate_uid
|
15
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
16
|
+
cat = MiniMagick::Image.read(blob)
|
17
|
+
cat.format("tif")
|
18
|
+
cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
|
19
|
+
cat.write tmp_file.to_s
|
20
|
+
return tmp_file
|
21
|
+
end
|
22
|
+
end
|
data/lib/processors/rmagick.rb
CHANGED
@@ -8,4 +8,13 @@ module RMagickProcessor
|
|
8
8
|
cat.write tmp_file.to_s
|
9
9
|
return tmp_file
|
10
10
|
end
|
11
|
+
|
12
|
+
def image_from_blob(blob)
|
13
|
+
generate_uid
|
14
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
15
|
+
cat = Magick::Image.from_blob(blob).first
|
16
|
+
cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
|
17
|
+
cat.write tmp_file.to_s
|
18
|
+
return tmp_file
|
19
|
+
end
|
11
20
|
end
|
data/lib/rtesseract.rb
CHANGED
@@ -5,7 +5,7 @@ require "rtesseract/errors"
|
|
5
5
|
require "rtesseract/mixed"
|
6
6
|
|
7
7
|
class RTesseract
|
8
|
-
VERSION = '0.0.
|
8
|
+
VERSION = '0.0.10'
|
9
9
|
attr_accessor :options
|
10
10
|
attr_writer :lang
|
11
11
|
attr_writer :psm
|
@@ -26,6 +26,15 @@ class RTesseract
|
|
26
26
|
choose_processor!
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.read(src = nil, options = {}, &block)
|
30
|
+
raise RTesseract::ImageNotSelectedError if src == nil
|
31
|
+
image = Magick::Image.read(src.to_s).first
|
32
|
+
yield image
|
33
|
+
object = RTesseract.new("", options)
|
34
|
+
object.from_blob(image.to_blob)
|
35
|
+
object
|
36
|
+
end
|
37
|
+
|
29
38
|
def source= src
|
30
39
|
@value = ""
|
31
40
|
@source = Pathname.new src
|
@@ -127,6 +136,19 @@ class RTesseract
|
|
127
136
|
raise RTesseract::ConversionError
|
128
137
|
end
|
129
138
|
|
139
|
+
#Read image from memory blob
|
140
|
+
def from_blob(blob)
|
141
|
+
generate_uid
|
142
|
+
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
|
143
|
+
tmp_image = image_from_blob(blob)
|
144
|
+
`#{@command} '#{tmp_image}' '#{tmp_file.to_s}' #{lang} #{psm} #{config_file} #{clear_console_output}`
|
145
|
+
@value = File.read("#{tmp_file.to_s}.txt").to_s
|
146
|
+
@uid = nil
|
147
|
+
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
148
|
+
rescue
|
149
|
+
raise RTesseract::ConversionError
|
150
|
+
end
|
151
|
+
|
130
152
|
#Output value
|
131
153
|
def to_s
|
132
154
|
return @value if @value != ""
|
@@ -154,4 +176,3 @@ class RTesseract
|
|
154
176
|
end
|
155
177
|
end
|
156
178
|
end
|
157
|
-
|
data/rtesseract.gemspec
CHANGED
@@ -4,14 +4,14 @@
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
8
|
-
s.version = "0.0.
|
7
|
+
s.name = "rtesseract"
|
8
|
+
s.version = "0.0.10"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = [
|
12
|
-
s.date =
|
13
|
-
s.description =
|
14
|
-
s.email =
|
11
|
+
s.authors = ["Danilo Jeremias da Silva"]
|
12
|
+
s.date = "2012-01-13"
|
13
|
+
s.description = "Ruby library for working with the Tesseract OCR."
|
14
|
+
s.email = "dannnylo@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
17
|
"README.rdoc"
|
@@ -37,10 +37,10 @@ Gem::Specification.new do |s|
|
|
37
37
|
"test/test_mixed.rb",
|
38
38
|
"test/test_rtesseract.rb"
|
39
39
|
]
|
40
|
-
s.homepage =
|
41
|
-
s.require_paths = [
|
42
|
-
s.rubygems_version =
|
43
|
-
s.summary =
|
40
|
+
s.homepage = "http://github.com/dannnylo/rtesseract"
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = "1.8.11"
|
43
|
+
s.summary = "Ruby library for working with the Tesseract OCR."
|
44
44
|
|
45
45
|
if s.respond_to? :specification_version then
|
46
46
|
s.specification_version = 3
|
data/test/test_rtesseract.rb
CHANGED
@@ -77,6 +77,27 @@ class TestRtesseract < Test::Unit::TestCase
|
|
77
77
|
reg.convert
|
78
78
|
assert_not_equal value , reg.generate_uid
|
79
79
|
end
|
80
|
+
|
81
|
+
should "read image from blob" do
|
82
|
+
image = Magick::Image.read(@path.join("images","test.png").to_s).first
|
83
|
+
blob = image.white_threshold(245).quantize(256,Magick::GRAYColorspace).to_blob
|
84
|
+
|
85
|
+
test = RTesseract.new
|
86
|
+
test.from_blob(blob)
|
87
|
+
assert_equal test.to_s_without_spaces , "HW9W"
|
88
|
+
end
|
89
|
+
|
90
|
+
should "change image in a block" do
|
91
|
+
test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
|
92
|
+
image = image.white_threshold(245)
|
93
|
+
image = image.quantize(256,Magick::GRAYColorspace)
|
94
|
+
end
|
95
|
+
assert_equal test.to_s_without_spaces , "HW9W"
|
96
|
+
|
97
|
+
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
|
98
|
+
image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
|
99
|
+
end
|
100
|
+
assert_equal test.to_s_without_spaces , "3R8Z"
|
101
|
+
end
|
80
102
|
end
|
81
103
|
end
|
82
|
-
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Danilo Jeremias da Silva
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-01-13 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: jeweler
|
@@ -121,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
121
|
requirements: []
|
122
122
|
|
123
123
|
rubyforge_project:
|
124
|
-
rubygems_version: 1.8.
|
124
|
+
rubygems_version: 1.8.11
|
125
125
|
signing_key:
|
126
126
|
specification_version: 3
|
127
127
|
summary: Ruby library for working with the Tesseract OCR.
|