rtesseract 1.0.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.document +0 -0
- data/.rspec +0 -0
- data/.travis.sh +2 -0
- data/.travis.yml +4 -2
- data/Gemfile +7 -6
- data/Gemfile.lock +47 -42
- data/LICENSE.txt +1 -1
- data/README.rdoc +14 -5
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +17 -13
- data/lib/processors/rmagick.rb +17 -11
- data/lib/rtesseract.rb +106 -90
- data/lib/rtesseract/errors.rb +12 -4
- data/lib/rtesseract/mixed.rb +18 -21
- data/rtesseract.gemspec +19 -18
- data/spec/rtesseract_mixed_spec.rb +13 -0
- data/spec/rtesseract_spec.rb +32 -4
- data/spec/spec_helper.rb +3 -0
- metadata +18 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b275cd0fced912da404a74ef1bf5805b9377706
|
4
|
+
data.tar.gz: 3572edcdfae3ee0d93c520290b78d94f18ffd4c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2806a98f9a07fcd3c26ebd86980c088fae3c8e08687f8f7ef43444bbc594188624ef6b2286c932a1874e1aa90053e5b33852f6a57ccb5602454a8ce8a0d8401a
|
7
|
+
data.tar.gz: 4ae0d5c34b189d4c72729fbfc7c004beff31695cb8079281211a5717abbf0e74698354999c76c2c6c8f045b3952799487f15b40247aef7e5effff1ebda7fcae4
|
data/.document
CHANGED
File without changes
|
data/.rspec
CHANGED
File without changes
|
data/.travis.sh
ADDED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -2,15 +2,16 @@ source "http://rubygems.org"
|
|
2
2
|
# Add dependencies to develop your gem here.
|
3
3
|
# Include everything needed to run rake, tests, features, etc.
|
4
4
|
group :development do
|
5
|
-
gem "rspec"
|
6
|
-
gem "rdoc"
|
7
|
-
gem "bundler"
|
8
|
-
gem "jeweler", "~>
|
9
|
-
gem "simplecov"
|
10
|
-
gem '
|
5
|
+
gem "rspec"
|
6
|
+
gem "rdoc"
|
7
|
+
gem "bundler"
|
8
|
+
gem "jeweler", "~> 2.0.1"
|
9
|
+
gem "simplecov"
|
10
|
+
gem 'coveralls', require: false
|
11
11
|
end
|
12
12
|
|
13
13
|
group :test do
|
14
14
|
gem "rmagick"
|
15
|
+
gem "mini_magick"
|
15
16
|
end
|
16
17
|
|
data/Gemfile.lock
CHANGED
@@ -1,59 +1,63 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activesupport (4.0.1)
|
5
|
-
i18n (~> 0.6, >= 0.6.4)
|
6
|
-
minitest (~> 4.2)
|
7
|
-
multi_json (~> 1.3)
|
8
|
-
thread_safe (~> 0.1)
|
9
|
-
tzinfo (~> 0.3.37)
|
10
4
|
addressable (2.3.5)
|
11
|
-
atomic (1.1.14)
|
12
5
|
builder (3.2.2)
|
6
|
+
coveralls (0.7.0)
|
7
|
+
multi_json (~> 1.3)
|
8
|
+
rest-client
|
9
|
+
simplecov (>= 0.7)
|
10
|
+
term-ansicolor
|
11
|
+
thor
|
12
|
+
descendants_tracker (0.0.3)
|
13
13
|
diff-lcs (1.1.3)
|
14
|
-
docile (1.1.
|
15
|
-
faraday (0.
|
16
|
-
multipart-post (
|
14
|
+
docile (1.1.3)
|
15
|
+
faraday (0.9.0)
|
16
|
+
multipart-post (>= 1.2, < 3)
|
17
17
|
git (1.2.6)
|
18
|
-
github_api (0.
|
19
|
-
addressable
|
20
|
-
|
18
|
+
github_api (0.11.2)
|
19
|
+
addressable (~> 2.3)
|
20
|
+
descendants_tracker (~> 0.0.1)
|
21
|
+
faraday (~> 0.8, < 0.10)
|
21
22
|
hashie (>= 1.2)
|
22
|
-
multi_json (
|
23
|
-
nokogiri (~> 1.
|
23
|
+
multi_json (>= 1.7.5, < 2.0)
|
24
|
+
nokogiri (~> 1.6.0)
|
24
25
|
oauth2
|
25
26
|
hashie (2.0.5)
|
26
27
|
highline (1.6.20)
|
27
|
-
|
28
|
-
i18n (0.6.5)
|
29
|
-
jeweler (1.8.8)
|
28
|
+
jeweler (2.0.1)
|
30
29
|
builder
|
31
|
-
bundler (
|
30
|
+
bundler (>= 1.0)
|
32
31
|
git (>= 1.2.5)
|
33
|
-
github_api
|
32
|
+
github_api
|
34
33
|
highline (>= 1.6.15)
|
35
|
-
nokogiri (
|
34
|
+
nokogiri (>= 1.5.10)
|
36
35
|
rake
|
37
36
|
rdoc
|
38
37
|
json (1.8.1)
|
39
|
-
jwt (0.1.
|
38
|
+
jwt (0.1.11)
|
40
39
|
multi_json (>= 1.5)
|
41
|
-
|
42
|
-
|
40
|
+
mime-types (2.1)
|
41
|
+
mini_magick (3.7.0)
|
42
|
+
subexec (~> 0.2.1)
|
43
|
+
mini_portile (0.5.2)
|
44
|
+
multi_json (1.8.4)
|
43
45
|
multi_xml (0.5.5)
|
44
|
-
multipart-post (
|
45
|
-
nokogiri (1.
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
jwt (~> 0.1.
|
50
|
-
multi_json (~> 1.
|
46
|
+
multipart-post (2.0.0)
|
47
|
+
nokogiri (1.6.1)
|
48
|
+
mini_portile (~> 0.5.0)
|
49
|
+
oauth2 (0.9.3)
|
50
|
+
faraday (>= 0.8, < 0.10)
|
51
|
+
jwt (~> 0.1.8)
|
52
|
+
multi_json (~> 1.3)
|
51
53
|
multi_xml (~> 0.5)
|
52
54
|
rack (~> 1.2)
|
53
55
|
rack (1.5.2)
|
54
|
-
rake (10.1.
|
56
|
+
rake (10.1.1)
|
55
57
|
rdoc (3.12.2)
|
56
58
|
json (~> 1.4)
|
59
|
+
rest-client (1.6.7)
|
60
|
+
mime-types (>= 1.16)
|
57
61
|
rmagick (2.13.2)
|
58
62
|
rspec (2.8.0)
|
59
63
|
rspec-core (~> 2.8.0)
|
@@ -63,25 +67,26 @@ GEM
|
|
63
67
|
rspec-expectations (2.8.0)
|
64
68
|
diff-lcs (~> 1.1.2)
|
65
69
|
rspec-mocks (2.8.0)
|
66
|
-
shoulda-matchers (2.4.0)
|
67
|
-
activesupport (>= 3.0.0)
|
68
70
|
simplecov (0.8.2)
|
69
71
|
docile (~> 1.1.0)
|
70
72
|
multi_json
|
71
73
|
simplecov-html (~> 0.8.0)
|
72
74
|
simplecov-html (0.8.0)
|
73
|
-
|
74
|
-
|
75
|
-
|
75
|
+
subexec (0.2.3)
|
76
|
+
term-ansicolor (1.2.2)
|
77
|
+
tins (~> 0.8)
|
78
|
+
thor (0.18.1)
|
79
|
+
tins (0.13.2)
|
76
80
|
|
77
81
|
PLATFORMS
|
78
82
|
ruby
|
79
83
|
|
80
84
|
DEPENDENCIES
|
81
|
-
bundler
|
82
|
-
|
83
|
-
|
85
|
+
bundler
|
86
|
+
coveralls
|
87
|
+
jeweler (~> 2.0.1)
|
88
|
+
mini_magick
|
89
|
+
rdoc
|
84
90
|
rmagick
|
85
|
-
rspec
|
86
|
-
shoulda-matchers
|
91
|
+
rspec
|
87
92
|
simplecov
|
data/LICENSE.txt
CHANGED
data/README.rdoc
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
= rtesseract
|
2
2
|
{<img src="https://badge.fury.io/rb/rtesseract.png" alt="Gem Version" />}[http://badge.fury.io/rb/rtesseract]
|
3
|
+
{<img src="https://travis-ci.org/dannnylo/rtesseract.png?branch=master" alt="Build Status" />}[https://travis-ci.org/dannnylo/rtesseract]
|
4
|
+
{<img src="https://coveralls.io/repos/dannnylo/rtesseract/badge.png?branch=master" alt="Coverage Status" />}[https://coveralls.io/r/dannnylo/rtesseract?branch=master]
|
5
|
+
{<img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />}[https://codeclimate.com/github/dannnylo/rtesseract]
|
3
6
|
|
4
7
|
Ruby library for working with the Tesseract OCR.
|
5
8
|
|
@@ -55,11 +58,16 @@ It's very simple to use rtesseract:
|
|
55
58
|
]})
|
56
59
|
mix_block.to_s
|
57
60
|
|
58
|
-
==
|
59
|
-
|
61
|
+
== Contributing to rtesseract
|
62
|
+
|
63
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
64
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
60
65
|
* Fork the project.
|
61
|
-
*
|
62
|
-
*
|
66
|
+
* Start a feature/bugfix branch.
|
67
|
+
* Commit and push until you are happy with your contribution.
|
68
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
69
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
70
|
+
|
63
71
|
|
64
72
|
== Links
|
65
73
|
|
@@ -68,4 +76,5 @@ It's very simple to use rtesseract:
|
|
68
76
|
|
69
77
|
== Copyright
|
70
78
|
|
71
|
-
Copyright (c)
|
79
|
+
Copyright (c) 2014 Danilo Jeremias da Silva. See LICENSE.txt for
|
80
|
+
further details.
|
data/Rakefile
CHANGED
@@ -13,7 +13,7 @@ require 'rake'
|
|
13
13
|
|
14
14
|
require 'jeweler'
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
|
-
# gem is a Gem::Specification... see http://
|
16
|
+
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
17
17
|
gem.name = "rtesseract"
|
18
18
|
gem.homepage = "http://github.com/dannnylo/rtesseract"
|
19
19
|
gem.license = "MIT"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
@@ -1,24 +1,28 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
2
|
+
# Add to rtesseract a image manipulation with MiniMagick
|
3
3
|
module MiniMagickProcessor
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
def self.setup
|
5
|
+
require 'mini_magick'
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.a_name?(name)
|
9
|
+
%w(mini_magick MiniMagickProcessor).include?(name.to_s)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
|
13
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
14
|
+
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
|
+
cat.format('tif') { |c| c.compress 'None' }
|
16
|
+
cat.crop("#{w}x#{h}+#{x}+#{y}") unless [x, y, w, h].compact == []
|
12
17
|
cat.write tmp_file.path.to_s
|
13
|
-
|
18
|
+
tmp_file
|
14
19
|
end
|
15
20
|
|
16
|
-
def read_with_processor(path)
|
21
|
+
def self.read_with_processor(path)
|
17
22
|
MiniMagick::Image.open(path.to_s)
|
18
23
|
end
|
19
24
|
|
20
|
-
def
|
25
|
+
def self.image?(object)
|
21
26
|
object.class == MiniMagick::Image
|
22
27
|
end
|
23
28
|
end
|
24
|
-
|
data/lib/processors/rmagick.rb
CHANGED
@@ -1,21 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
2
|
+
# Add to rtesseract a image manipulation with RMagick
|
3
3
|
module RMagickProcessor
|
4
|
-
|
5
|
-
|
6
|
-
tmp_file = Tempfile.new(["",".tif"])
|
7
|
-
cat = @instance || Magick::Image.read(@source.to_s).first
|
8
|
-
cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
|
9
|
-
cat.write(tmp_file.path.to_s){self.compression = Magick::NoCompression}
|
10
|
-
return tmp_file
|
4
|
+
def self.setup
|
5
|
+
require 'RMagick'
|
11
6
|
end
|
12
7
|
|
13
|
-
def
|
8
|
+
def self.a_name?(name)
|
9
|
+
%w(rmagick RMagickProcessor).include?(name.to_s)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
|
13
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
14
|
+
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
|
+
cat.crop!(x, y, w, h) unless [x, y, w, h].compact == []
|
16
|
+
cat.write(tmp_file.path.to_s) { self.compression = Magick::NoCompression }
|
17
|
+
tmp_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.read_with_processor(path)
|
14
21
|
Magick::Image.read(path.to_s).first
|
15
22
|
end
|
16
23
|
|
17
|
-
def
|
24
|
+
def self.image?(object)
|
18
25
|
object.class == Magick::Image
|
19
26
|
end
|
20
27
|
end
|
21
|
-
|
data/lib/rtesseract.rb
CHANGED
@@ -1,89 +1,104 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require
|
3
|
-
require
|
2
|
+
require 'pathname'
|
3
|
+
require 'tempfile'
|
4
4
|
|
5
|
-
require
|
6
|
-
require
|
5
|
+
require 'rtesseract/errors'
|
6
|
+
require 'rtesseract/mixed'
|
7
7
|
|
8
|
+
# Processors
|
9
|
+
require 'processors/rmagick.rb'
|
10
|
+
require 'processors/mini_magick.rb'
|
11
|
+
|
12
|
+
# Ruby wrapper for Tesseract OCR
|
8
13
|
class RTesseract
|
9
14
|
attr_accessor :options
|
10
15
|
attr_writer :lang
|
11
16
|
attr_writer :psm
|
12
17
|
attr_reader :processor
|
13
18
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
OPTIONS = %w(command lang psm processor debug clear_console_output)
|
20
|
+
# Aliases to languages names
|
21
|
+
LANGUAGES = {
|
22
|
+
'eng' => %w(en en-us english),
|
23
|
+
'ita' => %w(it),
|
24
|
+
'por' => %w(pt pt-br portuguese),
|
25
|
+
'spa' => %w(sp)
|
26
|
+
}
|
27
|
+
|
28
|
+
def initialize(src = '', options = {})
|
29
|
+
@options = command_line_options(options)
|
30
|
+
@value, @x, @y, @w, @h = ['']
|
24
31
|
choose_processor!
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
+
@source = @processor.image?(src) ? src : Pathname.new(src)
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_option(options, name, default)
|
36
|
+
options.fetch(name.to_s, options.fetch(name, default))
|
37
|
+
end
|
38
|
+
|
39
|
+
def command_line_options(options)
|
40
|
+
@command = fetch_option(options, :command, default_command)
|
41
|
+
@lang = fetch_option(options, :lang, '')
|
42
|
+
@psm = fetch_option(options, :psm, nil)
|
43
|
+
@processor = fetch_option(options, :processor, 'rmagick')
|
44
|
+
@debug = fetch_option(options, :debug, false)
|
45
|
+
|
46
|
+
# Disable clear console if debug mode
|
47
|
+
@clear_console_output = @debug ? false : fetch_option(options, :clear_console_output, true)
|
48
|
+
|
49
|
+
options.delete_if { |k, v| OPTIONS.include?(k.to_s) }
|
50
|
+
options
|
32
51
|
end
|
33
52
|
|
34
53
|
def default_command
|
35
54
|
TesseractBin::Executables[:tesseract] || 'tesseract'
|
36
55
|
rescue
|
37
|
-
|
56
|
+
'tesseract'
|
38
57
|
end
|
39
58
|
|
40
59
|
def self.read(src = nil, options = {}, &block)
|
41
|
-
|
42
|
-
processor = options.delete(:processor) || options.delete(
|
43
|
-
if processor ==
|
60
|
+
fail RTesseract::ImageNotSelectedError if src.nil?
|
61
|
+
processor = options.delete(:processor) || options.delete('processor')
|
62
|
+
if processor == 'mini_magick'
|
44
63
|
image = MiniMagickProcessor.read_with_processor(src.to_s)
|
45
64
|
else
|
46
65
|
image = RMagickProcessor.read_with_processor(src.to_s)
|
47
66
|
end
|
48
67
|
yield image
|
49
|
-
object = RTesseract.new(
|
68
|
+
object = RTesseract.new('', options)
|
50
69
|
object.from_blob(image.to_blob)
|
51
70
|
object
|
52
71
|
end
|
53
72
|
|
54
|
-
def source=
|
55
|
-
@value =
|
56
|
-
@source = Pathname.new
|
73
|
+
def source=(src)
|
74
|
+
@value = ''
|
75
|
+
@source = @processor.image?(src) ? src : Pathname.new(src)
|
57
76
|
end
|
58
77
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
#Crop image to convert
|
65
|
-
def crop!(x,y,width,height)
|
66
|
-
@x, @y, @w, @h = x, y, width, height
|
78
|
+
# Crop image to convert
|
79
|
+
def crop!(x, y, width, height)
|
80
|
+
@value = ''
|
81
|
+
@x, @y, @w, @h = x.to_i, y.to_i, width.to_i, height.to_i
|
67
82
|
self
|
68
83
|
end
|
69
84
|
|
70
|
-
#Remove files
|
71
|
-
def remove_file(files=[])
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
end
|
85
|
+
# Remove files
|
86
|
+
def remove_file(files = [])
|
87
|
+
files.each do |file|
|
88
|
+
if file.is_a?(Tempfile)
|
89
|
+
file.close
|
90
|
+
file.unlink
|
91
|
+
else
|
92
|
+
File.unlink(file)
|
79
93
|
end
|
94
|
+
end
|
80
95
|
true
|
81
|
-
rescue
|
82
|
-
raise RTesseract::TempFilesNotRemovedError
|
96
|
+
rescue => error
|
97
|
+
raise RTesseract::TempFilesNotRemovedError.new(:error => error, :files => files)
|
83
98
|
end
|
84
99
|
|
85
100
|
# Select the language
|
86
|
-
|
101
|
+
# ===Languages
|
87
102
|
## * eng - English
|
88
103
|
## * deu - German
|
89
104
|
## * deu-f - German fraktur
|
@@ -96,35 +111,30 @@ class RTesseract
|
|
96
111
|
## Note: Make sure you have installed the language to tesseract
|
97
112
|
def lang
|
98
113
|
language = "#{@lang}".strip.downcase
|
99
|
-
|
100
|
-
"eng" => ["en","en-us","english"],
|
101
|
-
"ita" => ["it"],
|
102
|
-
"por" => ["pt","pt-br","portuguese"],
|
103
|
-
"spa" => ["sp"]
|
104
|
-
}.each do |value,names|
|
114
|
+
LANGUAGES.each do |value, names|
|
105
115
|
return " -l #{value} " if names.include? language
|
106
116
|
end
|
107
117
|
return " -l #{language} " if language.size > 0
|
108
|
-
|
118
|
+
''
|
109
119
|
rescue
|
110
|
-
|
120
|
+
''
|
111
121
|
end
|
112
122
|
|
113
|
-
#Page Segment Mode
|
123
|
+
# Page Segment Mode
|
114
124
|
def psm
|
115
|
-
@psm.nil? ?
|
125
|
+
@psm.nil? ? '' : " -psm #{@psm} "
|
116
126
|
rescue
|
117
|
-
|
127
|
+
''
|
118
128
|
end
|
119
129
|
|
120
130
|
def config
|
121
131
|
@options ||= {}
|
122
|
-
@options.
|
132
|
+
@options.map { |k, v| "#{k} #{v}" }.join("\n")
|
123
133
|
end
|
124
134
|
|
125
135
|
def config_file
|
126
|
-
return
|
127
|
-
conf = Tempfile.new(
|
136
|
+
return '' if @options == {}
|
137
|
+
conf = Tempfile.new('config')
|
128
138
|
conf.write(config)
|
129
139
|
conf.flush
|
130
140
|
conf.path
|
@@ -132,59 +142,65 @@ class RTesseract
|
|
132
142
|
|
133
143
|
#TODO: Clear console for MacOS or Windows
|
134
144
|
def clear_console_output
|
135
|
-
return
|
136
|
-
return
|
145
|
+
return '' unless @clear_console_output
|
146
|
+
return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
|
147
|
+
end
|
148
|
+
|
149
|
+
def image
|
150
|
+
(@image = @processor.image_to_tif(@source, @x, @y, @w, @h)).path
|
151
|
+
end
|
152
|
+
|
153
|
+
def text_file
|
154
|
+
@text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}.txt").to_s
|
137
155
|
end
|
138
156
|
|
139
|
-
#Convert image to string
|
157
|
+
# Convert image to string
|
140
158
|
def convert
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
rescue
|
147
|
-
raise RTesseract::ConversionError
|
159
|
+
`#{@command} "#{image}" "#{text_file.gsub('.txt', '')}" #{lang} #{psm} #{config_file} #{clear_console_output}`
|
160
|
+
@value = File.read(@text_file).to_s
|
161
|
+
remove_file([@image, @text_file])
|
162
|
+
rescue => error
|
163
|
+
raise RTesseract::ConversionError.new(error)
|
148
164
|
end
|
149
165
|
|
150
|
-
#Read image from memory blob
|
166
|
+
# Read image from memory blob
|
151
167
|
def from_blob(blob)
|
152
|
-
blob_file = Tempfile.new(
|
168
|
+
blob_file = Tempfile.new('blob')
|
153
169
|
blob_file.write(blob)
|
154
170
|
blob_file.rewind
|
155
171
|
blob_file.flush
|
156
172
|
self.source = blob_file.path
|
157
173
|
convert
|
158
174
|
remove_file([blob_file])
|
159
|
-
rescue
|
160
|
-
raise RTesseract::ConversionError
|
175
|
+
rescue => error
|
176
|
+
raise RTesseract::ConversionError.new(error)
|
161
177
|
end
|
162
178
|
|
163
|
-
#Output value
|
179
|
+
# Output value
|
164
180
|
def to_s
|
165
|
-
return @value if @value !=
|
166
|
-
if @
|
181
|
+
return @value if @value != ''
|
182
|
+
if @processor.image?(@source) || @source.file?
|
167
183
|
convert
|
168
184
|
@value
|
169
185
|
else
|
170
|
-
|
186
|
+
fail RTesseract::ImageNotSelectedError.new(@source)
|
171
187
|
end
|
172
188
|
end
|
173
189
|
|
174
|
-
#Remove spaces and break-lines
|
190
|
+
# Remove spaces and break-lines
|
175
191
|
def to_s_without_spaces
|
176
|
-
to_s.gsub(
|
192
|
+
to_s.gsub(' ', '').gsub("\n", '').gsub("\r", '')
|
177
193
|
end
|
178
194
|
|
179
195
|
private
|
196
|
+
|
180
197
|
def choose_processor!
|
181
|
-
if @processor.to_s
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
end
|
198
|
+
@processor = if MiniMagickProcessor.a_name?(@processor.to_s)
|
199
|
+
MiniMagickProcessor
|
200
|
+
else
|
201
|
+
RMagickProcessor
|
202
|
+
end
|
203
|
+
@processor.setup
|
188
204
|
end
|
189
205
|
end
|
190
206
|
|
data/lib/rtesseract/errors.rb
CHANGED
@@ -1,6 +1,14 @@
|
|
1
1
|
class RTesseract
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
2
|
+
# Class of error with storage of normal errors
|
3
|
+
class ErrorWithMemory < StandardError
|
4
|
+
attr_accessor :old_error
|
5
|
+
|
6
|
+
def initialize(stored_error = nil)
|
7
|
+
@old_error = stored_error
|
8
|
+
end
|
9
|
+
end
|
6
10
|
|
11
|
+
class ConversionError < ErrorWithMemory; end
|
12
|
+
class ImageNotSelectedError < ErrorWithMemory; end
|
13
|
+
class TempFilesNotRemovedError < ErrorWithMemory; end
|
14
|
+
end
|
data/lib/rtesseract/mixed.rb
CHANGED
@@ -1,54 +1,51 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
class RTesseract
|
3
|
+
# Class to read an image from specified areas
|
3
4
|
class Mixed
|
4
|
-
|
5
|
+
attr_reader :areas
|
6
|
+
|
7
|
+
def initialize(src = '', options = {})
|
5
8
|
@source = Pathname.new src
|
6
9
|
@options = options
|
7
|
-
@value =
|
10
|
+
@value = ''
|
8
11
|
@areas = options.delete(:areas) || []
|
9
12
|
yield self if block_given?
|
10
13
|
end
|
11
14
|
|
12
15
|
def area(x, y, width, height)
|
13
|
-
@value =
|
14
|
-
@areas << {:x => x, :y => y, :width => width, :height => height}
|
15
|
-
end
|
16
|
-
|
17
|
-
def areas
|
18
|
-
@areas
|
16
|
+
@value = ''
|
17
|
+
@areas << { :x => x, :y => y, :width => width, :height => height }
|
19
18
|
end
|
20
19
|
|
21
20
|
def clear_areas
|
22
21
|
@areas = []
|
23
22
|
end
|
24
23
|
|
25
|
-
#Convert parts of image to string
|
24
|
+
# Convert parts of image to string
|
26
25
|
def convert
|
27
|
-
@value =
|
28
|
-
@areas.
|
29
|
-
image
|
30
|
-
image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
|
26
|
+
@value = ''
|
27
|
+
@areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
|
28
|
+
image.crop!(area[:x], area[:y], area[:width], area[:height])
|
31
29
|
@value << image.to_s
|
32
30
|
end
|
33
|
-
rescue
|
34
|
-
raise RTesseract::ConversionError
|
31
|
+
rescue => error
|
32
|
+
raise RTesseract::ConversionError.new(error)
|
35
33
|
end
|
36
34
|
|
37
|
-
#Output value
|
35
|
+
# Output value
|
38
36
|
def to_s
|
39
|
-
return @value if @value !=
|
37
|
+
return @value if @value != ''
|
40
38
|
if @source.file?
|
41
39
|
convert
|
42
40
|
@value
|
43
41
|
else
|
44
|
-
|
42
|
+
fail RTesseract::ImageNotSelectedError.new(@source)
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
48
|
-
#Remove spaces and break-lines
|
46
|
+
# Remove spaces and break-lines
|
49
47
|
def to_s_without_spaces
|
50
|
-
to_s.gsub(
|
48
|
+
to_s.gsub(' ', '').gsub("\n", '').gsub("\r", '')
|
51
49
|
end
|
52
50
|
end
|
53
51
|
end
|
54
|
-
|
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 1.0
|
5
|
+
# stub: rtesseract 1.1.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "1.0
|
9
|
+
s.version = "1.1.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2014-
|
14
|
+
s.date = "2014-02-07"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.files = [
|
22
22
|
".document",
|
23
23
|
".rspec",
|
24
|
+
".travis.sh",
|
24
25
|
".travis.yml",
|
25
26
|
"Gemfile",
|
26
27
|
"Gemfile.lock",
|
@@ -54,27 +55,27 @@ Gem::Specification.new do |s|
|
|
54
55
|
s.specification_version = 4
|
55
56
|
|
56
57
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
57
|
-
s.add_development_dependency(%q<rspec>, ["
|
58
|
-
s.add_development_dependency(%q<rdoc>, ["
|
59
|
-
s.add_development_dependency(%q<bundler>, ["
|
60
|
-
s.add_development_dependency(%q<jeweler>, ["~>
|
58
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
60
|
+
s.add_development_dependency(%q<bundler>, [">= 0"])
|
61
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
61
62
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
62
|
-
s.add_development_dependency(%q<
|
63
|
+
s.add_development_dependency(%q<coveralls>, [">= 0"])
|
63
64
|
else
|
64
|
-
s.add_dependency(%q<rspec>, ["
|
65
|
-
s.add_dependency(%q<rdoc>, ["
|
66
|
-
s.add_dependency(%q<bundler>, ["
|
67
|
-
s.add_dependency(%q<jeweler>, ["~>
|
65
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
66
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
67
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
68
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
68
69
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
69
|
-
s.add_dependency(%q<
|
70
|
+
s.add_dependency(%q<coveralls>, [">= 0"])
|
70
71
|
end
|
71
72
|
else
|
72
|
-
s.add_dependency(%q<rspec>, ["
|
73
|
-
s.add_dependency(%q<rdoc>, ["
|
74
|
-
s.add_dependency(%q<bundler>, ["
|
75
|
-
s.add_dependency(%q<jeweler>, ["~>
|
73
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
74
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
75
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
76
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
76
77
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
77
|
-
s.add_dependency(%q<
|
78
|
+
s.add_dependency(%q<coveralls>, [">= 0"])
|
78
79
|
end
|
79
80
|
end
|
80
81
|
|
@@ -21,6 +21,8 @@ describe "Rtesseract::Mixed" do
|
|
21
21
|
image.area(248, 24, 22, 22) # position of z
|
22
22
|
end
|
23
23
|
mix_block.to_s_without_spaces.should eql("43ZZ")
|
24
|
+
mix_block.clear_areas
|
25
|
+
mix_block.areas.should == []
|
24
26
|
|
25
27
|
mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
|
26
28
|
{:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
|
@@ -30,4 +32,15 @@ describe "Rtesseract::Mixed" do
|
|
30
32
|
],:psm=>7})
|
31
33
|
mix_block.to_s_without_spaces.should eql("43ZZ")
|
32
34
|
end
|
35
|
+
|
36
|
+
it " get a error" do
|
37
|
+
mix_block = RTesseract::Mixed.new(@path.join("images","test_not_exists.png").to_s,{:areas => [{:x => 28, :y=>19, :width=>25, :height=>25 }
|
38
|
+
],:psm=>7})
|
39
|
+
expect{ mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
40
|
+
|
41
|
+
|
42
|
+
mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [{:x => 28, :y=>19, :width=>25, :height=>25 }
|
43
|
+
],:psm=>7, :command => "tesseract_error"})
|
44
|
+
expect{ mix_block.to_s }.to raise_error(RTesseract::ConversionError)
|
45
|
+
end
|
33
46
|
end
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -16,12 +16,11 @@ describe "Rtesseract" do
|
|
16
16
|
|
17
17
|
it " translate image to text" do
|
18
18
|
RTesseract.new(@image_tiff).to_s_without_spaces.should eql("43ZZ")
|
19
|
+
RTesseract.new(@image_tiff, {:processor => 'mini_magick'}).to_s_without_spaces.should eql("43ZZ")
|
19
20
|
RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces.should eql("V2V4")
|
20
21
|
RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces.should eql("V2V4")
|
21
22
|
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
24
|
it " translate images .png, .jpg, .bmp" do
|
26
25
|
RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces.should eql("HW9W")
|
27
26
|
RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces.should eql("3R8Z")
|
@@ -77,10 +76,20 @@ describe "Rtesseract" do
|
|
77
76
|
test = RTesseract.new("", {:psm => 7})
|
78
77
|
test.from_blob(blob)
|
79
78
|
test.to_s_without_spaces.should eql("HW9W")
|
79
|
+
|
80
|
+
test = RTesseract.new("", {:psm => 7})
|
81
|
+
expect{test.from_blob('') }.to raise_error(RTesseract::ConversionError)
|
80
82
|
end
|
81
83
|
|
82
84
|
it " use a instance" do
|
83
85
|
RTesseract.new(Magick::Image.read(@image_tiff.to_s).first).to_s_without_spaces.should eql("43ZZ")
|
86
|
+
RMagickProcessor.a_name?('teste').should == false
|
87
|
+
RMagickProcessor.a_name?('rmagick').should == true
|
88
|
+
RMagickProcessor.a_name?('RMagickProcessor').should == true
|
89
|
+
|
90
|
+
MiniMagickProcessor.a_name?('teste').should == false
|
91
|
+
MiniMagickProcessor.a_name?('mini_magick').should == true
|
92
|
+
MiniMagickProcessor.a_name?('MiniMagickProcessor').should == true
|
84
93
|
end
|
85
94
|
|
86
95
|
it " change image in a block" do
|
@@ -91,9 +100,28 @@ describe "Rtesseract" do
|
|
91
100
|
test.to_s_without_spaces.should eql("HW9W")
|
92
101
|
|
93
102
|
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
|
94
|
-
image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
|
103
|
+
image = image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
|
104
|
+
end
|
105
|
+
test.to_s_without_spaces.should eql("3R8Z")
|
106
|
+
|
107
|
+
require 'mini_magick'
|
108
|
+
|
109
|
+
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en', :processor => 'mini_magick'}) do |image|
|
110
|
+
#image.white_threshold(245)
|
111
|
+
image.gravity "south"
|
95
112
|
end
|
96
|
-
|
113
|
+
test.to_s_without_spaces.should eql("3R8Z")
|
114
|
+
end
|
115
|
+
|
116
|
+
it " get a error" do
|
117
|
+
expect{ RTesseract.new(@path.join("images","test.jpg").to_s, {:command => "tesseract_error"}).to_s }.to raise_error(RTesseract::ConversionError)
|
118
|
+
expect{ RTesseract.new(@path.join("images","test_not_exists.png").to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
97
119
|
end
|
98
120
|
|
121
|
+
it "remove a file" do
|
122
|
+
rtesseract = RTesseract.new('.')
|
123
|
+
rtesseract.remove_file(Tempfile.new('config'))
|
124
|
+
|
125
|
+
expect{ rtesseract.remove_file(Pathname.new(Dir.tmpdir).join("test_not_exists")) }.to raise_error(RTesseract::TempFilesNotRemovedError)
|
126
|
+
end
|
99
127
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,10 +2,13 @@
|
|
2
2
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
require 'rspec'
|
5
|
+
require 'coveralls'
|
5
6
|
require "simplecov"
|
6
7
|
SimpleCov.start do
|
7
8
|
add_filter "/spec/"
|
8
9
|
end
|
10
|
+
Coveralls.wear!
|
11
|
+
|
9
12
|
require 'rtesseract'
|
10
13
|
# Requires supporting files with custom matchers and macros, etc,
|
11
14
|
# in ./support/ and its subdirectories.
|
metadata
CHANGED
@@ -1,71 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rdoc
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '>='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - '>='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: jeweler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 2.0.1
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 2.0.1
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: simplecov
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: coveralls
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - '>='
|
@@ -104,6 +104,7 @@ extra_rdoc_files:
|
|
104
104
|
files:
|
105
105
|
- .document
|
106
106
|
- .rspec
|
107
|
+
- .travis.sh
|
107
108
|
- .travis.yml
|
108
109
|
- Gemfile
|
109
110
|
- Gemfile.lock
|