rtesseract 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -24,6 +24,26 @@ It's very simple to use rtesseract:
24
24
  image.source = "new_image.png"
25
25
  image.to_s
26
26
 
27
+ === CONVERT PARTS OF IMAGE TO STRING
28
+
29
+ mix_block = RTesseract::Mixed.new("test.jpg") do |image|
30
+ image.area(28, 19, 25, 25)
31
+ image.area(180, 22, 20, 28)
32
+ image.area(218, 22, 24, 28)
33
+ image.area(248, 24, 22, 22)
34
+ end
35
+ mix_block.to_s
36
+
37
+ OR
38
+
39
+ mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
40
+ {:x => 28, :y=>19, :width=>25, :height=>25 },
41
+ {:x => 180, :y=>22, :width=>20, :height=>28},
42
+ {:x => 218, :y=>22, :width=>24, :height=>28},
43
+ {:x => 248, :y=>24, :width=>22, :height=>22}
44
+ ]})
45
+ mix_block.to_s
46
+
27
47
  == Note on Patches/Pull Requests
28
48
 
29
49
  * Fork the project.
data/Rakefile CHANGED
@@ -5,13 +5,15 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "rtesseract"
8
- gem.version = '0.0.6'
8
+ gem.version = '0.0.7'
9
9
  gem.summary = "Ruby library for working with the Tesseract OCR."
10
10
  gem.description = "Ruby library for working with the Tesseract OCR."
11
11
  gem.email = "dannnylo@gmail.com"
12
12
  gem.homepage = "http://github.com/dannnylo/rtesseract"
13
13
  gem.authors = ["Danilo Jeremias da Silva"]
14
+ gem.add_development_dependency "jeweler", ">=1.4.0"
14
15
  gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
16
+
15
17
  gem.add_runtime_dependency "rmagick", '>= 2.10.1'
16
18
  end
17
19
  Jeweler::GemcutterTasks.new
@@ -1,4 +1,6 @@
1
1
  class RTesseract
2
2
  class ConversionError < StandardError ;end
3
+ class ImageNotSelectedError < StandardError ;end
4
+ class TempFilesNotRemovedError < StandardError ;end
3
5
  end
4
6
 
@@ -40,7 +40,7 @@ class RTesseract
40
40
  convert
41
41
  @value
42
42
  else
43
- raise "Select a image file."
43
+ raise RTesseract::ImageNotSelectedError
44
44
  end
45
45
  end
46
46
 
data/lib/rtesseract.rb CHANGED
@@ -6,15 +6,19 @@ require "rtesseract/errors"
6
6
  require "rtesseract/mixed"
7
7
 
8
8
  class RTesseract
9
- VERSION = '0.0.6'
9
+ VERSION = '0.0.7'
10
10
  attr_accessor :options
11
11
  attr_writer :lang
12
+ attr_writer :psm
12
13
 
13
14
  def initialize(src="", options={})
14
15
  @uid = options.delete(:uid) || nil
15
16
  @source = Pathname.new src
16
17
  @command = options.delete(:command) || "tesseract"
17
18
  @lang = options.delete(:lang) || options.delete("lang") || ""
19
+ @psm = options.delete(:psm) || options.delete("psm") || nil
20
+ @clear_console_output = options.delete(:clear_console_output)
21
+ @clear_console_output = true if @clear_console_output.nil?
18
22
  @options = options
19
23
  @value = ""
20
24
  @x,@y,@w,@h = []
@@ -55,8 +59,8 @@ class RTesseract
55
59
  end
56
60
  end
57
61
  true
58
- rescue
59
- raise "Error on remove file."
62
+ rescue
63
+ raise RTesseract::TempFilesNotRemovedError
60
64
  end
61
65
 
62
66
  def generate_uid
@@ -77,15 +81,28 @@ class RTesseract
77
81
  ## * vie - Vietnamese
78
82
  ## Note: Make sure you have installed the language to tesseract
79
83
  def lang
80
- language = "#{@lang}".strip
81
- {"eng" => ["eng","en","en-us","english"], "deu" => ["deu"], "deu-f" => ["deu-f"] , "fra" => ["fra"], "ita" => ["ita","it"] , "nld" => ["nld"] , "por" => ["por","pt","pt-br","portuguese"] , "spa" => ["spa"] , "vie" => ["vie"]}.each do |value,names|
82
- return " -l #{value} " if names.include? language.downcase
84
+ language = "#{@lang}".strip.downcase
85
+ { #Aliases to languages names
86
+ "eng" => ["en","en-us","english"],
87
+ "ita" => ["it"],
88
+ "por" => ["pt","pt-br","portuguese"],
89
+ "spa" => ["sp"]
90
+ }.each do |value,names|
91
+ return " -l #{value} " if names.include? language
83
92
  end
93
+ return " -l #{language} " if language.size > 0
84
94
  ""
85
95
  rescue
86
96
  ""
87
97
  end
88
98
 
99
+ #Page Segment Mode
100
+ def psm
101
+ @psm.nil? ? "" : " -psm #{@psm} "
102
+ rescue
103
+ ""
104
+ end
105
+
89
106
  def config
90
107
  @options ||= {}
91
108
  @options.collect{|k,v| "#{k} #{v}" }.join("\n")
@@ -98,17 +115,23 @@ class RTesseract
98
115
  conf.path
99
116
  end
100
117
 
118
+ #TODO: Clear console for MacOS or Windows
119
+ def clear_console_output
120
+ return "" unless @clear_console_output
121
+ return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
122
+ end
123
+
101
124
  #Convert image to string
102
125
  def convert
103
126
  generate_uid
104
127
  tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
105
128
  tmp_image = image_to_tiff
106
- `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{config_file}`
129
+ `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}`
107
130
  @value = File.read("#{tmp_file.to_s}.txt").to_s
108
131
  @uid = nil
109
132
  remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
110
133
  rescue
111
- raise "Error on conversion."
134
+ raise RTesseract::ConversionError
112
135
  end
113
136
 
114
137
  #Output value
@@ -118,7 +141,7 @@ class RTesseract
118
141
  convert
119
142
  @value
120
143
  else
121
- raise "Select a image file."
144
+ raise RTesseract::ImageNotSelectedError
122
145
  end
123
146
  end
124
147
 
data/rtesseract.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rtesseract}
8
- s.version = "0.0.6"
8
+ s.version = "0.0.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danilo Jeremias da Silva"]
12
- s.date = %q{2011-03-02}
12
+ s.date = %q{2011-03-16}
13
13
  s.description = %q{Ruby library for working with the Tesseract OCR.}
14
14
  s.email = %q{dannnylo@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -49,13 +49,16 @@ Gem::Specification.new do |s|
49
49
  s.specification_version = 3
50
50
 
51
51
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
52
53
  s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
53
54
  s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
54
55
  else
56
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
55
57
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
56
58
  s.add_dependency(%q<rmagick>, [">= 2.10.1"])
57
59
  end
58
60
  else
61
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
59
62
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
60
63
  s.add_dependency(%q<rmagick>, [">= 2.10.1"])
61
64
  end
data/test/test_mixed.rb CHANGED
@@ -16,12 +16,20 @@ class TestMixed < Test::Unit::TestCase
16
16
 
17
17
  should "translate parts of the image to text" do
18
18
  mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
19
- image.area(28, 19, 25, 25) #position of 4
20
- image.area(180, 22, 20, 28) # position of 3
21
- image.area(218, 22, 24, 28) # position of z
22
- image.area(248, 24, 22, 22) # position of z
19
+ image.area(28, 19, 25, 25) #position of 4
20
+ image.area(180, 22, 20, 28) # position of 3
21
+ image.area(218, 22, 24, 28) # position of z
22
+ image.area(248, 24, 22, 22) # position of z
23
23
  end
24
24
  assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
+
26
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
27
+ {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
28
+ {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
29
+ {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
30
+ {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
31
+ ]})
32
+ assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
33
  end
26
34
  end
27
35
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 6
10
- version: 0.0.6
9
+ - 7
10
+ version: 0.0.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Danilo Jeremias da Silva
@@ -15,13 +15,29 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-03-02 00:00:00 -03:00
18
+ date: 2011-03-16 00:00:00 -03:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: thoughtbot-shoulda
22
+ name: jeweler
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 0
34
+ version: 1.4.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: thoughtbot-shoulda
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
25
41
  none: false
26
42
  requirements:
27
43
  - - ">="
@@ -31,11 +47,11 @@ dependencies:
31
47
  - 0
32
48
  version: "0"
33
49
  type: :development
34
- version_requirements: *id001
50
+ version_requirements: *id002
35
51
  - !ruby/object:Gem::Dependency
36
52
  name: rmagick
37
53
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
54
+ requirement: &id003 !ruby/object:Gem::Requirement
39
55
  none: false
40
56
  requirements:
41
57
  - - ">="
@@ -47,7 +63,7 @@ dependencies:
47
63
  - 1
48
64
  version: 2.10.1
49
65
  type: :runtime
50
- version_requirements: *id002
66
+ version_requirements: *id003
51
67
  description: Ruby library for working with the Tesseract OCR.
52
68
  email: dannnylo@gmail.com
53
69
  executables: []