rtesseract 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -24,6 +24,26 @@ It's very simple to use rtesseract:
24
24
  image.source = "new_image.png"
25
25
  image.to_s
26
26
 
27
+ === CONVERT PARTS OF IMAGE TO STRING
28
+
29
+ mix_block = RTesseract::Mixed.new("test.jpg") do |image|
30
+ image.area(28, 19, 25, 25)
31
+ image.area(180, 22, 20, 28)
32
+ image.area(218, 22, 24, 28)
33
+ image.area(248, 24, 22, 22)
34
+ end
35
+ mix_block.to_s
36
+
37
+ OR
38
+
39
+ mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
40
+ {:x => 28, :y=>19, :width=>25, :height=>25 },
41
+ {:x => 180, :y=>22, :width=>20, :height=>28},
42
+ {:x => 218, :y=>22, :width=>24, :height=>28},
43
+ {:x => 248, :y=>24, :width=>22, :height=>22}
44
+ ]})
45
+ mix_block.to_s
46
+
27
47
  == Note on Patches/Pull Requests
28
48
 
29
49
  * Fork the project.
data/Rakefile CHANGED
@@ -5,13 +5,15 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "rtesseract"
8
- gem.version = '0.0.6'
8
+ gem.version = '0.0.7'
9
9
  gem.summary = "Ruby library for working with the Tesseract OCR."
10
10
  gem.description = "Ruby library for working with the Tesseract OCR."
11
11
  gem.email = "dannnylo@gmail.com"
12
12
  gem.homepage = "http://github.com/dannnylo/rtesseract"
13
13
  gem.authors = ["Danilo Jeremias da Silva"]
14
+ gem.add_development_dependency "jeweler", ">=1.4.0"
14
15
  gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
16
+
15
17
  gem.add_runtime_dependency "rmagick", '>= 2.10.1'
16
18
  end
17
19
  Jeweler::GemcutterTasks.new
@@ -1,4 +1,6 @@
1
1
  class RTesseract
2
2
  class ConversionError < StandardError ;end
3
+ class ImageNotSelectedError < StandardError ;end
4
+ class TempFilesNotRemovedError < StandardError ;end
3
5
  end
4
6
 
@@ -40,7 +40,7 @@ class RTesseract
40
40
  convert
41
41
  @value
42
42
  else
43
- raise "Select a image file."
43
+ raise RTesseract::ImageNotSelectedError
44
44
  end
45
45
  end
46
46
 
data/lib/rtesseract.rb CHANGED
@@ -6,15 +6,19 @@ require "rtesseract/errors"
6
6
  require "rtesseract/mixed"
7
7
 
8
8
  class RTesseract
9
- VERSION = '0.0.6'
9
+ VERSION = '0.0.7'
10
10
  attr_accessor :options
11
11
  attr_writer :lang
12
+ attr_writer :psm
12
13
 
13
14
  def initialize(src="", options={})
14
15
  @uid = options.delete(:uid) || nil
15
16
  @source = Pathname.new src
16
17
  @command = options.delete(:command) || "tesseract"
17
18
  @lang = options.delete(:lang) || options.delete("lang") || ""
19
+ @psm = options.delete(:psm) || options.delete("psm") || nil
20
+ @clear_console_output = options.delete(:clear_console_output)
21
+ @clear_console_output = true if @clear_console_output.nil?
18
22
  @options = options
19
23
  @value = ""
20
24
  @x,@y,@w,@h = []
@@ -55,8 +59,8 @@ class RTesseract
55
59
  end
56
60
  end
57
61
  true
58
- rescue
59
- raise "Error on remove file."
62
+ rescue
63
+ raise RTesseract::TempFilesNotRemovedError
60
64
  end
61
65
 
62
66
  def generate_uid
@@ -77,15 +81,28 @@ class RTesseract
77
81
  ## * vie - Vietnamese
78
82
  ## Note: Make sure you have installed the language to tesseract
79
83
  def lang
80
- language = "#{@lang}".strip
81
- {"eng" => ["eng","en","en-us","english"], "deu" => ["deu"], "deu-f" => ["deu-f"] , "fra" => ["fra"], "ita" => ["ita","it"] , "nld" => ["nld"] , "por" => ["por","pt","pt-br","portuguese"] , "spa" => ["spa"] , "vie" => ["vie"]}.each do |value,names|
82
- return " -l #{value} " if names.include? language.downcase
84
+ language = "#{@lang}".strip.downcase
85
+ { #Aliases to languages names
86
+ "eng" => ["en","en-us","english"],
87
+ "ita" => ["it"],
88
+ "por" => ["pt","pt-br","portuguese"],
89
+ "spa" => ["sp"]
90
+ }.each do |value,names|
91
+ return " -l #{value} " if names.include? language
83
92
  end
93
+ return " -l #{language} " if language.size > 0
84
94
  ""
85
95
  rescue
86
96
  ""
87
97
  end
88
98
 
99
+ #Page Segment Mode
100
+ def psm
101
+ @psm.nil? ? "" : " -psm #{@psm} "
102
+ rescue
103
+ ""
104
+ end
105
+
89
106
  def config
90
107
  @options ||= {}
91
108
  @options.collect{|k,v| "#{k} #{v}" }.join("\n")
@@ -98,17 +115,23 @@ class RTesseract
98
115
  conf.path
99
116
  end
100
117
 
118
+ #TODO: Clear console for MacOS or Windows
119
+ def clear_console_output
120
+ return "" unless @clear_console_output
121
+ return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
122
+ end
123
+
101
124
  #Convert image to string
102
125
  def convert
103
126
  generate_uid
104
127
  tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
105
128
  tmp_image = image_to_tiff
106
- `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{config_file}`
129
+ `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang} #{psm} #{config_file} #{clear_console_output}`
107
130
  @value = File.read("#{tmp_file.to_s}.txt").to_s
108
131
  @uid = nil
109
132
  remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
110
133
  rescue
111
- raise "Error on conversion."
134
+ raise RTesseract::ConversionError
112
135
  end
113
136
 
114
137
  #Output value
@@ -118,7 +141,7 @@ class RTesseract
118
141
  convert
119
142
  @value
120
143
  else
121
- raise "Select a image file."
144
+ raise RTesseract::ImageNotSelectedError
122
145
  end
123
146
  end
124
147
 
data/rtesseract.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rtesseract}
8
- s.version = "0.0.6"
8
+ s.version = "0.0.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danilo Jeremias da Silva"]
12
- s.date = %q{2011-03-02}
12
+ s.date = %q{2011-03-16}
13
13
  s.description = %q{Ruby library for working with the Tesseract OCR.}
14
14
  s.email = %q{dannnylo@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -49,13 +49,16 @@ Gem::Specification.new do |s|
49
49
  s.specification_version = 3
50
50
 
51
51
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
52
53
  s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
53
54
  s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
54
55
  else
56
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
55
57
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
56
58
  s.add_dependency(%q<rmagick>, [">= 2.10.1"])
57
59
  end
58
60
  else
61
+ s.add_dependency(%q<jeweler>, [">= 1.4.0"])
59
62
  s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
60
63
  s.add_dependency(%q<rmagick>, [">= 2.10.1"])
61
64
  end
data/test/test_mixed.rb CHANGED
@@ -16,12 +16,20 @@ class TestMixed < Test::Unit::TestCase
16
16
 
17
17
  should "translate parts of the image to text" do
18
18
  mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
19
- image.area(28, 19, 25, 25) #position of 4
20
- image.area(180, 22, 20, 28) # position of 3
21
- image.area(218, 22, 24, 28) # position of z
22
- image.area(248, 24, 22, 22) # position of z
19
+ image.area(28, 19, 25, 25) #position of 4
20
+ image.area(180, 22, 20, 28) # position of 3
21
+ image.area(218, 22, 24, 28) # position of z
22
+ image.area(248, 24, 22, 22) # position of z
23
23
  end
24
24
  assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
+
26
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
27
+ {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
28
+ {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
29
+ {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
30
+ {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
31
+ ]})
32
+ assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
33
  end
26
34
  end
27
35
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 6
10
- version: 0.0.6
9
+ - 7
10
+ version: 0.0.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Danilo Jeremias da Silva
@@ -15,13 +15,29 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-03-02 00:00:00 -03:00
18
+ date: 2011-03-16 00:00:00 -03:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: thoughtbot-shoulda
22
+ name: jeweler
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 0
34
+ version: 1.4.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: thoughtbot-shoulda
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
25
41
  none: false
26
42
  requirements:
27
43
  - - ">="
@@ -31,11 +47,11 @@ dependencies:
31
47
  - 0
32
48
  version: "0"
33
49
  type: :development
34
- version_requirements: *id001
50
+ version_requirements: *id002
35
51
  - !ruby/object:Gem::Dependency
36
52
  name: rmagick
37
53
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
54
+ requirement: &id003 !ruby/object:Gem::Requirement
39
55
  none: false
40
56
  requirements:
41
57
  - - ">="
@@ -47,7 +63,7 @@ dependencies:
47
63
  - 1
48
64
  version: 2.10.1
49
65
  type: :runtime
50
- version_requirements: *id002
66
+ version_requirements: *id003
51
67
  description: Ruby library for working with the Tesseract OCR.
52
68
  email: dannnylo@gmail.com
53
69
  executables: []