rtesseract 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 306d7190055e16449e318de2ab9646835ec1114c
4
- data.tar.gz: b4309c8765de4c99bb436e08ba5cb50bb2aae5e4
3
+ metadata.gz: 3b275cd0fced912da404a74ef1bf5805b9377706
4
+ data.tar.gz: 3572edcdfae3ee0d93c520290b78d94f18ffd4c3
5
5
  SHA512:
6
- metadata.gz: 1c0f98a7afa037b33dc5231194554cb428c0e05098a1d623556a601eb44b363c95fa91eaa98fd4086921108144f118d3526c5413790e3c7fd090bc671430109c
7
- data.tar.gz: 043e45b5c0079c4d56ee8b9f34c3a76fb6698eb5c993d87865774a0f1c80cebb7ada2c34580c9e22218be9a420a3c25d82cd94ee18ddfc986fd29c99e2aa006a
6
+ metadata.gz: 2806a98f9a07fcd3c26ebd86980c088fae3c8e08687f8f7ef43444bbc594188624ef6b2286c932a1874e1aa90053e5b33852f6a57ccb5602454a8ce8a0d8401a
7
+ data.tar.gz: 4ae0d5c34b189d4c72729fbfc7c004beff31695cb8079281211a5717abbf0e74698354999c76c2c6c8f045b3952799487f15b40247aef7e5effff1ebda7fcae4
data/.document CHANGED
File without changes
data/.rspec CHANGED
File without changes
data/.travis.sh ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/sh
2
+ sudo apt-get install tesseract-ocr tesseract-ocr-por
data/.travis.yml CHANGED
@@ -1,5 +1,7 @@
1
1
  language: ruby
2
+ before_script: "./.travis.sh"
2
3
  rvm:
3
4
  - 1.9.3
4
- - 1.9.2
5
- - 2.0.0
5
+ - 2.0.0
6
+ - 2.1.0
7
+
data/Gemfile CHANGED
@@ -2,15 +2,16 @@ source "http://rubygems.org"
2
2
  # Add dependencies to develop your gem here.
3
3
  # Include everything needed to run rake, tests, features, etc.
4
4
  group :development do
5
- gem "rspec", "~> 2.8.0"
6
- gem "rdoc", "~> 3.12"
7
- gem "bundler", "~> 1.0"
8
- gem "jeweler", "~> 1.8.7"
9
- gem "simplecov", ">= 0"
10
- gem 'shoulda-matchers'
5
+ gem "rspec"
6
+ gem "rdoc"
7
+ gem "bundler"
8
+ gem "jeweler", "~> 2.0.1"
9
+ gem "simplecov"
10
+ gem 'coveralls', require: false
11
11
  end
12
12
 
13
13
  group :test do
14
14
  gem "rmagick"
15
+ gem "mini_magick"
15
16
  end
16
17
 
data/Gemfile.lock CHANGED
@@ -1,59 +1,63 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- activesupport (4.0.1)
5
- i18n (~> 0.6, >= 0.6.4)
6
- minitest (~> 4.2)
7
- multi_json (~> 1.3)
8
- thread_safe (~> 0.1)
9
- tzinfo (~> 0.3.37)
10
4
  addressable (2.3.5)
11
- atomic (1.1.14)
12
5
  builder (3.2.2)
6
+ coveralls (0.7.0)
7
+ multi_json (~> 1.3)
8
+ rest-client
9
+ simplecov (>= 0.7)
10
+ term-ansicolor
11
+ thor
12
+ descendants_tracker (0.0.3)
13
13
  diff-lcs (1.1.3)
14
- docile (1.1.0)
15
- faraday (0.8.8)
16
- multipart-post (~> 1.2.0)
14
+ docile (1.1.3)
15
+ faraday (0.9.0)
16
+ multipart-post (>= 1.2, < 3)
17
17
  git (1.2.6)
18
- github_api (0.10.1)
19
- addressable
20
- faraday (~> 0.8.1)
18
+ github_api (0.11.2)
19
+ addressable (~> 2.3)
20
+ descendants_tracker (~> 0.0.1)
21
+ faraday (~> 0.8, < 0.10)
21
22
  hashie (>= 1.2)
22
- multi_json (~> 1.4)
23
- nokogiri (~> 1.5.2)
23
+ multi_json (>= 1.7.5, < 2.0)
24
+ nokogiri (~> 1.6.0)
24
25
  oauth2
25
26
  hashie (2.0.5)
26
27
  highline (1.6.20)
27
- httpauth (0.2.0)
28
- i18n (0.6.5)
29
- jeweler (1.8.8)
28
+ jeweler (2.0.1)
30
29
  builder
31
- bundler (~> 1.0)
30
+ bundler (>= 1.0)
32
31
  git (>= 1.2.5)
33
- github_api (= 0.10.1)
32
+ github_api
34
33
  highline (>= 1.6.15)
35
- nokogiri (= 1.5.10)
34
+ nokogiri (>= 1.5.10)
36
35
  rake
37
36
  rdoc
38
37
  json (1.8.1)
39
- jwt (0.1.8)
38
+ jwt (0.1.11)
40
39
  multi_json (>= 1.5)
41
- minitest (4.7.5)
42
- multi_json (1.8.2)
40
+ mime-types (2.1)
41
+ mini_magick (3.7.0)
42
+ subexec (~> 0.2.1)
43
+ mini_portile (0.5.2)
44
+ multi_json (1.8.4)
43
45
  multi_xml (0.5.5)
44
- multipart-post (1.2.0)
45
- nokogiri (1.5.10)
46
- oauth2 (0.9.2)
47
- faraday (~> 0.8)
48
- httpauth (~> 0.2)
49
- jwt (~> 0.1.4)
50
- multi_json (~> 1.0)
46
+ multipart-post (2.0.0)
47
+ nokogiri (1.6.1)
48
+ mini_portile (~> 0.5.0)
49
+ oauth2 (0.9.3)
50
+ faraday (>= 0.8, < 0.10)
51
+ jwt (~> 0.1.8)
52
+ multi_json (~> 1.3)
51
53
  multi_xml (~> 0.5)
52
54
  rack (~> 1.2)
53
55
  rack (1.5.2)
54
- rake (10.1.0)
56
+ rake (10.1.1)
55
57
  rdoc (3.12.2)
56
58
  json (~> 1.4)
59
+ rest-client (1.6.7)
60
+ mime-types (>= 1.16)
57
61
  rmagick (2.13.2)
58
62
  rspec (2.8.0)
59
63
  rspec-core (~> 2.8.0)
@@ -63,25 +67,26 @@ GEM
63
67
  rspec-expectations (2.8.0)
64
68
  diff-lcs (~> 1.1.2)
65
69
  rspec-mocks (2.8.0)
66
- shoulda-matchers (2.4.0)
67
- activesupport (>= 3.0.0)
68
70
  simplecov (0.8.2)
69
71
  docile (~> 1.1.0)
70
72
  multi_json
71
73
  simplecov-html (~> 0.8.0)
72
74
  simplecov-html (0.8.0)
73
- thread_safe (0.1.3)
74
- atomic
75
- tzinfo (0.3.38)
75
+ subexec (0.2.3)
76
+ term-ansicolor (1.2.2)
77
+ tins (~> 0.8)
78
+ thor (0.18.1)
79
+ tins (0.13.2)
76
80
 
77
81
  PLATFORMS
78
82
  ruby
79
83
 
80
84
  DEPENDENCIES
81
- bundler (~> 1.0)
82
- jeweler (~> 1.8.7)
83
- rdoc (~> 3.12)
85
+ bundler
86
+ coveralls
87
+ jeweler (~> 2.0.1)
88
+ mini_magick
89
+ rdoc
84
90
  rmagick
85
- rspec (~> 2.8.0)
86
- shoulda-matchers
91
+ rspec
87
92
  simplecov
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013 Danilo Jeremias da Silva
1
+ Copyright (c) 2014 Danilo Jeremias da Silva
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -1,5 +1,8 @@
1
1
  = rtesseract
2
2
  {<img src="https://badge.fury.io/rb/rtesseract.png" alt="Gem Version" />}[http://badge.fury.io/rb/rtesseract]
3
+ {<img src="https://travis-ci.org/dannnylo/rtesseract.png?branch=master" alt="Build Status" />}[https://travis-ci.org/dannnylo/rtesseract]
4
+ {<img src="https://coveralls.io/repos/dannnylo/rtesseract/badge.png?branch=master" alt="Coverage Status" />}[https://coveralls.io/r/dannnylo/rtesseract?branch=master]
5
+ {<img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />}[https://codeclimate.com/github/dannnylo/rtesseract]
3
6
 
4
7
  Ruby library for working with the Tesseract OCR.
5
8
 
@@ -55,11 +58,16 @@ It's very simple to use rtesseract:
55
58
  ]})
56
59
  mix_block.to_s
57
60
 
58
- == Note on Patches/Pull Requests
59
-
61
+ == Contributing to rtesseract
62
+
63
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
64
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
60
65
  * Fork the project.
61
- * Make your feature addition or bug fix.
62
- * Send me a pull request. Bonus points for topic branches.
66
+ * Start a feature/bugfix branch.
67
+ * Commit and push until you are happy with your contribution.
68
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
69
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
70
+
63
71
 
64
72
  == Links
65
73
 
@@ -68,4 +76,5 @@ It's very simple to use rtesseract:
68
76
 
69
77
  == Copyright
70
78
 
71
- Copyright (c) 2013 Danilo Jeremias da Silva. See LICENSE for details.
79
+ Copyright (c) 2014 Danilo Jeremias da Silva. See LICENSE.txt for
80
+ further details.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ require 'rake'
13
13
 
14
14
  require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
16
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
17
17
  gem.name = "rtesseract"
18
18
  gem.homepage = "http://github.com/dannnylo/rtesseract"
19
19
  gem.license = "MIT"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.5
1
+ 1.1.0
@@ -1,24 +1,28 @@
1
1
  # encoding: UTF-8
2
- require 'mini_magick'
2
+ # Add to rtesseract a image manipulation with MiniMagick
3
3
  module MiniMagickProcessor
4
- extend self
5
- def image_to_tiff
6
- tmp_file = Tempfile.new(["",".tif"])
7
- cat = @instance || read_with_processor(@source.to_s)
8
- cat.format("tif") do |c|
9
- c.compress "None"
10
- end
11
- cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
4
+ def self.setup
5
+ require 'mini_magick'
6
+ end
7
+
8
+ def self.a_name?(name)
9
+ %w(mini_magick MiniMagickProcessor).include?(name.to_s)
10
+ end
11
+
12
+ def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
13
+ tmp_file = Tempfile.new(['', '.tif'])
14
+ cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
15
+ cat.format('tif') { |c| c.compress 'None' }
16
+ cat.crop("#{w}x#{h}+#{x}+#{y}") unless [x, y, w, h].compact == []
12
17
  cat.write tmp_file.path.to_s
13
- return tmp_file
18
+ tmp_file
14
19
  end
15
20
 
16
- def read_with_processor(path)
21
+ def self.read_with_processor(path)
17
22
  MiniMagick::Image.open(path.to_s)
18
23
  end
19
24
 
20
- def is_a_instance?(object)
25
+ def self.image?(object)
21
26
  object.class == MiniMagick::Image
22
27
  end
23
28
  end
24
-
@@ -1,21 +1,27 @@
1
1
  # encoding: UTF-8
2
- require "RMagick"
2
+ # Add to rtesseract a image manipulation with RMagick
3
3
  module RMagickProcessor
4
- extend self
5
- def image_to_tiff
6
- tmp_file = Tempfile.new(["",".tif"])
7
- cat = @instance || Magick::Image.read(@source.to_s).first
8
- cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
9
- cat.write(tmp_file.path.to_s){self.compression = Magick::NoCompression}
10
- return tmp_file
4
+ def self.setup
5
+ require 'RMagick'
11
6
  end
12
7
 
13
- def read_with_processor(path)
8
+ def self.a_name?(name)
9
+ %w(rmagick RMagickProcessor).include?(name.to_s)
10
+ end
11
+
12
+ def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
13
+ tmp_file = Tempfile.new(['', '.tif'])
14
+ cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
15
+ cat.crop!(x, y, w, h) unless [x, y, w, h].compact == []
16
+ cat.write(tmp_file.path.to_s) { self.compression = Magick::NoCompression }
17
+ tmp_file
18
+ end
19
+
20
+ def self.read_with_processor(path)
14
21
  Magick::Image.read(path.to_s).first
15
22
  end
16
23
 
17
- def is_a_instance?(object)
24
+ def self.image?(object)
18
25
  object.class == Magick::Image
19
26
  end
20
27
  end
21
-
data/lib/rtesseract.rb CHANGED
@@ -1,89 +1,104 @@
1
1
  # encoding: UTF-8
2
- require "pathname"
3
- require "tempfile"
2
+ require 'pathname'
3
+ require 'tempfile'
4
4
 
5
- require "rtesseract/errors"
6
- require "rtesseract/mixed"
5
+ require 'rtesseract/errors'
6
+ require 'rtesseract/mixed'
7
7
 
8
+ # Processors
9
+ require 'processors/rmagick.rb'
10
+ require 'processors/mini_magick.rb'
11
+
12
+ # Ruby wrapper for Tesseract OCR
8
13
  class RTesseract
9
14
  attr_accessor :options
10
15
  attr_writer :lang
11
16
  attr_writer :psm
12
17
  attr_reader :processor
13
18
 
14
- def initialize(src = "", options = {})
15
- @command = options.delete(:command) || default_command
16
- @lang = options.delete(:lang) || options.delete("lang") || ""
17
- @psm = options.delete(:psm) || options.delete("psm") || nil
18
- @clear_console_output = options.delete(:clear_console_output)
19
- @clear_console_output = true if @clear_console_output.nil?
20
- @options = options
21
- @value = ""
22
- @x, @y, @w, @h = []
23
- @processor = options.delete(:processor) || options.delete("processor")
19
+ OPTIONS = %w(command lang psm processor debug clear_console_output)
20
+ # Aliases to languages names
21
+ LANGUAGES = {
22
+ 'eng' => %w(en en-us english),
23
+ 'ita' => %w(it),
24
+ 'por' => %w(pt pt-br portuguese),
25
+ 'spa' => %w(sp)
26
+ }
27
+
28
+ def initialize(src = '', options = {})
29
+ @options = command_line_options(options)
30
+ @value, @x, @y, @w, @h = ['']
24
31
  choose_processor!
25
- if is_a_instance?(src)
26
- @source = Pathname.new '.'
27
- @instance = src
28
- else
29
- @instance = nil
30
- @source = Pathname.new src
31
- end
32
+ @source = @processor.image?(src) ? src : Pathname.new(src)
33
+ end
34
+
35
+ def fetch_option(options, name, default)
36
+ options.fetch(name.to_s, options.fetch(name, default))
37
+ end
38
+
39
+ def command_line_options(options)
40
+ @command = fetch_option(options, :command, default_command)
41
+ @lang = fetch_option(options, :lang, '')
42
+ @psm = fetch_option(options, :psm, nil)
43
+ @processor = fetch_option(options, :processor, 'rmagick')
44
+ @debug = fetch_option(options, :debug, false)
45
+
46
+ # Disable clear console if debug mode
47
+ @clear_console_output = @debug ? false : fetch_option(options, :clear_console_output, true)
48
+
49
+ options.delete_if { |k, v| OPTIONS.include?(k.to_s) }
50
+ options
32
51
  end
33
52
 
34
53
  def default_command
35
54
  TesseractBin::Executables[:tesseract] || 'tesseract'
36
55
  rescue
37
- "tesseract"
56
+ 'tesseract'
38
57
  end
39
58
 
40
59
  def self.read(src = nil, options = {}, &block)
41
- raise RTesseract::ImageNotSelectedError if src == nil
42
- processor = options.delete(:processor) || options.delete("processor")
43
- if processor == "mini_magick"
60
+ fail RTesseract::ImageNotSelectedError if src.nil?
61
+ processor = options.delete(:processor) || options.delete('processor')
62
+ if processor == 'mini_magick'
44
63
  image = MiniMagickProcessor.read_with_processor(src.to_s)
45
64
  else
46
65
  image = RMagickProcessor.read_with_processor(src.to_s)
47
66
  end
48
67
  yield image
49
- object = RTesseract.new("", options)
68
+ object = RTesseract.new('', options)
50
69
  object.from_blob(image.to_blob)
51
70
  object
52
71
  end
53
72
 
54
- def source= src
55
- @value = ""
56
- @source = Pathname.new src
73
+ def source=(src)
74
+ @value = ''
75
+ @source = @processor.image?(src) ? src : Pathname.new(src)
57
76
  end
58
77
 
59
- def image_name
60
- @source.basename
61
- end
62
-
63
-
64
- #Crop image to convert
65
- def crop!(x,y,width,height)
66
- @x, @y, @w, @h = x, y, width, height
78
+ # Crop image to convert
79
+ def crop!(x, y, width, height)
80
+ @value = ''
81
+ @x, @y, @w, @h = x.to_i, y.to_i, width.to_i, height.to_i
67
82
  self
68
83
  end
69
84
 
70
- #Remove files
71
- def remove_file(files=[])
72
- files.each do |file|
73
- if file.is_a?(Tempfile)
74
- file.close
75
- file.unlink
76
- else
77
- File.unlink(file)
78
- end
85
+ # Remove files
86
+ def remove_file(files = [])
87
+ files.each do |file|
88
+ if file.is_a?(Tempfile)
89
+ file.close
90
+ file.unlink
91
+ else
92
+ File.unlink(file)
79
93
  end
94
+ end
80
95
  true
81
- rescue
82
- raise RTesseract::TempFilesNotRemovedError
96
+ rescue => error
97
+ raise RTesseract::TempFilesNotRemovedError.new(:error => error, :files => files)
83
98
  end
84
99
 
85
100
  # Select the language
86
- #===Languages
101
+ # ===Languages
87
102
  ## * eng - English
88
103
  ## * deu - German
89
104
  ## * deu-f - German fraktur
@@ -96,35 +111,30 @@ class RTesseract
96
111
  ## Note: Make sure you have installed the language to tesseract
97
112
  def lang
98
113
  language = "#{@lang}".strip.downcase
99
- { #Aliases to languages names
100
- "eng" => ["en","en-us","english"],
101
- "ita" => ["it"],
102
- "por" => ["pt","pt-br","portuguese"],
103
- "spa" => ["sp"]
104
- }.each do |value,names|
114
+ LANGUAGES.each do |value, names|
105
115
  return " -l #{value} " if names.include? language
106
116
  end
107
117
  return " -l #{language} " if language.size > 0
108
- ""
118
+ ''
109
119
  rescue
110
- ""
120
+ ''
111
121
  end
112
122
 
113
- #Page Segment Mode
123
+ # Page Segment Mode
114
124
  def psm
115
- @psm.nil? ? "" : " -psm #{@psm} "
125
+ @psm.nil? ? '' : " -psm #{@psm} "
116
126
  rescue
117
- ""
127
+ ''
118
128
  end
119
129
 
120
130
  def config
121
131
  @options ||= {}
122
- @options.collect{|k,v| "#{k} #{v}" }.join("\n")
132
+ @options.map { |k, v| "#{k} #{v}" }.join("\n")
123
133
  end
124
134
 
125
135
  def config_file
126
- return "" if @options == {}
127
- conf = Tempfile.new("config")
136
+ return '' if @options == {}
137
+ conf = Tempfile.new('config')
128
138
  conf.write(config)
129
139
  conf.flush
130
140
  conf.path
@@ -132,59 +142,65 @@ class RTesseract
132
142
 
133
143
  #TODO: Clear console for MacOS or Windows
134
144
  def clear_console_output
135
- return "" unless @clear_console_output
136
- return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
145
+ return '' unless @clear_console_output
146
+ return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
147
+ end
148
+
149
+ def image
150
+ (@image = @processor.image_to_tif(@source, @x, @y, @w, @h)).path
151
+ end
152
+
153
+ def text_file
154
+ @text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}.txt").to_s
137
155
  end
138
156
 
139
- #Convert image to string
157
+ # Convert image to string
140
158
  def convert
141
- path = Pathname.new(Dir::tmpdir).join("#{Time.now.to_f}#{rand(1500)}.txt").to_s
142
- tmp_image = image_to_tiff
143
- `#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}`
144
- @value = File.read(path).to_s
145
- remove_file([tmp_image, path])
146
- rescue
147
- raise RTesseract::ConversionError
159
+ `#{@command} "#{image}" "#{text_file.gsub('.txt', '')}" #{lang} #{psm} #{config_file} #{clear_console_output}`
160
+ @value = File.read(@text_file).to_s
161
+ remove_file([@image, @text_file])
162
+ rescue => error
163
+ raise RTesseract::ConversionError.new(error)
148
164
  end
149
165
 
150
- #Read image from memory blob
166
+ # Read image from memory blob
151
167
  def from_blob(blob)
152
- blob_file = Tempfile.new("blob")
168
+ blob_file = Tempfile.new('blob')
153
169
  blob_file.write(blob)
154
170
  blob_file.rewind
155
171
  blob_file.flush
156
172
  self.source = blob_file.path
157
173
  convert
158
174
  remove_file([blob_file])
159
- rescue
160
- raise RTesseract::ConversionError
175
+ rescue => error
176
+ raise RTesseract::ConversionError.new(error)
161
177
  end
162
178
 
163
- #Output value
179
+ # Output value
164
180
  def to_s
165
- return @value if @value != ""
166
- if @source.file? || !@instance.nil?
181
+ return @value if @value != ''
182
+ if @processor.image?(@source) || @source.file?
167
183
  convert
168
184
  @value
169
185
  else
170
- raise RTesseract::ImageNotSelectedError
186
+ fail RTesseract::ImageNotSelectedError.new(@source)
171
187
  end
172
188
  end
173
189
 
174
- #Remove spaces and break-lines
190
+ # Remove spaces and break-lines
175
191
  def to_s_without_spaces
176
- to_s.gsub(" ","").gsub("\n","").gsub("\r","")
192
+ to_s.gsub(' ', '').gsub("\n", '').gsub("\r", '')
177
193
  end
178
194
 
179
195
  private
196
+
180
197
  def choose_processor!
181
- if @processor.to_s == "mini_magick"
182
- require File.expand_path(File.dirname(__FILE__) + "/processors/mini_magick.rb")
183
- self.class.send(:include, MiniMagickProcessor)
184
- else
185
- require File.expand_path(File.dirname(__FILE__) + "/processors/rmagick.rb")
186
- self.class.send(:include, RMagickProcessor)
187
- end
198
+ @processor = if MiniMagickProcessor.a_name?(@processor.to_s)
199
+ MiniMagickProcessor
200
+ else
201
+ RMagickProcessor
202
+ end
203
+ @processor.setup
188
204
  end
189
205
  end
190
206
 
@@ -1,6 +1,14 @@
1
1
  class RTesseract
2
- class ConversionError < StandardError ;end
3
- class ImageNotSelectedError < StandardError ;end
4
- class TempFilesNotRemovedError < StandardError ;end
5
- end
2
+ # Class of error with storage of normal errors
3
+ class ErrorWithMemory < StandardError
4
+ attr_accessor :old_error
5
+
6
+ def initialize(stored_error = nil)
7
+ @old_error = stored_error
8
+ end
9
+ end
6
10
 
11
+ class ConversionError < ErrorWithMemory; end
12
+ class ImageNotSelectedError < ErrorWithMemory; end
13
+ class TempFilesNotRemovedError < ErrorWithMemory; end
14
+ end
@@ -1,54 +1,51 @@
1
1
  # encoding: UTF-8
2
2
  class RTesseract
3
+ # Class to read an image from specified areas
3
4
  class Mixed
4
- def initialize(src="", options={})
5
+ attr_reader :areas
6
+
7
+ def initialize(src = '', options = {})
5
8
  @source = Pathname.new src
6
9
  @options = options
7
- @value = ""
10
+ @value = ''
8
11
  @areas = options.delete(:areas) || []
9
12
  yield self if block_given?
10
13
  end
11
14
 
12
15
  def area(x, y, width, height)
13
- @value = ""
14
- @areas << {:x => x, :y => y, :width => width, :height => height}
15
- end
16
-
17
- def areas
18
- @areas
16
+ @value = ''
17
+ @areas << { :x => x, :y => y, :width => width, :height => height }
19
18
  end
20
19
 
21
20
  def clear_areas
22
21
  @areas = []
23
22
  end
24
23
 
25
- #Convert parts of image to string
24
+ # Convert parts of image to string
26
25
  def convert
27
- @value = ""
28
- @areas.each do |area|
29
- image = RTesseract.new(@source.to_s,@options.dup)
30
- image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
26
+ @value = ''
27
+ @areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
28
+ image.crop!(area[:x], area[:y], area[:width], area[:height])
31
29
  @value << image.to_s
32
30
  end
33
- rescue
34
- raise RTesseract::ConversionError
31
+ rescue => error
32
+ raise RTesseract::ConversionError.new(error)
35
33
  end
36
34
 
37
- #Output value
35
+ # Output value
38
36
  def to_s
39
- return @value if @value != ""
37
+ return @value if @value != ''
40
38
  if @source.file?
41
39
  convert
42
40
  @value
43
41
  else
44
- raise RTesseract::ImageNotSelectedError
42
+ fail RTesseract::ImageNotSelectedError.new(@source)
45
43
  end
46
44
  end
47
45
 
48
- #Remove spaces and break-lines
46
+ # Remove spaces and break-lines
49
47
  def to_s_without_spaces
50
- to_s.gsub(" ","").gsub("\n","").gsub("\r","")
48
+ to_s.gsub(' ', '').gsub("\n", '').gsub("\r", '')
51
49
  end
52
50
  end
53
51
  end
54
-
data/rtesseract.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: rtesseract 1.0.5 ruby lib
5
+ # stub: rtesseract 1.1.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "rtesseract"
9
- s.version = "1.0.5"
9
+ s.version = "1.1.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Danilo Jeremias da Silva"]
14
- s.date = "2014-01-21"
14
+ s.date = "2014-02-07"
15
15
  s.description = "Ruby library for working with the Tesseract OCR."
16
16
  s.email = "dannnylo@gmail.com"
17
17
  s.extra_rdoc_files = [
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.files = [
22
22
  ".document",
23
23
  ".rspec",
24
+ ".travis.sh",
24
25
  ".travis.yml",
25
26
  "Gemfile",
26
27
  "Gemfile.lock",
@@ -54,27 +55,27 @@ Gem::Specification.new do |s|
54
55
  s.specification_version = 4
55
56
 
56
57
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
57
- s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
58
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
59
- s.add_development_dependency(%q<bundler>, ["~> 1.0"])
60
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.7"])
58
+ s.add_development_dependency(%q<rspec>, [">= 0"])
59
+ s.add_development_dependency(%q<rdoc>, [">= 0"])
60
+ s.add_development_dependency(%q<bundler>, [">= 0"])
61
+ s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
61
62
  s.add_development_dependency(%q<simplecov>, [">= 0"])
62
- s.add_development_dependency(%q<shoulda-matchers>, [">= 0"])
63
+ s.add_development_dependency(%q<coveralls>, [">= 0"])
63
64
  else
64
- s.add_dependency(%q<rspec>, ["~> 2.8.0"])
65
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
66
- s.add_dependency(%q<bundler>, ["~> 1.0"])
67
- s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
65
+ s.add_dependency(%q<rspec>, [">= 0"])
66
+ s.add_dependency(%q<rdoc>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, [">= 0"])
68
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
68
69
  s.add_dependency(%q<simplecov>, [">= 0"])
69
- s.add_dependency(%q<shoulda-matchers>, [">= 0"])
70
+ s.add_dependency(%q<coveralls>, [">= 0"])
70
71
  end
71
72
  else
72
- s.add_dependency(%q<rspec>, ["~> 2.8.0"])
73
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
74
- s.add_dependency(%q<bundler>, ["~> 1.0"])
75
- s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
73
+ s.add_dependency(%q<rspec>, [">= 0"])
74
+ s.add_dependency(%q<rdoc>, [">= 0"])
75
+ s.add_dependency(%q<bundler>, [">= 0"])
76
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
76
77
  s.add_dependency(%q<simplecov>, [">= 0"])
77
- s.add_dependency(%q<shoulda-matchers>, [">= 0"])
78
+ s.add_dependency(%q<coveralls>, [">= 0"])
78
79
  end
79
80
  end
80
81
 
@@ -21,6 +21,8 @@ describe "Rtesseract::Mixed" do
21
21
  image.area(248, 24, 22, 22) # position of z
22
22
  end
23
23
  mix_block.to_s_without_spaces.should eql("43ZZ")
24
+ mix_block.clear_areas
25
+ mix_block.areas.should == []
24
26
 
25
27
  mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
26
28
  {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
@@ -30,4 +32,15 @@ describe "Rtesseract::Mixed" do
30
32
  ],:psm=>7})
31
33
  mix_block.to_s_without_spaces.should eql("43ZZ")
32
34
  end
35
+
36
+ it " get a error" do
37
+ mix_block = RTesseract::Mixed.new(@path.join("images","test_not_exists.png").to_s,{:areas => [{:x => 28, :y=>19, :width=>25, :height=>25 }
38
+ ],:psm=>7})
39
+ expect{ mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
40
+
41
+
42
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [{:x => 28, :y=>19, :width=>25, :height=>25 }
43
+ ],:psm=>7, :command => "tesseract_error"})
44
+ expect{ mix_block.to_s }.to raise_error(RTesseract::ConversionError)
45
+ end
33
46
  end
@@ -16,12 +16,11 @@ describe "Rtesseract" do
16
16
 
17
17
  it " translate image to text" do
18
18
  RTesseract.new(@image_tiff).to_s_without_spaces.should eql("43ZZ")
19
+ RTesseract.new(@image_tiff, {:processor => 'mini_magick'}).to_s_without_spaces.should eql("43ZZ")
19
20
  RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces.should eql("V2V4")
20
21
  RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces.should eql("V2V4")
21
22
  end
22
23
 
23
-
24
-
25
24
  it " translate images .png, .jpg, .bmp" do
26
25
  RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces.should eql("HW9W")
27
26
  RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces.should eql("3R8Z")
@@ -77,10 +76,20 @@ describe "Rtesseract" do
77
76
  test = RTesseract.new("", {:psm => 7})
78
77
  test.from_blob(blob)
79
78
  test.to_s_without_spaces.should eql("HW9W")
79
+
80
+ test = RTesseract.new("", {:psm => 7})
81
+ expect{test.from_blob('') }.to raise_error(RTesseract::ConversionError)
80
82
  end
81
83
 
82
84
  it " use a instance" do
83
85
  RTesseract.new(Magick::Image.read(@image_tiff.to_s).first).to_s_without_spaces.should eql("43ZZ")
86
+ RMagickProcessor.a_name?('teste').should == false
87
+ RMagickProcessor.a_name?('rmagick').should == true
88
+ RMagickProcessor.a_name?('RMagickProcessor').should == true
89
+
90
+ MiniMagickProcessor.a_name?('teste').should == false
91
+ MiniMagickProcessor.a_name?('mini_magick').should == true
92
+ MiniMagickProcessor.a_name?('MiniMagickProcessor').should == true
84
93
  end
85
94
 
86
95
  it " change image in a block" do
@@ -91,9 +100,28 @@ describe "Rtesseract" do
91
100
  test.to_s_without_spaces.should eql("HW9W")
92
101
 
93
102
  test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
94
- image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
103
+ image = image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
104
+ end
105
+ test.to_s_without_spaces.should eql("3R8Z")
106
+
107
+ require 'mini_magick'
108
+
109
+ test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en', :processor => 'mini_magick'}) do |image|
110
+ #image.white_threshold(245)
111
+ image.gravity "south"
95
112
  end
96
- test.to_s_without_spaces.should eql("3R8Z")
113
+ test.to_s_without_spaces.should eql("3R8Z")
114
+ end
115
+
116
+ it " get a error" do
117
+ expect{ RTesseract.new(@path.join("images","test.jpg").to_s, {:command => "tesseract_error"}).to_s }.to raise_error(RTesseract::ConversionError)
118
+ expect{ RTesseract.new(@path.join("images","test_not_exists.png").to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
97
119
  end
98
120
 
121
+ it "remove a file" do
122
+ rtesseract = RTesseract.new('.')
123
+ rtesseract.remove_file(Tempfile.new('config'))
124
+
125
+ expect{ rtesseract.remove_file(Pathname.new(Dir.tmpdir).join("test_not_exists")) }.to raise_error(RTesseract::TempFilesNotRemovedError)
126
+ end
99
127
  end
data/spec/spec_helper.rb CHANGED
@@ -2,10 +2,13 @@
2
2
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
4
  require 'rspec'
5
+ require 'coveralls'
5
6
  require "simplecov"
6
7
  SimpleCov.start do
7
8
  add_filter "/spec/"
8
9
  end
10
+ Coveralls.wear!
11
+
9
12
  require 'rtesseract'
10
13
  # Requires supporting files with custom matchers and macros, etc,
11
14
  # in ./support/ and its subdirectories.
metadata CHANGED
@@ -1,71 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-21 00:00:00.000000000 Z
11
+ date: 2014-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
- version: 2.8.0
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
- version: 2.8.0
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rdoc
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
- version: '3.12'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
- version: '3.12'
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
- version: '1.0'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
- version: '1.0'
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: jeweler
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 1.8.7
61
+ version: 2.0.1
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: 1.8.7
68
+ version: 2.0.1
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: simplecov
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -81,7 +81,7 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: shoulda-matchers
84
+ name: coveralls
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - '>='
@@ -104,6 +104,7 @@ extra_rdoc_files:
104
104
  files:
105
105
  - .document
106
106
  - .rspec
107
+ - .travis.sh
107
108
  - .travis.yml
108
109
  - Gemfile
109
110
  - Gemfile.lock