rtesseract 2.2.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.document +1 -2
  3. data/.gitignore +12 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +13 -10
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +4 -17
  8. data/Gemfile.lock +40 -85
  9. data/LICENSE.txt +18 -17
  10. data/README.md +137 -0
  11. data/Rakefile +4 -48
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/lib/rtesseract.rb +22 -220
  15. data/lib/rtesseract/box.rb +15 -60
  16. data/lib/rtesseract/check.rb +14 -0
  17. data/lib/rtesseract/command.rb +41 -0
  18. data/lib/rtesseract/configuration.rb +15 -64
  19. data/lib/rtesseract/pdf.rb +18 -0
  20. data/lib/rtesseract/text.rb +9 -0
  21. data/lib/rtesseract/tsv.rb +18 -0
  22. data/lib/rtesseract/version.rb +3 -0
  23. data/rtesseract.gemspec +27 -98
  24. metadata +36 -85
  25. data/README.rdoc +0 -156
  26. data/VERSION +0 -1
  27. data/lib/processors/mini_magick.rb +0 -43
  28. data/lib/processors/none.rb +0 -34
  29. data/lib/processors/rmagick.rb +0 -46
  30. data/lib/rtesseract/blob.rb +0 -34
  31. data/lib/rtesseract/box_char.rb +0 -31
  32. data/lib/rtesseract/errors.rb +0 -21
  33. data/lib/rtesseract/mixed.rb +0 -54
  34. data/lib/rtesseract/processor.rb +0 -19
  35. data/lib/rtesseract/utils.rb +0 -44
  36. data/lib/rtesseract/uzn.rb +0 -47
  37. data/spec/configs/eng.user-words.txt +0 -13
  38. data/spec/images/README.pdf +0 -0
  39. data/spec/images/blank.tif +0 -0
  40. data/spec/images/mixed.tif +0 -0
  41. data/spec/images/orientation_reverse.png +0 -0
  42. data/spec/images/test with spaces.tif +0 -0
  43. data/spec/images/test-pdf.png +0 -0
  44. data/spec/images/test.bmp +0 -0
  45. data/spec/images/test.jpg +0 -0
  46. data/spec/images/test.png +0 -0
  47. data/spec/images/test.tif +0 -0
  48. data/spec/images/test1.tif +0 -0
  49. data/spec/images/test_words.png +0 -0
  50. data/spec/rtesseract_box_char_spec.rb +0 -82
  51. data/spec/rtesseract_box_spec.rb +0 -36
  52. data/spec/rtesseract_mixed_spec.rb +0 -49
  53. data/spec/rtesseract_spec.rb +0 -282
  54. data/spec/rtesseract_uzn_spec.rb +0 -56
  55. data/spec/spec_helper.rb +0 -21
@@ -0,0 +1,41 @@
1
+ require 'tmpdir'
2
+
3
+ class RTesseract
4
+ class Command
5
+ FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
6
+
7
+ attr_reader :options
8
+
9
+ def initialize(source, output, options)
10
+ @source = source
11
+ @output = output
12
+ @options = options
13
+ end
14
+
15
+ def configs
16
+ @options.to_h.map { |key, value| ['-c', "#{key}=#{value}"] unless FIXED.include?(key) }.compact
17
+ end
18
+
19
+ def full_command
20
+ command = [options.command, @source, @output]
21
+
22
+ command << ['--psm', options.psm.to_s] if options.psm
23
+ command << ['--oem', options.oem.to_s] if options.oem
24
+ command << ['-l', options.lang] if options.lang
25
+
26
+ command << ['--tessdata_dir', options.tessdata_dir] if options.tessdata_dir
27
+ command << ['--user_words', options.user_words] if options.user_words
28
+ command << ['--user_patterns', options.user_patterns] if options.user_patterns
29
+
30
+ command << configs
31
+
32
+ command << options.config_file.to_s if options.config_file
33
+
34
+ command.flatten
35
+ end
36
+
37
+ def run
38
+ Open3.capture2e(*full_command)
39
+ end
40
+ end
41
+ end
@@ -1,75 +1,26 @@
1
- # RTesseract
2
- class RTesseract
3
- # Aliases to languages names
4
- LANGUAGES = {
5
- 'en' => 'eng',
6
- 'en-us' => 'eng',
7
- 'english' => 'eng',
8
- 'pt' => 'por',
9
- 'pt-br' => 'por',
10
- 'portuguese' => 'por',
11
- 'it' => 'ita',
12
- 'sp' => 'spa'
13
- }.freeze
14
-
15
- # Configuration class
16
- class Configuration
17
- attr_accessor :processor, :lang, :psm, :oem, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
18
-
19
- def initialize
20
- @processor = 'rmagick'
21
- end
22
-
23
- # Global configuration
24
- def parent
25
- @parent ||= RTesseract.configuration || RTesseract::Configuration.new
26
- end
27
-
28
- # Set value of option
29
- def option(options, name, default = nil)
30
- self.instance_variable_set("@#{name}", options.option(name, parent.send(name)) || default)
31
- end
1
+ require 'ostruct'
32
2
 
33
- # Return the values of options
34
- def load_options(options, names = [])
35
- names.each { |name| option(options, name, nil) }
3
+ class RTesseract
4
+ class Configuration < OpenStruct
5
+ def merge(options)
6
+ RTesseract::Configuration.new(self.to_h.merge(options))
36
7
  end
37
8
  end
38
9
 
39
10
  class << self
40
- attr_accessor :configuration
41
- end
42
-
43
- def self.configure
44
- self.configuration ||= Configuration.new
45
- yield(configuration)
46
- self.clear_pdf_option
47
- end
48
-
49
- # Clear pdf option
50
- def self.clear_pdf_option
51
- if self.configuration.options_cmd
52
- self.configuration.options_cmd.delete('pdf')
53
- self.configuration.options_cmd.delete(:pdf)
11
+ def config
12
+ @config ||= RTesseract::Configuration.new(
13
+ command: 'tesseract',
14
+ debug_file: '/dev/null'
15
+ )
54
16
  end
55
- end
56
17
 
57
- # Default command
58
- def self.default_command
59
- TesseractBin::Executables[:tesseract] || 'tesseract'
60
- rescue
61
- 'tesseract'
62
- end
18
+ def configure
19
+ yield(config) if block_given?
20
+ end
63
21
 
64
- # Local config to instance
65
- def self.local_config(options = {})
66
- RTesseract::Configuration.new.tap do |config|
67
- config.command = config.option(options, :command, RTesseract.default_command)
68
- config.processor = config.option(options, :processor, 'rmagick')
69
- config.load_options(options, [:lang, :psm, :oem, :tessdata_dir, :user_words, :user_patterns])
70
- config.debug = config.option(options, :debug, false)
71
- pdf_opts = lambda { |o| o == 'pdf' || o == :pdf }
72
- config.options_cmd = [options.option(:options, nil)].delete_if(&pdf_opts).flatten.compact
22
+ def reset_config!
23
+ @config = nil
73
24
  end
74
25
  end
75
26
  end
@@ -0,0 +1,18 @@
1
+ require 'tmpdir'
2
+
3
+ class RTesseract
4
+ module Pdf
5
+ def self.temp_dir
6
+ @file_path = Pathname.new(Dir.tmpdir)
7
+ end
8
+
9
+ def self.run(source, options)
10
+ name = "rtesseract_#{SecureRandom.uuid}"
11
+ options.tessedit_create_pdf = 1
12
+
13
+ RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
14
+
15
+ File.open(temp_dir.join("#{name}.pdf").to_s, 'r')
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ require 'open3'
2
+
3
+ class RTesseract
4
+ module Text
5
+ def self.run(source, options)
6
+ RTesseract::Command.new(source, 'stdout', options).run.first
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,18 @@
1
+ require 'tmpdir'
2
+
3
+ class RTesseract
4
+ module Tsv
5
+ def self.temp_dir
6
+ @file_path = Pathname.new(Dir.tmpdir)
7
+ end
8
+
9
+ def self.run(source, options)
10
+ name = "rtesseract_#{SecureRandom.uuid}"
11
+ options.tessedit_create_tsv = 1
12
+
13
+ RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
14
+
15
+ File.open(temp_dir.join("#{name}.tsv").to_s, 'r')
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,3 @@
1
+ class RTesseract
2
+ VERSION = '3.0.0'.freeze
3
+ end
data/rtesseract.gemspec CHANGED
@@ -1,104 +1,33 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
- # stub: rtesseract 2.2.0 ruby lib
6
1
 
7
- Gem::Specification.new do |s|
8
- s.name = "rtesseract".freeze
9
- s.version = "2.2.0"
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "rtesseract/version"
10
5
 
11
- s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
- s.require_paths = ["lib".freeze]
13
- s.authors = ["Danilo Jeremias da Silva".freeze]
14
- s.date = "2018-01-05"
15
- s.description = "Ruby library for working with the Tesseract OCR.".freeze
16
- s.email = "dannnylo@gmail.com".freeze
17
- s.extra_rdoc_files = [
18
- "LICENSE.txt",
19
- "README.rdoc"
20
- ]
21
- s.files = [
22
- ".document",
23
- ".rspec",
24
- ".travis.yml",
25
- "CHANGELOG.md",
26
- "Gemfile",
27
- "Gemfile.lock",
28
- "LICENSE.txt",
29
- "README.rdoc",
30
- "Rakefile",
31
- "VERSION",
32
- "lib/processors/mini_magick.rb",
33
- "lib/processors/none.rb",
34
- "lib/processors/rmagick.rb",
35
- "lib/rtesseract.rb",
36
- "lib/rtesseract/blob.rb",
37
- "lib/rtesseract/box.rb",
38
- "lib/rtesseract/box_char.rb",
39
- "lib/rtesseract/configuration.rb",
40
- "lib/rtesseract/errors.rb",
41
- "lib/rtesseract/mixed.rb",
42
- "lib/rtesseract/processor.rb",
43
- "lib/rtesseract/utils.rb",
44
- "lib/rtesseract/uzn.rb",
45
- "rtesseract.gemspec",
46
- "spec/configs/eng.user-words.txt",
47
- "spec/images/README.pdf",
48
- "spec/images/blank.tif",
49
- "spec/images/mixed.tif",
50
- "spec/images/orientation_reverse.png",
51
- "spec/images/test with spaces.tif",
52
- "spec/images/test-pdf.png",
53
- "spec/images/test.bmp",
54
- "spec/images/test.jpg",
55
- "spec/images/test.png",
56
- "spec/images/test.tif",
57
- "spec/images/test1.tif",
58
- "spec/images/test_words.png",
59
- "spec/rtesseract_box_char_spec.rb",
60
- "spec/rtesseract_box_spec.rb",
61
- "spec/rtesseract_mixed_spec.rb",
62
- "spec/rtesseract_spec.rb",
63
- "spec/rtesseract_uzn_spec.rb",
64
- "spec/spec_helper.rb"
65
- ]
66
- s.homepage = "http://github.com/dannnylo/rtesseract".freeze
67
- s.licenses = ["MIT".freeze]
68
- s.rubygems_version = "2.6.14".freeze
69
- s.summary = "Ruby library for working with the Tesseract OCR.".freeze
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "rtesseract"
8
+ spec.version = RTesseract::VERSION
9
+ spec.authors = ["Danilo Jeremias da Silva"]
10
+ spec.email = ["dannnylo@gmail.com"]
70
11
 
71
- if s.respond_to? :specification_version then
72
- s.specification_version = 4
12
+ spec.summary = "Ruby library for working with the Tesseract OCR.".freeze
13
+ spec.description = "Ruby library for working with the Tesseract OCR.".freeze
14
+ spec.homepage = "http://github.com/dannnylo/rtesseract".freeze
15
+ spec.license = "MIT"
73
16
 
74
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
75
- s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 0"])
76
- s.add_development_dependency(%q<rspec>.freeze, [">= 0"])
77
- s.add_development_dependency(%q<rdoc>.freeze, [">= 0"])
78
- s.add_development_dependency(%q<bundler>.freeze, [">= 0"])
79
- s.add_development_dependency(%q<jeweler>.freeze, [">= 0"])
80
- s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
81
- s.add_development_dependency(%q<json>.freeze, [">= 0"])
82
- s.add_development_dependency(%q<coveralls>.freeze, [">= 0"])
83
- else
84
- s.add_dependency(%q<nokogiri>.freeze, [">= 0"])
85
- s.add_dependency(%q<rspec>.freeze, [">= 0"])
86
- s.add_dependency(%q<rdoc>.freeze, [">= 0"])
87
- s.add_dependency(%q<bundler>.freeze, [">= 0"])
88
- s.add_dependency(%q<jeweler>.freeze, [">= 0"])
89
- s.add_dependency(%q<simplecov>.freeze, [">= 0"])
90
- s.add_dependency(%q<json>.freeze, [">= 0"])
91
- s.add_dependency(%q<coveralls>.freeze, [">= 0"])
92
- end
93
- else
94
- s.add_dependency(%q<nokogiri>.freeze, [">= 0"])
95
- s.add_dependency(%q<rspec>.freeze, [">= 0"])
96
- s.add_dependency(%q<rdoc>.freeze, [">= 0"])
97
- s.add_dependency(%q<bundler>.freeze, [">= 0"])
98
- s.add_dependency(%q<jeweler>.freeze, [">= 0"])
99
- s.add_dependency(%q<simplecov>.freeze, [">= 0"])
100
- s.add_dependency(%q<json>.freeze, [">= 0"])
101
- s.add_dependency(%q<coveralls>.freeze, [">= 0"])
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
102
21
  end
103
- end
22
+ spec.bindir = "exe"
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ["lib"]
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.17"
27
+ spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency "rspec", "~> 3.0"
29
+ spec.add_development_dependency "simplecov"
30
+ spec.add_development_dependency "coveralls"
104
31
 
32
+ spec.add_dependency "nokogiri"
33
+ end
metadata CHANGED
@@ -1,85 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2018-01-05 00:00:00.000000000 Z
11
+ date: 2019-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: nokogiri
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rspec
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rdoc
14
+ name: bundler
43
15
  requirement: !ruby/object:Gem::Requirement
44
16
  requirements:
45
- - - ">="
17
+ - - "~>"
46
18
  - !ruby/object:Gem::Version
47
- version: '0'
19
+ version: '1.17'
48
20
  type: :development
49
21
  prerelease: false
50
22
  version_requirements: !ruby/object:Gem::Requirement
51
23
  requirements:
52
- - - ">="
24
+ - - "~>"
53
25
  - !ruby/object:Gem::Version
54
- version: '0'
26
+ version: '1.17'
55
27
  - !ruby/object:Gem::Dependency
56
- name: bundler
28
+ name: rake
57
29
  requirement: !ruby/object:Gem::Requirement
58
30
  requirements:
59
- - - ">="
31
+ - - "~>"
60
32
  - !ruby/object:Gem::Version
61
- version: '0'
33
+ version: '10.0'
62
34
  type: :development
63
35
  prerelease: false
64
36
  version_requirements: !ruby/object:Gem::Requirement
65
37
  requirements:
66
- - - ">="
38
+ - - "~>"
67
39
  - !ruby/object:Gem::Version
68
- version: '0'
40
+ version: '10.0'
69
41
  - !ruby/object:Gem::Dependency
70
- name: jeweler
42
+ name: rspec
71
43
  requirement: !ruby/object:Gem::Requirement
72
44
  requirements:
73
- - - ">="
45
+ - - "~>"
74
46
  - !ruby/object:Gem::Version
75
- version: '0'
47
+ version: '3.0'
76
48
  type: :development
77
49
  prerelease: false
78
50
  version_requirements: !ruby/object:Gem::Requirement
79
51
  requirements:
80
- - - ">="
52
+ - - "~>"
81
53
  - !ruby/object:Gem::Version
82
- version: '0'
54
+ version: '3.0'
83
55
  - !ruby/object:Gem::Dependency
84
56
  name: simplecov
85
57
  requirement: !ruby/object:Gem::Requirement
@@ -95,7 +67,7 @@ dependencies:
95
67
  - !ruby/object:Gem::Version
96
68
  version: '0'
97
69
  - !ruby/object:Gem::Dependency
98
- name: json
70
+ name: coveralls
99
71
  requirement: !ruby/object:Gem::Requirement
100
72
  requirements:
101
73
  - - ">="
@@ -109,13 +81,13 @@ dependencies:
109
81
  - !ruby/object:Gem::Version
110
82
  version: '0'
111
83
  - !ruby/object:Gem::Dependency
112
- name: coveralls
84
+ name: nokogiri
113
85
  requirement: !ruby/object:Gem::Requirement
114
86
  requirements:
115
87
  - - ">="
116
88
  - !ruby/object:Gem::Version
117
89
  version: '0'
118
- type: :development
90
+ type: :runtime
119
91
  prerelease: false
120
92
  version_requirements: !ruby/object:Gem::Requirement
121
93
  requirements:
@@ -123,56 +95,35 @@ dependencies:
123
95
  - !ruby/object:Gem::Version
124
96
  version: '0'
125
97
  description: Ruby library for working with the Tesseract OCR.
126
- email: dannnylo@gmail.com
98
+ email:
99
+ - dannnylo@gmail.com
127
100
  executables: []
128
101
  extensions: []
129
- extra_rdoc_files:
130
- - LICENSE.txt
131
- - README.rdoc
102
+ extra_rdoc_files: []
132
103
  files:
133
104
  - ".document"
105
+ - ".gitignore"
134
106
  - ".rspec"
135
107
  - ".travis.yml"
136
108
  - CHANGELOG.md
109
+ - CODE_OF_CONDUCT.md
137
110
  - Gemfile
138
111
  - Gemfile.lock
139
112
  - LICENSE.txt
140
- - README.rdoc
113
+ - README.md
141
114
  - Rakefile
142
- - VERSION
143
- - lib/processors/mini_magick.rb
144
- - lib/processors/none.rb
145
- - lib/processors/rmagick.rb
115
+ - bin/console
116
+ - bin/setup
146
117
  - lib/rtesseract.rb
147
- - lib/rtesseract/blob.rb
148
118
  - lib/rtesseract/box.rb
149
- - lib/rtesseract/box_char.rb
119
+ - lib/rtesseract/check.rb
120
+ - lib/rtesseract/command.rb
150
121
  - lib/rtesseract/configuration.rb
151
- - lib/rtesseract/errors.rb
152
- - lib/rtesseract/mixed.rb
153
- - lib/rtesseract/processor.rb
154
- - lib/rtesseract/utils.rb
155
- - lib/rtesseract/uzn.rb
122
+ - lib/rtesseract/pdf.rb
123
+ - lib/rtesseract/text.rb
124
+ - lib/rtesseract/tsv.rb
125
+ - lib/rtesseract/version.rb
156
126
  - rtesseract.gemspec
157
- - spec/configs/eng.user-words.txt
158
- - spec/images/README.pdf
159
- - spec/images/blank.tif
160
- - spec/images/mixed.tif
161
- - spec/images/orientation_reverse.png
162
- - spec/images/test with spaces.tif
163
- - spec/images/test-pdf.png
164
- - spec/images/test.bmp
165
- - spec/images/test.jpg
166
- - spec/images/test.png
167
- - spec/images/test.tif
168
- - spec/images/test1.tif
169
- - spec/images/test_words.png
170
- - spec/rtesseract_box_char_spec.rb
171
- - spec/rtesseract_box_spec.rb
172
- - spec/rtesseract_mixed_spec.rb
173
- - spec/rtesseract_spec.rb
174
- - spec/rtesseract_uzn_spec.rb
175
- - spec/spec_helper.rb
176
127
  homepage: http://github.com/dannnylo/rtesseract
177
128
  licenses:
178
129
  - MIT
@@ -193,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
144
  version: '0'
194
145
  requirements: []
195
146
  rubyforge_project:
196
- rubygems_version: 2.6.14
147
+ rubygems_version: 2.7.6
197
148
  signing_key:
198
149
  specification_version: 4
199
150
  summary: Ruby library for working with the Tesseract OCR.