rtesseract 3.0.2 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa205feaeb6a538fbe70f6482e2ac8cd99c709b40f977d69addb38689045d9a9
4
- data.tar.gz: 9216bd8d9aae244e8e6dd527e050a0281f940a8aeebfcfa3f7d696c1c830a1b1
3
+ metadata.gz: 8eb694ea4b37475f756451795c145b8ba9618a0e5e94b0774a90301bb1aa97a2
4
+ data.tar.gz: fe725c774fc39720ff830e47e4580f7335def9ece76f67fad77fe9722f415d6c
5
5
  SHA512:
6
- metadata.gz: 47df8a451610fbbae458851a626fb48a97ecb77a7eb32c24fcc29cb38465e4ac0d170f447cf557ade0ff8776bfee7fe5e102cb1bb32a2eacfbbc6eaf38a89a7d
7
- data.tar.gz: 0540a33932743072a560a99ba5a0224ffd855df6dd8890fb2072582cc12af94f62c1aa6d2bf5b193e56bcf9e308f1daef85cd438da04aaffd9d083ba750d1c0a
6
+ metadata.gz: f40f8b53fc3c63e4968d9b1adab5153771730897fd681e354afea79f2007c28c4216dc61cad0d218dc7d76814360ae9169a0ffb8999ab6c8d15ef51ad712ec07
7
+ data.tar.gz: e2dfbf63b972c6e678d4bb79ec00064a958c9607f76cffe197bc26f555dff35bd7b0e1c263e376ede9598e2f429a344abb4b8d39b0e47ede6d1c09a32e6c44a4
@@ -0,0 +1,8 @@
1
+ Documentation:
2
+ Enabled: false
3
+
4
+ Metrics/LineLength:
5
+ Max: 150
6
+
7
+ Metrics/BlockLength:
8
+ Max: 50
@@ -1,3 +1,15 @@
1
+ ## v3.0.3
2
+
3
+ #### Changed
4
+
5
+ * Fix some problems with commanders gem
6
+
7
+ ## v3.0.0
8
+
9
+ #### Changed
10
+
11
+ * Refactoring all gem to working with tesseract version 4 or above
12
+
1
13
  ## v2.1.0
2
14
 
3
15
  #### Added
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in rtesseract.gemspec
6
6
  gemspec
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rtesseract (3.0.2)
4
+ rtesseract (3.0.3)
5
5
  nokogiri
6
6
 
7
7
  GEM
@@ -17,7 +17,7 @@ GEM
17
17
  docile (1.3.1)
18
18
  json (2.1.0)
19
19
  mini_portile2 (2.4.0)
20
- nokogiri (1.9.1)
20
+ nokogiri (1.10.1)
21
21
  mini_portile2 (~> 2.4.0)
22
22
  rake (10.5.0)
23
23
  rspec (3.8.0)
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "rtesseract"
3
+ require 'bundler/setup'
4
+ require 'rtesseract'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "rtesseract"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
13
+ require 'irb'
14
14
  IRB.start(__FILE__)
@@ -1,11 +1,11 @@
1
- require "rtesseract/check"
2
- require "rtesseract/configuration"
3
- require "rtesseract/command"
4
- require "rtesseract/base"
5
- require "rtesseract/text"
6
- require "rtesseract/pdf"
7
- require "rtesseract/box"
8
- require "rtesseract/tsv"
1
+ require 'rtesseract/check'
2
+ require 'rtesseract/configuration'
3
+ require 'rtesseract/command'
4
+ require 'rtesseract/base'
5
+ require 'rtesseract/text'
6
+ require 'rtesseract/pdf'
7
+ require 'rtesseract/box'
8
+ require 'rtesseract/tsv'
9
9
 
10
10
  class RTesseract
11
11
  class Error < StandardError; end
@@ -15,10 +15,11 @@ class RTesseract
15
15
  def initialize(src = '', options = {})
16
16
  @source = src
17
17
  @config = RTesseract.config.merge(options)
18
+ @errors = []
18
19
  end
19
20
 
20
21
  def to_box
21
- Box.run(@source, config)
22
+ Box.run(@source, @errors, config)
22
23
  end
23
24
 
24
25
  def words
@@ -26,20 +27,22 @@ class RTesseract
26
27
  end
27
28
 
28
29
  def to_pdf
29
- Pdf.run(@source, config)
30
+ Pdf.run(@source, @errors, config)
30
31
  end
31
32
 
32
33
  def to_tsv
33
- Tsv.run(@source, config)
34
+ Tsv.run(@source, @errors, config)
34
35
  end
35
36
 
36
37
  # Output value
37
38
  def to_s
38
- Text.run(@source, config)
39
+ Text.run(@source, @errors, config)
39
40
  end
40
41
 
41
42
  # Remove spaces and break-lines
42
43
  def to_s_without_spaces
43
44
  to_s.gsub(/\s/, '')
44
45
  end
46
+
47
+ attr_reader :errors
45
48
  end
@@ -1,5 +1,6 @@
1
1
  require 'tmpdir'
2
2
  require 'securerandom'
3
+ require 'pathname'
3
4
 
4
5
  class RTesseract
5
6
  module Base
@@ -9,4 +10,4 @@ class RTesseract
9
10
  Pathname.new(Dir.tmpdir).join("#{@rand_file}#{ext}").to_s
10
11
  end
11
12
  end
12
- end
13
+ end
@@ -4,10 +4,10 @@ class RTesseract
4
4
  module Box
5
5
  extend RTesseract::Base
6
6
 
7
- def self.run(source, options)
7
+ def self.run(source, errors, options)
8
8
  options.tessedit_create_hocr = 1
9
9
 
10
- RTesseract::Command.new(source, temp_file, options).run
10
+ RTesseract::Command.new(source, temp_file, errors, options).run
11
11
 
12
12
  parse(File.read(temp_file('.hocr')))
13
13
  end
@@ -15,16 +15,19 @@ class RTesseract
15
15
  def self.parse(content)
16
16
  html = Nokogiri::HTML(content)
17
17
  html.css('span.ocrx_word, span.ocr_word').map do |word|
18
- @attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')
19
-
20
- {
21
- word: word.text,
22
- x_start: @attributes[1].to_i,
23
- y_start: @attributes[2].to_i,
24
- x_end: @attributes[3].to_i,
25
- y_end: @attributes[4].to_i
26
- }
18
+ attributes = word.attributes['title'].value.to_s.delete(';').split(' ')
19
+ word_info(word, attributes)
27
20
  end
28
21
  end
22
+
23
+ def self.word_info(word, data)
24
+ {
25
+ word: word.text,
26
+ x_start: data[1].to_i,
27
+ y_start: data[2].to_i,
28
+ x_end: data[3].to_i,
29
+ y_end: data[4].to_i
30
+ }
31
+ end
29
32
  end
30
- end
33
+ end
@@ -1,14 +1,13 @@
1
-
2
1
  class RTesseract
3
2
  class << self
4
3
  def tesseract_version
5
- Open3.capture2e(RTesseract.config.command, "--version").first.to_s.match(/\d+.\d+/)[0].to_f
4
+ Open3.capture2e(RTesseract.config.command, '--version').first.to_s.match(/\d+.\d+/)[0].to_f
6
5
  rescue Errno::ENOENT
7
6
  0
8
7
  end
9
8
 
10
9
  def check_version!
11
- raise RTesseract::Error.new('Tesseract OCR 3.5 or later not installed') if RTesseract.tesseract_version < 3.05
10
+ raise RTesseract::Error, 'Tesseract OCR 3.5 or later not installed' if RTesseract.tesseract_version < 3.05
12
11
  end
13
12
  end
14
- end
13
+ end
@@ -1,14 +1,15 @@
1
1
  class RTesseract
2
2
  class Command
3
- FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
3
+ FIXED = %i[command psm oem lang tessdata_dir user_words user_patterns config_file].freeze
4
4
 
5
5
  attr_reader :options
6
6
 
7
- def initialize(source, output, options)
7
+ def initialize(source, output, errors, options)
8
8
  @source = source
9
9
  @output = output
10
10
  @options = options
11
- @full_command = [ options.command, @source, @output]
11
+ @errors = errors
12
+ @full_command = [options.command, @source, @output]
12
13
  end
13
14
 
14
15
  def full_command
@@ -41,11 +42,13 @@ class RTesseract
41
42
  end
42
43
 
43
44
  def run
44
- output, status = Open3.capture2e(*full_command.flatten)
45
+ output, error, status = Open3.capture3(*full_command.flatten)
46
+
47
+ @errors.push(error)
45
48
 
46
49
  return output if status.success?
47
50
 
48
- raise RTesseract::Error.new(output)
51
+ raise RTesseract::Error, error
49
52
  end
50
53
  end
51
- end
54
+ end
@@ -3,16 +3,20 @@ require 'ostruct'
3
3
  class RTesseract
4
4
  class Configuration < OpenStruct
5
5
  def merge(options)
6
- RTesseract::Configuration.new(self.to_h.merge(options))
6
+ RTesseract::Configuration.new(to_h.merge(options))
7
+ end
8
+
9
+ def command
10
+ @table[:command]
7
11
  end
8
12
  end
9
13
 
10
14
  class << self
11
15
  def config
12
16
  @config ||= RTesseract::Configuration.new(
13
- command: 'tesseract',
14
- debug_file: '/dev/null'
15
- )
17
+ command: 'tesseract',
18
+ debug_file: '/dev/null'
19
+ )
16
20
  end
17
21
 
18
22
  def configure
@@ -2,12 +2,12 @@ class RTesseract
2
2
  module Pdf
3
3
  extend Base
4
4
 
5
- def self.run(source, options)
5
+ def self.run(source, errors, options)
6
6
  options.tessedit_create_pdf = 1
7
7
 
8
- RTesseract::Command.new(source, temp_file, options).run
8
+ RTesseract::Command.new(source, temp_file, errors, options).run
9
9
 
10
10
  File.open(temp_file('.pdf'), 'r')
11
11
  end
12
12
  end
13
- end
13
+ end
@@ -2,8 +2,8 @@ require 'open3'
2
2
 
3
3
  class RTesseract
4
4
  module Text
5
- def self.run(source, options)
6
- RTesseract::Command.new(source, 'stdout', options).run
5
+ def self.run(source, errors, options)
6
+ RTesseract::Command.new(source, 'stdout', errors, options).run
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -2,12 +2,12 @@ class RTesseract
2
2
  module Tsv
3
3
  extend Base
4
4
 
5
- def self.run(source, options)
5
+ def self.run(source, errors, options)
6
6
  options.tessedit_create_tsv = 1
7
7
 
8
- RTesseract::Command.new(source, temp_file, options).run
8
+ RTesseract::Command.new(source, temp_file, errors, options).run
9
9
 
10
10
  File.open(temp_file('.tsv'), 'r')
11
11
  end
12
12
  end
13
- end
13
+ end
@@ -1,3 +1,3 @@
1
1
  class RTesseract
2
- VERSION = '3.0.2'.freeze
2
+ VERSION = '3.0.3'.freeze
3
3
  end
@@ -1,33 +1,32 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require "rtesseract/version"
3
+ require 'rtesseract/version'
5
4
 
6
5
  Gem::Specification.new do |spec|
7
- spec.name = "rtesseract"
6
+ spec.name = 'rtesseract'
8
7
  spec.version = RTesseract::VERSION
9
- spec.authors = ["Danilo Jeremias da Silva"]
10
- spec.email = ["dannnylo@gmail.com"]
8
+ spec.authors = ['Danilo Jeremias da Silva']
9
+ spec.email = ['dannnylo@gmail.com']
11
10
 
12
- spec.summary = "Ruby library for working with the Tesseract OCR.".freeze
13
- spec.description = "Ruby library for working with the Tesseract OCR.".freeze
14
- spec.homepage = "http://github.com/dannnylo/rtesseract".freeze
15
- spec.license = "MIT"
11
+ spec.summary = 'Ruby library for working with the Tesseract OCR.'.freeze
12
+ spec.description = 'Ruby library for working with the Tesseract OCR.'.freeze
13
+ spec.homepage = 'http://github.com/dannnylo/rtesseract'.freeze
14
+ spec.license = 'MIT'
16
15
 
17
16
  # Specify which files should be added to the gem when it is released.
18
17
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
20
19
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
20
  end
22
- spec.bindir = "exe"
21
+ spec.bindir = 'exe'
23
22
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
- spec.require_paths = ["lib"]
23
+ spec.require_paths = ['lib']
25
24
 
26
- spec.add_development_dependency "bundler", "~> 1.17"
27
- spec.add_development_dependency "rake", "~> 10.0"
28
- spec.add_development_dependency "rspec", "~> 3.0"
29
- spec.add_development_dependency "simplecov"
30
- spec.add_development_dependency "coveralls"
25
+ spec.add_development_dependency 'bundler', '~> 1.17'
26
+ spec.add_development_dependency 'coveralls'
27
+ spec.add_development_dependency 'rake', '~> 10.0'
28
+ spec.add_development_dependency 'rspec', '~> 3.0'
29
+ spec.add_development_dependency 'simplecov'
31
30
 
32
- spec.add_dependency "nokogiri"
31
+ spec.add_dependency 'nokogiri'
33
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-04 00:00:00.000000000 Z
11
+ date: 2019-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -25,49 +25,49 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.17'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: coveralls
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rspec
42
+ name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '10.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '10.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: simplecov
56
+ name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '3.0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '3.0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: coveralls
70
+ name: simplecov
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -104,6 +104,7 @@ files:
104
104
  - ".document"
105
105
  - ".gitignore"
106
106
  - ".rspec"
107
+ - ".rubocop.yml"
107
108
  - ".travis.yml"
108
109
  - CHANGELOG.md
109
110
  - CODE_OF_CONDUCT.md
@@ -145,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
146
  version: '0'
146
147
  requirements: []
147
148
  rubyforge_project:
148
- rubygems_version: 2.7.6
149
+ rubygems_version: 2.7.8
149
150
  signing_key:
150
151
  specification_version: 4
151
152
  summary: Ruby library for working with the Tesseract OCR.