rtesseract 3.0.2 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa205feaeb6a538fbe70f6482e2ac8cd99c709b40f977d69addb38689045d9a9
4
- data.tar.gz: 9216bd8d9aae244e8e6dd527e050a0281f940a8aeebfcfa3f7d696c1c830a1b1
3
+ metadata.gz: 8eb694ea4b37475f756451795c145b8ba9618a0e5e94b0774a90301bb1aa97a2
4
+ data.tar.gz: fe725c774fc39720ff830e47e4580f7335def9ece76f67fad77fe9722f415d6c
5
5
  SHA512:
6
- metadata.gz: 47df8a451610fbbae458851a626fb48a97ecb77a7eb32c24fcc29cb38465e4ac0d170f447cf557ade0ff8776bfee7fe5e102cb1bb32a2eacfbbc6eaf38a89a7d
7
- data.tar.gz: 0540a33932743072a560a99ba5a0224ffd855df6dd8890fb2072582cc12af94f62c1aa6d2bf5b193e56bcf9e308f1daef85cd438da04aaffd9d083ba750d1c0a
6
+ metadata.gz: f40f8b53fc3c63e4968d9b1adab5153771730897fd681e354afea79f2007c28c4216dc61cad0d218dc7d76814360ae9169a0ffb8999ab6c8d15ef51ad712ec07
7
+ data.tar.gz: e2dfbf63b972c6e678d4bb79ec00064a958c9607f76cffe197bc26f555dff35bd7b0e1c263e376ede9598e2f429a344abb4b8d39b0e47ede6d1c09a32e6c44a4
@@ -0,0 +1,8 @@
1
+ Documentation:
2
+ Enabled: false
3
+
4
+ Metrics/LineLength:
5
+ Max: 150
6
+
7
+ Metrics/BlockLength:
8
+ Max: 50
@@ -1,3 +1,15 @@
1
+ ## v3.0.3
2
+
3
+ #### Changed
4
+
5
+ * Fix some problems with commanders gem
6
+
7
+ ## v3.0.0
8
+
9
+ #### Changed
10
+
11
+ * Refactoring all gem to working with tesseract version 4 or above
12
+
1
13
  ## v2.1.0
2
14
 
3
15
  #### Added
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in rtesseract.gemspec
6
6
  gemspec
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rtesseract (3.0.2)
4
+ rtesseract (3.0.3)
5
5
  nokogiri
6
6
 
7
7
  GEM
@@ -17,7 +17,7 @@ GEM
17
17
  docile (1.3.1)
18
18
  json (2.1.0)
19
19
  mini_portile2 (2.4.0)
20
- nokogiri (1.9.1)
20
+ nokogiri (1.10.1)
21
21
  mini_portile2 (~> 2.4.0)
22
22
  rake (10.5.0)
23
23
  rspec (3.8.0)
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "rtesseract"
3
+ require 'bundler/setup'
4
+ require 'rtesseract'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "rtesseract"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
13
+ require 'irb'
14
14
  IRB.start(__FILE__)
@@ -1,11 +1,11 @@
1
- require "rtesseract/check"
2
- require "rtesseract/configuration"
3
- require "rtesseract/command"
4
- require "rtesseract/base"
5
- require "rtesseract/text"
6
- require "rtesseract/pdf"
7
- require "rtesseract/box"
8
- require "rtesseract/tsv"
1
+ require 'rtesseract/check'
2
+ require 'rtesseract/configuration'
3
+ require 'rtesseract/command'
4
+ require 'rtesseract/base'
5
+ require 'rtesseract/text'
6
+ require 'rtesseract/pdf'
7
+ require 'rtesseract/box'
8
+ require 'rtesseract/tsv'
9
9
 
10
10
  class RTesseract
11
11
  class Error < StandardError; end
@@ -15,10 +15,11 @@ class RTesseract
15
15
  def initialize(src = '', options = {})
16
16
  @source = src
17
17
  @config = RTesseract.config.merge(options)
18
+ @errors = []
18
19
  end
19
20
 
20
21
  def to_box
21
- Box.run(@source, config)
22
+ Box.run(@source, @errors, config)
22
23
  end
23
24
 
24
25
  def words
@@ -26,20 +27,22 @@ class RTesseract
26
27
  end
27
28
 
28
29
  def to_pdf
29
- Pdf.run(@source, config)
30
+ Pdf.run(@source, @errors, config)
30
31
  end
31
32
 
32
33
  def to_tsv
33
- Tsv.run(@source, config)
34
+ Tsv.run(@source, @errors, config)
34
35
  end
35
36
 
36
37
  # Output value
37
38
  def to_s
38
- Text.run(@source, config)
39
+ Text.run(@source, @errors, config)
39
40
  end
40
41
 
41
42
  # Remove spaces and break-lines
42
43
  def to_s_without_spaces
43
44
  to_s.gsub(/\s/, '')
44
45
  end
46
+
47
+ attr_reader :errors
45
48
  end
@@ -1,5 +1,6 @@
1
1
  require 'tmpdir'
2
2
  require 'securerandom'
3
+ require 'pathname'
3
4
 
4
5
  class RTesseract
5
6
  module Base
@@ -9,4 +10,4 @@ class RTesseract
9
10
  Pathname.new(Dir.tmpdir).join("#{@rand_file}#{ext}").to_s
10
11
  end
11
12
  end
12
- end
13
+ end
@@ -4,10 +4,10 @@ class RTesseract
4
4
  module Box
5
5
  extend RTesseract::Base
6
6
 
7
- def self.run(source, options)
7
+ def self.run(source, errors, options)
8
8
  options.tessedit_create_hocr = 1
9
9
 
10
- RTesseract::Command.new(source, temp_file, options).run
10
+ RTesseract::Command.new(source, temp_file, errors, options).run
11
11
 
12
12
  parse(File.read(temp_file('.hocr')))
13
13
  end
@@ -15,16 +15,19 @@ class RTesseract
15
15
  def self.parse(content)
16
16
  html = Nokogiri::HTML(content)
17
17
  html.css('span.ocrx_word, span.ocr_word').map do |word|
18
- @attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')
19
-
20
- {
21
- word: word.text,
22
- x_start: @attributes[1].to_i,
23
- y_start: @attributes[2].to_i,
24
- x_end: @attributes[3].to_i,
25
- y_end: @attributes[4].to_i
26
- }
18
+ attributes = word.attributes['title'].value.to_s.delete(';').split(' ')
19
+ word_info(word, attributes)
27
20
  end
28
21
  end
22
+
23
+ def self.word_info(word, data)
24
+ {
25
+ word: word.text,
26
+ x_start: data[1].to_i,
27
+ y_start: data[2].to_i,
28
+ x_end: data[3].to_i,
29
+ y_end: data[4].to_i
30
+ }
31
+ end
29
32
  end
30
- end
33
+ end
@@ -1,14 +1,13 @@
1
-
2
1
  class RTesseract
3
2
  class << self
4
3
  def tesseract_version
5
- Open3.capture2e(RTesseract.config.command, "--version").first.to_s.match(/\d+.\d+/)[0].to_f
4
+ Open3.capture2e(RTesseract.config.command, '--version').first.to_s.match(/\d+.\d+/)[0].to_f
6
5
  rescue Errno::ENOENT
7
6
  0
8
7
  end
9
8
 
10
9
  def check_version!
11
- raise RTesseract::Error.new('Tesseract OCR 3.5 or later not installed') if RTesseract.tesseract_version < 3.05
10
+ raise RTesseract::Error, 'Tesseract OCR 3.5 or later not installed' if RTesseract.tesseract_version < 3.05
12
11
  end
13
12
  end
14
- end
13
+ end
@@ -1,14 +1,15 @@
1
1
  class RTesseract
2
2
  class Command
3
- FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
3
+ FIXED = %i[command psm oem lang tessdata_dir user_words user_patterns config_file].freeze
4
4
 
5
5
  attr_reader :options
6
6
 
7
- def initialize(source, output, options)
7
+ def initialize(source, output, errors, options)
8
8
  @source = source
9
9
  @output = output
10
10
  @options = options
11
- @full_command = [ options.command, @source, @output]
11
+ @errors = errors
12
+ @full_command = [options.command, @source, @output]
12
13
  end
13
14
 
14
15
  def full_command
@@ -41,11 +42,13 @@ class RTesseract
41
42
  end
42
43
 
43
44
  def run
44
- output, status = Open3.capture2e(*full_command.flatten)
45
+ output, error, status = Open3.capture3(*full_command.flatten)
46
+
47
+ @errors.push(error)
45
48
 
46
49
  return output if status.success?
47
50
 
48
- raise RTesseract::Error.new(output)
51
+ raise RTesseract::Error, error
49
52
  end
50
53
  end
51
- end
54
+ end
@@ -3,16 +3,20 @@ require 'ostruct'
3
3
  class RTesseract
4
4
  class Configuration < OpenStruct
5
5
  def merge(options)
6
- RTesseract::Configuration.new(self.to_h.merge(options))
6
+ RTesseract::Configuration.new(to_h.merge(options))
7
+ end
8
+
9
+ def command
10
+ @table[:command]
7
11
  end
8
12
  end
9
13
 
10
14
  class << self
11
15
  def config
12
16
  @config ||= RTesseract::Configuration.new(
13
- command: 'tesseract',
14
- debug_file: '/dev/null'
15
- )
17
+ command: 'tesseract',
18
+ debug_file: '/dev/null'
19
+ )
16
20
  end
17
21
 
18
22
  def configure
@@ -2,12 +2,12 @@ class RTesseract
2
2
  module Pdf
3
3
  extend Base
4
4
 
5
- def self.run(source, options)
5
+ def self.run(source, errors, options)
6
6
  options.tessedit_create_pdf = 1
7
7
 
8
- RTesseract::Command.new(source, temp_file, options).run
8
+ RTesseract::Command.new(source, temp_file, errors, options).run
9
9
 
10
10
  File.open(temp_file('.pdf'), 'r')
11
11
  end
12
12
  end
13
- end
13
+ end
@@ -2,8 +2,8 @@ require 'open3'
2
2
 
3
3
  class RTesseract
4
4
  module Text
5
- def self.run(source, options)
6
- RTesseract::Command.new(source, 'stdout', options).run
5
+ def self.run(source, errors, options)
6
+ RTesseract::Command.new(source, 'stdout', errors, options).run
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -2,12 +2,12 @@ class RTesseract
2
2
  module Tsv
3
3
  extend Base
4
4
 
5
- def self.run(source, options)
5
+ def self.run(source, errors, options)
6
6
  options.tessedit_create_tsv = 1
7
7
 
8
- RTesseract::Command.new(source, temp_file, options).run
8
+ RTesseract::Command.new(source, temp_file, errors, options).run
9
9
 
10
10
  File.open(temp_file('.tsv'), 'r')
11
11
  end
12
12
  end
13
- end
13
+ end
@@ -1,3 +1,3 @@
1
1
  class RTesseract
2
- VERSION = '3.0.2'.freeze
2
+ VERSION = '3.0.3'.freeze
3
3
  end
@@ -1,33 +1,32 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require "rtesseract/version"
3
+ require 'rtesseract/version'
5
4
 
6
5
  Gem::Specification.new do |spec|
7
- spec.name = "rtesseract"
6
+ spec.name = 'rtesseract'
8
7
  spec.version = RTesseract::VERSION
9
- spec.authors = ["Danilo Jeremias da Silva"]
10
- spec.email = ["dannnylo@gmail.com"]
8
+ spec.authors = ['Danilo Jeremias da Silva']
9
+ spec.email = ['dannnylo@gmail.com']
11
10
 
12
- spec.summary = "Ruby library for working with the Tesseract OCR.".freeze
13
- spec.description = "Ruby library for working with the Tesseract OCR.".freeze
14
- spec.homepage = "http://github.com/dannnylo/rtesseract".freeze
15
- spec.license = "MIT"
11
+ spec.summary = 'Ruby library for working with the Tesseract OCR.'.freeze
12
+ spec.description = 'Ruby library for working with the Tesseract OCR.'.freeze
13
+ spec.homepage = 'http://github.com/dannnylo/rtesseract'.freeze
14
+ spec.license = 'MIT'
16
15
 
17
16
  # Specify which files should be added to the gem when it is released.
18
17
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
20
19
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
20
  end
22
- spec.bindir = "exe"
21
+ spec.bindir = 'exe'
23
22
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
- spec.require_paths = ["lib"]
23
+ spec.require_paths = ['lib']
25
24
 
26
- spec.add_development_dependency "bundler", "~> 1.17"
27
- spec.add_development_dependency "rake", "~> 10.0"
28
- spec.add_development_dependency "rspec", "~> 3.0"
29
- spec.add_development_dependency "simplecov"
30
- spec.add_development_dependency "coveralls"
25
+ spec.add_development_dependency 'bundler', '~> 1.17'
26
+ spec.add_development_dependency 'coveralls'
27
+ spec.add_development_dependency 'rake', '~> 10.0'
28
+ spec.add_development_dependency 'rspec', '~> 3.0'
29
+ spec.add_development_dependency 'simplecov'
31
30
 
32
- spec.add_dependency "nokogiri"
31
+ spec.add_dependency 'nokogiri'
33
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-04 00:00:00.000000000 Z
11
+ date: 2019-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -25,49 +25,49 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.17'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: coveralls
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rspec
42
+ name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '10.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '10.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: simplecov
56
+ name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '3.0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '3.0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: coveralls
70
+ name: simplecov
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -104,6 +104,7 @@ files:
104
104
  - ".document"
105
105
  - ".gitignore"
106
106
  - ".rspec"
107
+ - ".rubocop.yml"
107
108
  - ".travis.yml"
108
109
  - CHANGELOG.md
109
110
  - CODE_OF_CONDUCT.md
@@ -145,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
146
  version: '0'
146
147
  requirements: []
147
148
  rubyforge_project:
148
- rubygems_version: 2.7.6
149
+ rubygems_version: 2.7.8
149
150
  signing_key:
150
151
  specification_version: 4
151
152
  summary: Ruby library for working with the Tesseract OCR.