rtesseract 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 00e35fcd674159d47c7252be586ba214c42f374d1e671cab2ae8a47672237d92
4
- data.tar.gz: e6601b67346092513af4399c49bb161b99afccb84f277395fe8f27417d2f61da
3
+ metadata.gz: c40e571fce623118c523c005a8d8404d99390ae90fa73d43a50ee873b103d431
4
+ data.tar.gz: a4d7325c79141f3bb9625def8b28b1ef808f6dc7e6ab59abefb2178a73b59277
5
5
  SHA512:
6
- metadata.gz: 92713fe105ad0b96ca28fc61f530c4dcfb8a6905aeb9d7206dfb951afab7f482e10b1ffc397f5d2e8cf86f9bd0a69be65e8da791ba0dd32655c29341bf0a67af
7
- data.tar.gz: 4601e51dc96489292cfd0b54653f149bfcde9a830b3c3071ed2bc55778ab393405c267172ea06aa1e2ad3f64c615b845ba0aaf9a5317288555ee50e913bc196f
6
+ metadata.gz: 39198fd34d327c75172433a3c811ced60b6cf4be19bb7071a922ea66a9336f3023f0d1edaa70e0f9cf695c95e9b9eb1928690449213f6740b0ad4f5779be3d61
7
+ data.tar.gz: 97684d198ce69b722e03c6f2bde257eac62f772d23e5fd2695eafe47a053c72731f0335c95e779ded290cc50d70c4fc21d3841ca27691c80856c02655630c6a4
data/.gitignore CHANGED
@@ -10,3 +10,4 @@
10
10
 
11
11
  # rspec failure tracking
12
12
  .rspec_status
13
+ *.gem
@@ -14,3 +14,4 @@ before_install:
14
14
  rvm:
15
15
  - 2.4.5
16
16
  - 2.5.3
17
+ - 2.6.0
data/README.md CHANGED
@@ -57,7 +57,7 @@ It's very simple to use rtesseract.
57
57
 
58
58
  ```ruby
59
59
  image = RTesseract.new("my_image.jpg")
60
- image.to_tsv # Getting open file of pdf
60
+ image.to_tsv # Getting open file of tsv
61
61
  ```
62
62
 
63
63
  This will preserve the image colors, pictures and structure in the generated pdf.
@@ -100,23 +100,24 @@ This will preserve the image colors, pictures and structure in the generated pdf
100
100
  RTesseract.new('test_words.png').to_box
101
101
  ```
102
102
 
103
- # => [
104
- # {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
105
- # {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
106
- # {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
107
- # {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
108
- # {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
109
- # {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
110
- # {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
111
- # {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
112
- # {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
113
- # {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
114
- # {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
115
- # {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
116
- # {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
117
- # {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
118
- # ]
119
-
103
+ ```ruby
104
+ => [
105
+ {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
106
+ {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
107
+ {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
108
+ {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
109
+ {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
110
+ {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
111
+ {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
112
+ {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
113
+ {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
114
+ {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
115
+ {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
116
+ {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
117
+ {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
118
+ {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
119
+ ]
120
+ ```
120
121
 
121
122
  ## Development
122
123
 
@@ -1,6 +1,7 @@
1
1
  require "rtesseract/check"
2
2
  require "rtesseract/configuration"
3
3
  require "rtesseract/command"
4
+ require "rtesseract/base"
4
5
  require "rtesseract/text"
5
6
  require "rtesseract/pdf"
6
7
  require "rtesseract/box"
@@ -9,8 +10,6 @@ require "rtesseract/tsv"
9
10
  class RTesseract
10
11
  class Error < StandardError; end
11
12
 
12
- check_version!
13
-
14
13
  attr_reader :config, :source
15
14
 
16
15
  def initialize(src = '', options = {})
@@ -0,0 +1,12 @@
1
+ require 'tmpdir'
2
+ require 'securerandom'
3
+
4
+ class RTesseract
5
+ module Base
6
+ def temp_file(ext = '')
7
+ @rand_file ||= "rtesseract_#{SecureRandom.uuid}"
8
+
9
+ Pathname.new(Dir.tmpdir).join("#{@rand_file}#{ext}").to_s
10
+ end
11
+ end
12
+ end
@@ -1,19 +1,15 @@
1
1
  require 'nokogiri'
2
- require 'tmpdir'
3
2
 
4
3
  class RTesseract
5
4
  module Box
6
- def self.temp_dir
7
- @file_path = Pathname.new(Dir.tmpdir)
8
- end
5
+ extend RTesseract::Base
9
6
 
10
7
  def self.run(source, options)
11
- name = "rtesseract_#{SecureRandom.uuid}"
12
8
  options.tessedit_create_hocr = 1
13
9
 
14
- RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
10
+ RTesseract::Command.new(source, temp_file, options).run
15
11
 
16
- parse(temp_dir.join("#{name}.hocr").read)
12
+ parse(File.read(temp_file('.hocr')))
17
13
  end
18
14
 
19
15
  def self.parse(content)
@@ -1,5 +1,3 @@
1
- require 'tmpdir'
2
-
3
1
  class RTesseract
4
2
  class Command
5
3
  FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
@@ -10,32 +8,44 @@ class RTesseract
10
8
  @source = source
11
9
  @output = output
12
10
  @options = options
13
- end
14
-
15
- def configs
16
- @options.to_h.map { |key, value| ['-c', "#{key}=#{value}"] unless FIXED.include?(key) }.compact
11
+ @full_command = [ options.command, @source, @output]
17
12
  end
18
13
 
19
14
  def full_command
20
- command = [options.command, @source, @output]
15
+ add_option('--psm', options.psm)
16
+ add_option('--oem', options.oem)
17
+ add_option('-l', options.lang)
18
+ add_option('--tessdata_dir', options.tessdata_dir)
19
+ add_option('--user_words', options.user_words)
20
+ add_option('--user_patterns', options.user_patterns)
21
21
 
22
- command << ['--psm', options.psm.to_s] if options.psm
23
- command << ['--oem', options.oem.to_s] if options.oem
24
- command << ['-l', options.lang] if options.lang
22
+ other_configs
25
23
 
26
- command << ['--tessdata_dir', options.tessdata_dir] if options.tessdata_dir
27
- command << ['--user_words', options.user_words] if options.user_words
28
- command << ['--user_patterns', options.user_patterns] if options.user_patterns
24
+ add_option(options.config_file)
29
25
 
30
- command << configs
26
+ @full_command
27
+ end
31
28
 
32
- command << options.config_file.to_s if options.config_file
29
+ def add_option(*args)
30
+ return unless args.last
33
31
 
34
- command.flatten
32
+ @full_command << args.map(&:to_s)
33
+ end
34
+
35
+ def other_configs
36
+ @options.to_h.map do |key, value|
37
+ next if FIXED.include?(key)
38
+
39
+ add_option('-c', "#{key}=#{value}")
40
+ end
35
41
  end
36
42
 
37
43
  def run
38
- Open3.capture2e(*full_command)
44
+ output, status = Open3.capture2e(*full_command.flatten)
45
+
46
+ return output if status.success?
47
+
48
+ raise RTesseract::Error.new(output)
39
49
  end
40
50
  end
41
51
  end
@@ -1,18 +1,13 @@
1
- require 'tmpdir'
2
-
3
1
  class RTesseract
4
2
  module Pdf
5
- def self.temp_dir
6
- @file_path = Pathname.new(Dir.tmpdir)
7
- end
3
+ extend Base
8
4
 
9
5
  def self.run(source, options)
10
- name = "rtesseract_#{SecureRandom.uuid}"
11
6
  options.tessedit_create_pdf = 1
12
7
 
13
- RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
8
+ RTesseract::Command.new(source, temp_file, options).run
14
9
 
15
- File.open(temp_dir.join("#{name}.pdf").to_s, 'r')
10
+ File.open(temp_file('.pdf'), 'r')
16
11
  end
17
12
  end
18
13
  end
@@ -3,7 +3,7 @@ require 'open3'
3
3
  class RTesseract
4
4
  module Text
5
5
  def self.run(source, options)
6
- RTesseract::Command.new(source, 'stdout', options).run.first
6
+ RTesseract::Command.new(source, 'stdout', options).run
7
7
  end
8
8
  end
9
9
  end
@@ -1,18 +1,13 @@
1
- require 'tmpdir'
2
-
3
1
  class RTesseract
4
2
  module Tsv
5
- def self.temp_dir
6
- @file_path = Pathname.new(Dir.tmpdir)
7
- end
3
+ extend Base
8
4
 
9
5
  def self.run(source, options)
10
- name = "rtesseract_#{SecureRandom.uuid}"
11
6
  options.tessedit_create_tsv = 1
12
7
 
13
- RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
8
+ RTesseract::Command.new(source, temp_file, options).run
14
9
 
15
- File.open(temp_dir.join("#{name}.tsv").to_s, 'r')
10
+ File.open(temp_file('.tsv'), 'r')
16
11
  end
17
12
  end
18
13
  end
@@ -1,3 +1,3 @@
1
1
  class RTesseract
2
- VERSION = '3.0.0'.freeze
2
+ VERSION = '3.0.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-01 00:00:00.000000000 Z
11
+ date: 2019-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,6 +115,7 @@ files:
115
115
  - bin/console
116
116
  - bin/setup
117
117
  - lib/rtesseract.rb
118
+ - lib/rtesseract/base.rb
118
119
  - lib/rtesseract/box.rb
119
120
  - lib/rtesseract/check.rb
120
121
  - lib/rtesseract/command.rb