rtesseract 3.0.0 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 00e35fcd674159d47c7252be586ba214c42f374d1e671cab2ae8a47672237d92
4
- data.tar.gz: e6601b67346092513af4399c49bb161b99afccb84f277395fe8f27417d2f61da
3
+ metadata.gz: c40e571fce623118c523c005a8d8404d99390ae90fa73d43a50ee873b103d431
4
+ data.tar.gz: a4d7325c79141f3bb9625def8b28b1ef808f6dc7e6ab59abefb2178a73b59277
5
5
  SHA512:
6
- metadata.gz: 92713fe105ad0b96ca28fc61f530c4dcfb8a6905aeb9d7206dfb951afab7f482e10b1ffc397f5d2e8cf86f9bd0a69be65e8da791ba0dd32655c29341bf0a67af
7
- data.tar.gz: 4601e51dc96489292cfd0b54653f149bfcde9a830b3c3071ed2bc55778ab393405c267172ea06aa1e2ad3f64c615b845ba0aaf9a5317288555ee50e913bc196f
6
+ metadata.gz: 39198fd34d327c75172433a3c811ced60b6cf4be19bb7071a922ea66a9336f3023f0d1edaa70e0f9cf695c95e9b9eb1928690449213f6740b0ad4f5779be3d61
7
+ data.tar.gz: 97684d198ce69b722e03c6f2bde257eac62f772d23e5fd2695eafe47a053c72731f0335c95e779ded290cc50d70c4fc21d3841ca27691c80856c02655630c6a4
data/.gitignore CHANGED
@@ -10,3 +10,4 @@
10
10
 
11
11
  # rspec failure tracking
12
12
  .rspec_status
13
+ *.gem
@@ -14,3 +14,4 @@ before_install:
14
14
  rvm:
15
15
  - 2.4.5
16
16
  - 2.5.3
17
+ - 2.6.0
data/README.md CHANGED
@@ -57,7 +57,7 @@ It's very simple to use rtesseract.
57
57
 
58
58
  ```ruby
59
59
  image = RTesseract.new("my_image.jpg")
60
- image.to_tsv # Getting open file of pdf
60
+ image.to_tsv # Getting open file of tsv
61
61
  ```
62
62
 
63
63
  This will preserve the image colors, pictures and structure in the generated pdf.
@@ -100,23 +100,24 @@ This will preserve the image colors, pictures and structure in the generated pdf
100
100
  RTesseract.new('test_words.png').to_box
101
101
  ```
102
102
 
103
- # => [
104
- # {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
105
- # {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
106
- # {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
107
- # {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
108
- # {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
109
- # {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
110
- # {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
111
- # {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
112
- # {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
113
- # {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
114
- # {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
115
- # {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
116
- # {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
117
- # {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
118
- # ]
119
-
103
+ ```ruby
104
+ => [
105
+ {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
106
+ {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
107
+ {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
108
+ {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
109
+ {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
110
+ {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
111
+ {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
112
+ {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
113
+ {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
114
+ {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
115
+ {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
116
+ {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
117
+ {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
118
+ {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
119
+ ]
120
+ ```
120
121
 
121
122
  ## Development
122
123
 
@@ -1,6 +1,7 @@
1
1
  require "rtesseract/check"
2
2
  require "rtesseract/configuration"
3
3
  require "rtesseract/command"
4
+ require "rtesseract/base"
4
5
  require "rtesseract/text"
5
6
  require "rtesseract/pdf"
6
7
  require "rtesseract/box"
@@ -9,8 +10,6 @@ require "rtesseract/tsv"
9
10
  class RTesseract
10
11
  class Error < StandardError; end
11
12
 
12
- check_version!
13
-
14
13
  attr_reader :config, :source
15
14
 
16
15
  def initialize(src = '', options = {})
@@ -0,0 +1,12 @@
1
+ require 'tmpdir'
2
+ require 'securerandom'
3
+
4
+ class RTesseract
5
+ module Base
6
+ def temp_file(ext = '')
7
+ @rand_file ||= "rtesseract_#{SecureRandom.uuid}"
8
+
9
+ Pathname.new(Dir.tmpdir).join("#{@rand_file}#{ext}").to_s
10
+ end
11
+ end
12
+ end
@@ -1,19 +1,15 @@
1
1
  require 'nokogiri'
2
- require 'tmpdir'
3
2
 
4
3
  class RTesseract
5
4
  module Box
6
- def self.temp_dir
7
- @file_path = Pathname.new(Dir.tmpdir)
8
- end
5
+ extend RTesseract::Base
9
6
 
10
7
  def self.run(source, options)
11
- name = "rtesseract_#{SecureRandom.uuid}"
12
8
  options.tessedit_create_hocr = 1
13
9
 
14
- RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
10
+ RTesseract::Command.new(source, temp_file, options).run
15
11
 
16
- parse(temp_dir.join("#{name}.hocr").read)
12
+ parse(File.read(temp_file('.hocr')))
17
13
  end
18
14
 
19
15
  def self.parse(content)
@@ -1,5 +1,3 @@
1
- require 'tmpdir'
2
-
3
1
  class RTesseract
4
2
  class Command
5
3
  FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
@@ -10,32 +8,44 @@ class RTesseract
10
8
  @source = source
11
9
  @output = output
12
10
  @options = options
13
- end
14
-
15
- def configs
16
- @options.to_h.map { |key, value| ['-c', "#{key}=#{value}"] unless FIXED.include?(key) }.compact
11
+ @full_command = [ options.command, @source, @output]
17
12
  end
18
13
 
19
14
  def full_command
20
- command = [options.command, @source, @output]
15
+ add_option('--psm', options.psm)
16
+ add_option('--oem', options.oem)
17
+ add_option('-l', options.lang)
18
+ add_option('--tessdata_dir', options.tessdata_dir)
19
+ add_option('--user_words', options.user_words)
20
+ add_option('--user_patterns', options.user_patterns)
21
21
 
22
- command << ['--psm', options.psm.to_s] if options.psm
23
- command << ['--oem', options.oem.to_s] if options.oem
24
- command << ['-l', options.lang] if options.lang
22
+ other_configs
25
23
 
26
- command << ['--tessdata_dir', options.tessdata_dir] if options.tessdata_dir
27
- command << ['--user_words', options.user_words] if options.user_words
28
- command << ['--user_patterns', options.user_patterns] if options.user_patterns
24
+ add_option(options.config_file)
29
25
 
30
- command << configs
26
+ @full_command
27
+ end
31
28
 
32
- command << options.config_file.to_s if options.config_file
29
+ def add_option(*args)
30
+ return unless args.last
33
31
 
34
- command.flatten
32
+ @full_command << args.map(&:to_s)
33
+ end
34
+
35
+ def other_configs
36
+ @options.to_h.map do |key, value|
37
+ next if FIXED.include?(key)
38
+
39
+ add_option('-c', "#{key}=#{value}")
40
+ end
35
41
  end
36
42
 
37
43
  def run
38
- Open3.capture2e(*full_command)
44
+ output, status = Open3.capture2e(*full_command.flatten)
45
+
46
+ return output if status.success?
47
+
48
+ raise RTesseract::Error.new(output)
39
49
  end
40
50
  end
41
51
  end
@@ -1,18 +1,13 @@
1
- require 'tmpdir'
2
-
3
1
  class RTesseract
4
2
  module Pdf
5
- def self.temp_dir
6
- @file_path = Pathname.new(Dir.tmpdir)
7
- end
3
+ extend Base
8
4
 
9
5
  def self.run(source, options)
10
- name = "rtesseract_#{SecureRandom.uuid}"
11
6
  options.tessedit_create_pdf = 1
12
7
 
13
- RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
8
+ RTesseract::Command.new(source, temp_file, options).run
14
9
 
15
- File.open(temp_dir.join("#{name}.pdf").to_s, 'r')
10
+ File.open(temp_file('.pdf'), 'r')
16
11
  end
17
12
  end
18
13
  end
@@ -3,7 +3,7 @@ require 'open3'
3
3
  class RTesseract
4
4
  module Text
5
5
  def self.run(source, options)
6
- RTesseract::Command.new(source, 'stdout', options).run.first
6
+ RTesseract::Command.new(source, 'stdout', options).run
7
7
  end
8
8
  end
9
9
  end
@@ -1,18 +1,13 @@
1
- require 'tmpdir'
2
-
3
1
  class RTesseract
4
2
  module Tsv
5
- def self.temp_dir
6
- @file_path = Pathname.new(Dir.tmpdir)
7
- end
3
+ extend Base
8
4
 
9
5
  def self.run(source, options)
10
- name = "rtesseract_#{SecureRandom.uuid}"
11
6
  options.tessedit_create_tsv = 1
12
7
 
13
- RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run
8
+ RTesseract::Command.new(source, temp_file, options).run
14
9
 
15
- File.open(temp_dir.join("#{name}.tsv").to_s, 'r')
10
+ File.open(temp_file('.tsv'), 'r')
16
11
  end
17
12
  end
18
13
  end
@@ -1,3 +1,3 @@
1
1
  class RTesseract
2
- VERSION = '3.0.0'.freeze
2
+ VERSION = '3.0.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-01 00:00:00.000000000 Z
11
+ date: 2019-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,6 +115,7 @@ files:
115
115
  - bin/console
116
116
  - bin/setup
117
117
  - lib/rtesseract.rb
118
+ - lib/rtesseract/base.rb
118
119
  - lib/rtesseract/box.rb
119
120
  - lib/rtesseract/check.rb
120
121
  - lib/rtesseract/command.rb