rtesseract 3.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +1 -0
- data/README.md +19 -18
- data/lib/rtesseract.rb +1 -2
- data/lib/rtesseract/base.rb +12 -0
- data/lib/rtesseract/box.rb +3 -7
- data/lib/rtesseract/command.rb +27 -17
- data/lib/rtesseract/pdf.rb +3 -8
- data/lib/rtesseract/text.rb +1 -1
- data/lib/rtesseract/tsv.rb +3 -8
- data/lib/rtesseract/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c40e571fce623118c523c005a8d8404d99390ae90fa73d43a50ee873b103d431
|
4
|
+
data.tar.gz: a4d7325c79141f3bb9625def8b28b1ef808f6dc7e6ab59abefb2178a73b59277
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39198fd34d327c75172433a3c811ced60b6cf4be19bb7071a922ea66a9336f3023f0d1edaa70e0f9cf695c95e9b9eb1928690449213f6740b0ad4f5779be3d61
|
7
|
+
data.tar.gz: 97684d198ce69b722e03c6f2bde257eac62f772d23e5fd2695eafe47a053c72731f0335c95e779ded290cc50d70c4fc21d3841ca27691c80856c02655630c6a4
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -57,7 +57,7 @@ It's very simple to use rtesseract.
|
|
57
57
|
|
58
58
|
```ruby
|
59
59
|
image = RTesseract.new("my_image.jpg")
|
60
|
-
image.to_tsv # Getting open file of
|
60
|
+
image.to_tsv # Getting open file of tsv
|
61
61
|
```
|
62
62
|
|
63
63
|
This will preserve the image colors, pictures and structure in the generated pdf.
|
@@ -100,23 +100,24 @@ This will preserve the image colors, pictures and structure in the generated pdf
|
|
100
100
|
RTesseract.new('test_words.png').to_box
|
101
101
|
```
|
102
102
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
103
|
+
```ruby
|
104
|
+
=> [
|
105
|
+
{:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
|
106
|
+
{:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
|
107
|
+
{:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
|
108
|
+
{:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
|
109
|
+
{:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
|
110
|
+
{:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
|
111
|
+
{:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
|
112
|
+
{:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
|
113
|
+
{:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
|
114
|
+
{:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
|
115
|
+
{:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
|
116
|
+
{:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
|
117
|
+
{:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
|
118
|
+
{:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
|
119
|
+
]
|
120
|
+
```
|
120
121
|
|
121
122
|
## Development
|
122
123
|
|
data/lib/rtesseract.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "rtesseract/check"
|
2
2
|
require "rtesseract/configuration"
|
3
3
|
require "rtesseract/command"
|
4
|
+
require "rtesseract/base"
|
4
5
|
require "rtesseract/text"
|
5
6
|
require "rtesseract/pdf"
|
6
7
|
require "rtesseract/box"
|
@@ -9,8 +10,6 @@ require "rtesseract/tsv"
|
|
9
10
|
class RTesseract
|
10
11
|
class Error < StandardError; end
|
11
12
|
|
12
|
-
check_version!
|
13
|
-
|
14
13
|
attr_reader :config, :source
|
15
14
|
|
16
15
|
def initialize(src = '', options = {})
|
data/lib/rtesseract/box.rb
CHANGED
@@ -1,19 +1,15 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'tmpdir'
|
3
2
|
|
4
3
|
class RTesseract
|
5
4
|
module Box
|
6
|
-
|
7
|
-
@file_path = Pathname.new(Dir.tmpdir)
|
8
|
-
end
|
5
|
+
extend RTesseract::Base
|
9
6
|
|
10
7
|
def self.run(source, options)
|
11
|
-
name = "rtesseract_#{SecureRandom.uuid}"
|
12
8
|
options.tessedit_create_hocr = 1
|
13
9
|
|
14
|
-
RTesseract::Command.new(source,
|
10
|
+
RTesseract::Command.new(source, temp_file, options).run
|
15
11
|
|
16
|
-
parse(
|
12
|
+
parse(File.read(temp_file('.hocr')))
|
17
13
|
end
|
18
14
|
|
19
15
|
def self.parse(content)
|
data/lib/rtesseract/command.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'tmpdir'
|
2
|
-
|
3
1
|
class RTesseract
|
4
2
|
class Command
|
5
3
|
FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
|
@@ -10,32 +8,44 @@ class RTesseract
|
|
10
8
|
@source = source
|
11
9
|
@output = output
|
12
10
|
@options = options
|
13
|
-
|
14
|
-
|
15
|
-
def configs
|
16
|
-
@options.to_h.map { |key, value| ['-c', "#{key}=#{value}"] unless FIXED.include?(key) }.compact
|
11
|
+
@full_command = [ options.command, @source, @output]
|
17
12
|
end
|
18
13
|
|
19
14
|
def full_command
|
20
|
-
|
15
|
+
add_option('--psm', options.psm)
|
16
|
+
add_option('--oem', options.oem)
|
17
|
+
add_option('-l', options.lang)
|
18
|
+
add_option('--tessdata_dir', options.tessdata_dir)
|
19
|
+
add_option('--user_words', options.user_words)
|
20
|
+
add_option('--user_patterns', options.user_patterns)
|
21
21
|
|
22
|
-
|
23
|
-
command << ['--oem', options.oem.to_s] if options.oem
|
24
|
-
command << ['-l', options.lang] if options.lang
|
22
|
+
other_configs
|
25
23
|
|
26
|
-
|
27
|
-
command << ['--user_words', options.user_words] if options.user_words
|
28
|
-
command << ['--user_patterns', options.user_patterns] if options.user_patterns
|
24
|
+
add_option(options.config_file)
|
29
25
|
|
30
|
-
|
26
|
+
@full_command
|
27
|
+
end
|
31
28
|
|
32
|
-
|
29
|
+
def add_option(*args)
|
30
|
+
return unless args.last
|
33
31
|
|
34
|
-
|
32
|
+
@full_command << args.map(&:to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
def other_configs
|
36
|
+
@options.to_h.map do |key, value|
|
37
|
+
next if FIXED.include?(key)
|
38
|
+
|
39
|
+
add_option('-c', "#{key}=#{value}")
|
40
|
+
end
|
35
41
|
end
|
36
42
|
|
37
43
|
def run
|
38
|
-
Open3.capture2e(*full_command)
|
44
|
+
output, status = Open3.capture2e(*full_command.flatten)
|
45
|
+
|
46
|
+
return output if status.success?
|
47
|
+
|
48
|
+
raise RTesseract::Error.new(output)
|
39
49
|
end
|
40
50
|
end
|
41
51
|
end
|
data/lib/rtesseract/pdf.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
|
-
require 'tmpdir'
|
2
|
-
|
3
1
|
class RTesseract
|
4
2
|
module Pdf
|
5
|
-
|
6
|
-
@file_path = Pathname.new(Dir.tmpdir)
|
7
|
-
end
|
3
|
+
extend Base
|
8
4
|
|
9
5
|
def self.run(source, options)
|
10
|
-
name = "rtesseract_#{SecureRandom.uuid}"
|
11
6
|
options.tessedit_create_pdf = 1
|
12
7
|
|
13
|
-
RTesseract::Command.new(source,
|
8
|
+
RTesseract::Command.new(source, temp_file, options).run
|
14
9
|
|
15
|
-
File.open(
|
10
|
+
File.open(temp_file('.pdf'), 'r')
|
16
11
|
end
|
17
12
|
end
|
18
13
|
end
|
data/lib/rtesseract/text.rb
CHANGED
data/lib/rtesseract/tsv.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
|
-
require 'tmpdir'
|
2
|
-
|
3
1
|
class RTesseract
|
4
2
|
module Tsv
|
5
|
-
|
6
|
-
@file_path = Pathname.new(Dir.tmpdir)
|
7
|
-
end
|
3
|
+
extend Base
|
8
4
|
|
9
5
|
def self.run(source, options)
|
10
|
-
name = "rtesseract_#{SecureRandom.uuid}"
|
11
6
|
options.tessedit_create_tsv = 1
|
12
7
|
|
13
|
-
RTesseract::Command.new(source,
|
8
|
+
RTesseract::Command.new(source, temp_file, options).run
|
14
9
|
|
15
|
-
File.open(
|
10
|
+
File.open(temp_file('.tsv'), 'r')
|
16
11
|
end
|
17
12
|
end
|
18
13
|
end
|
data/lib/rtesseract/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- bin/console
|
116
116
|
- bin/setup
|
117
117
|
- lib/rtesseract.rb
|
118
|
+
- lib/rtesseract/base.rb
|
118
119
|
- lib/rtesseract/box.rb
|
119
120
|
- lib/rtesseract/check.rb
|
120
121
|
- lib/rtesseract/command.rb
|