rtesseract 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +1 -0
- data/README.md +19 -18
- data/lib/rtesseract.rb +1 -2
- data/lib/rtesseract/base.rb +12 -0
- data/lib/rtesseract/box.rb +3 -7
- data/lib/rtesseract/command.rb +27 -17
- data/lib/rtesseract/pdf.rb +3 -8
- data/lib/rtesseract/text.rb +1 -1
- data/lib/rtesseract/tsv.rb +3 -8
- data/lib/rtesseract/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c40e571fce623118c523c005a8d8404d99390ae90fa73d43a50ee873b103d431
|
4
|
+
data.tar.gz: a4d7325c79141f3bb9625def8b28b1ef808f6dc7e6ab59abefb2178a73b59277
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39198fd34d327c75172433a3c811ced60b6cf4be19bb7071a922ea66a9336f3023f0d1edaa70e0f9cf695c95e9b9eb1928690449213f6740b0ad4f5779be3d61
|
7
|
+
data.tar.gz: 97684d198ce69b722e03c6f2bde257eac62f772d23e5fd2695eafe47a053c72731f0335c95e779ded290cc50d70c4fc21d3841ca27691c80856c02655630c6a4
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -57,7 +57,7 @@ It's very simple to use rtesseract.
|
|
57
57
|
|
58
58
|
```ruby
|
59
59
|
image = RTesseract.new("my_image.jpg")
|
60
|
-
image.to_tsv # Getting open file of
|
60
|
+
image.to_tsv # Getting open file of tsv
|
61
61
|
```
|
62
62
|
|
63
63
|
This will preserve the image colors, pictures and structure in the generated pdf.
|
@@ -100,23 +100,24 @@ This will preserve the image colors, pictures and structure in the generated pdf
|
|
100
100
|
RTesseract.new('test_words.png').to_box
|
101
101
|
```
|
102
102
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
103
|
+
```ruby
|
104
|
+
=> [
|
105
|
+
{:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
|
106
|
+
{:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
|
107
|
+
{:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
|
108
|
+
{:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
|
109
|
+
{:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
|
110
|
+
{:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
|
111
|
+
{:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
|
112
|
+
{:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
|
113
|
+
{:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
|
114
|
+
{:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
|
115
|
+
{:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
|
116
|
+
{:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
|
117
|
+
{:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
|
118
|
+
{:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
|
119
|
+
]
|
120
|
+
```
|
120
121
|
|
121
122
|
## Development
|
122
123
|
|
data/lib/rtesseract.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "rtesseract/check"
|
2
2
|
require "rtesseract/configuration"
|
3
3
|
require "rtesseract/command"
|
4
|
+
require "rtesseract/base"
|
4
5
|
require "rtesseract/text"
|
5
6
|
require "rtesseract/pdf"
|
6
7
|
require "rtesseract/box"
|
@@ -9,8 +10,6 @@ require "rtesseract/tsv"
|
|
9
10
|
class RTesseract
|
10
11
|
class Error < StandardError; end
|
11
12
|
|
12
|
-
check_version!
|
13
|
-
|
14
13
|
attr_reader :config, :source
|
15
14
|
|
16
15
|
def initialize(src = '', options = {})
|
data/lib/rtesseract/box.rb
CHANGED
@@ -1,19 +1,15 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'tmpdir'
|
3
2
|
|
4
3
|
class RTesseract
|
5
4
|
module Box
|
6
|
-
|
7
|
-
@file_path = Pathname.new(Dir.tmpdir)
|
8
|
-
end
|
5
|
+
extend RTesseract::Base
|
9
6
|
|
10
7
|
def self.run(source, options)
|
11
|
-
name = "rtesseract_#{SecureRandom.uuid}"
|
12
8
|
options.tessedit_create_hocr = 1
|
13
9
|
|
14
|
-
RTesseract::Command.new(source,
|
10
|
+
RTesseract::Command.new(source, temp_file, options).run
|
15
11
|
|
16
|
-
parse(
|
12
|
+
parse(File.read(temp_file('.hocr')))
|
17
13
|
end
|
18
14
|
|
19
15
|
def self.parse(content)
|
data/lib/rtesseract/command.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'tmpdir'
|
2
|
-
|
3
1
|
class RTesseract
|
4
2
|
class Command
|
5
3
|
FIXED = [:command, :psm, :oem, :lang, :tessdata_dir, :user_words, :user_patterns, :config_file]
|
@@ -10,32 +8,44 @@ class RTesseract
|
|
10
8
|
@source = source
|
11
9
|
@output = output
|
12
10
|
@options = options
|
13
|
-
|
14
|
-
|
15
|
-
def configs
|
16
|
-
@options.to_h.map { |key, value| ['-c', "#{key}=#{value}"] unless FIXED.include?(key) }.compact
|
11
|
+
@full_command = [ options.command, @source, @output]
|
17
12
|
end
|
18
13
|
|
19
14
|
def full_command
|
20
|
-
|
15
|
+
add_option('--psm', options.psm)
|
16
|
+
add_option('--oem', options.oem)
|
17
|
+
add_option('-l', options.lang)
|
18
|
+
add_option('--tessdata_dir', options.tessdata_dir)
|
19
|
+
add_option('--user_words', options.user_words)
|
20
|
+
add_option('--user_patterns', options.user_patterns)
|
21
21
|
|
22
|
-
|
23
|
-
command << ['--oem', options.oem.to_s] if options.oem
|
24
|
-
command << ['-l', options.lang] if options.lang
|
22
|
+
other_configs
|
25
23
|
|
26
|
-
|
27
|
-
command << ['--user_words', options.user_words] if options.user_words
|
28
|
-
command << ['--user_patterns', options.user_patterns] if options.user_patterns
|
24
|
+
add_option(options.config_file)
|
29
25
|
|
30
|
-
|
26
|
+
@full_command
|
27
|
+
end
|
31
28
|
|
32
|
-
|
29
|
+
def add_option(*args)
|
30
|
+
return unless args.last
|
33
31
|
|
34
|
-
|
32
|
+
@full_command << args.map(&:to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
def other_configs
|
36
|
+
@options.to_h.map do |key, value|
|
37
|
+
next if FIXED.include?(key)
|
38
|
+
|
39
|
+
add_option('-c', "#{key}=#{value}")
|
40
|
+
end
|
35
41
|
end
|
36
42
|
|
37
43
|
def run
|
38
|
-
Open3.capture2e(*full_command)
|
44
|
+
output, status = Open3.capture2e(*full_command.flatten)
|
45
|
+
|
46
|
+
return output if status.success?
|
47
|
+
|
48
|
+
raise RTesseract::Error.new(output)
|
39
49
|
end
|
40
50
|
end
|
41
51
|
end
|
data/lib/rtesseract/pdf.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
|
-
require 'tmpdir'
|
2
|
-
|
3
1
|
class RTesseract
|
4
2
|
module Pdf
|
5
|
-
|
6
|
-
@file_path = Pathname.new(Dir.tmpdir)
|
7
|
-
end
|
3
|
+
extend Base
|
8
4
|
|
9
5
|
def self.run(source, options)
|
10
|
-
name = "rtesseract_#{SecureRandom.uuid}"
|
11
6
|
options.tessedit_create_pdf = 1
|
12
7
|
|
13
|
-
RTesseract::Command.new(source,
|
8
|
+
RTesseract::Command.new(source, temp_file, options).run
|
14
9
|
|
15
|
-
File.open(
|
10
|
+
File.open(temp_file('.pdf'), 'r')
|
16
11
|
end
|
17
12
|
end
|
18
13
|
end
|
data/lib/rtesseract/text.rb
CHANGED
data/lib/rtesseract/tsv.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
|
-
require 'tmpdir'
|
2
|
-
|
3
1
|
class RTesseract
|
4
2
|
module Tsv
|
5
|
-
|
6
|
-
@file_path = Pathname.new(Dir.tmpdir)
|
7
|
-
end
|
3
|
+
extend Base
|
8
4
|
|
9
5
|
def self.run(source, options)
|
10
|
-
name = "rtesseract_#{SecureRandom.uuid}"
|
11
6
|
options.tessedit_create_tsv = 1
|
12
7
|
|
13
|
-
RTesseract::Command.new(source,
|
8
|
+
RTesseract::Command.new(source, temp_file, options).run
|
14
9
|
|
15
|
-
File.open(
|
10
|
+
File.open(temp_file('.tsv'), 'r')
|
16
11
|
end
|
17
12
|
end
|
18
13
|
end
|
data/lib/rtesseract/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- bin/console
|
116
116
|
- bin/setup
|
117
117
|
- lib/rtesseract.rb
|
118
|
+
- lib/rtesseract/base.rb
|
118
119
|
- lib/rtesseract/box.rb
|
119
120
|
- lib/rtesseract/check.rb
|
120
121
|
- lib/rtesseract/command.rb
|