rtesseract 3.0.5 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +3 -0
- data/.rubocop.yml +15 -3
- data/Gemfile +2 -0
- data/Gemfile.lock +19 -26
- data/Rakefile +2 -0
- data/bin/console +1 -0
- data/lib/rtesseract.rb +2 -0
- data/lib/rtesseract/base.rb +2 -0
- data/lib/rtesseract/box.rb +32 -20
- data/lib/rtesseract/check.rb +2 -0
- data/lib/rtesseract/command.rb +2 -0
- data/lib/rtesseract/configuration.rb +2 -0
- data/lib/rtesseract/pdf.rb +2 -0
- data/lib/rtesseract/text.rb +2 -0
- data/lib/rtesseract/tsv.rb +2 -0
- data/lib/rtesseract/version.rb +3 -1
- data/rtesseract.gemspec +5 -6
- metadata +3 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43dd931b6649c05a5cb484f3fc0548075d088e5682bbbea46d7ac3202a9b607c
|
4
|
+
data.tar.gz: 1b300fa5a83f458c986dabde158b3ff31dfeed6e7c7c3c98a17812fdf62c4bbb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 955b9c159f1a89d681e5add1151d79ad2f0f29fbcd65d5558cb08b4a856f5442b6dfd4ae157b57f5bad869dbf9690a13bd1784bb23a7068a7c8402c8abba2468
|
7
|
+
data.tar.gz: daa3a7aa5ba7549c30f0bb85019c6197acec22854122887a8fd5693b52828f8c1b1210806c6d307f1109ddb784f13f090192884f8ae820941b444055df79de7e
|
data/.hound.yml
ADDED
data/.rubocop.yml
CHANGED
@@ -1,8 +1,20 @@
|
|
1
|
-
Documentation:
|
2
|
-
Enabled: false
|
3
1
|
|
4
|
-
|
2
|
+
Layout/LineLength:
|
5
3
|
Max: 150
|
6
4
|
|
7
5
|
Metrics/BlockLength:
|
8
6
|
Max: 50
|
7
|
+
|
8
|
+
Metrics/AbcSize:
|
9
|
+
Max: 30
|
10
|
+
|
11
|
+
Style/Documentation:
|
12
|
+
Enabled: false
|
13
|
+
Style/HashEachMethods:
|
14
|
+
Enabled: true
|
15
|
+
|
16
|
+
Style/HashTransformKeys:
|
17
|
+
Enabled: true
|
18
|
+
|
19
|
+
Style/HashTransformValues:
|
20
|
+
Enabled: true
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,51 +1,45 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rtesseract (3.0
|
5
|
-
nokogiri
|
4
|
+
rtesseract (3.1.0)
|
6
5
|
|
7
6
|
GEM
|
8
7
|
remote: https://rubygems.org/
|
9
8
|
specs:
|
10
|
-
coveralls (0.
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
coveralls (0.8.23)
|
10
|
+
json (>= 1.8, < 3)
|
11
|
+
simplecov (~> 0.16.1)
|
12
|
+
term-ansicolor (~> 1.3)
|
13
|
+
thor (>= 0.19.4, < 2.0)
|
14
|
+
tins (~> 1.6)
|
16
15
|
diff-lcs (1.3)
|
17
16
|
docile (1.3.2)
|
18
|
-
|
19
|
-
mime-types-data (~> 3.2015)
|
20
|
-
mime-types-data (3.2019.1009)
|
21
|
-
mini_portile2 (2.4.0)
|
22
|
-
multi_json (1.14.1)
|
23
|
-
nokogiri (1.10.9)
|
24
|
-
mini_portile2 (~> 2.4.0)
|
17
|
+
json (2.3.0)
|
25
18
|
rake (13.0.1)
|
26
|
-
rest-client (1.6.7)
|
27
|
-
mime-types (>= 1.16)
|
28
19
|
rspec (3.9.0)
|
29
20
|
rspec-core (~> 3.9.0)
|
30
21
|
rspec-expectations (~> 3.9.0)
|
31
22
|
rspec-mocks (~> 3.9.0)
|
32
23
|
rspec-core (3.9.1)
|
33
24
|
rspec-support (~> 3.9.1)
|
34
|
-
rspec-expectations (3.9.
|
25
|
+
rspec-expectations (3.9.1)
|
35
26
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
27
|
rspec-support (~> 3.9.0)
|
37
28
|
rspec-mocks (3.9.1)
|
38
29
|
diff-lcs (>= 1.2.0, < 2.0)
|
39
30
|
rspec-support (~> 3.9.0)
|
40
31
|
rspec-support (3.9.2)
|
41
|
-
simplecov (0.
|
32
|
+
simplecov (0.16.1)
|
42
33
|
docile (~> 1.1)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
34
|
+
json (>= 1.8, < 3)
|
35
|
+
simplecov-html (~> 0.10.0)
|
36
|
+
simplecov-html (0.10.2)
|
37
|
+
sync (0.5.0)
|
38
|
+
term-ansicolor (1.7.1)
|
39
|
+
tins (~> 1.0)
|
40
|
+
thor (1.0.1)
|
41
|
+
tins (1.24.1)
|
42
|
+
sync
|
49
43
|
|
50
44
|
PLATFORMS
|
51
45
|
ruby
|
@@ -56,7 +50,6 @@ DEPENDENCIES
|
|
56
50
|
rake
|
57
51
|
rspec
|
58
52
|
rtesseract!
|
59
|
-
simplecov
|
60
53
|
|
61
54
|
BUNDLED WITH
|
62
55
|
2.1.4
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
data/lib/rtesseract.rb
CHANGED
data/lib/rtesseract/base.rb
CHANGED
data/lib/rtesseract/box.rb
CHANGED
@@ -1,33 +1,45 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class RTesseract
|
4
4
|
module Box
|
5
5
|
extend RTesseract::Base
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
class << self
|
8
|
+
def run(source, errors, options)
|
9
|
+
options.tessedit_create_hocr = 1
|
9
10
|
|
10
|
-
|
11
|
+
RTesseract::Command.new(source, temp_file, errors, options).run
|
11
12
|
|
12
|
-
|
13
|
-
|
13
|
+
parse(File.read(temp_file('.hocr')))
|
14
|
+
end
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
html.css('span.ocrx_word, span.ocr_word').map do |word|
|
18
|
-
attributes = word.attributes['title'].value.to_s.delete(';').split(' ')
|
19
|
-
word_info(word, attributes)
|
16
|
+
def parse(content)
|
17
|
+
content.lines.map { |line| parse_line(line) }.compact
|
20
18
|
end
|
21
|
-
end
|
22
19
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
def parse_line(line)
|
21
|
+
return unless line.match?(/oc(rx|r)_word/)
|
22
|
+
|
23
|
+
word = line.match(/(?<=>)(.*?)(?=<)/).to_s
|
24
|
+
|
25
|
+
return if word.strip == ''
|
26
|
+
|
27
|
+
word_info(word, parse_position(line))
|
28
|
+
end
|
29
|
+
|
30
|
+
def word_info(word, positions)
|
31
|
+
{
|
32
|
+
word: word,
|
33
|
+
x_start: positions[1].to_i,
|
34
|
+
y_start: positions[2].to_i,
|
35
|
+
x_end: positions[3].to_i,
|
36
|
+
y_end: positions[4].to_i
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_position(line)
|
41
|
+
line.match(/(?<=title)(.*?)(?=;)/).to_s.split(' ')
|
42
|
+
end
|
31
43
|
end
|
32
44
|
end
|
33
45
|
end
|
data/lib/rtesseract/check.rb
CHANGED
data/lib/rtesseract/command.rb
CHANGED
data/lib/rtesseract/pdf.rb
CHANGED
data/lib/rtesseract/text.rb
CHANGED
data/lib/rtesseract/tsv.rb
CHANGED
data/lib/rtesseract/version.rb
CHANGED
data/rtesseract.gemspec
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
lib = File.expand_path('lib', __dir__)
|
2
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
5
|
require 'rtesseract/version'
|
@@ -8,9 +10,9 @@ Gem::Specification.new do |spec|
|
|
8
10
|
spec.authors = ['Danilo Jeremias da Silva']
|
9
11
|
spec.email = ['dannnylo@gmail.com']
|
10
12
|
|
11
|
-
spec.summary = 'Ruby library for working with the Tesseract OCR.'
|
12
|
-
spec.description = 'Ruby library for working with the Tesseract OCR.'
|
13
|
-
spec.homepage = 'http://github.com/dannnylo/rtesseract'
|
13
|
+
spec.summary = 'Ruby library for working with the Tesseract OCR.'
|
14
|
+
spec.description = 'Ruby library for working with the Tesseract OCR.'
|
15
|
+
spec.homepage = 'http://github.com/dannnylo/rtesseract'
|
14
16
|
spec.license = 'MIT'
|
15
17
|
|
16
18
|
# Specify which files should be added to the gem when it is released.
|
@@ -26,7 +28,4 @@ Gem::Specification.new do |spec|
|
|
26
28
|
spec.add_development_dependency 'coveralls'
|
27
29
|
spec.add_development_dependency 'rake'
|
28
30
|
spec.add_development_dependency 'rspec'
|
29
|
-
spec.add_development_dependency 'simplecov'
|
30
|
-
|
31
|
-
spec.add_dependency 'nokogiri'
|
32
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,34 +66,6 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: simplecov
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: nokogiri
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :runtime
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
69
|
description: Ruby library for working with the Tesseract OCR.
|
98
70
|
email:
|
99
71
|
- dannnylo@gmail.com
|
@@ -103,6 +75,7 @@ extra_rdoc_files: []
|
|
103
75
|
files:
|
104
76
|
- ".document"
|
105
77
|
- ".gitignore"
|
78
|
+
- ".hound.yml"
|
106
79
|
- ".rspec"
|
107
80
|
- ".rubocop.yml"
|
108
81
|
- ".travis.yml"
|