rtesseract 3.1.2 → 3.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.deepsource.toml +9 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +13 -7
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +27 -34
- data/README.md +14 -3
- data/lib/rtesseract/box.rb +4 -4
- data/lib/rtesseract/pdf.rb +1 -1
- data/lib/rtesseract/text.rb +3 -1
- data/lib/rtesseract/tsv.rb +7 -2
- data/lib/rtesseract/version.rb +1 -1
- data/lib/rtesseract.rb +5 -7
- data/rtesseract.gemspec +2 -4
- metadata +9 -62
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 120e26c05e889a9bbfa935c3d43bfcbd9432b7296aa9b144fc55fd3c43f6a6af
|
4
|
+
data.tar.gz: 147e882d0648febff070e7f422c79852be258f594ded86cbcde8b5677c49b354
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fc275b3afb190d731e0d12356867ea3a359177141fabe876a12b2df7391b65964b3086af63156bce8cc79d5e2a8f8c6c901b9ffeeef514ad7eb5482039858dd
|
7
|
+
data.tar.gz: fd6ab267bb3edfbd52afa54615ffad0e624c611270691b9e0e48cdf4ddbdbccd1565d0d52386ac664cc978fe23abf8aeb134a275f5629b2005f3464249c6455e
|
data/.deepsource.toml
ADDED
data/.github/FUNDING.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
github: dannnylo
|
data/.github/workflows/ci.yml
CHANGED
@@ -6,26 +6,32 @@ jobs:
|
|
6
6
|
strategy:
|
7
7
|
matrix:
|
8
8
|
ruby:
|
9
|
-
- '2.
|
10
|
-
- '2.
|
11
|
-
|
9
|
+
- '2.7.4'
|
10
|
+
- '3.2.0'
|
11
|
+
repository:
|
12
|
+
- 'ppa:alex-p/tesseract-ocr5'
|
13
|
+
- 'ppa:alex-p/tesseract-ocr-devel'
|
12
14
|
steps:
|
13
15
|
- uses: actions/checkout@v2
|
14
16
|
- name: Install tesseract-ocr
|
15
17
|
run: |
|
16
|
-
sudo add-apt-repository
|
18
|
+
sudo add-apt-repository ${{ matrix.repository }} -y
|
17
19
|
sudo apt-get update -q
|
18
20
|
sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
|
21
|
+
tesseract --version
|
19
22
|
- name: Setup Ruby
|
20
|
-
uses:
|
23
|
+
uses: ruby/setup-ruby@v1
|
21
24
|
with:
|
22
25
|
ruby-version: ${{ matrix.ruby }}
|
23
26
|
- name: Bundle
|
24
|
-
env:
|
25
|
-
MTSR_RAILS_VERSION: ${{ matrix.rails }}
|
26
27
|
run: |
|
27
28
|
gem uninstall -aIx bundler
|
28
29
|
gem install bundler
|
29
30
|
bundle install --jobs 4 --retry 3
|
30
31
|
- name: Test
|
31
32
|
run: bundle exec rake
|
33
|
+
- name: Coverage
|
34
|
+
env:
|
35
|
+
CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}
|
36
|
+
run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Ruby -r coverage/lcov/*
|
37
|
+
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -6,3 +6,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
|
6
6
|
|
7
7
|
# Specify your gem's dependencies in rtesseract.gemspec
|
8
8
|
gemspec
|
9
|
+
|
10
|
+
group :development, :test do
|
11
|
+
gem 'bundler', '~> 2'
|
12
|
+
gem 'rake'
|
13
|
+
gem 'rspec'
|
14
|
+
|
15
|
+
gem 'simplecov'
|
16
|
+
gem 'simplecov-cobertura'
|
17
|
+
gem 'simplecov-lcov'
|
18
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -1,55 +1,48 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rtesseract (3.1.
|
4
|
+
rtesseract (3.1.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
rspec (3.9.0)
|
20
|
-
rspec-core (~> 3.9.0)
|
21
|
-
rspec-expectations (~> 3.9.0)
|
22
|
-
rspec-mocks (~> 3.9.0)
|
23
|
-
rspec-core (3.9.1)
|
24
|
-
rspec-support (~> 3.9.1)
|
25
|
-
rspec-expectations (3.9.1)
|
9
|
+
diff-lcs (1.4.4)
|
10
|
+
docile (1.4.0)
|
11
|
+
rake (13.0.6)
|
12
|
+
rspec (3.10.0)
|
13
|
+
rspec-core (~> 3.10.0)
|
14
|
+
rspec-expectations (~> 3.10.0)
|
15
|
+
rspec-mocks (~> 3.10.0)
|
16
|
+
rspec-core (3.10.1)
|
17
|
+
rspec-support (~> 3.10.0)
|
18
|
+
rspec-expectations (3.10.1)
|
26
19
|
diff-lcs (>= 1.2.0, < 2.0)
|
27
|
-
rspec-support (~> 3.
|
28
|
-
rspec-mocks (3.
|
20
|
+
rspec-support (~> 3.10.0)
|
21
|
+
rspec-mocks (3.10.2)
|
29
22
|
diff-lcs (>= 1.2.0, < 2.0)
|
30
|
-
rspec-support (~> 3.
|
31
|
-
rspec-support (3.
|
32
|
-
simplecov (0.
|
23
|
+
rspec-support (~> 3.10.0)
|
24
|
+
rspec-support (3.10.2)
|
25
|
+
simplecov (0.21.2)
|
33
26
|
docile (~> 1.1)
|
34
|
-
|
35
|
-
|
36
|
-
simplecov-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
tins (1.24.1)
|
42
|
-
sync
|
27
|
+
simplecov-html (~> 0.11)
|
28
|
+
simplecov_json_formatter (~> 0.1)
|
29
|
+
simplecov-cobertura (1.4.2)
|
30
|
+
simplecov (~> 0.8)
|
31
|
+
simplecov-html (0.12.3)
|
32
|
+
simplecov-lcov (0.8.0)
|
33
|
+
simplecov_json_formatter (0.1.3)
|
43
34
|
|
44
35
|
PLATFORMS
|
45
36
|
ruby
|
46
37
|
|
47
38
|
DEPENDENCIES
|
48
39
|
bundler (~> 2)
|
49
|
-
coveralls
|
50
40
|
rake
|
51
41
|
rspec
|
52
42
|
rtesseract!
|
43
|
+
simplecov
|
44
|
+
simplecov-cobertura
|
45
|
+
simplecov-lcov
|
53
46
|
|
54
47
|
BUNDLED WITH
|
55
|
-
2.
|
48
|
+
2.4.20
|
data/README.md
CHANGED
@@ -6,8 +6,11 @@
|
|
6
6
|
<a href='https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg'>
|
7
7
|
<img src="https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg" alt="Build Status" />
|
8
8
|
</a>
|
9
|
-
<a href='https://
|
10
|
-
|
9
|
+
<a href='https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837'>
|
10
|
+
<img src="https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837" alt="Coverage Status" />
|
11
|
+
</a>
|
12
|
+
<a href='https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837'>
|
13
|
+
<img src="https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837" alt="Coverage" />
|
11
14
|
</a>
|
12
15
|
<a href='https://codeclimate.com/github/dannnylo/rtesseract'>
|
13
16
|
<img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />
|
@@ -17,10 +20,18 @@ Ruby library for working with the Tesseract OCR.
|
|
17
20
|
|
18
21
|
## Installation
|
19
22
|
|
20
|
-
Check if tesseract ocr programs
|
23
|
+
Check if tesseract ocr programs are installed:
|
21
24
|
|
22
25
|
$ tesseract --version
|
23
26
|
|
27
|
+
If not, you can install them with a command like:
|
28
|
+
|
29
|
+
$ apt install tesseract-ocr
|
30
|
+
|
31
|
+
or
|
32
|
+
|
33
|
+
$ brew install tesseract
|
34
|
+
|
24
35
|
Add this line to your application's Gemfile:
|
25
36
|
|
26
37
|
```ruby
|
data/lib/rtesseract/box.rb
CHANGED
@@ -6,7 +6,7 @@ class RTesseract
|
|
6
6
|
|
7
7
|
class << self
|
8
8
|
def run(source, errors, options)
|
9
|
-
options.tessedit_create_hocr
|
9
|
+
options = options.merge({ tessedit_create_hocr: 1 })
|
10
10
|
|
11
11
|
RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
|
12
12
|
parse(File.read("#{output_path}.hocr"))
|
@@ -20,7 +20,7 @@ class RTesseract
|
|
20
20
|
def parse_line(line)
|
21
21
|
return unless line.match?(/oc(rx|r)_word/)
|
22
22
|
|
23
|
-
word = line.
|
23
|
+
word = line.to_s.scan(/>(.*)</).flatten.first.to_s
|
24
24
|
|
25
25
|
return if word.strip == ''
|
26
26
|
|
@@ -39,11 +39,11 @@ class RTesseract
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def parse_position(line)
|
42
|
-
line.match(/(?<=title)(.*?)(?=;)/).to_s.split
|
42
|
+
line.match(/(?<=title)(.*?)(?=;)/).to_s.split
|
43
43
|
end
|
44
44
|
|
45
45
|
def parse_confidence(line)
|
46
|
-
line.match(/(?<=;)(.*?)(?=')/).to_s.split
|
46
|
+
line.match(/(?<=;)(.*?)(?=')/).to_s.split
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
data/lib/rtesseract/pdf.rb
CHANGED
@@ -5,7 +5,7 @@ class RTesseract
|
|
5
5
|
extend Base
|
6
6
|
|
7
7
|
def self.run(source, errors, options)
|
8
|
-
options.tessedit_create_pdf
|
8
|
+
options = options.merge({ tessedit_create_pdf: 1 })
|
9
9
|
|
10
10
|
RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
|
11
11
|
File.open("#{output_path}.pdf", 'r')
|
data/lib/rtesseract/text.rb
CHANGED
@@ -5,7 +5,9 @@ require 'open3'
|
|
5
5
|
class RTesseract
|
6
6
|
module Text
|
7
7
|
def self.run(source, errors, options)
|
8
|
-
RTesseract::Command.new(source, 'stdout', errors, options).run
|
8
|
+
text = RTesseract::Command.new(source, 'stdout', errors, options).run
|
9
|
+
text = text.gsub("\f", '') if text.is_a?(String)
|
10
|
+
text
|
9
11
|
end
|
10
12
|
end
|
11
13
|
end
|
data/lib/rtesseract/tsv.rb
CHANGED
@@ -5,9 +5,14 @@ class RTesseract
|
|
5
5
|
extend Base
|
6
6
|
|
7
7
|
def self.run(source, errors, options)
|
8
|
-
options.tessedit_create_tsv
|
8
|
+
options = options.merge({ tessedit_create_tsv: 1 })
|
9
9
|
|
10
|
-
RTesseract::Command.new(
|
10
|
+
RTesseract::Command.new(
|
11
|
+
source,
|
12
|
+
temp_file_path,
|
13
|
+
errors,
|
14
|
+
options
|
15
|
+
).run do |output_path|
|
11
16
|
File.open("#{output_path}.tsv", 'r')
|
12
17
|
end
|
13
18
|
end
|
data/lib/rtesseract/version.rb
CHANGED
data/lib/rtesseract.rb
CHANGED
@@ -12,7 +12,7 @@ require 'rtesseract/tsv'
|
|
12
12
|
class RTesseract
|
13
13
|
class Error < StandardError; end
|
14
14
|
|
15
|
-
attr_reader :config, :source
|
15
|
+
attr_reader :config, :source, :errors
|
16
16
|
|
17
17
|
def initialize(src = '', options = {})
|
18
18
|
@source = src
|
@@ -21,7 +21,7 @@ class RTesseract
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_box
|
24
|
-
Box.run(@source, @errors, config)
|
24
|
+
Box.run(@source, @errors, @config)
|
25
25
|
end
|
26
26
|
|
27
27
|
def words
|
@@ -29,22 +29,20 @@ class RTesseract
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def to_pdf
|
32
|
-
Pdf.run(@source, @errors, config)
|
32
|
+
Pdf.run(@source, @errors, @config)
|
33
33
|
end
|
34
34
|
|
35
35
|
def to_tsv
|
36
|
-
Tsv.run(@source, @errors, config)
|
36
|
+
Tsv.run(@source, @errors, @config)
|
37
37
|
end
|
38
38
|
|
39
39
|
# Output value
|
40
40
|
def to_s
|
41
|
-
Text.run(@source, @errors, config)
|
41
|
+
Text.run(@source, @errors, @config)
|
42
42
|
end
|
43
43
|
|
44
44
|
# Remove spaces and break-lines
|
45
45
|
def to_s_without_spaces
|
46
46
|
to_s.gsub(/\s/, '')
|
47
47
|
end
|
48
|
-
|
49
|
-
attr_reader :errors
|
50
48
|
end
|
data/rtesseract.gemspec
CHANGED
@@ -20,12 +20,10 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
21
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
22
|
end
|
23
|
+
spec.required_ruby_version = '>= 2.7'
|
23
24
|
spec.bindir = 'exe'
|
24
25
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
26
|
spec.require_paths = ['lib']
|
26
27
|
|
27
|
-
spec.
|
28
|
-
spec.add_development_dependency 'coveralls'
|
29
|
-
spec.add_development_dependency 'rake'
|
30
|
-
spec.add_development_dependency 'rspec'
|
28
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
31
29
|
end
|
metadata
CHANGED
@@ -1,71 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '2'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '2'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: coveralls
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
11
|
+
date: 2023-10-04 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
69
13
|
description: Ruby library for working with the Tesseract OCR.
|
70
14
|
email:
|
71
15
|
- dannnylo@gmail.com
|
@@ -73,7 +17,9 @@ executables: []
|
|
73
17
|
extensions: []
|
74
18
|
extra_rdoc_files: []
|
75
19
|
files:
|
20
|
+
- ".deepsource.toml"
|
76
21
|
- ".document"
|
22
|
+
- ".github/FUNDING.yml"
|
77
23
|
- ".github/workflows/ci.yml"
|
78
24
|
- ".gitignore"
|
79
25
|
- ".hound.yml"
|
@@ -102,7 +48,8 @@ files:
|
|
102
48
|
homepage: http://github.com/dannnylo/rtesseract
|
103
49
|
licenses:
|
104
50
|
- MIT
|
105
|
-
metadata:
|
51
|
+
metadata:
|
52
|
+
rubygems_mfa_required: 'true'
|
106
53
|
post_install_message:
|
107
54
|
rdoc_options: []
|
108
55
|
require_paths:
|
@@ -111,14 +58,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
58
|
requirements:
|
112
59
|
- - ">="
|
113
60
|
- !ruby/object:Gem::Version
|
114
|
-
version: '
|
61
|
+
version: '2.7'
|
115
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
63
|
requirements:
|
117
64
|
- - ">="
|
118
65
|
- !ruby/object:Gem::Version
|
119
66
|
version: '0'
|
120
67
|
requirements: []
|
121
|
-
rubygems_version: 3.
|
68
|
+
rubygems_version: 3.4.10
|
122
69
|
signing_key:
|
123
70
|
specification_version: 4
|
124
71
|
summary: Ruby library for working with the Tesseract OCR.
|