rtesseract 3.1.2 → 3.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.deepsource.toml +9 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +13 -7
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +27 -34
- data/README.md +14 -3
- data/lib/rtesseract/box.rb +4 -4
- data/lib/rtesseract/pdf.rb +1 -1
- data/lib/rtesseract/text.rb +3 -1
- data/lib/rtesseract/tsv.rb +7 -2
- data/lib/rtesseract/version.rb +1 -1
- data/lib/rtesseract.rb +5 -7
- data/rtesseract.gemspec +2 -4
- metadata +9 -62
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 120e26c05e889a9bbfa935c3d43bfcbd9432b7296aa9b144fc55fd3c43f6a6af
|
4
|
+
data.tar.gz: 147e882d0648febff070e7f422c79852be258f594ded86cbcde8b5677c49b354
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fc275b3afb190d731e0d12356867ea3a359177141fabe876a12b2df7391b65964b3086af63156bce8cc79d5e2a8f8c6c901b9ffeeef514ad7eb5482039858dd
|
7
|
+
data.tar.gz: fd6ab267bb3edfbd52afa54615ffad0e624c611270691b9e0e48cdf4ddbdbccd1565d0d52386ac664cc978fe23abf8aeb134a275f5629b2005f3464249c6455e
|
data/.deepsource.toml
ADDED
data/.github/FUNDING.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
github: dannnylo
|
data/.github/workflows/ci.yml
CHANGED
@@ -6,26 +6,32 @@ jobs:
|
|
6
6
|
strategy:
|
7
7
|
matrix:
|
8
8
|
ruby:
|
9
|
-
- '2.
|
10
|
-
- '2.
|
11
|
-
|
9
|
+
- '2.7.4'
|
10
|
+
- '3.2.0'
|
11
|
+
repository:
|
12
|
+
- 'ppa:alex-p/tesseract-ocr5'
|
13
|
+
- 'ppa:alex-p/tesseract-ocr-devel'
|
12
14
|
steps:
|
13
15
|
- uses: actions/checkout@v2
|
14
16
|
- name: Install tesseract-ocr
|
15
17
|
run: |
|
16
|
-
sudo add-apt-repository
|
18
|
+
sudo add-apt-repository ${{ matrix.repository }} -y
|
17
19
|
sudo apt-get update -q
|
18
20
|
sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
|
21
|
+
tesseract --version
|
19
22
|
- name: Setup Ruby
|
20
|
-
uses:
|
23
|
+
uses: ruby/setup-ruby@v1
|
21
24
|
with:
|
22
25
|
ruby-version: ${{ matrix.ruby }}
|
23
26
|
- name: Bundle
|
24
|
-
env:
|
25
|
-
MTSR_RAILS_VERSION: ${{ matrix.rails }}
|
26
27
|
run: |
|
27
28
|
gem uninstall -aIx bundler
|
28
29
|
gem install bundler
|
29
30
|
bundle install --jobs 4 --retry 3
|
30
31
|
- name: Test
|
31
32
|
run: bundle exec rake
|
33
|
+
- name: Coverage
|
34
|
+
env:
|
35
|
+
CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}
|
36
|
+
run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Ruby -r coverage/lcov/*
|
37
|
+
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -6,3 +6,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
|
6
6
|
|
7
7
|
# Specify your gem's dependencies in rtesseract.gemspec
|
8
8
|
gemspec
|
9
|
+
|
10
|
+
group :development, :test do
|
11
|
+
gem 'bundler', '~> 2'
|
12
|
+
gem 'rake'
|
13
|
+
gem 'rspec'
|
14
|
+
|
15
|
+
gem 'simplecov'
|
16
|
+
gem 'simplecov-cobertura'
|
17
|
+
gem 'simplecov-lcov'
|
18
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -1,55 +1,48 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rtesseract (3.1.
|
4
|
+
rtesseract (3.1.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
rspec (3.9.0)
|
20
|
-
rspec-core (~> 3.9.0)
|
21
|
-
rspec-expectations (~> 3.9.0)
|
22
|
-
rspec-mocks (~> 3.9.0)
|
23
|
-
rspec-core (3.9.1)
|
24
|
-
rspec-support (~> 3.9.1)
|
25
|
-
rspec-expectations (3.9.1)
|
9
|
+
diff-lcs (1.4.4)
|
10
|
+
docile (1.4.0)
|
11
|
+
rake (13.0.6)
|
12
|
+
rspec (3.10.0)
|
13
|
+
rspec-core (~> 3.10.0)
|
14
|
+
rspec-expectations (~> 3.10.0)
|
15
|
+
rspec-mocks (~> 3.10.0)
|
16
|
+
rspec-core (3.10.1)
|
17
|
+
rspec-support (~> 3.10.0)
|
18
|
+
rspec-expectations (3.10.1)
|
26
19
|
diff-lcs (>= 1.2.0, < 2.0)
|
27
|
-
rspec-support (~> 3.
|
28
|
-
rspec-mocks (3.
|
20
|
+
rspec-support (~> 3.10.0)
|
21
|
+
rspec-mocks (3.10.2)
|
29
22
|
diff-lcs (>= 1.2.0, < 2.0)
|
30
|
-
rspec-support (~> 3.
|
31
|
-
rspec-support (3.
|
32
|
-
simplecov (0.
|
23
|
+
rspec-support (~> 3.10.0)
|
24
|
+
rspec-support (3.10.2)
|
25
|
+
simplecov (0.21.2)
|
33
26
|
docile (~> 1.1)
|
34
|
-
|
35
|
-
|
36
|
-
simplecov-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
tins (1.24.1)
|
42
|
-
sync
|
27
|
+
simplecov-html (~> 0.11)
|
28
|
+
simplecov_json_formatter (~> 0.1)
|
29
|
+
simplecov-cobertura (1.4.2)
|
30
|
+
simplecov (~> 0.8)
|
31
|
+
simplecov-html (0.12.3)
|
32
|
+
simplecov-lcov (0.8.0)
|
33
|
+
simplecov_json_formatter (0.1.3)
|
43
34
|
|
44
35
|
PLATFORMS
|
45
36
|
ruby
|
46
37
|
|
47
38
|
DEPENDENCIES
|
48
39
|
bundler (~> 2)
|
49
|
-
coveralls
|
50
40
|
rake
|
51
41
|
rspec
|
52
42
|
rtesseract!
|
43
|
+
simplecov
|
44
|
+
simplecov-cobertura
|
45
|
+
simplecov-lcov
|
53
46
|
|
54
47
|
BUNDLED WITH
|
55
|
-
2.
|
48
|
+
2.4.20
|
data/README.md
CHANGED
@@ -6,8 +6,11 @@
|
|
6
6
|
<a href='https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg'>
|
7
7
|
<img src="https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg" alt="Build Status" />
|
8
8
|
</a>
|
9
|
-
<a href='https://
|
10
|
-
|
9
|
+
<a href='https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837'>
|
10
|
+
<img src="https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837" alt="Coverage Status" />
|
11
|
+
</a>
|
12
|
+
<a href='https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837'>
|
13
|
+
<img src="https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837" alt="Coverage" />
|
11
14
|
</a>
|
12
15
|
<a href='https://codeclimate.com/github/dannnylo/rtesseract'>
|
13
16
|
<img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />
|
@@ -17,10 +20,18 @@ Ruby library for working with the Tesseract OCR.
|
|
17
20
|
|
18
21
|
## Installation
|
19
22
|
|
20
|
-
Check if tesseract ocr programs
|
23
|
+
Check if tesseract ocr programs are installed:
|
21
24
|
|
22
25
|
$ tesseract --version
|
23
26
|
|
27
|
+
If not, you can install them with a command like:
|
28
|
+
|
29
|
+
$ apt install tesseract-ocr
|
30
|
+
|
31
|
+
or
|
32
|
+
|
33
|
+
$ brew install tesseract
|
34
|
+
|
24
35
|
Add this line to your application's Gemfile:
|
25
36
|
|
26
37
|
```ruby
|
data/lib/rtesseract/box.rb
CHANGED
@@ -6,7 +6,7 @@ class RTesseract
|
|
6
6
|
|
7
7
|
class << self
|
8
8
|
def run(source, errors, options)
|
9
|
-
options.tessedit_create_hocr
|
9
|
+
options = options.merge({ tessedit_create_hocr: 1 })
|
10
10
|
|
11
11
|
RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
|
12
12
|
parse(File.read("#{output_path}.hocr"))
|
@@ -20,7 +20,7 @@ class RTesseract
|
|
20
20
|
def parse_line(line)
|
21
21
|
return unless line.match?(/oc(rx|r)_word/)
|
22
22
|
|
23
|
-
word = line.
|
23
|
+
word = line.to_s.scan(/>(.*)</).flatten.first.to_s
|
24
24
|
|
25
25
|
return if word.strip == ''
|
26
26
|
|
@@ -39,11 +39,11 @@ class RTesseract
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def parse_position(line)
|
42
|
-
line.match(/(?<=title)(.*?)(?=;)/).to_s.split
|
42
|
+
line.match(/(?<=title)(.*?)(?=;)/).to_s.split
|
43
43
|
end
|
44
44
|
|
45
45
|
def parse_confidence(line)
|
46
|
-
line.match(/(?<=;)(.*?)(?=')/).to_s.split
|
46
|
+
line.match(/(?<=;)(.*?)(?=')/).to_s.split
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
data/lib/rtesseract/pdf.rb
CHANGED
@@ -5,7 +5,7 @@ class RTesseract
|
|
5
5
|
extend Base
|
6
6
|
|
7
7
|
def self.run(source, errors, options)
|
8
|
-
options.tessedit_create_pdf
|
8
|
+
options = options.merge({ tessedit_create_pdf: 1 })
|
9
9
|
|
10
10
|
RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
|
11
11
|
File.open("#{output_path}.pdf", 'r')
|
data/lib/rtesseract/text.rb
CHANGED
@@ -5,7 +5,9 @@ require 'open3'
|
|
5
5
|
class RTesseract
|
6
6
|
module Text
|
7
7
|
def self.run(source, errors, options)
|
8
|
-
RTesseract::Command.new(source, 'stdout', errors, options).run
|
8
|
+
text = RTesseract::Command.new(source, 'stdout', errors, options).run
|
9
|
+
text = text.gsub("\f", '') if text.is_a?(String)
|
10
|
+
text
|
9
11
|
end
|
10
12
|
end
|
11
13
|
end
|
data/lib/rtesseract/tsv.rb
CHANGED
@@ -5,9 +5,14 @@ class RTesseract
|
|
5
5
|
extend Base
|
6
6
|
|
7
7
|
def self.run(source, errors, options)
|
8
|
-
options.tessedit_create_tsv
|
8
|
+
options = options.merge({ tessedit_create_tsv: 1 })
|
9
9
|
|
10
|
-
RTesseract::Command.new(
|
10
|
+
RTesseract::Command.new(
|
11
|
+
source,
|
12
|
+
temp_file_path,
|
13
|
+
errors,
|
14
|
+
options
|
15
|
+
).run do |output_path|
|
11
16
|
File.open("#{output_path}.tsv", 'r')
|
12
17
|
end
|
13
18
|
end
|
data/lib/rtesseract/version.rb
CHANGED
data/lib/rtesseract.rb
CHANGED
@@ -12,7 +12,7 @@ require 'rtesseract/tsv'
|
|
12
12
|
class RTesseract
|
13
13
|
class Error < StandardError; end
|
14
14
|
|
15
|
-
attr_reader :config, :source
|
15
|
+
attr_reader :config, :source, :errors
|
16
16
|
|
17
17
|
def initialize(src = '', options = {})
|
18
18
|
@source = src
|
@@ -21,7 +21,7 @@ class RTesseract
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_box
|
24
|
-
Box.run(@source, @errors, config)
|
24
|
+
Box.run(@source, @errors, @config)
|
25
25
|
end
|
26
26
|
|
27
27
|
def words
|
@@ -29,22 +29,20 @@ class RTesseract
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def to_pdf
|
32
|
-
Pdf.run(@source, @errors, config)
|
32
|
+
Pdf.run(@source, @errors, @config)
|
33
33
|
end
|
34
34
|
|
35
35
|
def to_tsv
|
36
|
-
Tsv.run(@source, @errors, config)
|
36
|
+
Tsv.run(@source, @errors, @config)
|
37
37
|
end
|
38
38
|
|
39
39
|
# Output value
|
40
40
|
def to_s
|
41
|
-
Text.run(@source, @errors, config)
|
41
|
+
Text.run(@source, @errors, @config)
|
42
42
|
end
|
43
43
|
|
44
44
|
# Remove spaces and break-lines
|
45
45
|
def to_s_without_spaces
|
46
46
|
to_s.gsub(/\s/, '')
|
47
47
|
end
|
48
|
-
|
49
|
-
attr_reader :errors
|
50
48
|
end
|
data/rtesseract.gemspec
CHANGED
@@ -20,12 +20,10 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
21
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
22
|
end
|
23
|
+
spec.required_ruby_version = '>= 2.7'
|
23
24
|
spec.bindir = 'exe'
|
24
25
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
26
|
spec.require_paths = ['lib']
|
26
27
|
|
27
|
-
spec.
|
28
|
-
spec.add_development_dependency 'coveralls'
|
29
|
-
spec.add_development_dependency 'rake'
|
30
|
-
spec.add_development_dependency 'rspec'
|
28
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
31
29
|
end
|
metadata
CHANGED
@@ -1,71 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '2'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '2'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: coveralls
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
11
|
+
date: 2023-10-04 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
69
13
|
description: Ruby library for working with the Tesseract OCR.
|
70
14
|
email:
|
71
15
|
- dannnylo@gmail.com
|
@@ -73,7 +17,9 @@ executables: []
|
|
73
17
|
extensions: []
|
74
18
|
extra_rdoc_files: []
|
75
19
|
files:
|
20
|
+
- ".deepsource.toml"
|
76
21
|
- ".document"
|
22
|
+
- ".github/FUNDING.yml"
|
77
23
|
- ".github/workflows/ci.yml"
|
78
24
|
- ".gitignore"
|
79
25
|
- ".hound.yml"
|
@@ -102,7 +48,8 @@ files:
|
|
102
48
|
homepage: http://github.com/dannnylo/rtesseract
|
103
49
|
licenses:
|
104
50
|
- MIT
|
105
|
-
metadata:
|
51
|
+
metadata:
|
52
|
+
rubygems_mfa_required: 'true'
|
106
53
|
post_install_message:
|
107
54
|
rdoc_options: []
|
108
55
|
require_paths:
|
@@ -111,14 +58,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
58
|
requirements:
|
112
59
|
- - ">="
|
113
60
|
- !ruby/object:Gem::Version
|
114
|
-
version: '
|
61
|
+
version: '2.7'
|
115
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
63
|
requirements:
|
117
64
|
- - ">="
|
118
65
|
- !ruby/object:Gem::Version
|
119
66
|
version: '0'
|
120
67
|
requirements: []
|
121
|
-
rubygems_version: 3.
|
68
|
+
rubygems_version: 3.4.10
|
122
69
|
signing_key:
|
123
70
|
specification_version: 4
|
124
71
|
summary: Ruby library for working with the Tesseract OCR.
|