rtesseract 3.1.2 → 3.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87f85a70ab24a03a719e7726d95debeb469d822c447acc39a8adc1579c43e6d1
4
- data.tar.gz: a18cd83cd4632ed5adc096f9384659f0e47a4ba33e2f8a76e804cf8140e8798b
3
+ metadata.gz: 120e26c05e889a9bbfa935c3d43bfcbd9432b7296aa9b144fc55fd3c43f6a6af
4
+ data.tar.gz: 147e882d0648febff070e7f422c79852be258f594ded86cbcde8b5677c49b354
5
5
  SHA512:
6
- metadata.gz: 4f6b37e1645d5f82c759e5feea081cfcf5384e2ad91423d2d04030a89ef007eb9469ecf398ba4cd2364bf2baeea4bad885866c4c41b19fa4da2c078b79cef4a6
7
- data.tar.gz: bea642e1d7d2576dbdaeeb68ff730627104894d914a405a4ec2a37b8c65755f2f9c33a5091e16eaced51162725b6ea62f647b764ec838eefe3bfe4f58f461986
6
+ metadata.gz: 9fc275b3afb190d731e0d12356867ea3a359177141fabe876a12b2df7391b65964b3086af63156bce8cc79d5e2a8f8c6c901b9ffeeef514ad7eb5482039858dd
7
+ data.tar.gz: fd6ab267bb3edfbd52afa54615ffad0e624c611270691b9e0e48cdf4ddbdbccd1565d0d52386ac664cc978fe23abf8aeb134a275f5629b2005f3464249c6455e
data/.deepsource.toml ADDED
@@ -0,0 +1,9 @@
1
+ version = 1
2
+
3
+ [[analyzers]]
4
+ name = "shell"
5
+ enabled = true
6
+
7
+ [[analyzers]]
8
+ name = "ruby"
9
+ enabled = true
@@ -0,0 +1 @@
1
+ github: dannnylo
@@ -6,26 +6,32 @@ jobs:
6
6
  strategy:
7
7
  matrix:
8
8
  ruby:
9
- - '2.5.x'
10
- - '2.6.x'
11
- - '2.7.x'
9
+ - '2.7.4'
10
+ - '3.2.0'
11
+ repository:
12
+ - 'ppa:alex-p/tesseract-ocr5'
13
+ - 'ppa:alex-p/tesseract-ocr-devel'
12
14
  steps:
13
15
  - uses: actions/checkout@v2
14
16
  - name: Install tesseract-ocr
15
17
  run: |
16
- sudo add-apt-repository ppa:alex-p/tesseract-ocr -y
18
+ sudo add-apt-repository ${{ matrix.repository }} -y
17
19
  sudo apt-get update -q
18
20
  sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
21
+ tesseract --version
19
22
  - name: Setup Ruby
20
- uses: actions/setup-ruby@v1
23
+ uses: ruby/setup-ruby@v1
21
24
  with:
22
25
  ruby-version: ${{ matrix.ruby }}
23
26
  - name: Bundle
24
- env:
25
- MTSR_RAILS_VERSION: ${{ matrix.rails }}
26
27
  run: |
27
28
  gem uninstall -aIx bundler
28
29
  gem install bundler
29
30
  bundle install --jobs 4 --retry 3
30
31
  - name: Test
31
32
  run: bundle exec rake
33
+ - name: Coverage
34
+ env:
35
+ CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}
36
+ run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Ruby -r coverage/lcov/*
37
+
data/.rubocop.yml CHANGED
@@ -1,3 +1,6 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
1
4
 
2
5
  Layout/LineLength:
3
6
  Max: 150
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ # Changes
2
+ ## v3.1.3
3
+
4
+ * Fixed a configuration error that wouldn't allow you to do different kinds of calls on the same object, for example calling .to_box and then .to_s would result in unexpected behavior.
5
+
1
6
  ## v3.1.2
2
7
 
3
8
  #### Added
data/Gemfile CHANGED
@@ -6,3 +6,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
6
 
7
7
  # Specify your gem's dependencies in rtesseract.gemspec
8
8
  gemspec
9
+
10
+ group :development, :test do
11
+ gem 'bundler', '~> 2'
12
+ gem 'rake'
13
+ gem 'rspec'
14
+
15
+ gem 'simplecov'
16
+ gem 'simplecov-cobertura'
17
+ gem 'simplecov-lcov'
18
+ end
data/Gemfile.lock CHANGED
@@ -1,55 +1,48 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rtesseract (3.1.2)
4
+ rtesseract (3.1.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- coveralls (0.8.23)
10
- json (>= 1.8, < 3)
11
- simplecov (~> 0.16.1)
12
- term-ansicolor (~> 1.3)
13
- thor (>= 0.19.4, < 2.0)
14
- tins (~> 1.6)
15
- diff-lcs (1.3)
16
- docile (1.3.2)
17
- json (2.3.0)
18
- rake (13.0.1)
19
- rspec (3.9.0)
20
- rspec-core (~> 3.9.0)
21
- rspec-expectations (~> 3.9.0)
22
- rspec-mocks (~> 3.9.0)
23
- rspec-core (3.9.1)
24
- rspec-support (~> 3.9.1)
25
- rspec-expectations (3.9.1)
9
+ diff-lcs (1.4.4)
10
+ docile (1.4.0)
11
+ rake (13.0.6)
12
+ rspec (3.10.0)
13
+ rspec-core (~> 3.10.0)
14
+ rspec-expectations (~> 3.10.0)
15
+ rspec-mocks (~> 3.10.0)
16
+ rspec-core (3.10.1)
17
+ rspec-support (~> 3.10.0)
18
+ rspec-expectations (3.10.1)
26
19
  diff-lcs (>= 1.2.0, < 2.0)
27
- rspec-support (~> 3.9.0)
28
- rspec-mocks (3.9.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-mocks (3.10.2)
29
22
  diff-lcs (>= 1.2.0, < 2.0)
30
- rspec-support (~> 3.9.0)
31
- rspec-support (3.9.2)
32
- simplecov (0.16.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-support (3.10.2)
25
+ simplecov (0.21.2)
33
26
  docile (~> 1.1)
34
- json (>= 1.8, < 3)
35
- simplecov-html (~> 0.10.0)
36
- simplecov-html (0.10.2)
37
- sync (0.5.0)
38
- term-ansicolor (1.7.1)
39
- tins (~> 1.0)
40
- thor (1.0.1)
41
- tins (1.24.1)
42
- sync
27
+ simplecov-html (~> 0.11)
28
+ simplecov_json_formatter (~> 0.1)
29
+ simplecov-cobertura (1.4.2)
30
+ simplecov (~> 0.8)
31
+ simplecov-html (0.12.3)
32
+ simplecov-lcov (0.8.0)
33
+ simplecov_json_formatter (0.1.3)
43
34
 
44
35
  PLATFORMS
45
36
  ruby
46
37
 
47
38
  DEPENDENCIES
48
39
  bundler (~> 2)
49
- coveralls
50
40
  rake
51
41
  rspec
52
42
  rtesseract!
43
+ simplecov
44
+ simplecov-cobertura
45
+ simplecov-lcov
53
46
 
54
47
  BUNDLED WITH
55
- 2.1.4
48
+ 2.4.20
data/README.md CHANGED
@@ -6,8 +6,11 @@
6
6
  <a href='https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg'>
7
7
  <img src="https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg" alt="Build Status" />
8
8
  </a>
9
- <a href='https://coveralls.io/r/dannnylo/rtesseract?branch=master'>
10
- <img src="https://coveralls.io/repos/dannnylo/rtesseract/badge.png?branch=master" alt="Coverage Status" />
9
+ <a href='https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837'>
10
+ <img src="https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837" alt="Coverage Status" />
11
+ </a>
12
+ <a href='https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837'>
13
+ <img src="https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837" alt="Coverage" />
11
14
  </a>
12
15
  <a href='https://codeclimate.com/github/dannnylo/rtesseract'>
13
16
  <img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />
@@ -17,10 +20,18 @@ Ruby library for working with the Tesseract OCR.
17
20
 
18
21
  ## Installation
19
22
 
20
- Check if tesseract ocr programs is installed:
23
+ Check if tesseract ocr programs are installed:
21
24
 
22
25
  $ tesseract --version
23
26
 
27
+ If not, you can install them with a command like:
28
+
29
+ $ apt install tesseract-ocr
30
+
31
+ or
32
+
33
+ $ brew install tesseract
34
+
24
35
  Add this line to your application's Gemfile:
25
36
 
26
37
  ```ruby
@@ -6,7 +6,7 @@ class RTesseract
6
6
 
7
7
  class << self
8
8
  def run(source, errors, options)
9
- options.tessedit_create_hocr = 1
9
+ options = options.merge({ tessedit_create_hocr: 1 })
10
10
 
11
11
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
12
12
  parse(File.read("#{output_path}.hocr"))
@@ -20,7 +20,7 @@ class RTesseract
20
20
  def parse_line(line)
21
21
  return unless line.match?(/oc(rx|r)_word/)
22
22
 
23
- word = line.match(/(?<=>)(.*?)(?=<)/).to_s
23
+ word = line.to_s.scan(/>(.*)</).flatten.first.to_s
24
24
 
25
25
  return if word.strip == ''
26
26
 
@@ -39,11 +39,11 @@ class RTesseract
39
39
  end
40
40
 
41
41
  def parse_position(line)
42
- line.match(/(?<=title)(.*?)(?=;)/).to_s.split(' ')
42
+ line.match(/(?<=title)(.*?)(?=;)/).to_s.split
43
43
  end
44
44
 
45
45
  def parse_confidence(line)
46
- line.match(/(?<=;)(.*?)(?=')/).to_s.split(' ')
46
+ line.match(/(?<=;)(.*?)(?=')/).to_s.split
47
47
  end
48
48
  end
49
49
  end
@@ -5,7 +5,7 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_pdf = 1
8
+ options = options.merge({ tessedit_create_pdf: 1 })
9
9
 
10
10
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
11
11
  File.open("#{output_path}.pdf", 'r')
@@ -5,7 +5,9 @@ require 'open3'
5
5
  class RTesseract
6
6
  module Text
7
7
  def self.run(source, errors, options)
8
- RTesseract::Command.new(source, 'stdout', errors, options).run
8
+ text = RTesseract::Command.new(source, 'stdout', errors, options).run
9
+ text = text.gsub("\f", '') if text.is_a?(String)
10
+ text
9
11
  end
10
12
  end
11
13
  end
@@ -5,9 +5,14 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_tsv = 1
8
+ options = options.merge({ tessedit_create_tsv: 1 })
9
9
 
10
- RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
10
+ RTesseract::Command.new(
11
+ source,
12
+ temp_file_path,
13
+ errors,
14
+ options
15
+ ).run do |output_path|
11
16
  File.open("#{output_path}.tsv", 'r')
12
17
  end
13
18
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RTesseract
4
- VERSION = '3.1.2'
4
+ VERSION = '3.1.3'
5
5
  end
data/lib/rtesseract.rb CHANGED
@@ -12,7 +12,7 @@ require 'rtesseract/tsv'
12
12
  class RTesseract
13
13
  class Error < StandardError; end
14
14
 
15
- attr_reader :config, :source
15
+ attr_reader :config, :source, :errors
16
16
 
17
17
  def initialize(src = '', options = {})
18
18
  @source = src
@@ -21,7 +21,7 @@ class RTesseract
21
21
  end
22
22
 
23
23
  def to_box
24
- Box.run(@source, @errors, config)
24
+ Box.run(@source, @errors, @config)
25
25
  end
26
26
 
27
27
  def words
@@ -29,22 +29,20 @@ class RTesseract
29
29
  end
30
30
 
31
31
  def to_pdf
32
- Pdf.run(@source, @errors, config)
32
+ Pdf.run(@source, @errors, @config)
33
33
  end
34
34
 
35
35
  def to_tsv
36
- Tsv.run(@source, @errors, config)
36
+ Tsv.run(@source, @errors, @config)
37
37
  end
38
38
 
39
39
  # Output value
40
40
  def to_s
41
- Text.run(@source, @errors, config)
41
+ Text.run(@source, @errors, @config)
42
42
  end
43
43
 
44
44
  # Remove spaces and break-lines
45
45
  def to_s_without_spaces
46
46
  to_s.gsub(/\s/, '')
47
47
  end
48
-
49
- attr_reader :errors
50
48
  end
data/rtesseract.gemspec CHANGED
@@ -20,12 +20,10 @@ Gem::Specification.new do |spec|
20
20
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
+ spec.required_ruby_version = '>= 2.7'
23
24
  spec.bindir = 'exe'
24
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
26
  spec.require_paths = ['lib']
26
27
 
27
- spec.add_development_dependency 'bundler', '~> 2'
28
- spec.add_development_dependency 'coveralls'
29
- spec.add_development_dependency 'rake'
30
- spec.add_development_dependency 'rspec'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
31
29
  end
metadata CHANGED
@@ -1,71 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.2
4
+ version: 3.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2'
27
- - !ruby/object:Gem::Dependency
28
- name: coveralls
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
+ dependencies: []
69
13
  description: Ruby library for working with the Tesseract OCR.
70
14
  email:
71
15
  - dannnylo@gmail.com
@@ -73,7 +17,9 @@ executables: []
73
17
  extensions: []
74
18
  extra_rdoc_files: []
75
19
  files:
20
+ - ".deepsource.toml"
76
21
  - ".document"
22
+ - ".github/FUNDING.yml"
77
23
  - ".github/workflows/ci.yml"
78
24
  - ".gitignore"
79
25
  - ".hound.yml"
@@ -102,7 +48,8 @@ files:
102
48
  homepage: http://github.com/dannnylo/rtesseract
103
49
  licenses:
104
50
  - MIT
105
- metadata: {}
51
+ metadata:
52
+ rubygems_mfa_required: 'true'
106
53
  post_install_message:
107
54
  rdoc_options: []
108
55
  require_paths:
@@ -111,14 +58,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
111
58
  requirements:
112
59
  - - ">="
113
60
  - !ruby/object:Gem::Version
114
- version: '0'
61
+ version: '2.7'
115
62
  required_rubygems_version: !ruby/object:Gem::Requirement
116
63
  requirements:
117
64
  - - ">="
118
65
  - !ruby/object:Gem::Version
119
66
  version: '0'
120
67
  requirements: []
121
- rubygems_version: 3.1.2
68
+ rubygems_version: 3.4.10
122
69
  signing_key:
123
70
  specification_version: 4
124
71
  summary: Ruby library for working with the Tesseract OCR.