rtesseract 3.1.2 → 3.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87f85a70ab24a03a719e7726d95debeb469d822c447acc39a8adc1579c43e6d1
4
- data.tar.gz: a18cd83cd4632ed5adc096f9384659f0e47a4ba33e2f8a76e804cf8140e8798b
3
+ metadata.gz: 120e26c05e889a9bbfa935c3d43bfcbd9432b7296aa9b144fc55fd3c43f6a6af
4
+ data.tar.gz: 147e882d0648febff070e7f422c79852be258f594ded86cbcde8b5677c49b354
5
5
  SHA512:
6
- metadata.gz: 4f6b37e1645d5f82c759e5feea081cfcf5384e2ad91423d2d04030a89ef007eb9469ecf398ba4cd2364bf2baeea4bad885866c4c41b19fa4da2c078b79cef4a6
7
- data.tar.gz: bea642e1d7d2576dbdaeeb68ff730627104894d914a405a4ec2a37b8c65755f2f9c33a5091e16eaced51162725b6ea62f647b764ec838eefe3bfe4f58f461986
6
+ metadata.gz: 9fc275b3afb190d731e0d12356867ea3a359177141fabe876a12b2df7391b65964b3086af63156bce8cc79d5e2a8f8c6c901b9ffeeef514ad7eb5482039858dd
7
+ data.tar.gz: fd6ab267bb3edfbd52afa54615ffad0e624c611270691b9e0e48cdf4ddbdbccd1565d0d52386ac664cc978fe23abf8aeb134a275f5629b2005f3464249c6455e
data/.deepsource.toml ADDED
@@ -0,0 +1,9 @@
1
+ version = 1
2
+
3
+ [[analyzers]]
4
+ name = "shell"
5
+ enabled = true
6
+
7
+ [[analyzers]]
8
+ name = "ruby"
9
+ enabled = true
@@ -0,0 +1 @@
1
+ github: dannnylo
@@ -6,26 +6,32 @@ jobs:
6
6
  strategy:
7
7
  matrix:
8
8
  ruby:
9
- - '2.5.x'
10
- - '2.6.x'
11
- - '2.7.x'
9
+ - '2.7.4'
10
+ - '3.2.0'
11
+ repository:
12
+ - 'ppa:alex-p/tesseract-ocr5'
13
+ - 'ppa:alex-p/tesseract-ocr-devel'
12
14
  steps:
13
15
  - uses: actions/checkout@v2
14
16
  - name: Install tesseract-ocr
15
17
  run: |
16
- sudo add-apt-repository ppa:alex-p/tesseract-ocr -y
18
+ sudo add-apt-repository ${{ matrix.repository }} -y
17
19
  sudo apt-get update -q
18
20
  sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
21
+ tesseract --version
19
22
  - name: Setup Ruby
20
- uses: actions/setup-ruby@v1
23
+ uses: ruby/setup-ruby@v1
21
24
  with:
22
25
  ruby-version: ${{ matrix.ruby }}
23
26
  - name: Bundle
24
- env:
25
- MTSR_RAILS_VERSION: ${{ matrix.rails }}
26
27
  run: |
27
28
  gem uninstall -aIx bundler
28
29
  gem install bundler
29
30
  bundle install --jobs 4 --retry 3
30
31
  - name: Test
31
32
  run: bundle exec rake
33
+ - name: Coverage
34
+ env:
35
+ CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}
36
+ run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Ruby -r coverage/lcov/*
37
+
data/.rubocop.yml CHANGED
@@ -1,3 +1,6 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
1
4
 
2
5
  Layout/LineLength:
3
6
  Max: 150
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ # Changes
2
+ ## v3.1.3
3
+
4
+ * Fixed a configuration error that wouldn't allow you to do different kinds of calls on the same object, for example calling .to_box and then .to_s would result in unexpected behavior.
5
+
1
6
  ## v3.1.2
2
7
 
3
8
  #### Added
data/Gemfile CHANGED
@@ -6,3 +6,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
6
 
7
7
  # Specify your gem's dependencies in rtesseract.gemspec
8
8
  gemspec
9
+
10
+ group :development, :test do
11
+ gem 'bundler', '~> 2'
12
+ gem 'rake'
13
+ gem 'rspec'
14
+
15
+ gem 'simplecov'
16
+ gem 'simplecov-cobertura'
17
+ gem 'simplecov-lcov'
18
+ end
data/Gemfile.lock CHANGED
@@ -1,55 +1,48 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rtesseract (3.1.2)
4
+ rtesseract (3.1.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- coveralls (0.8.23)
10
- json (>= 1.8, < 3)
11
- simplecov (~> 0.16.1)
12
- term-ansicolor (~> 1.3)
13
- thor (>= 0.19.4, < 2.0)
14
- tins (~> 1.6)
15
- diff-lcs (1.3)
16
- docile (1.3.2)
17
- json (2.3.0)
18
- rake (13.0.1)
19
- rspec (3.9.0)
20
- rspec-core (~> 3.9.0)
21
- rspec-expectations (~> 3.9.0)
22
- rspec-mocks (~> 3.9.0)
23
- rspec-core (3.9.1)
24
- rspec-support (~> 3.9.1)
25
- rspec-expectations (3.9.1)
9
+ diff-lcs (1.4.4)
10
+ docile (1.4.0)
11
+ rake (13.0.6)
12
+ rspec (3.10.0)
13
+ rspec-core (~> 3.10.0)
14
+ rspec-expectations (~> 3.10.0)
15
+ rspec-mocks (~> 3.10.0)
16
+ rspec-core (3.10.1)
17
+ rspec-support (~> 3.10.0)
18
+ rspec-expectations (3.10.1)
26
19
  diff-lcs (>= 1.2.0, < 2.0)
27
- rspec-support (~> 3.9.0)
28
- rspec-mocks (3.9.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-mocks (3.10.2)
29
22
  diff-lcs (>= 1.2.0, < 2.0)
30
- rspec-support (~> 3.9.0)
31
- rspec-support (3.9.2)
32
- simplecov (0.16.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-support (3.10.2)
25
+ simplecov (0.21.2)
33
26
  docile (~> 1.1)
34
- json (>= 1.8, < 3)
35
- simplecov-html (~> 0.10.0)
36
- simplecov-html (0.10.2)
37
- sync (0.5.0)
38
- term-ansicolor (1.7.1)
39
- tins (~> 1.0)
40
- thor (1.0.1)
41
- tins (1.24.1)
42
- sync
27
+ simplecov-html (~> 0.11)
28
+ simplecov_json_formatter (~> 0.1)
29
+ simplecov-cobertura (1.4.2)
30
+ simplecov (~> 0.8)
31
+ simplecov-html (0.12.3)
32
+ simplecov-lcov (0.8.0)
33
+ simplecov_json_formatter (0.1.3)
43
34
 
44
35
  PLATFORMS
45
36
  ruby
46
37
 
47
38
  DEPENDENCIES
48
39
  bundler (~> 2)
49
- coveralls
50
40
  rake
51
41
  rspec
52
42
  rtesseract!
43
+ simplecov
44
+ simplecov-cobertura
45
+ simplecov-lcov
53
46
 
54
47
  BUNDLED WITH
55
- 2.1.4
48
+ 2.4.20
data/README.md CHANGED
@@ -6,8 +6,11 @@
6
6
  <a href='https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg'>
7
7
  <img src="https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg" alt="Build Status" />
8
8
  </a>
9
- <a href='https://coveralls.io/r/dannnylo/rtesseract?branch=master'>
10
- <img src="https://coveralls.io/repos/dannnylo/rtesseract/badge.png?branch=master" alt="Coverage Status" />
9
+ <a href='https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837'>
10
+ <img src="https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837" alt="Coverage Status" />
11
+ </a>
12
+ <a href='https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837'>
13
+ <img src="https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837" alt="Coverage" />
11
14
  </a>
12
15
  <a href='https://codeclimate.com/github/dannnylo/rtesseract'>
13
16
  <img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />
@@ -17,10 +20,18 @@ Ruby library for working with the Tesseract OCR.
17
20
 
18
21
  ## Installation
19
22
 
20
- Check if tesseract ocr programs is installed:
23
+ Check if tesseract ocr programs are installed:
21
24
 
22
25
  $ tesseract --version
23
26
 
27
+ If not, you can install them with a command like:
28
+
29
+ $ apt install tesseract-ocr
30
+
31
+ or
32
+
33
+ $ brew install tesseract
34
+
24
35
  Add this line to your application's Gemfile:
25
36
 
26
37
  ```ruby
@@ -6,7 +6,7 @@ class RTesseract
6
6
 
7
7
  class << self
8
8
  def run(source, errors, options)
9
- options.tessedit_create_hocr = 1
9
+ options = options.merge({ tessedit_create_hocr: 1 })
10
10
 
11
11
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
12
12
  parse(File.read("#{output_path}.hocr"))
@@ -20,7 +20,7 @@ class RTesseract
20
20
  def parse_line(line)
21
21
  return unless line.match?(/oc(rx|r)_word/)
22
22
 
23
- word = line.match(/(?<=>)(.*?)(?=<)/).to_s
23
+ word = line.to_s.scan(/>(.*)</).flatten.first.to_s
24
24
 
25
25
  return if word.strip == ''
26
26
 
@@ -39,11 +39,11 @@ class RTesseract
39
39
  end
40
40
 
41
41
  def parse_position(line)
42
- line.match(/(?<=title)(.*?)(?=;)/).to_s.split(' ')
42
+ line.match(/(?<=title)(.*?)(?=;)/).to_s.split
43
43
  end
44
44
 
45
45
  def parse_confidence(line)
46
- line.match(/(?<=;)(.*?)(?=')/).to_s.split(' ')
46
+ line.match(/(?<=;)(.*?)(?=')/).to_s.split
47
47
  end
48
48
  end
49
49
  end
@@ -5,7 +5,7 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_pdf = 1
8
+ options = options.merge({ tessedit_create_pdf: 1 })
9
9
 
10
10
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
11
11
  File.open("#{output_path}.pdf", 'r')
@@ -5,7 +5,9 @@ require 'open3'
5
5
  class RTesseract
6
6
  module Text
7
7
  def self.run(source, errors, options)
8
- RTesseract::Command.new(source, 'stdout', errors, options).run
8
+ text = RTesseract::Command.new(source, 'stdout', errors, options).run
9
+ text = text.gsub("\f", '') if text.is_a?(String)
10
+ text
9
11
  end
10
12
  end
11
13
  end
@@ -5,9 +5,14 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_tsv = 1
8
+ options = options.merge({ tessedit_create_tsv: 1 })
9
9
 
10
- RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
10
+ RTesseract::Command.new(
11
+ source,
12
+ temp_file_path,
13
+ errors,
14
+ options
15
+ ).run do |output_path|
11
16
  File.open("#{output_path}.tsv", 'r')
12
17
  end
13
18
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RTesseract
4
- VERSION = '3.1.2'
4
+ VERSION = '3.1.3'
5
5
  end
data/lib/rtesseract.rb CHANGED
@@ -12,7 +12,7 @@ require 'rtesseract/tsv'
12
12
  class RTesseract
13
13
  class Error < StandardError; end
14
14
 
15
- attr_reader :config, :source
15
+ attr_reader :config, :source, :errors
16
16
 
17
17
  def initialize(src = '', options = {})
18
18
  @source = src
@@ -21,7 +21,7 @@ class RTesseract
21
21
  end
22
22
 
23
23
  def to_box
24
- Box.run(@source, @errors, config)
24
+ Box.run(@source, @errors, @config)
25
25
  end
26
26
 
27
27
  def words
@@ -29,22 +29,20 @@ class RTesseract
29
29
  end
30
30
 
31
31
  def to_pdf
32
- Pdf.run(@source, @errors, config)
32
+ Pdf.run(@source, @errors, @config)
33
33
  end
34
34
 
35
35
  def to_tsv
36
- Tsv.run(@source, @errors, config)
36
+ Tsv.run(@source, @errors, @config)
37
37
  end
38
38
 
39
39
  # Output value
40
40
  def to_s
41
- Text.run(@source, @errors, config)
41
+ Text.run(@source, @errors, @config)
42
42
  end
43
43
 
44
44
  # Remove spaces and break-lines
45
45
  def to_s_without_spaces
46
46
  to_s.gsub(/\s/, '')
47
47
  end
48
-
49
- attr_reader :errors
50
48
  end
data/rtesseract.gemspec CHANGED
@@ -20,12 +20,10 @@ Gem::Specification.new do |spec|
20
20
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
+ spec.required_ruby_version = '>= 2.7'
23
24
  spec.bindir = 'exe'
24
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
26
  spec.require_paths = ['lib']
26
27
 
27
- spec.add_development_dependency 'bundler', '~> 2'
28
- spec.add_development_dependency 'coveralls'
29
- spec.add_development_dependency 'rake'
30
- spec.add_development_dependency 'rspec'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
31
29
  end
metadata CHANGED
@@ -1,71 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.2
4
+ version: 3.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2'
27
- - !ruby/object:Gem::Dependency
28
- name: coveralls
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
+ dependencies: []
69
13
  description: Ruby library for working with the Tesseract OCR.
70
14
  email:
71
15
  - dannnylo@gmail.com
@@ -73,7 +17,9 @@ executables: []
73
17
  extensions: []
74
18
  extra_rdoc_files: []
75
19
  files:
20
+ - ".deepsource.toml"
76
21
  - ".document"
22
+ - ".github/FUNDING.yml"
77
23
  - ".github/workflows/ci.yml"
78
24
  - ".gitignore"
79
25
  - ".hound.yml"
@@ -102,7 +48,8 @@ files:
102
48
  homepage: http://github.com/dannnylo/rtesseract
103
49
  licenses:
104
50
  - MIT
105
- metadata: {}
51
+ metadata:
52
+ rubygems_mfa_required: 'true'
106
53
  post_install_message:
107
54
  rdoc_options: []
108
55
  require_paths:
@@ -111,14 +58,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
111
58
  requirements:
112
59
  - - ">="
113
60
  - !ruby/object:Gem::Version
114
- version: '0'
61
+ version: '2.7'
115
62
  required_rubygems_version: !ruby/object:Gem::Requirement
116
63
  requirements:
117
64
  - - ">="
118
65
  - !ruby/object:Gem::Version
119
66
  version: '0'
120
67
  requirements: []
121
- rubygems_version: 3.1.2
68
+ rubygems_version: 3.4.10
122
69
  signing_key:
123
70
  specification_version: 4
124
71
  summary: Ruby library for working with the Tesseract OCR.