rtesseract 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87f85a70ab24a03a719e7726d95debeb469d822c447acc39a8adc1579c43e6d1
4
- data.tar.gz: a18cd83cd4632ed5adc096f9384659f0e47a4ba33e2f8a76e804cf8140e8798b
3
+ metadata.gz: 0a6663ca2e040b7f9ea75dba9efd078c4d4e8e9ddd51f5e6891049328fc39492
4
+ data.tar.gz: 0f0cb1ccfbaf0fde1a04da091f55ee536992504f2fcf698ab56fbd354292b2a3
5
5
  SHA512:
6
- metadata.gz: 4f6b37e1645d5f82c759e5feea081cfcf5384e2ad91423d2d04030a89ef007eb9469ecf398ba4cd2364bf2baeea4bad885866c4c41b19fa4da2c078b79cef4a6
7
- data.tar.gz: bea642e1d7d2576dbdaeeb68ff730627104894d914a405a4ec2a37b8c65755f2f9c33a5091e16eaced51162725b6ea62f647b764ec838eefe3bfe4f58f461986
6
+ metadata.gz: e5a661e73977518acc17627a850d1cdc22df44af6ed4118b974af8b9a487cab1bf487583cf3a196c4b23b3a708d1fed9521ecbd5aa1e0d5d0284bdc671739fe3
7
+ data.tar.gz: b575ccd5b3fe076d0565857a558ca454b0e919e1b421ca071b6eaa8636b20ffed8f899280d666ba2735f0cf73e72552535944aa55f72a8c985116008f7059457
data/.deepsource.toml ADDED
@@ -0,0 +1,9 @@
1
+ version = 1
2
+
3
+ [[analyzers]]
4
+ name = "shell"
5
+ enabled = true
6
+
7
+ [[analyzers]]
8
+ name = "ruby"
9
+ enabled = true
@@ -0,0 +1 @@
1
+ github: dannnylo
@@ -6,26 +6,31 @@ jobs:
6
6
  strategy:
7
7
  matrix:
8
8
  ruby:
9
- - '2.5.x'
10
- - '2.6.x'
11
- - '2.7.x'
9
+ - '3.2.0'
10
+ - '3.4.2'
11
+ repository:
12
+ - 'ppa:alex-p/tesseract-ocr5'
13
+ - 'ppa:alex-p/tesseract-ocr-devel'
12
14
  steps:
13
15
  - uses: actions/checkout@v2
14
16
  - name: Install tesseract-ocr
15
17
  run: |
16
- sudo add-apt-repository ppa:alex-p/tesseract-ocr -y
18
+ sudo add-apt-repository ${{ matrix.repository }} -y
17
19
  sudo apt-get update -q
18
20
  sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
21
+ tesseract --version
19
22
  - name: Setup Ruby
20
- uses: actions/setup-ruby@v1
23
+ uses: ruby/setup-ruby@v1
21
24
  with:
22
25
  ruby-version: ${{ matrix.ruby }}
23
26
  - name: Bundle
24
- env:
25
- MTSR_RAILS_VERSION: ${{ matrix.rails }}
26
27
  run: |
27
28
  gem uninstall -aIx bundler
28
29
  gem install bundler
29
30
  bundle install --jobs 4 --retry 3
30
31
  - name: Test
31
32
  run: bundle exec rake
33
+ - name: Coverage
34
+ env:
35
+ CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}
36
+ run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Ruby -r coverage/lcov/*
data/.rubocop.yml CHANGED
@@ -1,3 +1,6 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
1
4
 
2
5
  Layout/LineLength:
3
6
  Max: 150
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ # Changes
2
+
3
+ ## v3.1.4
4
+ # Changed
5
+ * Temporary hocr file is deleted after the file is processed.
6
+
7
+ ## v3.1.3
8
+ * Fixed a configuration error that wouldn't allow you to do different kinds of calls on the same object, for example calling .to_box and then .to_s would result in unexpected behavior.
9
+
1
10
  ## v3.1.2
2
11
 
3
12
  #### Added
data/Gemfile CHANGED
@@ -6,3 +6,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
6
 
7
7
  # Specify your gem's dependencies in rtesseract.gemspec
8
8
  gemspec
9
+
10
+ group :development, :test do
11
+ gem 'bundler', '~> 2'
12
+ gem 'rake'
13
+ gem 'rspec'
14
+
15
+ gem 'simplecov'
16
+ gem 'simplecov-cobertura'
17
+ gem 'simplecov-lcov'
18
+ end
data/Gemfile.lock CHANGED
@@ -1,55 +1,48 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rtesseract (3.1.2)
4
+ rtesseract (3.1.4)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- coveralls (0.8.23)
10
- json (>= 1.8, < 3)
11
- simplecov (~> 0.16.1)
12
- term-ansicolor (~> 1.3)
13
- thor (>= 0.19.4, < 2.0)
14
- tins (~> 1.6)
15
- diff-lcs (1.3)
16
- docile (1.3.2)
17
- json (2.3.0)
18
- rake (13.0.1)
19
- rspec (3.9.0)
20
- rspec-core (~> 3.9.0)
21
- rspec-expectations (~> 3.9.0)
22
- rspec-mocks (~> 3.9.0)
23
- rspec-core (3.9.1)
24
- rspec-support (~> 3.9.1)
25
- rspec-expectations (3.9.1)
9
+ diff-lcs (1.4.4)
10
+ docile (1.4.0)
11
+ rake (13.0.6)
12
+ rspec (3.10.0)
13
+ rspec-core (~> 3.10.0)
14
+ rspec-expectations (~> 3.10.0)
15
+ rspec-mocks (~> 3.10.0)
16
+ rspec-core (3.10.1)
17
+ rspec-support (~> 3.10.0)
18
+ rspec-expectations (3.10.1)
26
19
  diff-lcs (>= 1.2.0, < 2.0)
27
- rspec-support (~> 3.9.0)
28
- rspec-mocks (3.9.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-mocks (3.10.2)
29
22
  diff-lcs (>= 1.2.0, < 2.0)
30
- rspec-support (~> 3.9.0)
31
- rspec-support (3.9.2)
32
- simplecov (0.16.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-support (3.10.2)
25
+ simplecov (0.21.2)
33
26
  docile (~> 1.1)
34
- json (>= 1.8, < 3)
35
- simplecov-html (~> 0.10.0)
36
- simplecov-html (0.10.2)
37
- sync (0.5.0)
38
- term-ansicolor (1.7.1)
39
- tins (~> 1.0)
40
- thor (1.0.1)
41
- tins (1.24.1)
42
- sync
27
+ simplecov-html (~> 0.11)
28
+ simplecov_json_formatter (~> 0.1)
29
+ simplecov-cobertura (1.4.2)
30
+ simplecov (~> 0.8)
31
+ simplecov-html (0.12.3)
32
+ simplecov-lcov (0.8.0)
33
+ simplecov_json_formatter (0.1.3)
43
34
 
44
35
  PLATFORMS
45
36
  ruby
46
37
 
47
38
  DEPENDENCIES
48
39
  bundler (~> 2)
49
- coveralls
50
40
  rake
51
41
  rspec
52
42
  rtesseract!
43
+ simplecov
44
+ simplecov-cobertura
45
+ simplecov-lcov
53
46
 
54
47
  BUNDLED WITH
55
- 2.1.4
48
+ 2.4.20
data/README.md CHANGED
@@ -6,8 +6,11 @@
6
6
  <a href='https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg'>
7
7
  <img src="https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg" alt="Build Status" />
8
8
  </a>
9
- <a href='https://coveralls.io/r/dannnylo/rtesseract?branch=master'>
10
- <img src="https://coveralls.io/repos/dannnylo/rtesseract/badge.png?branch=master" alt="Coverage Status" />
9
+ <a href='https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837'>
10
+ <img src="https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837" alt="Coverage Status" />
11
+ </a>
12
+ <a href='https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837'>
13
+ <img src="https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837" alt="Coverage" />
11
14
  </a>
12
15
  <a href='https://codeclimate.com/github/dannnylo/rtesseract'>
13
16
  <img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />
@@ -17,10 +20,21 @@ Ruby library for working with the Tesseract OCR.
17
20
 
18
21
  ## Installation
19
22
 
20
- Check if tesseract ocr programs is installed:
23
+ Check if tesseract ocr programs are installed:
21
24
 
22
25
  $ tesseract --version
23
26
 
27
+ If not, you can install them with a command like:
28
+
29
+ $ apt install tesseract-ocr
30
+
31
+ or
32
+
33
+ $ brew install tesseract
34
+
35
+ or for Heroku 22 to add the buildpack https://github.com/pathwaysmedical/heroku-buildpack-tesseract
36
+
37
+
24
38
  Add this line to your application's Gemfile:
25
39
 
26
40
  ```ruby
@@ -9,5 +9,9 @@ class RTesseract
9
9
  def temp_file_path
10
10
  Pathname.new(Dir.tmpdir).join("rtesseract_#{SecureRandom.uuid}").to_s
11
11
  end
12
+
13
+ def remove_tmp_file(absolute_file_path)
14
+ File.delete(absolute_file_path) if File.file?(absolute_file_path)
15
+ end
12
16
  end
13
17
  end
@@ -6,10 +6,13 @@ class RTesseract
6
6
 
7
7
  class << self
8
8
  def run(source, errors, options)
9
- options.tessedit_create_hocr = 1
9
+ options = options.merge({ tessedit_create_hocr: 1 })
10
10
 
11
11
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
12
- parse(File.read("#{output_path}.hocr"))
12
+ filename = "#{output_path}.hocr"
13
+ content = File.read(filename)
14
+ remove_tmp_file(filename)
15
+ parse(content)
13
16
  end
14
17
  end
15
18
 
@@ -20,7 +23,7 @@ class RTesseract
20
23
  def parse_line(line)
21
24
  return unless line.match?(/oc(rx|r)_word/)
22
25
 
23
- word = line.match(/(?<=>)(.*?)(?=<)/).to_s
26
+ word = line.to_s.scan(/>(.*)</).flatten.first.to_s
24
27
 
25
28
  return if word.strip == ''
26
29
 
@@ -39,11 +42,11 @@ class RTesseract
39
42
  end
40
43
 
41
44
  def parse_position(line)
42
- line.match(/(?<=title)(.*?)(?=;)/).to_s.split(' ')
45
+ line.match(/(?<=title)(.*?)(?=;)/).to_s.split
43
46
  end
44
47
 
45
48
  def parse_confidence(line)
46
- line.match(/(?<=;)(.*?)(?=')/).to_s.split(' ')
49
+ line.match(/(?<=;)(.*?)(?=')/).to_s.split
47
50
  end
48
51
  end
49
52
  end
@@ -5,7 +5,7 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_pdf = 1
8
+ options = options.merge({ tessedit_create_pdf: 1 })
9
9
 
10
10
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
11
11
  File.open("#{output_path}.pdf", 'r')
@@ -5,7 +5,9 @@ require 'open3'
5
5
  class RTesseract
6
6
  module Text
7
7
  def self.run(source, errors, options)
8
- RTesseract::Command.new(source, 'stdout', errors, options).run
8
+ text = RTesseract::Command.new(source, 'stdout', errors, options).run
9
+ text = text.gsub("\f", '') if text.is_a?(String)
10
+ text
9
11
  end
10
12
  end
11
13
  end
@@ -5,9 +5,14 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_tsv = 1
8
+ options = options.merge({ tessedit_create_tsv: 1 })
9
9
 
10
- RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
10
+ RTesseract::Command.new(
11
+ source,
12
+ temp_file_path,
13
+ errors,
14
+ options
15
+ ).run do |output_path|
11
16
  File.open("#{output_path}.tsv", 'r')
12
17
  end
13
18
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RTesseract
4
- VERSION = '3.1.2'
4
+ VERSION = '3.1.4'
5
5
  end
data/lib/rtesseract.rb CHANGED
@@ -12,7 +12,7 @@ require 'rtesseract/tsv'
12
12
  class RTesseract
13
13
  class Error < StandardError; end
14
14
 
15
- attr_reader :config, :source
15
+ attr_reader :config, :source, :errors
16
16
 
17
17
  def initialize(src = '', options = {})
18
18
  @source = src
@@ -21,7 +21,7 @@ class RTesseract
21
21
  end
22
22
 
23
23
  def to_box
24
- Box.run(@source, @errors, config)
24
+ Box.run(@source, @errors, @config)
25
25
  end
26
26
 
27
27
  def words
@@ -29,22 +29,20 @@ class RTesseract
29
29
  end
30
30
 
31
31
  def to_pdf
32
- Pdf.run(@source, @errors, config)
32
+ Pdf.run(@source, @errors, @config)
33
33
  end
34
34
 
35
35
  def to_tsv
36
- Tsv.run(@source, @errors, config)
36
+ Tsv.run(@source, @errors, @config)
37
37
  end
38
38
 
39
39
  # Output value
40
40
  def to_s
41
- Text.run(@source, @errors, config)
41
+ Text.run(@source, @errors, @config)
42
42
  end
43
43
 
44
44
  # Remove spaces and break-lines
45
45
  def to_s_without_spaces
46
46
  to_s.gsub(/\s/, '')
47
47
  end
48
-
49
- attr_reader :errors
50
48
  end
data/rtesseract.gemspec CHANGED
@@ -20,12 +20,10 @@ Gem::Specification.new do |spec|
20
20
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
+ spec.required_ruby_version = '>= 2.7'
23
24
  spec.bindir = 'exe'
24
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
26
  spec.require_paths = ['lib']
26
27
 
27
- spec.add_development_dependency 'bundler', '~> 2'
28
- spec.add_development_dependency 'coveralls'
29
- spec.add_development_dependency 'rake'
30
- spec.add_development_dependency 'rspec'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
31
29
  end
metadata CHANGED
@@ -1,71 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.2
4
+ version: 3.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2020-08-23 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2'
27
- - !ruby/object:Gem::Dependency
28
- name: coveralls
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
10
+ date: 2025-04-30 00:00:00.000000000 Z
11
+ dependencies: []
69
12
  description: Ruby library for working with the Tesseract OCR.
70
13
  email:
71
14
  - dannnylo@gmail.com
@@ -73,7 +16,9 @@ executables: []
73
16
  extensions: []
74
17
  extra_rdoc_files: []
75
18
  files:
19
+ - ".deepsource.toml"
76
20
  - ".document"
21
+ - ".github/FUNDING.yml"
77
22
  - ".github/workflows/ci.yml"
78
23
  - ".gitignore"
79
24
  - ".hound.yml"
@@ -102,8 +47,8 @@ files:
102
47
  homepage: http://github.com/dannnylo/rtesseract
103
48
  licenses:
104
49
  - MIT
105
- metadata: {}
106
- post_install_message:
50
+ metadata:
51
+ rubygems_mfa_required: 'true'
107
52
  rdoc_options: []
108
53
  require_paths:
109
54
  - lib
@@ -111,15 +56,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
111
56
  requirements:
112
57
  - - ">="
113
58
  - !ruby/object:Gem::Version
114
- version: '0'
59
+ version: '2.7'
115
60
  required_rubygems_version: !ruby/object:Gem::Requirement
116
61
  requirements:
117
62
  - - ">="
118
63
  - !ruby/object:Gem::Version
119
64
  version: '0'
120
65
  requirements: []
121
- rubygems_version: 3.1.2
122
- signing_key:
66
+ rubygems_version: 3.6.3
123
67
  specification_version: 4
124
68
  summary: Ruby library for working with the Tesseract OCR.
125
69
  test_files: []