rtesseract 3.1.1 → 3.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e5a1940f2ac4c4429afaedc85c0d958159466285ebdd1fd25bd9942ef152a36
4
- data.tar.gz: 3971293ecf3ff95addc2f67560c9214d82bdd42d53a836c05fb3e343f7110741
3
+ metadata.gz: 120e26c05e889a9bbfa935c3d43bfcbd9432b7296aa9b144fc55fd3c43f6a6af
4
+ data.tar.gz: 147e882d0648febff070e7f422c79852be258f594ded86cbcde8b5677c49b354
5
5
  SHA512:
6
- metadata.gz: 810b41ddbd021094f5d41d5fde2a59aff0deb3c1d343e319f04e47a86f048e4030294af277017a0d694c89b5dd262431e78ed673228784a924602a1b00871f27
7
- data.tar.gz: 5122bb30c21077fbf8d9dfee754cdce7a57d99572b391ade83d1f904661888a237cf097ec5fbaee81af58015e99c7b5e7ea20b2f30b235af5e679edea443685e
6
+ metadata.gz: 9fc275b3afb190d731e0d12356867ea3a359177141fabe876a12b2df7391b65964b3086af63156bce8cc79d5e2a8f8c6c901b9ffeeef514ad7eb5482039858dd
7
+ data.tar.gz: fd6ab267bb3edfbd52afa54615ffad0e624c611270691b9e0e48cdf4ddbdbccd1565d0d52386ac664cc978fe23abf8aeb134a275f5629b2005f3464249c6455e
data/.deepsource.toml ADDED
@@ -0,0 +1,9 @@
1
+ version = 1
2
+
3
+ [[analyzers]]
4
+ name = "shell"
5
+ enabled = true
6
+
7
+ [[analyzers]]
8
+ name = "ruby"
9
+ enabled = true
@@ -0,0 +1 @@
1
+ github: dannnylo
@@ -6,26 +6,32 @@ jobs:
6
6
  strategy:
7
7
  matrix:
8
8
  ruby:
9
- - '2.5.x'
10
- - '2.6.x'
11
- - '2.7.x'
9
+ - '2.7.4'
10
+ - '3.2.0'
11
+ repository:
12
+ - 'ppa:alex-p/tesseract-ocr5'
13
+ - 'ppa:alex-p/tesseract-ocr-devel'
12
14
  steps:
13
15
  - uses: actions/checkout@v2
14
16
  - name: Install tesseract-ocr
15
17
  run: |
16
- sudo add-apt-repository ppa:alex-p/tesseract-ocr -y
18
+ sudo add-apt-repository ${{ matrix.repository }} -y
17
19
  sudo apt-get update -q
18
20
  sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
21
+ tesseract --version
19
22
  - name: Setup Ruby
20
- uses: actions/setup-ruby@v1
23
+ uses: ruby/setup-ruby@v1
21
24
  with:
22
25
  ruby-version: ${{ matrix.ruby }}
23
26
  - name: Bundle
24
- env:
25
- MTSR_RAILS_VERSION: ${{ matrix.rails }}
26
27
  run: |
27
28
  gem uninstall -aIx bundler
28
29
  gem install bundler
29
30
  bundle install --jobs 4 --retry 3
30
31
  - name: Test
31
32
  run: bundle exec rake
33
+ - name: Coverage
34
+ env:
35
+ CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}
36
+ run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -l Ruby -r coverage/lcov/*
37
+
data/.rubocop.yml CHANGED
@@ -1,3 +1,6 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
1
4
 
2
5
  Layout/LineLength:
3
6
  Max: 150
data/CHANGELOG.md CHANGED
@@ -1,3 +1,44 @@
1
+ # Changes
2
+ ## v3.1.3
3
+
4
+ * Fixed a configuration error that wouldn't allow you to do different kinds of calls on the same object, for example calling .to_box and then .to_s would result in unexpected behavior.
5
+
6
+ ## v3.1.2
7
+
8
+ #### Added
9
+
10
+ * Added confidence for each word in box mode
11
+
12
+ ## v3.1.1
13
+
14
+ #### Changed
15
+
16
+ * Changed RTesseract::Command to receive a block when success run
17
+
18
+ ## v3.1.0
19
+
20
+ #### Changed
21
+
22
+ * Removed nokogi dependency.
23
+
24
+ ## v3.0.4
25
+
26
+ #### Changed
27
+
28
+ * Updated dependencies by security alerts.
29
+
30
+ ## v3.0.5
31
+
32
+ #### Changed
33
+
34
+ * Updated dependencies by security alerts.
35
+
36
+ ## v3.0.4
37
+
38
+ #### Changed
39
+
40
+ * Updated dependencies by security alerts.
41
+
1
42
  ## v3.0.3
2
43
 
3
44
  #### Changed
data/Gemfile CHANGED
@@ -6,3 +6,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
6
 
7
7
  # Specify your gem's dependencies in rtesseract.gemspec
8
8
  gemspec
9
+
10
+ group :development, :test do
11
+ gem 'bundler', '~> 2'
12
+ gem 'rake'
13
+ gem 'rspec'
14
+
15
+ gem 'simplecov'
16
+ gem 'simplecov-cobertura'
17
+ gem 'simplecov-lcov'
18
+ end
data/Gemfile.lock CHANGED
@@ -1,55 +1,48 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rtesseract (3.1.1)
4
+ rtesseract (3.1.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- coveralls (0.8.23)
10
- json (>= 1.8, < 3)
11
- simplecov (~> 0.16.1)
12
- term-ansicolor (~> 1.3)
13
- thor (>= 0.19.4, < 2.0)
14
- tins (~> 1.6)
15
- diff-lcs (1.3)
16
- docile (1.3.2)
17
- json (2.3.0)
18
- rake (13.0.1)
19
- rspec (3.9.0)
20
- rspec-core (~> 3.9.0)
21
- rspec-expectations (~> 3.9.0)
22
- rspec-mocks (~> 3.9.0)
23
- rspec-core (3.9.1)
24
- rspec-support (~> 3.9.1)
25
- rspec-expectations (3.9.1)
9
+ diff-lcs (1.4.4)
10
+ docile (1.4.0)
11
+ rake (13.0.6)
12
+ rspec (3.10.0)
13
+ rspec-core (~> 3.10.0)
14
+ rspec-expectations (~> 3.10.0)
15
+ rspec-mocks (~> 3.10.0)
16
+ rspec-core (3.10.1)
17
+ rspec-support (~> 3.10.0)
18
+ rspec-expectations (3.10.1)
26
19
  diff-lcs (>= 1.2.0, < 2.0)
27
- rspec-support (~> 3.9.0)
28
- rspec-mocks (3.9.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-mocks (3.10.2)
29
22
  diff-lcs (>= 1.2.0, < 2.0)
30
- rspec-support (~> 3.9.0)
31
- rspec-support (3.9.2)
32
- simplecov (0.16.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-support (3.10.2)
25
+ simplecov (0.21.2)
33
26
  docile (~> 1.1)
34
- json (>= 1.8, < 3)
35
- simplecov-html (~> 0.10.0)
36
- simplecov-html (0.10.2)
37
- sync (0.5.0)
38
- term-ansicolor (1.7.1)
39
- tins (~> 1.0)
40
- thor (1.0.1)
41
- tins (1.24.1)
42
- sync
27
+ simplecov-html (~> 0.11)
28
+ simplecov_json_formatter (~> 0.1)
29
+ simplecov-cobertura (1.4.2)
30
+ simplecov (~> 0.8)
31
+ simplecov-html (0.12.3)
32
+ simplecov-lcov (0.8.0)
33
+ simplecov_json_formatter (0.1.3)
43
34
 
44
35
  PLATFORMS
45
36
  ruby
46
37
 
47
38
  DEPENDENCIES
48
39
  bundler (~> 2)
49
- coveralls
50
40
  rake
51
41
  rspec
52
42
  rtesseract!
43
+ simplecov
44
+ simplecov-cobertura
45
+ simplecov-lcov
53
46
 
54
47
  BUNDLED WITH
55
- 2.1.4
48
+ 2.4.20
data/README.md CHANGED
@@ -6,8 +6,11 @@
6
6
  <a href='https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg'>
7
7
  <img src="https://github.com/dannnylo/rtesseract/workflows/CI/badge.svg" alt="Build Status" />
8
8
  </a>
9
- <a href='https://coveralls.io/r/dannnylo/rtesseract?branch=master'>
10
- <img src="https://coveralls.io/repos/dannnylo/rtesseract/badge.png?branch=master" alt="Coverage Status" />
9
+ <a href='https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837'>
10
+ <img src="https://app.codacy.com/project/badge/Grade/316a48934db8415d84d2f9a318b0f837" alt="Coverage Status" />
11
+ </a>
12
+ <a href='https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837'>
13
+ <img src="https://app.codacy.com/project/badge/Coverage/316a48934db8415d84d2f9a318b0f837" alt="Coverage" />
11
14
  </a>
12
15
  <a href='https://codeclimate.com/github/dannnylo/rtesseract'>
13
16
  <img src="https://codeclimate.com/github/dannnylo/rtesseract.png" />
@@ -17,10 +20,18 @@ Ruby library for working with the Tesseract OCR.
17
20
 
18
21
  ## Installation
19
22
 
20
- Check if tesseract ocr programs is installed:
23
+ Check if tesseract ocr programs are installed:
21
24
 
22
25
  $ tesseract --version
23
26
 
27
+ If not, you can install them with a command like:
28
+
29
+ $ apt install tesseract-ocr
30
+
31
+ or
32
+
33
+ $ brew install tesseract
34
+
24
35
  Add this line to your application's Gemfile:
25
36
 
26
37
  ```ruby
@@ -100,20 +111,20 @@ This will preserve the image colors, pictures and structure in the generated pdf
100
111
  ```ruby
101
112
  RTesseract.new('test_words.png').to_box
102
113
  => [
103
- {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
104
- {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
105
- {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
106
- {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
107
- {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
108
- {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
109
- {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
110
- {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
111
- {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
112
- {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
113
- {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
114
- {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
115
- {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
116
- {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
114
+ { :word => 'If', :confidence=>89, :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
115
+ { :word => 'you', :confidence=>96, :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
116
+ { :word => 'are', :confidence=>92, :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
117
+ { :word => 'a', :confidence=>92, :x_start=>133, :y_start=>8, :x_end=>147, :y_end=>35},
118
+ { :word => 'friend,', :confidence=>95, :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
119
+ { :word => 'you', :confidence=>96, :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
120
+ { :word => 'speak', :confidence=>96, :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
121
+ { :word => 'the', :confidence=>96, :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
122
+ { :word => 'password,', :confidence=>96, :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
123
+ { :word => 'and', :confidence=>96, :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
124
+ { :word => 'the', :confidence=>96, :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
125
+ { :word => 'doors', :confidence=>96, :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
126
+ { :word => 'will', :confidence=>96, :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
127
+ { :word => 'open.', :confidence=>96, :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
117
128
  ]
118
129
  ```
119
130
 
@@ -6,7 +6,7 @@ class RTesseract
6
6
 
7
7
  class << self
8
8
  def run(source, errors, options)
9
- options.tessedit_create_hocr = 1
9
+ options = options.merge({ tessedit_create_hocr: 1 })
10
10
 
11
11
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
12
12
  parse(File.read("#{output_path}.hocr"))
@@ -20,16 +20,17 @@ class RTesseract
20
20
  def parse_line(line)
21
21
  return unless line.match?(/oc(rx|r)_word/)
22
22
 
23
- word = line.match(/(?<=>)(.*?)(?=<)/).to_s
23
+ word = line.to_s.scan(/>(.*)</).flatten.first.to_s
24
24
 
25
25
  return if word.strip == ''
26
26
 
27
- word_info(word, parse_position(line))
27
+ word_info(word, parse_position(line), parse_confidence(line))
28
28
  end
29
29
 
30
- def word_info(word, positions)
30
+ def word_info(word, positions, confidence)
31
31
  {
32
32
  word: word,
33
+ confidence: confidence[-1].to_i,
33
34
  x_start: positions[1].to_i,
34
35
  y_start: positions[2].to_i,
35
36
  x_end: positions[3].to_i,
@@ -38,7 +39,11 @@ class RTesseract
38
39
  end
39
40
 
40
41
  def parse_position(line)
41
- line.match(/(?<=title)(.*?)(?=;)/).to_s.split(' ')
42
+ line.match(/(?<=title)(.*?)(?=;)/).to_s.split
43
+ end
44
+
45
+ def parse_confidence(line)
46
+ line.match(/(?<=;)(.*?)(?=')/).to_s.split
42
47
  end
43
48
  end
44
49
  end
@@ -5,7 +5,7 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_pdf = 1
8
+ options = options.merge({ tessedit_create_pdf: 1 })
9
9
 
10
10
  RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
11
11
  File.open("#{output_path}.pdf", 'r')
@@ -5,7 +5,9 @@ require 'open3'
5
5
  class RTesseract
6
6
  module Text
7
7
  def self.run(source, errors, options)
8
- RTesseract::Command.new(source, 'stdout', errors, options).run
8
+ text = RTesseract::Command.new(source, 'stdout', errors, options).run
9
+ text = text.gsub("\f", '') if text.is_a?(String)
10
+ text
9
11
  end
10
12
  end
11
13
  end
@@ -5,9 +5,14 @@ class RTesseract
5
5
  extend Base
6
6
 
7
7
  def self.run(source, errors, options)
8
- options.tessedit_create_tsv = 1
8
+ options = options.merge({ tessedit_create_tsv: 1 })
9
9
 
10
- RTesseract::Command.new(source, temp_file_path, errors, options).run do |output_path|
10
+ RTesseract::Command.new(
11
+ source,
12
+ temp_file_path,
13
+ errors,
14
+ options
15
+ ).run do |output_path|
11
16
  File.open("#{output_path}.tsv", 'r')
12
17
  end
13
18
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RTesseract
4
- VERSION = '3.1.1'
4
+ VERSION = '3.1.3'
5
5
  end
data/lib/rtesseract.rb CHANGED
@@ -12,7 +12,7 @@ require 'rtesseract/tsv'
12
12
  class RTesseract
13
13
  class Error < StandardError; end
14
14
 
15
- attr_reader :config, :source
15
+ attr_reader :config, :source, :errors
16
16
 
17
17
  def initialize(src = '', options = {})
18
18
  @source = src
@@ -21,7 +21,7 @@ class RTesseract
21
21
  end
22
22
 
23
23
  def to_box
24
- Box.run(@source, @errors, config)
24
+ Box.run(@source, @errors, @config)
25
25
  end
26
26
 
27
27
  def words
@@ -29,22 +29,20 @@ class RTesseract
29
29
  end
30
30
 
31
31
  def to_pdf
32
- Pdf.run(@source, @errors, config)
32
+ Pdf.run(@source, @errors, @config)
33
33
  end
34
34
 
35
35
  def to_tsv
36
- Tsv.run(@source, @errors, config)
36
+ Tsv.run(@source, @errors, @config)
37
37
  end
38
38
 
39
39
  # Output value
40
40
  def to_s
41
- Text.run(@source, @errors, config)
41
+ Text.run(@source, @errors, @config)
42
42
  end
43
43
 
44
44
  # Remove spaces and break-lines
45
45
  def to_s_without_spaces
46
46
  to_s.gsub(/\s/, '')
47
47
  end
48
-
49
- attr_reader :errors
50
48
  end
data/rtesseract.gemspec CHANGED
@@ -20,12 +20,10 @@ Gem::Specification.new do |spec|
20
20
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
+ spec.required_ruby_version = '>= 2.7'
23
24
  spec.bindir = 'exe'
24
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
26
  spec.require_paths = ['lib']
26
27
 
27
- spec.add_development_dependency 'bundler', '~> 2'
28
- spec.add_development_dependency 'coveralls'
29
- spec.add_development_dependency 'rake'
30
- spec.add_development_dependency 'rspec'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
31
29
  end
metadata CHANGED
@@ -1,71 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 3.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danilo Jeremias da Silva
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-24 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2'
27
- - !ruby/object:Gem::Dependency
28
- name: coveralls
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
+ dependencies: []
69
13
  description: Ruby library for working with the Tesseract OCR.
70
14
  email:
71
15
  - dannnylo@gmail.com
@@ -73,7 +17,9 @@ executables: []
73
17
  extensions: []
74
18
  extra_rdoc_files: []
75
19
  files:
20
+ - ".deepsource.toml"
76
21
  - ".document"
22
+ - ".github/FUNDING.yml"
77
23
  - ".github/workflows/ci.yml"
78
24
  - ".gitignore"
79
25
  - ".hound.yml"
@@ -102,7 +48,8 @@ files:
102
48
  homepage: http://github.com/dannnylo/rtesseract
103
49
  licenses:
104
50
  - MIT
105
- metadata: {}
51
+ metadata:
52
+ rubygems_mfa_required: 'true'
106
53
  post_install_message:
107
54
  rdoc_options: []
108
55
  require_paths:
@@ -111,14 +58,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
111
58
  requirements:
112
59
  - - ">="
113
60
  - !ruby/object:Gem::Version
114
- version: '0'
61
+ version: '2.7'
115
62
  required_rubygems_version: !ruby/object:Gem::Requirement
116
63
  requirements:
117
64
  - - ">="
118
65
  - !ruby/object:Gem::Version
119
66
  version: '0'
120
67
  requirements: []
121
- rubygems_version: 3.0.8
68
+ rubygems_version: 3.4.10
122
69
  signing_key:
123
70
  specification_version: 4
124
71
  summary: Ruby library for working with the Tesseract OCR.