despeck 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/.rubocop.yml +17 -0
- data/.ruby-version +1 -0
- data/.travis.yml +58 -0
- data/Gemfile +2 -3
- data/OCR.md +36 -0
- data/README.md +96 -0
- data/{README.adoc → ROADMAP.adoc} +0 -0
- data/Rakefile +5 -3
- data/bin/console +5 -3
- data/bin/despeck +8 -0
- data/despeck.gemspec +29 -15
- data/install-vips.sh +26 -0
- data/lib/commands/ocr.rb +19 -0
- data/lib/commands/remove.rb +77 -0
- data/lib/despeck.rb +25 -4
- data/lib/despeck/cli.rb +14 -0
- data/lib/despeck/colour_checker.rb +24 -0
- data/lib/despeck/dominant_color.rb +57 -0
- data/lib/despeck/dominant_color_v2.rb +39 -0
- data/lib/despeck/logger.rb +26 -0
- data/lib/despeck/ocr.rb +16 -0
- data/lib/despeck/pdf_tools.rb +62 -0
- data/lib/despeck/version.rb +3 -1
- data/lib/despeck/watermark_mask.rb +49 -0
- data/lib/despeck/watermark_remover.rb +143 -0
- data/sensitivities.txt +12 -0
- metadata +144 -21
- data/samples/red-circle/10-06.pdf +0 -0
- data/samples/red-circle/10-08.pdf +0 -0
- data/samples/red-circle/10-09.pdf +0 -0
- data/samples/red-circle/10-11.pdf +0 -0
- data/samples/red-circle/a-despecked.jpg +0 -0
- data/samples/red-circle/a.jpg +0 -0
- data/samples/red-circle/b.jpg +0 -0
- data/samples/red-circle/c.jpg +0 -0
- data/samples/red-circle/despecked-portion-parameters.png +0 -0
- data/samples/red-circle/despecked-portion.png +0 -0
- data/samples/red-circle/despecked-watermark-parameters.png +0 -0
- data/samples/red-circle/despecked-watermark.png +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39b755a474307d4ace2641ee3d6b7202eda4c8d5
|
4
|
+
data.tar.gz: 5cf5487d284c2e113b7b46cb866dfeb318495a37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1e9a14709e077d5607445f4c46cb66323e783ac80134f12e20cc4d2e67ea3bd8882009bba5bed20e23a843b90d789cc5f8fb5f51d60cc9d5fbb582b7ab57df2
|
7
|
+
data.tar.gz: 33912c0cc1999c1a652be4404819d3ae09c89677ddbdc64cf77fb993746cbc0dc82490587b8ee48959b263d7e7019d30189b901aacbea7ade497f633f52b51e3
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.3.3
|
data/.travis.yml
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
sudo: false
|
2
|
+
|
3
|
+
env:
|
4
|
+
global:
|
5
|
+
- NOKOGIRI_USE_SYSTEM_LIBRARIES=true
|
6
|
+
- VIPS_VERSION_MAJOR=8
|
7
|
+
- VIPS_VERSION_MINOR=5
|
8
|
+
- VIPS_VERSION_MICRO=7
|
9
|
+
- PATH=$HOME/vips/bin:$PATH
|
10
|
+
- LD_LIBRARY_PATH=$HOME/vips/lib:$LD_LIBRARY_PATH
|
11
|
+
- PKG_CONFIG_PATH=$HOME/vips/lib/pkgconfig:$PKG_CONFIG_PATH
|
12
|
+
|
13
|
+
dist: trusty
|
14
|
+
|
15
|
+
addons:
|
16
|
+
apt:
|
17
|
+
packages:
|
18
|
+
- libexpat1-dev
|
19
|
+
- gettext
|
20
|
+
- liblcms2-dev
|
21
|
+
- libmagickwand-dev
|
22
|
+
- libopenexr-dev
|
23
|
+
- libcfitsio3-dev
|
24
|
+
- libgif-dev
|
25
|
+
- libgs-dev
|
26
|
+
- libgsf-1-dev
|
27
|
+
- libmatio-dev
|
28
|
+
- libopenslide-dev
|
29
|
+
- liborc-0.4-dev
|
30
|
+
- libpango1.0-dev
|
31
|
+
- libpoppler-glib-dev
|
32
|
+
- librsvg2-dev
|
33
|
+
- libwebp-dev
|
34
|
+
# missing on trusty, unfortunately
|
35
|
+
# - libwebpmux2
|
36
|
+
- libfftw3-dev
|
37
|
+
- libglib2.0-dev
|
38
|
+
|
39
|
+
cache:
|
40
|
+
directories:
|
41
|
+
- $HOME/vips
|
42
|
+
|
43
|
+
language: ruby
|
44
|
+
rvm:
|
45
|
+
- 2.3
|
46
|
+
- 2.4
|
47
|
+
- 2.5
|
48
|
+
|
49
|
+
script:
|
50
|
+
- bundle exec rspec spec
|
51
|
+
- bundle exec rubocop
|
52
|
+
|
53
|
+
gemfile:
|
54
|
+
- Gemfile
|
55
|
+
|
56
|
+
before_install:
|
57
|
+
- uname -a
|
58
|
+
- bash install-vips.sh --without-python
|
data/Gemfile
CHANGED
data/OCR.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# OCR with Despeck
|
2
|
+
|
3
|
+
To make OCR work, you need to install the following tools:
|
4
|
+
|
5
|
+
* Tesseract (version 3.x)
|
6
|
+
* ImageMagick (version 6.x)
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
### MacOS
|
11
|
+
|
12
|
+
To install tesseract itself:
|
13
|
+
|
14
|
+
```sh
|
15
|
+
$ brew install tesseract --all-languages
|
16
|
+
$ brew install imagemagick
|
17
|
+
```
|
18
|
+
|
19
|
+
Or you can install tesseract with some languages manually:
|
20
|
+
|
21
|
+
```sh
|
22
|
+
$ brew install tesseract wget imagemagick
|
23
|
+
$ mkdir -p ~/Downloads/tessdata
|
24
|
+
$ cd ~/Downloads/tessdata
|
25
|
+
$ wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/chi_sim.traineddata
|
26
|
+
```
|
27
|
+
|
28
|
+
The full list of languages trained data can be found here (note, they're different for different Tesseract versions):
|
29
|
+
|
30
|
+
https://github.com/tesseract-ocr/tesseract/wiki/Data-Files#data-files-for-version-304305
|
31
|
+
|
32
|
+
### Ubuntu/Debian
|
33
|
+
|
34
|
+
```sh
|
35
|
+
$ apt-get install tesseract-ocr tesseract-ocr-chi-sim imagemagick
|
36
|
+
```
|
data/README.md
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/despeck.svg)](https://badge.fury.io/rb/despeck)
|
2
|
+
[![Build Status](https://travis-ci.org/riboseinc/despeck.svg?branch=master)](https://travis-ci.org/riboseinc/despeck)
|
3
|
+
|
4
|
+
# Despeck
|
5
|
+
|
6
|
+
Remove unwanted stamps or watermarks from scanned images
|
7
|
+
|
8
|
+
`despeck` is a Ruby gem that helps you remove unwanted stamps or watermarks from
|
9
|
+
scanned images/PDFs, primarily prior to OCR.
|
10
|
+
|
11
|
+
Its image processing operations are based on libvips via the
|
12
|
+
https://github.com/jcupitt/ruby-vips[ruby-vips] Ruby-bindings.
|
13
|
+
|
14
|
+
It can be used to:
|
15
|
+
|
16
|
+
* detect uniform watermarks from a series of images,
|
17
|
+
* output a watermark pattern file (image, mask) that describes a watermark pattern, and
|
18
|
+
* remove a specified watermark pattern from input images regardless of the
|
19
|
+
location of the watermark on these images.
|
20
|
+
|
21
|
+
Assumptions on input:
|
22
|
+
|
23
|
+
* The input may be a single image, or a PDF of multiple pages of images
|
24
|
+
* In the case of multiple pages, not all pages may have the watermark
|
25
|
+
* The input images are assumed to be purely monochrome text-based.
|
26
|
+
* The watermarks are colored. For example, if the watermark is a GREEN SQUARE PATTERN, for all
|
27
|
+
the pages that contain this mark, despeck will attempt to detect this pattern
|
28
|
+
and remove them
|
29
|
+
|
30
|
+
## Installation
|
31
|
+
|
32
|
+
Install gem manually
|
33
|
+
|
34
|
+
```
|
35
|
+
$ gem install despeck
|
36
|
+
```
|
37
|
+
|
38
|
+
Or add it to your `Gemfile`
|
39
|
+
|
40
|
+
```
|
41
|
+
gem 'despeck'
|
42
|
+
```
|
43
|
+
|
44
|
+
and then run `bundle install`
|
45
|
+
|
46
|
+
## OCR
|
47
|
+
|
48
|
+
To be able to extract text via `despeck ocr` command, you'll need to install:
|
49
|
+
|
50
|
+
* Tesseract (3.x)
|
51
|
+
* ImageMagick (6.x)
|
52
|
+
* Desired languages
|
53
|
+
|
54
|
+
Installation instruction can be found here: [OCR tools installation guide](./OCR.md)
|
55
|
+
|
56
|
+
## Usage (Command Line)
|
57
|
+
|
58
|
+
Getting actual help:
|
59
|
+
|
60
|
+
```sh
|
61
|
+
# To show general help
|
62
|
+
despeck -h
|
63
|
+
despeck remove -h
|
64
|
+
```
|
65
|
+
|
66
|
+
To remove watermark:
|
67
|
+
|
68
|
+
```sh
|
69
|
+
$ despeck remove /path/to/input.jpg /path/to/output.jpg
|
70
|
+
```
|
71
|
+
|
72
|
+
With the command above, Despeck will try to find the watermark colour, and apply best filter settings to remove the watermark. It may be wrong, so you can pass several parameters to help Despeck with that:
|
73
|
+
|
74
|
+
```sh
|
75
|
+
$ despec remove --color 00FF00 --sensitivity 120 --black-const -60 --add-contrast /path/to/input.pdf /path/to/output.pdf
|
76
|
+
```
|
77
|
+
|
78
|
+
* `--color 00FF00` - to say watermark is ~ green.
|
79
|
+
* `--sensitivity 120` - increases sensitivity (if with default 100 watermark is still visible).
|
80
|
+
* `--black-const -60` - by default, Despeck tries to improve text quality by increasing black by -110. This may be too much for you, so you can reduce that number.
|
81
|
+
* `--add-contrast` - disabled by default, increases output image's contrast.
|
82
|
+
* `--accurate` - disabled by default. Applies filters to the area with watermark only, preserving the rest of the image untouched.
|
83
|
+
|
84
|
+
## Usage
|
85
|
+
|
86
|
+
*(still under development)*
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
wr = Despeck::WatermarkRemover.new(black_const: -90, resize: 0.01)
|
90
|
+
# => #<Despeck::WatermarkRemover:0x007f935b5a1a68 @add_contrast=true, @black_const=-110, @watermark_color=nil, @resize=0.1, @sensitivity=100>
|
91
|
+
image = Vips::Image.new_from_file("/path/to/image.jpg")
|
92
|
+
# => #<Image 4816x6900 uchar, 3 bands, srgb>
|
93
|
+
output_image = wr.remove_watermark(image)
|
94
|
+
# => #<Image 4816x6900 float, 3 bands, b-w>
|
95
|
+
output_image.write_to_file('/path/to/output.jpg')
|
96
|
+
```
|
File without changes
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#!/usr/bin/env ruby
|
2
4
|
|
3
|
-
require
|
4
|
-
require
|
5
|
+
require 'bundler/setup'
|
6
|
+
require 'despeck'
|
5
7
|
|
6
8
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
9
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +12,5 @@ require "despeck"
|
|
10
12
|
# require "pry"
|
11
13
|
# Pry.start
|
12
14
|
|
13
|
-
require
|
15
|
+
require 'irb'
|
14
16
|
IRB.start(__FILE__)
|
data/bin/despeck
ADDED
data/despeck.gemspec
CHANGED
@@ -1,28 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
lib = File.expand_path(
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
5
|
+
require 'despeck/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'despeck'
|
8
9
|
spec.version = Despeck::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
10
|
+
spec.authors = ['Ribose Inc.']
|
11
|
+
spec.email = ['open.source@ribose.com']
|
11
12
|
|
12
|
-
spec.summary = 'Removes stamps and watermarks
|
13
|
-
|
13
|
+
spec.summary = 'Removes stamps and watermarks '\
|
14
|
+
"from scanned images for OCR, 'removes specks'"
|
15
|
+
spec.description = 'Removes stamps and watermarks '\
|
16
|
+
"from scanned images for OCR, 'removes specks'"
|
14
17
|
|
15
|
-
spec.homepage =
|
16
|
-
spec.license =
|
18
|
+
spec.homepage = 'https://github.com/riboseinc/despeck'
|
19
|
+
spec.license = 'MIT'
|
17
20
|
|
18
21
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
19
22
|
f.match(%r{^(test|spec|features)/})
|
20
23
|
end
|
21
|
-
spec.bindir =
|
22
|
-
spec.executables = spec.files.grep(%r{^
|
23
|
-
spec.require_paths = [
|
24
|
+
spec.bindir = 'bin'
|
25
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
26
|
+
spec.require_paths = ['lib']
|
24
27
|
|
25
|
-
spec.
|
26
|
-
|
27
|
-
spec.
|
28
|
+
spec.required_ruby_version = '>= 2.3'
|
29
|
+
|
30
|
+
spec.add_dependency 'clamp', '~> 1.2'
|
31
|
+
spec.add_dependency 'pdf-reader', '~> 2.1'
|
32
|
+
spec.add_dependency 'prawn', '~> 2.2'
|
33
|
+
spec.add_dependency 'rmagick', '~> 2'
|
34
|
+
spec.add_dependency 'rtesseract', '~> 2.2'
|
35
|
+
spec.add_dependency 'ruby-vips', '~> 2.0'
|
36
|
+
|
37
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
38
|
+
spec.add_development_dependency 'pry'
|
39
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
40
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
41
|
+
spec.add_development_dependency 'rubocop', '~> 0.52'
|
28
42
|
end
|
data/install-vips.sh
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
vips_site=https://github.com/jcupitt/libvips/releases/download
|
4
|
+
version=$VIPS_VERSION_MAJOR.$VIPS_VERSION_MINOR.$VIPS_VERSION_MICRO
|
5
|
+
|
6
|
+
set -e
|
7
|
+
|
8
|
+
# do we already have the correct vips built? early exit if yes
|
9
|
+
# we could check the configure params as well I guess
|
10
|
+
if [ -d "$HOME/vips/bin" ]; then
|
11
|
+
installed_version=$($HOME/vips/bin/vips --version)
|
12
|
+
escaped_version="$VIPS_VERSION_MAJOR\.$VIPS_VERSION_MINOR\.$VIPS_VERSION_MICRO"
|
13
|
+
echo "Need vips-$version"
|
14
|
+
echo "Found $installed_version"
|
15
|
+
if [[ "$installed_version" =~ ^vips-$escaped_version ]]; then
|
16
|
+
echo "Using cached directory"
|
17
|
+
exit 0
|
18
|
+
fi
|
19
|
+
fi
|
20
|
+
|
21
|
+
rm -rf $HOME/vips
|
22
|
+
wget $vips_site/v$version/vips-$version.tar.gz
|
23
|
+
tar xf vips-$version.tar.gz
|
24
|
+
cd vips-$version
|
25
|
+
CXXFLAGS=-D_GLIBCXX_USE_CXX11_ABI=0 ./configure --prefix=$HOME/vips $*
|
26
|
+
make && make install
|
data/lib/commands/ocr.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
module Commands
|
5
|
+
# Subcommand that removes watermarks from images & PDFs
|
6
|
+
class Ocr < Clamp::Command
|
7
|
+
parameter 'input_file', 'Input file - either PDF or image',
|
8
|
+
attribute_name: :input_file
|
9
|
+
option ['-l', '--lang'],
|
10
|
+
'LANGUAGE',
|
11
|
+
'One of supported Tesseract languages (`eng` by default)',
|
12
|
+
default: :eng
|
13
|
+
|
14
|
+
def execute
|
15
|
+
puts Despeck::Ocr.new(input_file).text(lang: lang)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
module Commands
|
5
|
+
# Subcommand that removes watermarks from images & PDFs
|
6
|
+
class Remove < Clamp::Command
|
7
|
+
option(['-s', '--sensitivity'],
|
8
|
+
'SENSITIVITY',
|
9
|
+
'Sensitivity of algorithm, defaults to 160',
|
10
|
+
default: 160) do |s|
|
11
|
+
Integer(s)
|
12
|
+
end
|
13
|
+
|
14
|
+
option ['--add-contrast'],
|
15
|
+
:flag,
|
16
|
+
'Improve contrast of the output image'
|
17
|
+
|
18
|
+
option(['--black-const'],
|
19
|
+
'BLACK_CONSTANT',
|
20
|
+
'Constant to improve black (-100) or white (100). '\
|
21
|
+
'0 - to do nothing.',
|
22
|
+
default: -100) { |s| Integer(s) }
|
23
|
+
|
24
|
+
option ['--debug'], :flag, 'Show debug information'
|
25
|
+
|
26
|
+
option ['-c', '--color'],
|
27
|
+
'COLOR',
|
28
|
+
'Watermark primary HEX colour (example: FEFE7E)',
|
29
|
+
required: false
|
30
|
+
|
31
|
+
option ['--accurate'],
|
32
|
+
:flag,
|
33
|
+
'Change only the area with watermark, '\
|
34
|
+
'preserving the rest of the image untouched'
|
35
|
+
|
36
|
+
parameter 'input_file', 'Input file - either PDF or image',
|
37
|
+
attribute_name: :input_file
|
38
|
+
parameter 'output_file', 'Output file (same format as input)',
|
39
|
+
attribute_name: :output_file
|
40
|
+
|
41
|
+
def execute
|
42
|
+
Despeck.apply_logger_level(debug?)
|
43
|
+
|
44
|
+
if input_file.end_with?('.pdf')
|
45
|
+
images =
|
46
|
+
PdfTools.pdf_to_images(input_file).map do |image|
|
47
|
+
remove_watermark_from_image(image, nil)
|
48
|
+
end
|
49
|
+
PdfTools.images_to_pdf(images, output_file)
|
50
|
+
else
|
51
|
+
remove_watermark_from_image(input_file, output_file)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def remove_watermark_from_image(input, output)
|
58
|
+
wr =
|
59
|
+
WatermarkRemover.new(
|
60
|
+
add_contrast: add_contrast?,
|
61
|
+
accurate: accurate?,
|
62
|
+
black_const: black_const,
|
63
|
+
sensitivity: sensitivity,
|
64
|
+
watermark_color: color
|
65
|
+
)
|
66
|
+
|
67
|
+
input_image =
|
68
|
+
input.is_a?(String) ? Vips::Image.new_from_file(input) : input
|
69
|
+
|
70
|
+
output_image = wr.remove_watermark(input_image)
|
71
|
+
return output_image unless output
|
72
|
+
|
73
|
+
output_image&.write_to_file(output)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/despeck.rb
CHANGED
@@ -1,5 +1,26 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require 'clamp'
|
4
|
+
require 'benchmark'
|
5
|
+
require 'pdf-reader'
|
6
|
+
require 'prawn'
|
7
|
+
require 'pry'
|
8
|
+
require 'vips'
|
9
|
+
require 'rmagick'
|
10
|
+
require 'rtesseract'
|
11
|
+
|
12
|
+
require_relative 'commands/remove'
|
13
|
+
require_relative 'commands/ocr'
|
14
|
+
|
15
|
+
require_relative 'despeck/logger'
|
16
|
+
require_relative 'despeck/dominant_color'
|
17
|
+
require_relative 'despeck/dominant_color_v2'
|
18
|
+
require_relative 'despeck/watermark_mask'
|
19
|
+
require_relative 'despeck/colour_checker'
|
20
|
+
require_relative 'despeck/watermark_remover'
|
21
|
+
require_relative 'despeck/pdf_tools'
|
22
|
+
require_relative 'despeck/ocr'
|
23
|
+
require_relative 'despeck/cli'
|
24
|
+
|
25
|
+
# Prawn helper method are needed to calculate proper pages size
|
26
|
+
include Prawn::Measurements
|
data/lib/despeck/cli.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Command line interface
|
5
|
+
class CLI < Clamp::Command
|
6
|
+
option %w[--version -v], :flag, 'Show version' do
|
7
|
+
puts "Version #{Despeck::VERSION}"
|
8
|
+
exit(0)
|
9
|
+
end
|
10
|
+
|
11
|
+
subcommand('remove', 'Remove watermark', Despeck::Commands::Remove)
|
12
|
+
subcommand('ocr', 'Extract text from the image', Despeck::Commands::Ocr)
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Checks if image is black and white or colourized
|
5
|
+
class ColourChecker
|
6
|
+
attr_reader :image, :percent_threshold, :de_threshold
|
7
|
+
|
8
|
+
PERCENT_THRESHOLD = 99
|
9
|
+
DE_THRESHOLD = 20
|
10
|
+
|
11
|
+
def initialize(image:, **options)
|
12
|
+
@image = image
|
13
|
+
@image = @image.resize(options.fetch(:resize, 1.0))
|
14
|
+
@percent_threshold = options.fetch(:percent, PERCENT_THRESHOLD)
|
15
|
+
@de_threshold = options.fetch(:de, DE_THRESHOLD)
|
16
|
+
end
|
17
|
+
|
18
|
+
def black_and_white?
|
19
|
+
euclidean_distance =
|
20
|
+
image.colourspace('lch')[1].cast('uchar').percent(percent_threshold)
|
21
|
+
euclidean_distance <= de_threshold
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Finds primary watermark colour (red, green, or blue)
|
5
|
+
module DominantColor
|
6
|
+
class << self
|
7
|
+
def dominant_color(image)
|
8
|
+
color_pixels = non_black_colors(image)
|
9
|
+
primary_color(color_pixels)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def non_black_colors(image)
|
15
|
+
pixels = image.resize(0.02).to_a
|
16
|
+
pixels.flatten(1).reject { |p| black_and_white?(p) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_color(colors)
|
20
|
+
red, green, blue = calculate_channels_total(colors)
|
21
|
+
|
22
|
+
case [red, green, blue].max
|
23
|
+
when red
|
24
|
+
'FF0000'
|
25
|
+
when green
|
26
|
+
'00FF00'
|
27
|
+
when blue
|
28
|
+
'0000FF'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def calculate_channels_total(colors)
|
33
|
+
red, green, blue = 0, 0, 0
|
34
|
+
colors.each do |pixel|
|
35
|
+
r, g, b = pixel
|
36
|
+
case pixel.max
|
37
|
+
when r
|
38
|
+
red += 1
|
39
|
+
when g
|
40
|
+
green += 1
|
41
|
+
when b
|
42
|
+
blue += 1
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
[red, green, blue]
|
47
|
+
end
|
48
|
+
|
49
|
+
def black_and_white?(pixel)
|
50
|
+
average = pixel.reduce(:+) / pixel.count
|
51
|
+
min = average - 30
|
52
|
+
max = average + 30
|
53
|
+
pixel.all? { |i| min <= i && i <= max }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Finds average (?) 'colourful' colour
|
5
|
+
module DominantColorV2
|
6
|
+
class << self
|
7
|
+
def dominant_color(image)
|
8
|
+
image = image.resize(0.05)
|
9
|
+
image_pixels = image.colourspace('srgb').to_a
|
10
|
+
colors = []
|
11
|
+
mask(image).to_a.each_with_index do |row, i|
|
12
|
+
row.each_with_index do |pixel, j|
|
13
|
+
next unless white_pixel?(pixel)
|
14
|
+
|
15
|
+
colors << image_pixels[i][j]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
[average(colors, 0), average(colors, 1), average(colors, 2)]
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def mask(image, sens = 5)
|
25
|
+
image = image.colourspace 'lch'
|
26
|
+
(image[1] > sens)
|
27
|
+
end
|
28
|
+
|
29
|
+
def white_pixel?(pixel)
|
30
|
+
pixel.all? { |c| c >= 245 }
|
31
|
+
end
|
32
|
+
|
33
|
+
def average(pixels, channel)
|
34
|
+
total = pixels.map { |i| i[channel] }.reduce(:+).to_f
|
35
|
+
(total / pixels.count.to_f).to_i
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Everything related to logging and output for the gem
|
4
|
+
module Despeck
|
5
|
+
def self.logger
|
6
|
+
@logger ||=
|
7
|
+
begin
|
8
|
+
l = Logger.new($stdout)
|
9
|
+
l.level = Logger::ERROR
|
10
|
+
l
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.apply_logger_level(debug = false)
|
15
|
+
logger.level = debug ? Logger::DEBUG : Logger::ERROR
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.with_level(level = Logger::ERROR)
|
19
|
+
prev_level = logger.level
|
20
|
+
logger.level = level
|
21
|
+
|
22
|
+
yield
|
23
|
+
|
24
|
+
logger.level = prev_level
|
25
|
+
end
|
26
|
+
end
|
data/lib/despeck/ocr.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Extracts text of desired language from the image
|
5
|
+
class Ocr
|
6
|
+
attr_reader :lang, :image_path
|
7
|
+
|
8
|
+
def initialize(image)
|
9
|
+
@image_path = image
|
10
|
+
end
|
11
|
+
|
12
|
+
def text(lang: :eng)
|
13
|
+
RTesseract.new(image_path, lang: lang).to_s
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Read/Write PDF files
|
5
|
+
module PdfTools
|
6
|
+
class << self
|
7
|
+
# Increase to improve image quality, decrease to improve performance
|
8
|
+
DEFAULT_DPI = 300
|
9
|
+
|
10
|
+
def pdf_to_images(pdf_path, dpi: DEFAULT_DPI)
|
11
|
+
images = []
|
12
|
+
for_each_page(pdf_path) do |page_no|
|
13
|
+
images << Vips::Image.pdfload(pdf_path, page: page_no, dpi: dpi)
|
14
|
+
end
|
15
|
+
images
|
16
|
+
end
|
17
|
+
|
18
|
+
def images_to_pdf(images, pdf_path)
|
19
|
+
doc = nil
|
20
|
+
|
21
|
+
for_each_image_file(images) do |path, page_size, pic_size, layout|
|
22
|
+
if doc
|
23
|
+
doc.start_new_page(size: page_size, layout: layout)
|
24
|
+
else
|
25
|
+
doc = Prawn::Document.new(page_size: page_size, page_layout: layout)
|
26
|
+
end
|
27
|
+
|
28
|
+
doc.image(path, position: :left, vposition: :top, fit: pic_size)
|
29
|
+
end
|
30
|
+
|
31
|
+
doc.render_file(pdf_path)
|
32
|
+
end
|
33
|
+
|
34
|
+
def pages_count(pdf_path)
|
35
|
+
PDF::Reader.new(pdf_path).pages.count
|
36
|
+
end
|
37
|
+
|
38
|
+
def for_each_page(pdf_path)
|
39
|
+
pages_count(pdf_path).times do |page_no|
|
40
|
+
yield page_no
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def for_each_image_file(images)
|
47
|
+
images.each do |pic|
|
48
|
+
tempfile = Tempfile.new(['despeck', '.jpg'])
|
49
|
+
pic.write_to_file(tempfile.path)
|
50
|
+
|
51
|
+
page_size = pdf_size(pic)
|
52
|
+
layout = page_size.max == page_size.first ? :landscape : :portrait
|
53
|
+
yield tempfile.path, page_size, pic.size, layout
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def pdf_size(image)
|
58
|
+
image.size.map { |p| p + in2pt(1) }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/despeck/version.rb
CHANGED
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Creates B&W mask for the watermark
|
5
|
+
class WatermarkMask
|
6
|
+
attr_reader :image,
|
7
|
+
:watermark,
|
8
|
+
:no_watermark,
|
9
|
+
:sensitivity,
|
10
|
+
:mask
|
11
|
+
|
12
|
+
def initialize(image, sensitivity: 20)
|
13
|
+
@image = image
|
14
|
+
@sensitivity = sensitivity
|
15
|
+
end
|
16
|
+
|
17
|
+
def find_masks!
|
18
|
+
@mask = adjusted_chroma_mask(image)
|
19
|
+
|
20
|
+
@watermark = (image + @mask.invert)
|
21
|
+
@no_watermark = (image + @mask)
|
22
|
+
|
23
|
+
[watermark, no_watermark, mask]
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def adjusted_chroma_mask(image)
|
29
|
+
smaller_image = image.resize(0.2)
|
30
|
+
closing(chroma_mask(smaller_image))
|
31
|
+
.dilate(dilate_mask)
|
32
|
+
.resize(5)
|
33
|
+
end
|
34
|
+
|
35
|
+
def chroma_mask(img)
|
36
|
+
img = img.colourspace 'lch'
|
37
|
+
(img[1] > sensitivity)
|
38
|
+
end
|
39
|
+
|
40
|
+
def dilate_mask
|
41
|
+
@dilate_mask ||=
|
42
|
+
Vips::Image.new_from_array Array.new(3, Array.new(3, 255))
|
43
|
+
end
|
44
|
+
|
45
|
+
def closing(img)
|
46
|
+
img.dilate(dilate_mask).erode(dilate_mask)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Despeck
|
4
|
+
# Takes an image and removes watermark
|
5
|
+
class WatermarkRemover
|
6
|
+
attr_reader :add_contrast, :black_const,
|
7
|
+
:watermark_color, :resize, :sensitivity, :accurate
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
apply_options!(options)
|
11
|
+
|
12
|
+
Despeck.logger.debug "Sensitivity: #{sensitivity}"
|
13
|
+
Despeck.logger.debug "Contrast improvement: #{add_contrast}"
|
14
|
+
Despeck.logger.debug "Black level improvement: #{black_const}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def remove_watermark(image)
|
18
|
+
output_image = nil
|
19
|
+
time =
|
20
|
+
Benchmark.realtime do
|
21
|
+
output_image =
|
22
|
+
if accurate
|
23
|
+
__remove_watermark_only__(image)
|
24
|
+
else
|
25
|
+
__remove_watermark__(image)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
Despeck.logger.debug "Time taken: #{time} seconds"
|
29
|
+
|
30
|
+
output_image
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def apply_options!(options)
|
36
|
+
@add_contrast = options.fetch(:add_contrast, true)
|
37
|
+
@black_const = options.fetch(:black_const, -110)
|
38
|
+
@watermark_color = options.fetch(:watermark_color, nil)
|
39
|
+
@resize = options.fetch(:resize, 0.1)
|
40
|
+
@sensitivity = options.fetch(:sensitivity, 160)
|
41
|
+
@accurate = options.fetch(:accurate, false)
|
42
|
+
end
|
43
|
+
|
44
|
+
# keep the rest of the image untouched
|
45
|
+
def __remove_watermark_only__(image)
|
46
|
+
watermark, no_watermark, mask =
|
47
|
+
WatermarkMask.new(image).find_masks!
|
48
|
+
output_image = __remove_watermark__(watermark)
|
49
|
+
return unless output_image
|
50
|
+
|
51
|
+
no_watermark = no_watermark.colourspace('b-w').bandjoin(mask.invert)
|
52
|
+
|
53
|
+
output_image
|
54
|
+
.bandjoin(mask)
|
55
|
+
.composite(no_watermark, 'over')
|
56
|
+
end
|
57
|
+
|
58
|
+
def __remove_watermark__(image)
|
59
|
+
return if no_watermark?(image)
|
60
|
+
|
61
|
+
wm_color = watermark_color || detect_watermark_color(image)
|
62
|
+
Despeck.logger.debug "Watermark colour channel detected: #{wm_color}"
|
63
|
+
output_image = grayscale_algorithm(image, wm_color)
|
64
|
+
output_image = increase_contrast(output_image) if add_contrast
|
65
|
+
output_image = apply_black_improvement(output_image)
|
66
|
+
output_image = apply_grey_to_black(output_image) if wm_color != 'FF0000'
|
67
|
+
output_image
|
68
|
+
end
|
69
|
+
|
70
|
+
def no_watermark?(image)
|
71
|
+
return false if watermark_color
|
72
|
+
|
73
|
+
if ColourChecker.new(image: image, resize: resize).black_and_white?
|
74
|
+
Despeck.logger.error "Can't find watermark, skipping."
|
75
|
+
return true
|
76
|
+
end
|
77
|
+
|
78
|
+
false
|
79
|
+
end
|
80
|
+
|
81
|
+
def detect_watermark_color(image)
|
82
|
+
Despeck::DominantColor.dominant_color(image)
|
83
|
+
# Despeck::DominantColorV2.dominant_color(image)
|
84
|
+
end
|
85
|
+
|
86
|
+
def grayscale_algorithm(image, pr_color)
|
87
|
+
rgb_params = greyscale_params(pr_color)
|
88
|
+
rgb_params << 0 if image.bands == 4
|
89
|
+
image.recomb(rgb_params)
|
90
|
+
end
|
91
|
+
|
92
|
+
def greyscale_params(pr_color)
|
93
|
+
r, g, b = hex_to_rgb(pr_color)
|
94
|
+
defaults =
|
95
|
+
case [r, g, b].max
|
96
|
+
when r
|
97
|
+
[1.2, 0.03, 0.03]
|
98
|
+
when g
|
99
|
+
[0.03, 1.4, 0.03]
|
100
|
+
when b
|
101
|
+
[0.03, 0.03, 1.4]
|
102
|
+
end
|
103
|
+
|
104
|
+
apply_sentivity(defaults)
|
105
|
+
end
|
106
|
+
|
107
|
+
# rubocop:disable Metrics/AbcSize
|
108
|
+
def apply_sentivity(rgb)
|
109
|
+
max = rgb.max
|
110
|
+
res = rgb.map do |value|
|
111
|
+
if value == max
|
112
|
+
value * (sensitivity.to_f / 100)
|
113
|
+
else
|
114
|
+
value / (sensitivity.to_f / 100)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
Despeck.logger.debug "Remove channel value: #{res.max}"
|
118
|
+
Despeck.logger.debug "Untouched channels value: #{res.min}"
|
119
|
+
|
120
|
+
res
|
121
|
+
end
|
122
|
+
# rubocop:enable Metrics/AbcSize
|
123
|
+
|
124
|
+
def increase_contrast(bw_image)
|
125
|
+
bw_image.colourspace('lch') * [1, 100, 100] + [0, 0, 500]
|
126
|
+
end
|
127
|
+
|
128
|
+
def apply_black_improvement(image)
|
129
|
+
image.colourspace('b-w').linear(1, black_const)
|
130
|
+
end
|
131
|
+
|
132
|
+
def apply_grey_to_black(image)
|
133
|
+
match = [0, 0, 0]
|
134
|
+
distance = image.dE76(image.new_from_image(match))
|
135
|
+
(distance < 80).ifthenelse([0, 0, 0], image)
|
136
|
+
end
|
137
|
+
|
138
|
+
def hex_to_rgb(hex)
|
139
|
+
hex = hex.gsub(/^#/, '')
|
140
|
+
hex.chars.each_slice(2).map { |p| Integer("0x#{p.join}") }
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
data/sensitivities.txt
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# TODO: Add more examples to build a correct function
|
2
|
+
|
3
|
+
red - [219, 94, 85] - 140 | 219 / 140 = 1.564 (red)
|
4
|
+
light red - ? - 115 | ? / 115 = (?)
|
5
|
+
purple - [174, 140, 172] - 160 | 174 / 160 = 1.085 (red)
|
6
|
+
|
7
|
+
violet - [196, 189, 246] - 100 | 246 / 100 = 2.46 (blue)
|
8
|
+
blue - [137, 179, 217] - 130 | 217 / 130 = 1.669 (blue)
|
9
|
+
|
10
|
+
green - [154, 196, 154] - 140 | 196 / 140 = 1.207 (green)
|
11
|
+
yellow - [243, 244, 123] - 100 | 244 / 100 = 2.440 (green)
|
12
|
+
|
metadata
CHANGED
@@ -1,15 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: despeck
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: clamp
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.2'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pdf-reader
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: prawn
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.2'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rmagick
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rtesseract
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.2'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.2'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: ruby-vips
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '2.0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.0'
|
13
97
|
- !ruby/object:Gem::Dependency
|
14
98
|
name: bundler
|
15
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -24,6 +108,20 @@ dependencies:
|
|
24
108
|
- - "~>"
|
25
109
|
- !ruby/object:Gem::Version
|
26
110
|
version: '1.16'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pry
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
27
125
|
- !ruby/object:Gem::Dependency
|
28
126
|
name: rake
|
29
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,35 +150,60 @@ dependencies:
|
|
52
150
|
- - "~>"
|
53
151
|
- !ruby/object:Gem::Version
|
54
152
|
version: '3.0'
|
55
|
-
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubocop
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.52'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0.52'
|
167
|
+
description: Removes stamps and watermarks from scanned images for OCR, 'removes specks'
|
56
168
|
email:
|
57
169
|
- open.source@ribose.com
|
58
|
-
executables:
|
170
|
+
executables:
|
171
|
+
- console
|
172
|
+
- despeck
|
173
|
+
- setup
|
59
174
|
extensions: []
|
60
175
|
extra_rdoc_files: []
|
61
176
|
files:
|
62
177
|
- ".gitignore"
|
178
|
+
- ".rspec"
|
179
|
+
- ".rubocop.yml"
|
180
|
+
- ".ruby-version"
|
181
|
+
- ".travis.yml"
|
63
182
|
- CODE_OF_CONDUCT.md
|
64
183
|
- Gemfile
|
65
|
-
-
|
184
|
+
- OCR.md
|
185
|
+
- README.md
|
186
|
+
- ROADMAP.adoc
|
66
187
|
- Rakefile
|
67
188
|
- bin/console
|
189
|
+
- bin/despeck
|
68
190
|
- bin/setup
|
69
191
|
- despeck.gemspec
|
192
|
+
- install-vips.sh
|
193
|
+
- lib/commands/ocr.rb
|
194
|
+
- lib/commands/remove.rb
|
70
195
|
- lib/despeck.rb
|
196
|
+
- lib/despeck/cli.rb
|
197
|
+
- lib/despeck/colour_checker.rb
|
198
|
+
- lib/despeck/dominant_color.rb
|
199
|
+
- lib/despeck/dominant_color_v2.rb
|
200
|
+
- lib/despeck/logger.rb
|
201
|
+
- lib/despeck/ocr.rb
|
202
|
+
- lib/despeck/pdf_tools.rb
|
71
203
|
- lib/despeck/version.rb
|
72
|
-
-
|
73
|
-
-
|
74
|
-
-
|
75
|
-
- samples/red-circle/10-11.pdf
|
76
|
-
- samples/red-circle/a-despecked.jpg
|
77
|
-
- samples/red-circle/a.jpg
|
78
|
-
- samples/red-circle/b.jpg
|
79
|
-
- samples/red-circle/c.jpg
|
80
|
-
- samples/red-circle/despecked-portion-parameters.png
|
81
|
-
- samples/red-circle/despecked-portion.png
|
82
|
-
- samples/red-circle/despecked-watermark-parameters.png
|
83
|
-
- samples/red-circle/despecked-watermark.png
|
204
|
+
- lib/despeck/watermark_mask.rb
|
205
|
+
- lib/despeck/watermark_remover.rb
|
206
|
+
- sensitivities.txt
|
84
207
|
homepage: https://github.com/riboseinc/despeck
|
85
208
|
licenses:
|
86
209
|
- MIT
|
@@ -93,7 +216,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
216
|
requirements:
|
94
217
|
- - ">="
|
95
218
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
219
|
+
version: '2.3'
|
97
220
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
221
|
requirements:
|
99
222
|
- - ">="
|
@@ -101,8 +224,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
101
224
|
version: '0'
|
102
225
|
requirements: []
|
103
226
|
rubyforge_project:
|
104
|
-
rubygems_version: 2.
|
227
|
+
rubygems_version: 2.5.2
|
105
228
|
signing_key:
|
106
229
|
specification_version: 4
|
107
|
-
summary: Removes stamps and watermarks from scanned images for OCR,
|
230
|
+
summary: Removes stamps and watermarks from scanned images for OCR, 'removes specks'
|
108
231
|
test_files: []
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/samples/red-circle/a.jpg
DELETED
Binary file
|
data/samples/red-circle/b.jpg
DELETED
Binary file
|
data/samples/red-circle/c.jpg
DELETED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|