simple-ocr 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/simple-ocr.rb +8 -0
- data/lib/simple-ocr/image.rb +20 -0
- data/lib/simple-ocr/path.rb +52 -0
- data/lib/simple-ocr/scan.rb +54 -0
- data/lib/simple-ocr/zonal_ocr.rb +5 -0
- metadata +50 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1f3b37c06436ef5f307e64c4033e5116b531f55c
|
4
|
+
data.tar.gz: 55a206753bfaec92711f8e42f2e5516dcde68ed6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8336d1620e4982e00961862ce62f11db12344cb671712988c8258a34ddd2a64b021e36116fe541d9d81c26211e154cbff41e30c7d2f2f07d168a8f4b43ea3f54
|
7
|
+
data.tar.gz: 23957867d8bba43086ee2cb9f76a331f9a9da4b68c7811cbc37bd227ca78a2b6e9acce9dab58404242b4e9da41df6eb4e3aa9813912d3d743a6c5b4623079039
|
data/lib/simple-ocr.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module OCR
|
2
|
+
|
3
|
+
class Image
|
4
|
+
|
5
|
+
# Initialize your Input File.
|
6
|
+
#
|
7
|
+
# @params [String] path to input file.
|
8
|
+
def initialize(path)
|
9
|
+
@image = path
|
10
|
+
end
|
11
|
+
|
12
|
+
# OCR of input file (Main Function)
|
13
|
+
#
|
14
|
+
# @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
|
15
|
+
def scan(output_file, options, type)
|
16
|
+
Scan.new(@image, output_file, options, type).scan_img
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module OCR
|
2
|
+
class Path
|
3
|
+
|
4
|
+
EXTENS = {:pdf => "pdf", :png => "png"}
|
5
|
+
|
6
|
+
# Initialize your Input File.
|
7
|
+
#
|
8
|
+
# @params [String] path to input file.
|
9
|
+
def initialize(input_file)
|
10
|
+
@input_file = input_file
|
11
|
+
end
|
12
|
+
|
13
|
+
# Split the Realname
|
14
|
+
#
|
15
|
+
# @return [Array] name and extension
|
16
|
+
def name_exten
|
17
|
+
File.basename(@input_file).split(".")
|
18
|
+
end
|
19
|
+
|
20
|
+
# Duplicate the input file path
|
21
|
+
#
|
22
|
+
# @return [String] input file path
|
23
|
+
def duplicate_path
|
24
|
+
return @input_file.dup
|
25
|
+
end
|
26
|
+
|
27
|
+
# From PDF to Image conversion
|
28
|
+
#
|
29
|
+
# @return [String] Converted Image Path
|
30
|
+
def image_path
|
31
|
+
duppath = duplicate_path
|
32
|
+
duppath[name_exten[1]] = Path::EXTENS[:png]
|
33
|
+
return duppath
|
34
|
+
end
|
35
|
+
|
36
|
+
# Clean your Input File
|
37
|
+
#
|
38
|
+
# @return [String] Cleaned Image Path
|
39
|
+
def clean_image_path
|
40
|
+
duppath = duplicate_path
|
41
|
+
duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
|
42
|
+
return duppath
|
43
|
+
end
|
44
|
+
|
45
|
+
# Get the FileName
|
46
|
+
#
|
47
|
+
# @return [String] Filename
|
48
|
+
def get_filename
|
49
|
+
File.basename(@input_file).split("/")[0]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module OCR
|
4
|
+
class Scan
|
5
|
+
|
6
|
+
EXTENS = %w{pdf}
|
7
|
+
|
8
|
+
# Initialize your Input File, Output File, Options, Type.
|
9
|
+
#
|
10
|
+
# @params [String, String, String, String] path to input file, path to output file, options of conversion (e.g. Language), output format of file.
|
11
|
+
def initialize(input_file, output_file, options, type)
|
12
|
+
@output_file = output_file
|
13
|
+
@options = options
|
14
|
+
@type = type
|
15
|
+
@input_file = input_file
|
16
|
+
if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
|
17
|
+
@image = OCR::Path.new(input_file).image_path
|
18
|
+
convert_to_img
|
19
|
+
else
|
20
|
+
@image = input_file
|
21
|
+
end
|
22
|
+
@clean_image = OCR::Path.new(input_file).clean_image_path
|
23
|
+
end
|
24
|
+
|
25
|
+
# Conversion of PDF to Image
|
26
|
+
def convert_to_img
|
27
|
+
`gs -sDEVICE=png16m '-r#{OCR::MIN_DENSITY}' -o '#{@image}' '#{@input_file}'`
|
28
|
+
end
|
29
|
+
|
30
|
+
# OCR of Input
|
31
|
+
def scan_img
|
32
|
+
clean_img
|
33
|
+
`tesseract '#{@clean_image}' #{@options} '#{@output_file}' #{@type}`
|
34
|
+
delete_files
|
35
|
+
end
|
36
|
+
|
37
|
+
# Execute Command
|
38
|
+
def exec_command(command)
|
39
|
+
Open3.popen3(command)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Shell Script for cleaning the Image.
|
43
|
+
def clean_img
|
44
|
+
`sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
|
45
|
+
end
|
46
|
+
|
47
|
+
# Deleting unnecessary file after processing.
|
48
|
+
def delete_files
|
49
|
+
FileUtils.rm_rf(@clean_image)
|
50
|
+
FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple-ocr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Swaathi Kakarla
|
8
|
+
- Shilpi Agrawal
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-10-28 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Provides smart conversion of all scanned Images.
|
15
|
+
email: shilpi@skcript.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/simple-ocr.rb
|
21
|
+
- lib/simple-ocr/image.rb
|
22
|
+
- lib/simple-ocr/path.rb
|
23
|
+
- lib/simple-ocr/scan.rb
|
24
|
+
- lib/simple-ocr/zonal_ocr.rb
|
25
|
+
homepage: http://www.skcript.com
|
26
|
+
licenses:
|
27
|
+
- Closed
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.4.5
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: OCR Engine by Skcript
|
49
|
+
test_files: []
|
50
|
+
has_rdoc:
|