simple-ocr 1.0.0 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/simple-ocr/image.rb +2 -3
- data/lib/simple-ocr/path.rb +5 -5
- data/lib/simple-ocr/scan.rb +23 -7
- metadata +2 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9709934460b8582dd2a2ddf24c9c46a6ea1fed9b
|
4
|
+
data.tar.gz: da7c6d2f038b5dade09dc8e18a51d1a83b2be5bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8b518651bc8d31ad658199302917d0b313d753d39128b4ceb67404ffbaed31bbc5343a03b90d43a50be60e529c4b2cce157f7e56bfce0007ea8d0a298e87401
|
7
|
+
data.tar.gz: 68a05698d7de9d6738d0a4f68aebfc2455ad75a2a6f7930a2e5db9fa9b8f2aac3885a0f01f83dec916c2c0217e1d31f4ec8963f13bad796c9fce299e071b7811
|
data/lib/simple-ocr/image.rb
CHANGED
@@ -13,8 +13,7 @@ module OCR
|
|
13
13
|
#
|
14
14
|
# @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
|
15
15
|
def scan(output_file, options, type)
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
Scan.new(@image, output_file, options, type).scan_img
|
17
|
+
end
|
19
18
|
end
|
20
19
|
end
|
data/lib/simple-ocr/path.rb
CHANGED
@@ -21,7 +21,7 @@ module OCR
|
|
21
21
|
#
|
22
22
|
# @return [String] input file path
|
23
23
|
def duplicate_path
|
24
|
-
|
24
|
+
@input_file.dup
|
25
25
|
end
|
26
26
|
|
27
27
|
# From PDF to Image conversion
|
@@ -30,7 +30,7 @@ module OCR
|
|
30
30
|
def image_path
|
31
31
|
duppath = duplicate_path
|
32
32
|
duppath[name_exten[1]] = Path::EXTENS[:png]
|
33
|
-
|
33
|
+
duppath
|
34
34
|
end
|
35
35
|
|
36
36
|
# Clean your Input File
|
@@ -38,15 +38,15 @@ module OCR
|
|
38
38
|
# @return [String] Cleaned Image Path
|
39
39
|
def clean_image_path
|
40
40
|
duppath = duplicate_path
|
41
|
-
duppath[get_filename] = "cleaned_"+
|
42
|
-
|
41
|
+
duppath[get_filename] = "cleaned_"+get_filename+".png"
|
42
|
+
duppath
|
43
43
|
end
|
44
44
|
|
45
45
|
# Get the FileName
|
46
46
|
#
|
47
47
|
# @return [String] Filename
|
48
48
|
def get_filename
|
49
|
-
File.basename(@input_file)
|
49
|
+
File.basename(@input_file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/simple-ocr/scan.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'open3'
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
module OCR
|
4
5
|
class Scan
|
@@ -11,15 +12,25 @@ module OCR
|
|
11
12
|
def initialize(input_file, output_file, options, type)
|
12
13
|
@output_file = output_file
|
13
14
|
@options = options
|
14
|
-
@type = type
|
15
|
+
@type = handle_output_type(type)
|
15
16
|
@input_file = input_file
|
16
|
-
if
|
17
|
+
if pdf?(input_file)
|
17
18
|
@image = OCR::Path.new(input_file).image_path
|
18
19
|
convert_to_img
|
19
20
|
else
|
20
21
|
@image = input_file
|
21
22
|
end
|
22
|
-
@clean_image = OCR::Path.new(
|
23
|
+
@clean_image = OCR::Path.new(output_file).clean_image_path
|
24
|
+
end
|
25
|
+
|
26
|
+
def handle_output_type(type)
|
27
|
+
if type == :pdf
|
28
|
+
'pdf'
|
29
|
+
elsif type == :hocr
|
30
|
+
'hocr'
|
31
|
+
else
|
32
|
+
nil.to_s
|
33
|
+
end
|
23
34
|
end
|
24
35
|
|
25
36
|
# Conversion of PDF to Image
|
@@ -41,14 +52,19 @@ module OCR
|
|
41
52
|
|
42
53
|
# Shell Script for cleaning the Image.
|
43
54
|
def clean_img
|
44
|
-
|
55
|
+
name = 'simple-ocr'
|
56
|
+
g = Gem::Specification.find_by_name(name)
|
57
|
+
`sh #{File.join(g.full_gem_path, 'lib/textcleaner')} -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
|
45
58
|
end
|
46
59
|
|
47
|
-
# Deleting unnecessary
|
60
|
+
# Deleting unnecessary files after processing.
|
48
61
|
def delete_files
|
49
62
|
FileUtils.rm_rf(@clean_image)
|
50
|
-
FileUtils.rm_rf(@image) if
|
63
|
+
FileUtils.rm_rf(@image) if pdf?
|
51
64
|
end
|
52
65
|
|
66
|
+
def pdf?(input_file = @input_file)
|
67
|
+
OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
|
68
|
+
end
|
53
69
|
end
|
54
|
-
end
|
70
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Swaathi Kakarla
|
@@ -42,9 +42,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
42
|
version: '0'
|
43
43
|
requirements: []
|
44
44
|
rubyforge_project:
|
45
|
-
rubygems_version: 2.4.
|
45
|
+
rubygems_version: 2.4.8
|
46
46
|
signing_key:
|
47
47
|
specification_version: 4
|
48
48
|
summary: OCR Engine by Skcript
|
49
49
|
test_files: []
|
50
|
-
has_rdoc:
|