simple-ocr 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/simple-ocr/image.rb +2 -3
- data/lib/simple-ocr/path.rb +5 -5
- data/lib/simple-ocr/scan.rb +23 -7
- metadata +2 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9709934460b8582dd2a2ddf24c9c46a6ea1fed9b
|
4
|
+
data.tar.gz: da7c6d2f038b5dade09dc8e18a51d1a83b2be5bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8b518651bc8d31ad658199302917d0b313d753d39128b4ceb67404ffbaed31bbc5343a03b90d43a50be60e529c4b2cce157f7e56bfce0007ea8d0a298e87401
|
7
|
+
data.tar.gz: 68a05698d7de9d6738d0a4f68aebfc2455ad75a2a6f7930a2e5db9fa9b8f2aac3885a0f01f83dec916c2c0217e1d31f4ec8963f13bad796c9fce299e071b7811
|
data/lib/simple-ocr/image.rb
CHANGED
@@ -13,8 +13,7 @@ module OCR
|
|
13
13
|
#
|
14
14
|
# @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
|
15
15
|
def scan(output_file, options, type)
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
Scan.new(@image, output_file, options, type).scan_img
|
17
|
+
end
|
19
18
|
end
|
20
19
|
end
|
data/lib/simple-ocr/path.rb
CHANGED
@@ -21,7 +21,7 @@ module OCR
|
|
21
21
|
#
|
22
22
|
# @return [String] input file path
|
23
23
|
def duplicate_path
|
24
|
-
|
24
|
+
@input_file.dup
|
25
25
|
end
|
26
26
|
|
27
27
|
# From PDF to Image conversion
|
@@ -30,7 +30,7 @@ module OCR
|
|
30
30
|
def image_path
|
31
31
|
duppath = duplicate_path
|
32
32
|
duppath[name_exten[1]] = Path::EXTENS[:png]
|
33
|
-
|
33
|
+
duppath
|
34
34
|
end
|
35
35
|
|
36
36
|
# Clean your Input File
|
@@ -38,15 +38,15 @@ module OCR
|
|
38
38
|
# @return [String] Cleaned Image Path
|
39
39
|
def clean_image_path
|
40
40
|
duppath = duplicate_path
|
41
|
-
duppath[get_filename] = "cleaned_"+
|
42
|
-
|
41
|
+
duppath[get_filename] = "cleaned_"+get_filename+".png"
|
42
|
+
duppath
|
43
43
|
end
|
44
44
|
|
45
45
|
# Get the FileName
|
46
46
|
#
|
47
47
|
# @return [String] Filename
|
48
48
|
def get_filename
|
49
|
-
File.basename(@input_file)
|
49
|
+
File.basename(@input_file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/simple-ocr/scan.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'open3'
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
module OCR
|
4
5
|
class Scan
|
@@ -11,15 +12,25 @@ module OCR
|
|
11
12
|
def initialize(input_file, output_file, options, type)
|
12
13
|
@output_file = output_file
|
13
14
|
@options = options
|
14
|
-
@type = type
|
15
|
+
@type = handle_output_type(type)
|
15
16
|
@input_file = input_file
|
16
|
-
if
|
17
|
+
if pdf?(input_file)
|
17
18
|
@image = OCR::Path.new(input_file).image_path
|
18
19
|
convert_to_img
|
19
20
|
else
|
20
21
|
@image = input_file
|
21
22
|
end
|
22
|
-
@clean_image = OCR::Path.new(
|
23
|
+
@clean_image = OCR::Path.new(output_file).clean_image_path
|
24
|
+
end
|
25
|
+
|
26
|
+
def handle_output_type(type)
|
27
|
+
if type == :pdf
|
28
|
+
'pdf'
|
29
|
+
elsif type == :hocr
|
30
|
+
'hocr'
|
31
|
+
else
|
32
|
+
nil.to_s
|
33
|
+
end
|
23
34
|
end
|
24
35
|
|
25
36
|
# Conversion of PDF to Image
|
@@ -41,14 +52,19 @@ module OCR
|
|
41
52
|
|
42
53
|
# Shell Script for cleaning the Image.
|
43
54
|
def clean_img
|
44
|
-
|
55
|
+
name = 'simple-ocr'
|
56
|
+
g = Gem::Specification.find_by_name(name)
|
57
|
+
`sh #{File.join(g.full_gem_path, 'lib/textcleaner')} -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
|
45
58
|
end
|
46
59
|
|
47
|
-
# Deleting unnecessary
|
60
|
+
# Deleting unnecessary files after processing.
|
48
61
|
def delete_files
|
49
62
|
FileUtils.rm_rf(@clean_image)
|
50
|
-
FileUtils.rm_rf(@image) if
|
63
|
+
FileUtils.rm_rf(@image) if pdf?
|
51
64
|
end
|
52
65
|
|
66
|
+
def pdf?(input_file = @input_file)
|
67
|
+
OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
|
68
|
+
end
|
53
69
|
end
|
54
|
-
end
|
70
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Swaathi Kakarla
|
@@ -42,9 +42,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
42
|
version: '0'
|
43
43
|
requirements: []
|
44
44
|
rubyforge_project:
|
45
|
-
rubygems_version: 2.4.
|
45
|
+
rubygems_version: 2.4.8
|
46
46
|
signing_key:
|
47
47
|
specification_version: 4
|
48
48
|
summary: OCR Engine by Skcript
|
49
49
|
test_files: []
|
50
|
-
has_rdoc:
|