simple-ocr 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/simple-ocr/image.rb +2 -3
- data/lib/simple-ocr/path.rb +5 -5
- data/lib/simple-ocr/scan.rb +9 -5
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d048983e1a692eb7aaa4d35c412c735a122bf8e4
|
4
|
+
data.tar.gz: 5edad1d9e5ddb4182a93357852510c9d4d0328f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e4474f30cf9fb1c352b9b93b0e92082e45d51eff876bad4b1d7c7c9e4938d863c2eccbfc93fff57b96c279d516b22cfe27d05a4e13375cfc87da72ca9037ee6
|
7
|
+
data.tar.gz: a9b91887c5802e422fb40eaec0543d8dd754c2528055dd7ff2ec8e8b5ea296746c0fbed0697e67c008e27b1330df6da95699d86812e4247c25399f3a309ae830
|
data/lib/simple-ocr/image.rb
CHANGED
@@ -13,8 +13,7 @@ module OCR
|
|
13
13
|
#
|
14
14
|
# @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
|
15
15
|
def scan(output_file, options, type)
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
Scan.new(@image, output_file, options, type).scan_img
|
17
|
+
end
|
19
18
|
end
|
20
19
|
end
|
data/lib/simple-ocr/path.rb
CHANGED
@@ -21,7 +21,7 @@ module OCR
|
|
21
21
|
#
|
22
22
|
# @return [String] input file path
|
23
23
|
def duplicate_path
|
24
|
-
|
24
|
+
@input_file.dup
|
25
25
|
end
|
26
26
|
|
27
27
|
# From PDF to Image conversion
|
@@ -30,7 +30,7 @@ module OCR
|
|
30
30
|
def image_path
|
31
31
|
duppath = duplicate_path
|
32
32
|
duppath[name_exten[1]] = Path::EXTENS[:png]
|
33
|
-
|
33
|
+
duppath
|
34
34
|
end
|
35
35
|
|
36
36
|
# Clean your Input File
|
@@ -38,15 +38,15 @@ module OCR
|
|
38
38
|
# @return [String] Cleaned Image Path
|
39
39
|
def clean_image_path
|
40
40
|
duppath = duplicate_path
|
41
|
-
duppath[get_filename] = "cleaned_"+
|
42
|
-
|
41
|
+
duppath[get_filename] = "cleaned_"+get_filename+".png"
|
42
|
+
duppath
|
43
43
|
end
|
44
44
|
|
45
45
|
# Get the FileName
|
46
46
|
#
|
47
47
|
# @return [String] Filename
|
48
48
|
def get_filename
|
49
|
-
File.basename(@input_file)
|
49
|
+
File.basename(@input_file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/simple-ocr/scan.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'open3'
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
module OCR
|
4
5
|
class Scan
|
@@ -13,20 +14,20 @@ module OCR
|
|
13
14
|
@options = options
|
14
15
|
@type = handle_output_type(type)
|
15
16
|
@input_file = input_file
|
16
|
-
if
|
17
|
+
if pdf?(input_file)
|
17
18
|
@image = OCR::Path.new(input_file).image_path
|
18
19
|
convert_to_img
|
19
20
|
else
|
20
21
|
@image = input_file
|
21
22
|
end
|
22
|
-
@clean_image = OCR::Path.new(
|
23
|
+
@clean_image = OCR::Path.new(output_file).clean_image_path
|
23
24
|
end
|
24
25
|
|
25
26
|
def handle_output_type(type)
|
26
27
|
if type == :pdf
|
27
|
-
|
28
|
+
'pdf'
|
28
29
|
elsif type == :hocr
|
29
|
-
|
30
|
+
'hocr'
|
30
31
|
else
|
31
32
|
nil.to_s
|
32
33
|
end
|
@@ -59,8 +60,11 @@ module OCR
|
|
59
60
|
# Deleting unnecessary files after processing.
|
60
61
|
def delete_files
|
61
62
|
FileUtils.rm_rf(@clean_image)
|
62
|
-
FileUtils.rm_rf(@image) if
|
63
|
+
FileUtils.rm_rf(@image) if pdf?
|
63
64
|
end
|
64
65
|
|
66
|
+
def pdf?(input_file = @input_file)
|
67
|
+
OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
|
68
|
+
end
|
65
69
|
end
|
66
70
|
end
|