simple-ocr 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f3b37c06436ef5f307e64c4033e5116b531f55c
4
- data.tar.gz: 55a206753bfaec92711f8e42f2e5516dcde68ed6
3
+ metadata.gz: 9709934460b8582dd2a2ddf24c9c46a6ea1fed9b
4
+ data.tar.gz: da7c6d2f038b5dade09dc8e18a51d1a83b2be5bb
5
5
  SHA512:
6
- metadata.gz: 8336d1620e4982e00961862ce62f11db12344cb671712988c8258a34ddd2a64b021e36116fe541d9d81c26211e154cbff41e30c7d2f2f07d168a8f4b43ea3f54
7
- data.tar.gz: 23957867d8bba43086ee2cb9f76a331f9a9da4b68c7811cbc37bd227ca78a2b6e9acce9dab58404242b4e9da41df6eb4e3aa9813912d3d743a6c5b4623079039
6
+ metadata.gz: a8b518651bc8d31ad658199302917d0b313d753d39128b4ceb67404ffbaed31bbc5343a03b90d43a50be60e529c4b2cce157f7e56bfce0007ea8d0a298e87401
7
+ data.tar.gz: 68a05698d7de9d6738d0a4f68aebfc2455ad75a2a6f7930a2e5db9fa9b8f2aac3885a0f01f83dec916c2c0217e1d31f4ec8963f13bad796c9fce299e071b7811
@@ -13,8 +13,7 @@ module OCR
13
13
  #
14
14
  # @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
15
15
  def scan(output_file, options, type)
16
- Scan.new(@image, output_file, options, type).scan_img
17
- end
18
-
16
+ Scan.new(@image, output_file, options, type).scan_img
17
+ end
19
18
  end
20
19
  end
@@ -21,7 +21,7 @@ module OCR
21
21
  #
22
22
  # @return [String] input file path
23
23
  def duplicate_path
24
- return @input_file.dup
24
+ @input_file.dup
25
25
  end
26
26
 
27
27
  # From PDF to Image conversion
@@ -30,7 +30,7 @@ module OCR
30
30
  def image_path
31
31
  duppath = duplicate_path
32
32
  duppath[name_exten[1]] = Path::EXTENS[:png]
33
- return duppath
33
+ duppath
34
34
  end
35
35
 
36
36
  # Clean your Input File
@@ -38,15 +38,15 @@ module OCR
38
38
  # @return [String] Cleaned Image Path
39
39
  def clean_image_path
40
40
  duppath = duplicate_path
41
- duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
42
- return duppath
41
+ duppath[get_filename] = "cleaned_"+get_filename+".png"
42
+ duppath
43
43
  end
44
44
 
45
45
  # Get the FileName
46
46
  #
47
47
  # @return [String] Filename
48
48
  def get_filename
49
- File.basename(@input_file).split("/")[0]
49
+ File.basename(@input_file)
50
50
  end
51
51
  end
52
52
  end
@@ -1,4 +1,5 @@
1
1
  require 'open3'
2
+ require 'fileutils'
2
3
 
3
4
  module OCR
4
5
  class Scan
@@ -11,15 +12,25 @@ module OCR
11
12
  def initialize(input_file, output_file, options, type)
12
13
  @output_file = output_file
13
14
  @options = options
14
- @type = type
15
+ @type = handle_output_type(type)
15
16
  @input_file = input_file
16
- if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
17
+ if pdf?(input_file)
17
18
  @image = OCR::Path.new(input_file).image_path
18
19
  convert_to_img
19
20
  else
20
21
  @image = input_file
21
22
  end
22
- @clean_image = OCR::Path.new(input_file).clean_image_path
23
+ @clean_image = OCR::Path.new(output_file).clean_image_path
24
+ end
25
+
26
+ def handle_output_type(type)
27
+ if type == :pdf
28
+ 'pdf'
29
+ elsif type == :hocr
30
+ 'hocr'
31
+ else
32
+ nil.to_s
33
+ end
23
34
  end
24
35
 
25
36
  # Conversion of PDF to Image
@@ -41,14 +52,19 @@ module OCR
41
52
 
42
53
  # Shell Script for cleaning the Image.
43
54
  def clean_img
44
- `sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
55
+ name = 'simple-ocr'
56
+ g = Gem::Specification.find_by_name(name)
57
+ `sh #{File.join(g.full_gem_path, 'lib/textcleaner')} -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
45
58
  end
46
59
 
47
- # Deleting unnecessary file after processing.
60
+ # Deleting unnecessary files after processing.
48
61
  def delete_files
49
62
  FileUtils.rm_rf(@clean_image)
50
- FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
63
+ FileUtils.rm_rf(@image) if pdf?
51
64
  end
52
65
 
66
+ def pdf?(input_file = @input_file)
67
+ OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
68
+ end
53
69
  end
54
- end
70
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Swaathi Kakarla
@@ -42,9 +42,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
42
  version: '0'
43
43
  requirements: []
44
44
  rubyforge_project:
45
- rubygems_version: 2.4.5
45
+ rubygems_version: 2.4.8
46
46
  signing_key:
47
47
  specification_version: 4
48
48
  summary: OCR Engine by Skcript
49
49
  test_files: []
50
- has_rdoc: