simple-ocr 1.0.0 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f3b37c06436ef5f307e64c4033e5116b531f55c
4
- data.tar.gz: 55a206753bfaec92711f8e42f2e5516dcde68ed6
3
+ metadata.gz: 9709934460b8582dd2a2ddf24c9c46a6ea1fed9b
4
+ data.tar.gz: da7c6d2f038b5dade09dc8e18a51d1a83b2be5bb
5
5
  SHA512:
6
- metadata.gz: 8336d1620e4982e00961862ce62f11db12344cb671712988c8258a34ddd2a64b021e36116fe541d9d81c26211e154cbff41e30c7d2f2f07d168a8f4b43ea3f54
7
- data.tar.gz: 23957867d8bba43086ee2cb9f76a331f9a9da4b68c7811cbc37bd227ca78a2b6e9acce9dab58404242b4e9da41df6eb4e3aa9813912d3d743a6c5b4623079039
6
+ metadata.gz: a8b518651bc8d31ad658199302917d0b313d753d39128b4ceb67404ffbaed31bbc5343a03b90d43a50be60e529c4b2cce157f7e56bfce0007ea8d0a298e87401
7
+ data.tar.gz: 68a05698d7de9d6738d0a4f68aebfc2455ad75a2a6f7930a2e5db9fa9b8f2aac3885a0f01f83dec916c2c0217e1d31f4ec8963f13bad796c9fce299e071b7811
@@ -13,8 +13,7 @@ module OCR
13
13
  #
14
14
  # @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
15
15
  def scan(output_file, options, type)
16
- Scan.new(@image, output_file, options, type).scan_img
17
- end
18
-
16
+ Scan.new(@image, output_file, options, type).scan_img
17
+ end
19
18
  end
20
19
  end
@@ -21,7 +21,7 @@ module OCR
21
21
  #
22
22
  # @return [String] input file path
23
23
  def duplicate_path
24
- return @input_file.dup
24
+ @input_file.dup
25
25
  end
26
26
 
27
27
  # From PDF to Image conversion
@@ -30,7 +30,7 @@ module OCR
30
30
  def image_path
31
31
  duppath = duplicate_path
32
32
  duppath[name_exten[1]] = Path::EXTENS[:png]
33
- return duppath
33
+ duppath
34
34
  end
35
35
 
36
36
  # Clean your Input File
@@ -38,15 +38,15 @@ module OCR
38
38
  # @return [String] Cleaned Image Path
39
39
  def clean_image_path
40
40
  duppath = duplicate_path
41
- duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
42
- return duppath
41
+ duppath[get_filename] = "cleaned_"+get_filename+".png"
42
+ duppath
43
43
  end
44
44
 
45
45
  # Get the FileName
46
46
  #
47
47
  # @return [String] Filename
48
48
  def get_filename
49
- File.basename(@input_file).split("/")[0]
49
+ File.basename(@input_file)
50
50
  end
51
51
  end
52
52
  end
@@ -1,4 +1,5 @@
1
1
  require 'open3'
2
+ require 'fileutils'
2
3
 
3
4
  module OCR
4
5
  class Scan
@@ -11,15 +12,25 @@ module OCR
11
12
  def initialize(input_file, output_file, options, type)
12
13
  @output_file = output_file
13
14
  @options = options
14
- @type = type
15
+ @type = handle_output_type(type)
15
16
  @input_file = input_file
16
- if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
17
+ if pdf?(input_file)
17
18
  @image = OCR::Path.new(input_file).image_path
18
19
  convert_to_img
19
20
  else
20
21
  @image = input_file
21
22
  end
22
- @clean_image = OCR::Path.new(input_file).clean_image_path
23
+ @clean_image = OCR::Path.new(output_file).clean_image_path
24
+ end
25
+
26
+ def handle_output_type(type)
27
+ if type == :pdf
28
+ 'pdf'
29
+ elsif type == :hocr
30
+ 'hocr'
31
+ else
32
+ nil.to_s
33
+ end
23
34
  end
24
35
 
25
36
  # Conversion of PDF to Image
@@ -41,14 +52,19 @@ module OCR
41
52
 
42
53
  # Shell Script for cleaning the Image.
43
54
  def clean_img
44
- `sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
55
+ name = 'simple-ocr'
56
+ g = Gem::Specification.find_by_name(name)
57
+ `sh #{File.join(g.full_gem_path, 'lib/textcleaner')} -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
45
58
  end
46
59
 
47
- # Deleting unnecessary file after processing.
60
+ # Deleting unnecessary files after processing.
48
61
  def delete_files
49
62
  FileUtils.rm_rf(@clean_image)
50
- FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
63
+ FileUtils.rm_rf(@image) if pdf?
51
64
  end
52
65
 
66
+ def pdf?(input_file = @input_file)
67
+ OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
68
+ end
53
69
  end
54
- end
70
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Swaathi Kakarla
@@ -42,9 +42,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
42
  version: '0'
43
43
  requirements: []
44
44
  rubyforge_project:
45
- rubygems_version: 2.4.5
45
+ rubygems_version: 2.4.8
46
46
  signing_key:
47
47
  specification_version: 4
48
48
  summary: OCR Engine by Skcript
49
49
  test_files: []
50
- has_rdoc: