simple-ocr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1f3b37c06436ef5f307e64c4033e5116b531f55c
4
+ data.tar.gz: 55a206753bfaec92711f8e42f2e5516dcde68ed6
5
+ SHA512:
6
+ metadata.gz: 8336d1620e4982e00961862ce62f11db12344cb671712988c8258a34ddd2a64b021e36116fe541d9d81c26211e154cbff41e30c7d2f2f07d168a8f4b43ea3f54
7
+ data.tar.gz: 23957867d8bba43086ee2cb9f76a331f9a9da4b68c7811cbc37bd227ca78a2b6e9acce9dab58404242b4e9da41df6eb4e3aa9813912d3d743a6c5b4623079039
@@ -0,0 +1,8 @@
1
+ require 'simple-ocr/scan'
2
+ require 'simple-ocr/zonal_ocr'
3
+ require 'simple-ocr/image'
4
+ require 'simple-ocr/path'
5
+
6
+ module OCR
7
+ MIN_DENSITY = "300x300"
8
+ end
@@ -0,0 +1,20 @@
1
+ module OCR
2
+
3
+ class Image
4
+
5
+ # Initialize your Input File.
6
+ #
7
+ # @params [String] path to input file.
8
+ def initialize(path)
9
+ @image = path
10
+ end
11
+
12
+ # OCR of input file (Main Function)
13
+ #
14
+ # @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
15
+ def scan(output_file, options, type)
16
+ Scan.new(@image, output_file, options, type).scan_img
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,52 @@
1
+ module OCR
2
+ class Path
3
+
4
+ EXTENS = {:pdf => "pdf", :png => "png"}
5
+
6
+ # Initialize your Input File.
7
+ #
8
+ # @params [String] path to input file.
9
+ def initialize(input_file)
10
+ @input_file = input_file
11
+ end
12
+
13
+ # Split the Realname
14
+ #
15
+ # @return [Array] name and extension
16
+ def name_exten
17
+ File.basename(@input_file).split(".")
18
+ end
19
+
20
+ # Duplicate the input file path
21
+ #
22
+ # @return [String] input file path
23
+ def duplicate_path
24
+ return @input_file.dup
25
+ end
26
+
27
+ # From PDF to Image conversion
28
+ #
29
+ # @return [String] Converted Image Path
30
+ def image_path
31
+ duppath = duplicate_path
32
+ duppath[name_exten[1]] = Path::EXTENS[:png]
33
+ return duppath
34
+ end
35
+
36
+ # Clean your Input File
37
+ #
38
+ # @return [String] Cleaned Image Path
39
+ def clean_image_path
40
+ duppath = duplicate_path
41
+ duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
42
+ return duppath
43
+ end
44
+
45
+ # Get the FileName
46
+ #
47
+ # @return [String] Filename
48
+ def get_filename
49
+ File.basename(@input_file).split("/")[0]
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,54 @@
1
+ require 'open3'
2
+
3
+ module OCR
4
+ class Scan
5
+
6
+ EXTENS = %w{pdf}
7
+
8
+ # Initialize your Input File, Output File, Options, Type.
9
+ #
10
+ # @params [String, String, String, String] path to input file, path to output file, options of conversion (e.g. Language), output format of file.
11
+ def initialize(input_file, output_file, options, type)
12
+ @output_file = output_file
13
+ @options = options
14
+ @type = type
15
+ @input_file = input_file
16
+ if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
17
+ @image = OCR::Path.new(input_file).image_path
18
+ convert_to_img
19
+ else
20
+ @image = input_file
21
+ end
22
+ @clean_image = OCR::Path.new(input_file).clean_image_path
23
+ end
24
+
25
+ # Conversion of PDF to Image
26
+ def convert_to_img
27
+ `gs -sDEVICE=png16m '-r#{OCR::MIN_DENSITY}' -o '#{@image}' '#{@input_file}'`
28
+ end
29
+
30
+ # OCR of Input
31
+ def scan_img
32
+ clean_img
33
+ `tesseract '#{@clean_image}' #{@options} '#{@output_file}' #{@type}`
34
+ delete_files
35
+ end
36
+
37
+ # Execute Command
38
+ def exec_command(command)
39
+ Open3.popen3(command)
40
+ end
41
+
42
+ # Shell Script for cleaning the Image.
43
+ def clean_img
44
+ `sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
45
+ end
46
+
47
+ # Deleting unnecessary file after processing.
48
+ def delete_files
49
+ FileUtils.rm_rf(@clean_image)
50
+ FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,5 @@
1
+ module OCR
2
+ class ZonalOcr
3
+
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple-ocr
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Swaathi Kakarla
8
+ - Shilpi Agrawal
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-10-28 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Provides smart conversion of all scanned Images.
15
+ email: shilpi@skcript.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/simple-ocr.rb
21
+ - lib/simple-ocr/image.rb
22
+ - lib/simple-ocr/path.rb
23
+ - lib/simple-ocr/scan.rb
24
+ - lib/simple-ocr/zonal_ocr.rb
25
+ homepage: http://www.skcript.com
26
+ licenses:
27
+ - Closed
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.4.5
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: OCR Engine by Skcript
49
+ test_files: []
50
+ has_rdoc: