simple-ocr 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1f3b37c06436ef5f307e64c4033e5116b531f55c
4
+ data.tar.gz: 55a206753bfaec92711f8e42f2e5516dcde68ed6
5
+ SHA512:
6
+ metadata.gz: 8336d1620e4982e00961862ce62f11db12344cb671712988c8258a34ddd2a64b021e36116fe541d9d81c26211e154cbff41e30c7d2f2f07d168a8f4b43ea3f54
7
+ data.tar.gz: 23957867d8bba43086ee2cb9f76a331f9a9da4b68c7811cbc37bd227ca78a2b6e9acce9dab58404242b4e9da41df6eb4e3aa9813912d3d743a6c5b4623079039
@@ -0,0 +1,8 @@
1
+ require 'simple-ocr/scan'
2
+ require 'simple-ocr/zonal_ocr'
3
+ require 'simple-ocr/image'
4
+ require 'simple-ocr/path'
5
+
6
+ module OCR
7
+ MIN_DENSITY = "300x300"
8
+ end
@@ -0,0 +1,20 @@
1
+ module OCR
2
+
3
+ class Image
4
+
5
+ # Initialize your Input File.
6
+ #
7
+ # @params [String] path to input file.
8
+ def initialize(path)
9
+ @image = path
10
+ end
11
+
12
+ # OCR of input file (Main Function)
13
+ #
14
+ # @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
15
+ def scan(output_file, options, type)
16
+ Scan.new(@image, output_file, options, type).scan_img
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,52 @@
1
+ module OCR
2
+ class Path
3
+
4
+ EXTENS = {:pdf => "pdf", :png => "png"}
5
+
6
+ # Initialize your Input File.
7
+ #
8
+ # @params [String] path to input file.
9
+ def initialize(input_file)
10
+ @input_file = input_file
11
+ end
12
+
13
+ # Split the Realname
14
+ #
15
+ # @return [Array] name and extension
16
+ def name_exten
17
+ File.basename(@input_file).split(".")
18
+ end
19
+
20
+ # Duplicate the input file path
21
+ #
22
+ # @return [String] input file path
23
+ def duplicate_path
24
+ return @input_file.dup
25
+ end
26
+
27
+ # From PDF to Image conversion
28
+ #
29
+ # @return [String] Converted Image Path
30
+ def image_path
31
+ duppath = duplicate_path
32
+ duppath[name_exten[1]] = Path::EXTENS[:png]
33
+ return duppath
34
+ end
35
+
36
+ # Clean your Input File
37
+ #
38
+ # @return [String] Cleaned Image Path
39
+ def clean_image_path
40
+ duppath = duplicate_path
41
+ duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
42
+ return duppath
43
+ end
44
+
45
+ # Get the FileName
46
+ #
47
+ # @return [String] Filename
48
+ def get_filename
49
+ File.basename(@input_file).split("/")[0]
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,54 @@
1
+ require 'open3'
2
+
3
+ module OCR
4
+ class Scan
5
+
6
+ EXTENS = %w{pdf}
7
+
8
+ # Initialize your Input File, Output File, Options, Type.
9
+ #
10
+ # @params [String, String, String, String] path to input file, path to output file, options of conversion (e.g. Language), output format of file.
11
+ def initialize(input_file, output_file, options, type)
12
+ @output_file = output_file
13
+ @options = options
14
+ @type = type
15
+ @input_file = input_file
16
+ if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
17
+ @image = OCR::Path.new(input_file).image_path
18
+ convert_to_img
19
+ else
20
+ @image = input_file
21
+ end
22
+ @clean_image = OCR::Path.new(input_file).clean_image_path
23
+ end
24
+
25
+ # Conversion of PDF to Image
26
+ def convert_to_img
27
+ `gs -sDEVICE=png16m '-r#{OCR::MIN_DENSITY}' -o '#{@image}' '#{@input_file}'`
28
+ end
29
+
30
+ # OCR of Input
31
+ def scan_img
32
+ clean_img
33
+ `tesseract '#{@clean_image}' #{@options} '#{@output_file}' #{@type}`
34
+ delete_files
35
+ end
36
+
37
+ # Execute Command
38
+ def exec_command(command)
39
+ Open3.popen3(command)
40
+ end
41
+
42
+ # Shell Script for cleaning the Image.
43
+ def clean_img
44
+ `sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
45
+ end
46
+
47
+ # Deleting unnecessary file after processing.
48
+ def delete_files
49
+ FileUtils.rm_rf(@clean_image)
50
+ FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,5 @@
1
+ module OCR
2
+ class ZonalOcr
3
+
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple-ocr
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Swaathi Kakarla
8
+ - Shilpi Agrawal
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-10-28 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Provides smart conversion of all scanned Images.
15
+ email: shilpi@skcript.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/simple-ocr.rb
21
+ - lib/simple-ocr/image.rb
22
+ - lib/simple-ocr/path.rb
23
+ - lib/simple-ocr/scan.rb
24
+ - lib/simple-ocr/zonal_ocr.rb
25
+ homepage: http://www.skcript.com
26
+ licenses:
27
+ - Closed
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.4.5
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: OCR Engine by Skcript
49
+ test_files: []
50
+ has_rdoc: