RubyGems - simple-ocr - Versions diffs - 1.0.0 - Mend

simple-ocr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 1f3b37c06436ef5f307e64c4033e5116b531f55c
+  data.tar.gz: 55a206753bfaec92711f8e42f2e5516dcde68ed6
+SHA512:
+  metadata.gz: 8336d1620e4982e00961862ce62f11db12344cb671712988c8258a34ddd2a64b021e36116fe541d9d81c26211e154cbff41e30c7d2f2f07d168a8f4b43ea3f54
+  data.tar.gz: 23957867d8bba43086ee2cb9f76a331f9a9da4b68c7811cbc37bd227ca78a2b6e9acce9dab58404242b4e9da41df6eb4e3aa9813912d3d743a6c5b4623079039

data/lib/simple-ocr.rb ADDED

@@ -0,0 +1,8 @@
+require 'simple-ocr/scan'
+require 'simple-ocr/zonal_ocr'
+require 'simple-ocr/image'
+require 'simple-ocr/path'
+module OCR
+	MIN_DENSITY = "300x300"
+end

data/lib/simple-ocr/image.rb ADDED

@@ -0,0 +1,20 @@
+module OCR
+	class Image
+		# Initialize your Input File.
+		#
+		# @params [String] path to input file.
+		def initialize(path)
+			@image = path
+		end
+		# OCR of input file (Main Function)
+		#
+		# @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
+		def scan(output_file, options, type)
+	      Scan.new(@image, output_file, options, type).scan_img
+	    end
+	end
+end

data/lib/simple-ocr/path.rb ADDED

@@ -0,0 +1,52 @@
+module OCR
+	class Path
+		EXTENS = {:pdf => "pdf", :png => "png"}
+		# Initialize your Input File.
+		#
+		# @params [String] path to input file.
+		def initialize(input_file)
+			@input_file = input_file
+		end
+		# Split the Realname
+		#
+		# @return [Array] name and extension
+		def name_exten
+			File.basename(@input_file).split(".")
+		end
+		# Duplicate the input file path
+		#
+		# @return [String] input file path
+		def duplicate_path
+			return @input_file.dup
+		end
+		# From PDF to Image conversion
+		#
+		# @return [String] Converted Image Path
+		def image_path
+			duppath = duplicate_path
+			duppath[name_exten[1]] = Path::EXTENS[:png]
+			return duppath
+		end
+		# Clean your Input File
+		#
+		# @return [String] Cleaned Image Path
+		def clean_image_path
+			duppath = duplicate_path
+			duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
+			return duppath
+		end
+		# Get the FileName
+		#
+		# @return [String] Filename
+		def get_filename
+			File.basename(@input_file).split("/")[0]
+		end
+	end
+end

data/lib/simple-ocr/scan.rb ADDED

@@ -0,0 +1,54 @@
+require 'open3'
+module OCR
+	class Scan
+		EXTENS = %w{pdf}
+		# Initialize your Input File, Output File, Options, Type.
+		#
+		# @params [String, String, String, String] path to input file, path to output file, options of conversion (e.g. Language), output format of file.
+		def initialize(input_file, output_file, options, type)
+			@output_file = output_file
+			@options = options
+			@type = type
+			@input_file = input_file
+			if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
+				@image = OCR::Path.new(input_file).image_path
+				convert_to_img
+			else
+				@image = input_file
+			end
+			@clean_image = OCR::Path.new(input_file).clean_image_path
+		end
+		# Conversion of PDF to Image
+		def convert_to_img
+			`gs -sDEVICE=png16m '-r#{OCR::MIN_DENSITY}' -o '#{@image}' '#{@input_file}'`
+		end
+		# OCR of Input
+		def scan_img
+			clean_img
+			`tesseract '#{@clean_image}' #{@options} '#{@output_file}' #{@type}`
+			delete_files
+		end
+		# Execute Command
+		def exec_command(command)
+			Open3.popen3(command)
+		end
+		# Shell Script for cleaning the Image.
+		def clean_img
+			`sh ./textcleaner -g -e stretch -f 25 -o 20 -t 30 -u -s 1 -T -p 20 '#{@image}' '#{@clean_image}'`
+		end
+		# Deleting unnecessary file after processing.
+		def delete_files
+			FileUtils.rm_rf(@clean_image)
+			FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
+		end
+	end
+end

data/lib/simple-ocr/zonal_ocr.rb ADDED

@@ -0,0 +1,5 @@
+module OCR
+	class ZonalOcr
+	end
+end

metadata ADDED

@@ -0,0 +1,50 @@
+--- !ruby/object:Gem::Specification
+name: simple-ocr
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- Swaathi Kakarla
+- Shilpi Agrawal
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2015-10-28 00:00:00.000000000 Z
+dependencies: []
+description: Provides smart conversion of all scanned Images.
+email: shilpi@skcript.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/simple-ocr.rb
+- lib/simple-ocr/image.rb
+- lib/simple-ocr/path.rb
+- lib/simple-ocr/scan.rb
+- lib/simple-ocr/zonal_ocr.rb
+homepage: http://www.skcript.com
+licenses:
+- Closed
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.5
+signing_key:
+specification_version: 4
+summary: OCR Engine by Skcript
+test_files: []
+has_rdoc: