ocr 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,10 +1,14 @@
1
- #Ocr
1
+ #OCR
2
2
  OCR web services gateway for Ruby.
3
3
 
4
4
  #Description
5
5
  Recognize text and characters from image files using web services.
6
6
 
7
- ##Web services
7
+ ##Web services supported
8
+ - [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
9
+ * identify: Username and license code as password
10
+ * languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
11
+ * output formats: :doc, :pdf, :excel, :html, :txt, :rtf
8
12
 
9
13
  #Installation
10
14
  ##From the command line
@@ -27,5 +31,21 @@ gem 'ocr'
27
31
  bundle install
28
32
  ```
29
33
 
34
+ ## Using
35
+
36
+ ### OCR Web Service
37
+ [OCR Web Service](http://www.ocrwebservice.com/)
38
+
39
+ ```ruby
40
+ ocr = OCR.use :onlineocr
41
+
42
+ ocr.login <YOUR_USER>, <LICENSE_CODE>
43
+ ocr.file= 'pkg/text_test2.jpg'
44
+ ocr.recognize
45
+
46
+ puts "ERROR: #{ocr.error}" if ocr.error?
47
+ puts "RESULT: #{ocr.text}" unless ocr.error?
48
+ ```
49
+
30
50
  # License
31
51
  Released under the MIT license: [http://www.opensource.org/licenses/MIT](http://www.opensource.org/licenses/MIT)
data/lib/ocr.rb CHANGED
@@ -1,4 +1,13 @@
1
- module Ocr
2
- class Ocr
1
+ require File.expand_path('../ocr/factory', __FILE__)
2
+ require File.expand_path('../ocr/ocr', __FILE__)
3
+ require File.expand_path('../ocrs/dummy', __FILE__)
4
+ require File.expand_path('../ocrs/onlineocr', __FILE__)
5
+ require File.expand_path('../ocrs/onlineocr2', __FILE__)
6
+ require File.expand_path('../ocrs/free_ocr', __FILE__)
7
+
8
+
9
+ module OCR
10
+ def self.use name
11
+ Factory.create eval name.to_s.capitalize
3
12
  end
4
13
  end
@@ -0,0 +1,23 @@
1
+ module OCR
2
+ class Factory
3
+ attr_reader :args
4
+
5
+ class << self
6
+ protected :new
7
+ end
8
+
9
+ def initialize(*args)
10
+ self.init(*args) if self.respond_to?(:init)
11
+ end
12
+
13
+ def self.create(type = self.class, *args)
14
+ raise ArgumentError, "Cannot create instance of #{type} from #{self.name}" if type == self
15
+ raise ArgumentError, "Type cannot be nil" if type.nil?
16
+
17
+ if !type.ancestors.include?(self)
18
+ raise ArgumentError, "#{type.name} is not a descendant of #{self.name}"
19
+ end
20
+ type.new(*args)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,74 @@
1
+ module OCR
2
+ class Ocr < OCR::Factory
3
+ class << self
4
+ private :create
5
+ end
6
+
7
+ attr_reader :text, :error
8
+ attr_accessor :username, :password, :extra_login_data
9
+ attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
10
+ attr_accessor :file, :language, :format
11
+ attr_accessor :debug
12
+
13
+ def initialize(*args)
14
+ init
15
+ end
16
+
17
+ def init
18
+ clear_error
19
+ clear_text
20
+ login false, false
21
+ proxy false
22
+ file= false
23
+ lang= false
24
+ format= false
25
+ debug= false
26
+ end
27
+
28
+ def login username, password, extra_login_data = false
29
+ @username = username
30
+ @password = password
31
+ @extra_login_data = extra_login_data
32
+ end
33
+
34
+ def proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
35
+ @proxy_addr = p_addr
36
+ @proxy_port = p_port
37
+ @proxy_user = p_user
38
+ @proxy_pass = p_pass
39
+ end
40
+
41
+ def error?
42
+ @error != false
43
+ end
44
+
45
+ def recognize
46
+ clear_error
47
+ clear_text
48
+ return false && set_error("No file") if @file.nil?
49
+ return false && set_error("File not exists '#{@file}'") unless File.exist?(@file)
50
+ ocr_recognize
51
+ end
52
+
53
+ private
54
+ def clear_error
55
+ @error = false
56
+ end
57
+
58
+ def set_error msg
59
+ @error = msg
60
+ end
61
+
62
+ def clear_text
63
+ @text = nil
64
+ end
65
+
66
+ def set_text msg
67
+ @text = msg
68
+ end
69
+
70
+ def ocr_recognize
71
+ raise NotImplementedError, 'You should implement this method'
72
+ end
73
+ end
74
+ end
@@ -1,3 +1,3 @@
1
- module Ocr
2
- VERSION = "0.0.1"
1
+ module OCR
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,4 @@
1
+ module OCR
2
+ class Dummy < Ocr
3
+ end
4
+ end
@@ -0,0 +1,58 @@
1
+ require 'soap/wsdlDriver'
2
+ require 'base64'
3
+
4
+
5
+ module OCR
6
+ class Free_ocr < OCR::Ocr
7
+
8
+ attr_accessor :convert_to_bw
9
+
10
+ private
11
+ def init
12
+ super()
13
+ end
14
+
15
+ def ocr_recognize
16
+ raise Exception, 'You should set image file' unless @file
17
+ # request = {
18
+ # 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
19
+ # }
20
+ #puts request
21
+ #return
22
+ #client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
23
+ #puts client.wsdl.soap_actions
24
+ #return
25
+ #begin
26
+ # response = client.request(:analyze) do
27
+ # soap.body = request
28
+ # end
29
+ #rescue
30
+ # puts "EX"
31
+ #end
32
+
33
+ client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
34
+ result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
35
+
36
+ puts response. analyzeResult
37
+
38
+ puts result
39
+ return
40
+
41
+ puts
42
+ puts "BODY: #{response.body}"
43
+
44
+ return false if have_error? response.body
45
+ puts 'sigue'
46
+ set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
47
+ end
48
+
49
+ def have_error? response
50
+ return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
51
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
52
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
53
+ return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
54
+ set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
55
+ true
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,65 @@
1
+ require 'savon'
2
+ require 'base64'
3
+
4
+
5
+ module OCR
6
+ class Onlineocr < OCR::Ocr
7
+
8
+ attr_accessor :convert_to_bw
9
+
10
+ private
11
+ def init
12
+ super()
13
+ self.language= :english
14
+ self.format= :txt
15
+ self.convert_to_bw= false
16
+ end
17
+
18
+ def ocr_recognize
19
+ raise Exception, 'You should set username and license as password' unless @username && @password
20
+ request = {
21
+ 'user_name' => @username,
22
+ 'license_code' => @password,
23
+ 'OCRWSInputImage' => {
24
+ 'fileName' => File.basename(@file),
25
+ 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
26
+ },
27
+ 'OCRWSSetting' => {
28
+ 'ocrLanguages' => self.language.to_s.upcase,
29
+ 'outputDocumentFormat' => self.format.to_s.upcase,
30
+ 'convertToBW' => self.convert_to_bw.to_s,
31
+ 'getOCRText' => true.to_s,
32
+ 'createOutputDocument' => false.to_s,
33
+ 'multiPageDoc' => false.to_s,
34
+ 'ocrWords' => false.to_s
35
+ },
36
+ }
37
+
38
+ unless debug
39
+ Savon.configure do |config|
40
+ #config.log = true # enable logging
41
+ config.log = false # disable logging
42
+ config.log_level = :error # changing the log level
43
+ HTTPI.log = false # to total silent the logging.
44
+ end
45
+ end
46
+ client = Savon::Client.new('http://www.ocrwebservice.com/services/OCRWebService.asmx?WSDL')
47
+ response = client.request(:ocr_web_service_recognize) do
48
+ soap.body = request
49
+ end
50
+
51
+ return false if have_error? response.body
52
+
53
+ set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
54
+ end
55
+
56
+ def have_error? response
57
+ return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
58
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
59
+ return false unless response[:ocr_web_service_recognize_response][:ocrws_response].has_key?(:error_message)
60
+ return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
61
+ set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
62
+ true
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,109 @@
1
+ require 'net/http'
2
+ require 'base64'
3
+
4
+
5
+ module OCR
6
+ class Onlineocr2 < OCR::Ocr
7
+
8
+ attr_accessor :convert_to_bw
9
+
10
+ private
11
+ def init
12
+ super()
13
+ self.language= :english
14
+ self.format= :txt
15
+ self.convert_to_bw= false
16
+ end
17
+
18
+ def ocr_recognize
19
+ raise Exception, 'You should set username and license as password' unless @username && @password
20
+ request = {
21
+ 'user_name' => @username,
22
+ 'license_code' => @password,
23
+ 'OCRWSSettings' => {
24
+ # 'ocrLanguages' => [self.language.to_s.upcase],
25
+ 'outputDocumentFormat' => self.format.to_s.upcase,
26
+ 'convertToBW' => self.convert_to_bw,
27
+ 'getOCRText' => true,
28
+ 'createOutputDocument' => false,
29
+ 'multiPageDoc' => false,
30
+ 'ocrWords' => true
31
+ },
32
+ 'OCRWSInputImage' => {
33
+ 'fileName' => File.basename(@file),
34
+ # 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
35
+ },
36
+ }
37
+ #puts request
38
+ #return
39
+ data =<<EOT
40
+ <?xml version="1.0" encoding="utf-8"?>
41
+ <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
42
+ <soap:Body>
43
+ <OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
44
+ <user_name>#{@username}</user_name>
45
+ <license_code>#{@password}</license_code>
46
+ <OCRWSInputImage>
47
+ <fileName>string</fileName>
48
+ <fileData>base64Binary</fileData>
49
+ </OCRWSInputImage>
50
+ <OCRWSSetting>
51
+ <ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
52
+ <outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
53
+ <convertToBW>#{self.convert_to_bw}</convertToBW>
54
+ <getOCRText>true</getOCRText>
55
+ <createOutputDocument>false</createOutputDocument>
56
+ <multiPageDoc>false</multiPageDoc>
57
+ <ocrWords>true</ocrWords>
58
+ </OCRWSSetting>
59
+ </OCRWebServiceRecognize>
60
+ </soap:Body>
61
+ </soap:Envelope>
62
+ EOT
63
+
64
+ headers = {
65
+ 'Host' => 'www.ocrwebservice.com',
66
+ 'Content-Type' => 'application/soap+xml; charset=utf-8',
67
+ # 'Content-Type' => 'text/xml',
68
+ 'Content-Length' => "#{data.length}",
69
+ 'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
70
+ }
71
+
72
+ host = 'www.ocrwebservice.com'
73
+ url = '/services/OCRWebService.asmx'
74
+
75
+ #data = data.squeeze.tr "\n", ''
76
+ puts data
77
+ puts headers
78
+
79
+ #return
80
+ http = Net::HTTP.new(host, 80)
81
+ http.use_ssl = false
82
+ resp = http.post(url, data, headers)
83
+
84
+ puts resp
85
+ puts 'Code = ' + resp.code
86
+ puts 'Message = ' + resp.message
87
+
88
+
89
+
90
+ return
91
+
92
+ puts
93
+ puts "BODY: #{response.body}"
94
+
95
+ return false if have_error? response.body
96
+ puts 'sigue'
97
+ set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
98
+ end
99
+
100
+ def have_error? response
101
+ return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
102
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
103
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
104
+ return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
105
+ set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
106
+ true
107
+ end
108
+ end
109
+ end
@@ -1,10 +1,8 @@
1
- # encoding: utf-8
2
-
3
- root = File.expand_path('../..', __FILE__)
4
- require File.join(root, %w[lib ocr])
5
-
6
- describe Ocr::Ocr do
7
- it "should be correct size" do
8
- Ocr::Ocr.new
9
- end
10
- end
1
+ root = File.expand_path('../..', __FILE__)
2
+ require File.join(root, %w[lib ocr])
3
+
4
+ describe OCR do
5
+ it "should be correct size" do
6
+ OCR.use :dummy
7
+ end
8
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-24 00:00:00.000000000Z
12
+ date: 2012-02-27 00:00:00.000000000Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: savon
16
+ requirement: &19566960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.9
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *19566960
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: rspec
16
- requirement: &16343120 !ruby/object:Gem::Requirement
27
+ requirement: &19558440 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ~>
@@ -21,7 +32,7 @@ dependencies:
21
32
  version: 2.7.0
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *16343120
35
+ version_requirements: *19558440
25
36
  description: Recognize text and characters from image files using web services.
26
37
  email:
27
38
  - mabarroso@mabarroso.com
@@ -30,7 +41,13 @@ extensions: []
30
41
  extra_rdoc_files: []
31
42
  files:
32
43
  - lib/ocr.rb
44
+ - lib/ocrs/free_ocr.rb
45
+ - lib/ocrs/onlineocr.rb
46
+ - lib/ocrs/dummy.rb
47
+ - lib/ocrs/onlineocr2.rb
33
48
  - lib/ocr/version.rb
49
+ - lib/ocr/ocr.rb
50
+ - lib/ocr/factory.rb
34
51
  - MIT-LICENSE
35
52
  - Rakefile
36
53
  - README.md