ocr 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,10 +1,14 @@
1
- #Ocr
1
+ #OCR
2
2
  OCR web services gateway for Ruby.
3
3
 
4
4
  #Description
5
5
  Recognize text and characters from image files using web services.
6
6
 
7
- ##Web services
7
+ ##Web services supported
8
+ - [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
9
+ * identify: Username and license code as password
10
+ * languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
11
+ * output formats: :doc, :pdf, :excel, :html, :txt, :rtf
8
12
 
9
13
  #Installation
10
14
  ##From the command line
@@ -27,5 +31,21 @@ gem 'ocr'
27
31
  bundle install
28
32
  ```
29
33
 
34
+ ## Using
35
+
36
+ ### OCR Web Service
37
+ [OCR Web Service](http://www.ocrwebservice.com/)
38
+
39
+ ```ruby
40
+ ocr = OCR.use :onlineocr
41
+
42
+ ocr.login <YOUR_USER>, <LICENSE_CODE>
43
+ ocr.file= 'pkg/text_test2.jpg'
44
+ ocr.recognize
45
+
46
+ puts "ERROR: #{ocr.error}" if ocr.error?
47
+ puts "RESULT: #{ocr.text}" unless ocr.error?
48
+ ```
49
+
30
50
  # License
31
51
  Released under the MIT license: [http://www.opensource.org/licenses/MIT](http://www.opensource.org/licenses/MIT)
data/lib/ocr.rb CHANGED
@@ -1,4 +1,13 @@
1
- module Ocr
2
- class Ocr
1
+ require File.expand_path('../ocr/factory', __FILE__)
2
+ require File.expand_path('../ocr/ocr', __FILE__)
3
+ require File.expand_path('../ocrs/dummy', __FILE__)
4
+ require File.expand_path('../ocrs/onlineocr', __FILE__)
5
+ require File.expand_path('../ocrs/onlineocr2', __FILE__)
6
+ require File.expand_path('../ocrs/free_ocr', __FILE__)
7
+
8
+
9
+ module OCR
10
+ def self.use name
11
+ Factory.create eval name.to_s.capitalize
3
12
  end
4
13
  end
@@ -0,0 +1,23 @@
1
+ module OCR
2
+ class Factory
3
+ attr_reader :args
4
+
5
+ class << self
6
+ protected :new
7
+ end
8
+
9
+ def initialize(*args)
10
+ self.init(*args) if self.respond_to?(:init)
11
+ end
12
+
13
+ def self.create(type = self.class, *args)
14
+ raise ArgumentError, "Cannot create instance of #{type} from #{self.name}" if type == self
15
+ raise ArgumentError, "Type cannot be nil" if type.nil?
16
+
17
+ if !type.ancestors.include?(self)
18
+ raise ArgumentError, "#{type.name} is not a descendant of #{self.name}"
19
+ end
20
+ type.new(*args)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,74 @@
1
+ module OCR
2
+ class Ocr < OCR::Factory
3
+ class << self
4
+ private :create
5
+ end
6
+
7
+ attr_reader :text, :error
8
+ attr_accessor :username, :password, :extra_login_data
9
+ attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
10
+ attr_accessor :file, :language, :format
11
+ attr_accessor :debug
12
+
13
+ def initialize(*args)
14
+ init
15
+ end
16
+
17
+ def init
18
+ clear_error
19
+ clear_text
20
+ login false, false
21
+ proxy false
22
+ file= false
23
+ lang= false
24
+ format= false
25
+ debug= false
26
+ end
27
+
28
+ def login username, password, extra_login_data = false
29
+ @username = username
30
+ @password = password
31
+ @extra_login_data = extra_login_data
32
+ end
33
+
34
+ def proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
35
+ @proxy_addr = p_addr
36
+ @proxy_port = p_port
37
+ @proxy_user = p_user
38
+ @proxy_pass = p_pass
39
+ end
40
+
41
+ def error?
42
+ @error != false
43
+ end
44
+
45
+ def recognize
46
+ clear_error
47
+ clear_text
48
+ return false && set_error("No file") if @file.nil?
49
+ return false && set_error("File not exists '#{@file}'") unless File.exist?(@file)
50
+ ocr_recognize
51
+ end
52
+
53
+ private
54
+ def clear_error
55
+ @error = false
56
+ end
57
+
58
+ def set_error msg
59
+ @error = msg
60
+ end
61
+
62
+ def clear_text
63
+ @text = nil
64
+ end
65
+
66
+ def set_text msg
67
+ @text = msg
68
+ end
69
+
70
+ def ocr_recognize
71
+ raise NotImplementedError, 'You should implement this method'
72
+ end
73
+ end
74
+ end
@@ -1,3 +1,3 @@
1
- module Ocr
2
- VERSION = "0.0.1"
1
+ module OCR
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,4 @@
1
+ module OCR
2
+ class Dummy < Ocr
3
+ end
4
+ end
@@ -0,0 +1,58 @@
1
+ require 'soap/wsdlDriver'
2
+ require 'base64'
3
+
4
+
5
+ module OCR
6
+ class Free_ocr < OCR::Ocr
7
+
8
+ attr_accessor :convert_to_bw
9
+
10
+ private
11
+ def init
12
+ super()
13
+ end
14
+
15
+ def ocr_recognize
16
+ raise Exception, 'You should set image file' unless @file
17
+ # request = {
18
+ # 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
19
+ # }
20
+ #puts request
21
+ #return
22
+ #client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
23
+ #puts client.wsdl.soap_actions
24
+ #return
25
+ #begin
26
+ # response = client.request(:analyze) do
27
+ # soap.body = request
28
+ # end
29
+ #rescue
30
+ # puts "EX"
31
+ #end
32
+
33
+ client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
34
+ result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
35
+
36
+ puts response. analyzeResult
37
+
38
+ puts result
39
+ return
40
+
41
+ puts
42
+ puts "BODY: #{response.body}"
43
+
44
+ return false if have_error? response.body
45
+ puts 'sigue'
46
+ set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
47
+ end
48
+
49
+ def have_error? response
50
+ return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
51
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
52
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
53
+ return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
54
+ set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
55
+ true
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,65 @@
1
+ require 'savon'
2
+ require 'base64'
3
+
4
+
5
+ module OCR
6
+ class Onlineocr < OCR::Ocr
7
+
8
+ attr_accessor :convert_to_bw
9
+
10
+ private
11
+ def init
12
+ super()
13
+ self.language= :english
14
+ self.format= :txt
15
+ self.convert_to_bw= false
16
+ end
17
+
18
+ def ocr_recognize
19
+ raise Exception, 'You should set username and license as password' unless @username && @password
20
+ request = {
21
+ 'user_name' => @username,
22
+ 'license_code' => @password,
23
+ 'OCRWSInputImage' => {
24
+ 'fileName' => File.basename(@file),
25
+ 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
26
+ },
27
+ 'OCRWSSetting' => {
28
+ 'ocrLanguages' => self.language.to_s.upcase,
29
+ 'outputDocumentFormat' => self.format.to_s.upcase,
30
+ 'convertToBW' => self.convert_to_bw.to_s,
31
+ 'getOCRText' => true.to_s,
32
+ 'createOutputDocument' => false.to_s,
33
+ 'multiPageDoc' => false.to_s,
34
+ 'ocrWords' => false.to_s
35
+ },
36
+ }
37
+
38
+ unless debug
39
+ Savon.configure do |config|
40
+ #config.log = true # enable logging
41
+ config.log = false # disable logging
42
+ config.log_level = :error # changing the log level
43
+ HTTPI.log = false # to total silent the logging.
44
+ end
45
+ end
46
+ client = Savon::Client.new('http://www.ocrwebservice.com/services/OCRWebService.asmx?WSDL')
47
+ response = client.request(:ocr_web_service_recognize) do
48
+ soap.body = request
49
+ end
50
+
51
+ return false if have_error? response.body
52
+
53
+ set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
54
+ end
55
+
56
+ def have_error? response
57
+ return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
58
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
59
+ return false unless response[:ocr_web_service_recognize_response][:ocrws_response].has_key?(:error_message)
60
+ return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
61
+ set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
62
+ true
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,109 @@
1
+ require 'net/http'
2
+ require 'base64'
3
+
4
+
5
+ module OCR
6
+ class Onlineocr2 < OCR::Ocr
7
+
8
+ attr_accessor :convert_to_bw
9
+
10
+ private
11
+ def init
12
+ super()
13
+ self.language= :english
14
+ self.format= :txt
15
+ self.convert_to_bw= false
16
+ end
17
+
18
+ def ocr_recognize
19
+ raise Exception, 'You should set username and license as password' unless @username && @password
20
+ request = {
21
+ 'user_name' => @username,
22
+ 'license_code' => @password,
23
+ 'OCRWSSettings' => {
24
+ # 'ocrLanguages' => [self.language.to_s.upcase],
25
+ 'outputDocumentFormat' => self.format.to_s.upcase,
26
+ 'convertToBW' => self.convert_to_bw,
27
+ 'getOCRText' => true,
28
+ 'createOutputDocument' => false,
29
+ 'multiPageDoc' => false,
30
+ 'ocrWords' => true
31
+ },
32
+ 'OCRWSInputImage' => {
33
+ 'fileName' => File.basename(@file),
34
+ # 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
35
+ },
36
+ }
37
+ #puts request
38
+ #return
39
+ data =<<EOT
40
+ <?xml version="1.0" encoding="utf-8"?>
41
+ <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
42
+ <soap:Body>
43
+ <OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
44
+ <user_name>#{@username}</user_name>
45
+ <license_code>#{@password}</license_code>
46
+ <OCRWSInputImage>
47
+ <fileName>string</fileName>
48
+ <fileData>base64Binary</fileData>
49
+ </OCRWSInputImage>
50
+ <OCRWSSetting>
51
+ <ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
52
+ <outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
53
+ <convertToBW>#{self.convert_to_bw}</convertToBW>
54
+ <getOCRText>true</getOCRText>
55
+ <createOutputDocument>false</createOutputDocument>
56
+ <multiPageDoc>false</multiPageDoc>
57
+ <ocrWords>true</ocrWords>
58
+ </OCRWSSetting>
59
+ </OCRWebServiceRecognize>
60
+ </soap:Body>
61
+ </soap:Envelope>
62
+ EOT
63
+
64
+ headers = {
65
+ 'Host' => 'www.ocrwebservice.com',
66
+ 'Content-Type' => 'application/soap+xml; charset=utf-8',
67
+ # 'Content-Type' => 'text/xml',
68
+ 'Content-Length' => "#{data.length}",
69
+ 'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
70
+ }
71
+
72
+ host = 'www.ocrwebservice.com'
73
+ url = '/services/OCRWebService.asmx'
74
+
75
+ #data = data.squeeze.tr "\n", ''
76
+ puts data
77
+ puts headers
78
+
79
+ #return
80
+ http = Net::HTTP.new(host, 80)
81
+ http.use_ssl = false
82
+ resp = http.post(url, data, headers)
83
+
84
+ puts resp
85
+ puts 'Code = ' + resp.code
86
+ puts 'Message = ' + resp.message
87
+
88
+
89
+
90
+ return
91
+
92
+ puts
93
+ puts "BODY: #{response.body}"
94
+
95
+ return false if have_error? response.body
96
+ puts 'sigue'
97
+ set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
98
+ end
99
+
100
+ def have_error? response
101
+ return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
102
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
103
+ return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
104
+ return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
105
+ set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
106
+ true
107
+ end
108
+ end
109
+ end
@@ -1,10 +1,8 @@
1
- # encoding: utf-8
2
-
3
- root = File.expand_path('../..', __FILE__)
4
- require File.join(root, %w[lib ocr])
5
-
6
- describe Ocr::Ocr do
7
- it "should be correct size" do
8
- Ocr::Ocr.new
9
- end
10
- end
1
+ root = File.expand_path('../..', __FILE__)
2
+ require File.join(root, %w[lib ocr])
3
+
4
+ describe OCR do
5
+ it "should be correct size" do
6
+ OCR.use :dummy
7
+ end
8
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-24 00:00:00.000000000Z
12
+ date: 2012-02-27 00:00:00.000000000Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: savon
16
+ requirement: &19566960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.9
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *19566960
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: rspec
16
- requirement: &16343120 !ruby/object:Gem::Requirement
27
+ requirement: &19558440 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ~>
@@ -21,7 +32,7 @@ dependencies:
21
32
  version: 2.7.0
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *16343120
35
+ version_requirements: *19558440
25
36
  description: Recognize text and characters from image files using web services.
26
37
  email:
27
38
  - mabarroso@mabarroso.com
@@ -30,7 +41,13 @@ extensions: []
30
41
  extra_rdoc_files: []
31
42
  files:
32
43
  - lib/ocr.rb
44
+ - lib/ocrs/free_ocr.rb
45
+ - lib/ocrs/onlineocr.rb
46
+ - lib/ocrs/dummy.rb
47
+ - lib/ocrs/onlineocr2.rb
33
48
  - lib/ocr/version.rb
49
+ - lib/ocr/ocr.rb
50
+ - lib/ocr/factory.rb
34
51
  - MIT-LICENSE
35
52
  - Rakefile
36
53
  - README.md