ocr 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +22 -2
- data/lib/ocr.rb +11 -2
- data/lib/ocr/factory.rb +23 -0
- data/lib/ocr/ocr.rb +74 -0
- data/lib/ocr/version.rb +2 -2
- data/lib/ocrs/dummy.rb +4 -0
- data/lib/ocrs/free_ocr.rb +58 -0
- data/lib/ocrs/onlineocr.rb +65 -0
- data/lib/ocrs/onlineocr2.rb +109 -0
- data/spec/ocr_spec.rb +8 -10
- metadata +21 -4
data/README.md
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
-
#
|
1
|
+
#OCR
|
2
2
|
OCR web services gateway for Ruby.
|
3
3
|
|
4
4
|
#Description
|
5
5
|
Recognize text and characters from image files using web services.
|
6
6
|
|
7
|
-
##Web services
|
7
|
+
##Web services supported
|
8
|
+
- [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
|
9
|
+
* identify: Username and license code as password
|
10
|
+
* languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
|
11
|
+
* output formats: :doc, :pdf, :excel, :html, :txt, :rtf
|
8
12
|
|
9
13
|
#Installation
|
10
14
|
##From the command line
|
@@ -27,5 +31,21 @@ gem 'ocr'
|
|
27
31
|
bundle install
|
28
32
|
```
|
29
33
|
|
34
|
+
## Using
|
35
|
+
|
36
|
+
### OCR Web Service
|
37
|
+
[OCR Web Service](http://www.ocrwebservice.com/)
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
ocr = OCR.use :onlineocr
|
41
|
+
|
42
|
+
ocr.login <YOUR_USER>, <LICENSE_CODE>
|
43
|
+
ocr.file= 'pkg/text_test2.jpg'
|
44
|
+
ocr.recognize
|
45
|
+
|
46
|
+
puts "ERROR: #{ocr.error}" if ocr.error?
|
47
|
+
puts "RESULT: #{ocr.text}" unless ocr.error?
|
48
|
+
```
|
49
|
+
|
30
50
|
# License
|
31
51
|
Released under the MIT license: [http://www.opensource.org/licenses/MIT](http://www.opensource.org/licenses/MIT)
|
data/lib/ocr.rb
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require File.expand_path('../ocr/factory', __FILE__)
|
2
|
+
require File.expand_path('../ocr/ocr', __FILE__)
|
3
|
+
require File.expand_path('../ocrs/dummy', __FILE__)
|
4
|
+
require File.expand_path('../ocrs/onlineocr', __FILE__)
|
5
|
+
require File.expand_path('../ocrs/onlineocr2', __FILE__)
|
6
|
+
require File.expand_path('../ocrs/free_ocr', __FILE__)
|
7
|
+
|
8
|
+
|
9
|
+
module OCR
|
10
|
+
def self.use name
|
11
|
+
Factory.create eval name.to_s.capitalize
|
3
12
|
end
|
4
13
|
end
|
data/lib/ocr/factory.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module OCR
|
2
|
+
class Factory
|
3
|
+
attr_reader :args
|
4
|
+
|
5
|
+
class << self
|
6
|
+
protected :new
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(*args)
|
10
|
+
self.init(*args) if self.respond_to?(:init)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.create(type = self.class, *args)
|
14
|
+
raise ArgumentError, "Cannot create instance of #{type} from #{self.name}" if type == self
|
15
|
+
raise ArgumentError, "Type cannot be nil" if type.nil?
|
16
|
+
|
17
|
+
if !type.ancestors.include?(self)
|
18
|
+
raise ArgumentError, "#{type.name} is not a descendant of #{self.name}"
|
19
|
+
end
|
20
|
+
type.new(*args)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/ocr/ocr.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module OCR
|
2
|
+
class Ocr < OCR::Factory
|
3
|
+
class << self
|
4
|
+
private :create
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :text, :error
|
8
|
+
attr_accessor :username, :password, :extra_login_data
|
9
|
+
attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
|
10
|
+
attr_accessor :file, :language, :format
|
11
|
+
attr_accessor :debug
|
12
|
+
|
13
|
+
def initialize(*args)
|
14
|
+
init
|
15
|
+
end
|
16
|
+
|
17
|
+
def init
|
18
|
+
clear_error
|
19
|
+
clear_text
|
20
|
+
login false, false
|
21
|
+
proxy false
|
22
|
+
file= false
|
23
|
+
lang= false
|
24
|
+
format= false
|
25
|
+
debug= false
|
26
|
+
end
|
27
|
+
|
28
|
+
def login username, password, extra_login_data = false
|
29
|
+
@username = username
|
30
|
+
@password = password
|
31
|
+
@extra_login_data = extra_login_data
|
32
|
+
end
|
33
|
+
|
34
|
+
def proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
|
35
|
+
@proxy_addr = p_addr
|
36
|
+
@proxy_port = p_port
|
37
|
+
@proxy_user = p_user
|
38
|
+
@proxy_pass = p_pass
|
39
|
+
end
|
40
|
+
|
41
|
+
def error?
|
42
|
+
@error != false
|
43
|
+
end
|
44
|
+
|
45
|
+
def recognize
|
46
|
+
clear_error
|
47
|
+
clear_text
|
48
|
+
return false && set_error("No file") if @file.nil?
|
49
|
+
return false && set_error("File not exists '#{@file}'") unless File.exist?(@file)
|
50
|
+
ocr_recognize
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def clear_error
|
55
|
+
@error = false
|
56
|
+
end
|
57
|
+
|
58
|
+
def set_error msg
|
59
|
+
@error = msg
|
60
|
+
end
|
61
|
+
|
62
|
+
def clear_text
|
63
|
+
@text = nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_text msg
|
67
|
+
@text = msg
|
68
|
+
end
|
69
|
+
|
70
|
+
def ocr_recognize
|
71
|
+
raise NotImplementedError, 'You should implement this method'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/ocr/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module
|
2
|
-
VERSION = "0.0
|
1
|
+
module OCR
|
2
|
+
VERSION = "0.1.0"
|
3
3
|
end
|
data/lib/ocrs/dummy.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'soap/wsdlDriver'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Free_ocr < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :convert_to_bw
|
9
|
+
|
10
|
+
private
|
11
|
+
def init
|
12
|
+
super()
|
13
|
+
end
|
14
|
+
|
15
|
+
def ocr_recognize
|
16
|
+
raise Exception, 'You should set image file' unless @file
|
17
|
+
# request = {
|
18
|
+
# 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
19
|
+
# }
|
20
|
+
#puts request
|
21
|
+
#return
|
22
|
+
#client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
|
23
|
+
#puts client.wsdl.soap_actions
|
24
|
+
#return
|
25
|
+
#begin
|
26
|
+
# response = client.request(:analyze) do
|
27
|
+
# soap.body = request
|
28
|
+
# end
|
29
|
+
#rescue
|
30
|
+
# puts "EX"
|
31
|
+
#end
|
32
|
+
|
33
|
+
client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
|
34
|
+
result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
|
35
|
+
|
36
|
+
puts response. analyzeResult
|
37
|
+
|
38
|
+
puts result
|
39
|
+
return
|
40
|
+
|
41
|
+
puts
|
42
|
+
puts "BODY: #{response.body}"
|
43
|
+
|
44
|
+
return false if have_error? response.body
|
45
|
+
puts 'sigue'
|
46
|
+
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
47
|
+
end
|
48
|
+
|
49
|
+
def have_error? response
|
50
|
+
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
51
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
52
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
53
|
+
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
54
|
+
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'savon'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Onlineocr < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :convert_to_bw
|
9
|
+
|
10
|
+
private
|
11
|
+
def init
|
12
|
+
super()
|
13
|
+
self.language= :english
|
14
|
+
self.format= :txt
|
15
|
+
self.convert_to_bw= false
|
16
|
+
end
|
17
|
+
|
18
|
+
def ocr_recognize
|
19
|
+
raise Exception, 'You should set username and license as password' unless @username && @password
|
20
|
+
request = {
|
21
|
+
'user_name' => @username,
|
22
|
+
'license_code' => @password,
|
23
|
+
'OCRWSInputImage' => {
|
24
|
+
'fileName' => File.basename(@file),
|
25
|
+
'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
26
|
+
},
|
27
|
+
'OCRWSSetting' => {
|
28
|
+
'ocrLanguages' => self.language.to_s.upcase,
|
29
|
+
'outputDocumentFormat' => self.format.to_s.upcase,
|
30
|
+
'convertToBW' => self.convert_to_bw.to_s,
|
31
|
+
'getOCRText' => true.to_s,
|
32
|
+
'createOutputDocument' => false.to_s,
|
33
|
+
'multiPageDoc' => false.to_s,
|
34
|
+
'ocrWords' => false.to_s
|
35
|
+
},
|
36
|
+
}
|
37
|
+
|
38
|
+
unless debug
|
39
|
+
Savon.configure do |config|
|
40
|
+
#config.log = true # enable logging
|
41
|
+
config.log = false # disable logging
|
42
|
+
config.log_level = :error # changing the log level
|
43
|
+
HTTPI.log = false # to total silent the logging.
|
44
|
+
end
|
45
|
+
end
|
46
|
+
client = Savon::Client.new('http://www.ocrwebservice.com/services/OCRWebService.asmx?WSDL')
|
47
|
+
response = client.request(:ocr_web_service_recognize) do
|
48
|
+
soap.body = request
|
49
|
+
end
|
50
|
+
|
51
|
+
return false if have_error? response.body
|
52
|
+
|
53
|
+
set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
|
54
|
+
end
|
55
|
+
|
56
|
+
def have_error? response
|
57
|
+
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
58
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
59
|
+
return false unless response[:ocr_web_service_recognize_response][:ocrws_response].has_key?(:error_message)
|
60
|
+
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
61
|
+
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
62
|
+
true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Onlineocr2 < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :convert_to_bw
|
9
|
+
|
10
|
+
private
|
11
|
+
def init
|
12
|
+
super()
|
13
|
+
self.language= :english
|
14
|
+
self.format= :txt
|
15
|
+
self.convert_to_bw= false
|
16
|
+
end
|
17
|
+
|
18
|
+
def ocr_recognize
|
19
|
+
raise Exception, 'You should set username and license as password' unless @username && @password
|
20
|
+
request = {
|
21
|
+
'user_name' => @username,
|
22
|
+
'license_code' => @password,
|
23
|
+
'OCRWSSettings' => {
|
24
|
+
# 'ocrLanguages' => [self.language.to_s.upcase],
|
25
|
+
'outputDocumentFormat' => self.format.to_s.upcase,
|
26
|
+
'convertToBW' => self.convert_to_bw,
|
27
|
+
'getOCRText' => true,
|
28
|
+
'createOutputDocument' => false,
|
29
|
+
'multiPageDoc' => false,
|
30
|
+
'ocrWords' => true
|
31
|
+
},
|
32
|
+
'OCRWSInputImage' => {
|
33
|
+
'fileName' => File.basename(@file),
|
34
|
+
# 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
35
|
+
},
|
36
|
+
}
|
37
|
+
#puts request
|
38
|
+
#return
|
39
|
+
data =<<EOT
|
40
|
+
<?xml version="1.0" encoding="utf-8"?>
|
41
|
+
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
42
|
+
<soap:Body>
|
43
|
+
<OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
|
44
|
+
<user_name>#{@username}</user_name>
|
45
|
+
<license_code>#{@password}</license_code>
|
46
|
+
<OCRWSInputImage>
|
47
|
+
<fileName>string</fileName>
|
48
|
+
<fileData>base64Binary</fileData>
|
49
|
+
</OCRWSInputImage>
|
50
|
+
<OCRWSSetting>
|
51
|
+
<ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
|
52
|
+
<outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
|
53
|
+
<convertToBW>#{self.convert_to_bw}</convertToBW>
|
54
|
+
<getOCRText>true</getOCRText>
|
55
|
+
<createOutputDocument>false</createOutputDocument>
|
56
|
+
<multiPageDoc>false</multiPageDoc>
|
57
|
+
<ocrWords>true</ocrWords>
|
58
|
+
</OCRWSSetting>
|
59
|
+
</OCRWebServiceRecognize>
|
60
|
+
</soap:Body>
|
61
|
+
</soap:Envelope>
|
62
|
+
EOT
|
63
|
+
|
64
|
+
headers = {
|
65
|
+
'Host' => 'www.ocrwebservice.com',
|
66
|
+
'Content-Type' => 'application/soap+xml; charset=utf-8',
|
67
|
+
# 'Content-Type' => 'text/xml',
|
68
|
+
'Content-Length' => "#{data.length}",
|
69
|
+
'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
|
70
|
+
}
|
71
|
+
|
72
|
+
host = 'www.ocrwebservice.com'
|
73
|
+
url = '/services/OCRWebService.asmx'
|
74
|
+
|
75
|
+
#data = data.squeeze.tr "\n", ''
|
76
|
+
puts data
|
77
|
+
puts headers
|
78
|
+
|
79
|
+
#return
|
80
|
+
http = Net::HTTP.new(host, 80)
|
81
|
+
http.use_ssl = false
|
82
|
+
resp = http.post(url, data, headers)
|
83
|
+
|
84
|
+
puts resp
|
85
|
+
puts 'Code = ' + resp.code
|
86
|
+
puts 'Message = ' + resp.message
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
return
|
91
|
+
|
92
|
+
puts
|
93
|
+
puts "BODY: #{response.body}"
|
94
|
+
|
95
|
+
return false if have_error? response.body
|
96
|
+
puts 'sigue'
|
97
|
+
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
98
|
+
end
|
99
|
+
|
100
|
+
def have_error? response
|
101
|
+
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
102
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
103
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
104
|
+
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
105
|
+
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
106
|
+
true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/spec/ocr_spec.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
10
|
-
end
|
1
|
+
root = File.expand_path('../..', __FILE__)
|
2
|
+
require File.join(root, %w[lib ocr])
|
3
|
+
|
4
|
+
describe OCR do
|
5
|
+
it "should be correct size" do
|
6
|
+
OCR.use :dummy
|
7
|
+
end
|
8
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-27 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: savon
|
16
|
+
requirement: &19566960 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.9
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *19566960
|
14
25
|
- !ruby/object:Gem::Dependency
|
15
26
|
name: rspec
|
16
|
-
requirement: &
|
27
|
+
requirement: &19558440 !ruby/object:Gem::Requirement
|
17
28
|
none: false
|
18
29
|
requirements:
|
19
30
|
- - ~>
|
@@ -21,7 +32,7 @@ dependencies:
|
|
21
32
|
version: 2.7.0
|
22
33
|
type: :development
|
23
34
|
prerelease: false
|
24
|
-
version_requirements: *
|
35
|
+
version_requirements: *19558440
|
25
36
|
description: Recognize text and characters from image files using web services.
|
26
37
|
email:
|
27
38
|
- mabarroso@mabarroso.com
|
@@ -30,7 +41,13 @@ extensions: []
|
|
30
41
|
extra_rdoc_files: []
|
31
42
|
files:
|
32
43
|
- lib/ocr.rb
|
44
|
+
- lib/ocrs/free_ocr.rb
|
45
|
+
- lib/ocrs/onlineocr.rb
|
46
|
+
- lib/ocrs/dummy.rb
|
47
|
+
- lib/ocrs/onlineocr2.rb
|
33
48
|
- lib/ocr/version.rb
|
49
|
+
- lib/ocr/ocr.rb
|
50
|
+
- lib/ocr/factory.rb
|
34
51
|
- MIT-LICENSE
|
35
52
|
- Rakefile
|
36
53
|
- README.md
|