ocr 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +22 -2
- data/lib/ocr.rb +11 -2
- data/lib/ocr/factory.rb +23 -0
- data/lib/ocr/ocr.rb +74 -0
- data/lib/ocr/version.rb +2 -2
- data/lib/ocrs/dummy.rb +4 -0
- data/lib/ocrs/free_ocr.rb +58 -0
- data/lib/ocrs/onlineocr.rb +65 -0
- data/lib/ocrs/onlineocr2.rb +109 -0
- data/spec/ocr_spec.rb +8 -10
- metadata +21 -4
data/README.md
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
-
#
|
1
|
+
#OCR
|
2
2
|
OCR web services gateway for Ruby.
|
3
3
|
|
4
4
|
#Description
|
5
5
|
Recognize text and characters from image files using web services.
|
6
6
|
|
7
|
-
##Web services
|
7
|
+
##Web services supported
|
8
|
+
- [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
|
9
|
+
* identify: Username and license code as password
|
10
|
+
* languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
|
11
|
+
* output formats: :doc, :pdf, :excel, :html, :txt, :rtf
|
8
12
|
|
9
13
|
#Installation
|
10
14
|
##From the command line
|
@@ -27,5 +31,21 @@ gem 'ocr'
|
|
27
31
|
bundle install
|
28
32
|
```
|
29
33
|
|
34
|
+
## Using
|
35
|
+
|
36
|
+
### OCR Web Service
|
37
|
+
[OCR Web Service](http://www.ocrwebservice.com/)
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
ocr = OCR.use :onlineocr
|
41
|
+
|
42
|
+
ocr.login <YOUR_USER>, <LICENSE_CODE>
|
43
|
+
ocr.file= 'pkg/text_test2.jpg'
|
44
|
+
ocr.recognize
|
45
|
+
|
46
|
+
puts "ERROR: #{ocr.error}" if ocr.error?
|
47
|
+
puts "RESULT: #{ocr.text}" unless ocr.error?
|
48
|
+
```
|
49
|
+
|
30
50
|
# License
|
31
51
|
Released under the MIT license: [http://www.opensource.org/licenses/MIT](http://www.opensource.org/licenses/MIT)
|
data/lib/ocr.rb
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require File.expand_path('../ocr/factory', __FILE__)
|
2
|
+
require File.expand_path('../ocr/ocr', __FILE__)
|
3
|
+
require File.expand_path('../ocrs/dummy', __FILE__)
|
4
|
+
require File.expand_path('../ocrs/onlineocr', __FILE__)
|
5
|
+
require File.expand_path('../ocrs/onlineocr2', __FILE__)
|
6
|
+
require File.expand_path('../ocrs/free_ocr', __FILE__)
|
7
|
+
|
8
|
+
|
9
|
+
module OCR
|
10
|
+
def self.use name
|
11
|
+
Factory.create eval name.to_s.capitalize
|
3
12
|
end
|
4
13
|
end
|
data/lib/ocr/factory.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module OCR
|
2
|
+
class Factory
|
3
|
+
attr_reader :args
|
4
|
+
|
5
|
+
class << self
|
6
|
+
protected :new
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(*args)
|
10
|
+
self.init(*args) if self.respond_to?(:init)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.create(type = self.class, *args)
|
14
|
+
raise ArgumentError, "Cannot create instance of #{type} from #{self.name}" if type == self
|
15
|
+
raise ArgumentError, "Type cannot be nil" if type.nil?
|
16
|
+
|
17
|
+
if !type.ancestors.include?(self)
|
18
|
+
raise ArgumentError, "#{type.name} is not a descendant of #{self.name}"
|
19
|
+
end
|
20
|
+
type.new(*args)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/ocr/ocr.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module OCR
|
2
|
+
class Ocr < OCR::Factory
|
3
|
+
class << self
|
4
|
+
private :create
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :text, :error
|
8
|
+
attr_accessor :username, :password, :extra_login_data
|
9
|
+
attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
|
10
|
+
attr_accessor :file, :language, :format
|
11
|
+
attr_accessor :debug
|
12
|
+
|
13
|
+
def initialize(*args)
|
14
|
+
init
|
15
|
+
end
|
16
|
+
|
17
|
+
def init
|
18
|
+
clear_error
|
19
|
+
clear_text
|
20
|
+
login false, false
|
21
|
+
proxy false
|
22
|
+
file= false
|
23
|
+
lang= false
|
24
|
+
format= false
|
25
|
+
debug= false
|
26
|
+
end
|
27
|
+
|
28
|
+
def login username, password, extra_login_data = false
|
29
|
+
@username = username
|
30
|
+
@password = password
|
31
|
+
@extra_login_data = extra_login_data
|
32
|
+
end
|
33
|
+
|
34
|
+
def proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
|
35
|
+
@proxy_addr = p_addr
|
36
|
+
@proxy_port = p_port
|
37
|
+
@proxy_user = p_user
|
38
|
+
@proxy_pass = p_pass
|
39
|
+
end
|
40
|
+
|
41
|
+
def error?
|
42
|
+
@error != false
|
43
|
+
end
|
44
|
+
|
45
|
+
def recognize
|
46
|
+
clear_error
|
47
|
+
clear_text
|
48
|
+
return false && set_error("No file") if @file.nil?
|
49
|
+
return false && set_error("File not exists '#{@file}'") unless File.exist?(@file)
|
50
|
+
ocr_recognize
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def clear_error
|
55
|
+
@error = false
|
56
|
+
end
|
57
|
+
|
58
|
+
def set_error msg
|
59
|
+
@error = msg
|
60
|
+
end
|
61
|
+
|
62
|
+
def clear_text
|
63
|
+
@text = nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_text msg
|
67
|
+
@text = msg
|
68
|
+
end
|
69
|
+
|
70
|
+
def ocr_recognize
|
71
|
+
raise NotImplementedError, 'You should implement this method'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/ocr/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module
|
2
|
-
VERSION = "0.0
|
1
|
+
module OCR
|
2
|
+
VERSION = "0.1.0"
|
3
3
|
end
|
data/lib/ocrs/dummy.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'soap/wsdlDriver'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Free_ocr < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :convert_to_bw
|
9
|
+
|
10
|
+
private
|
11
|
+
def init
|
12
|
+
super()
|
13
|
+
end
|
14
|
+
|
15
|
+
def ocr_recognize
|
16
|
+
raise Exception, 'You should set image file' unless @file
|
17
|
+
# request = {
|
18
|
+
# 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
19
|
+
# }
|
20
|
+
#puts request
|
21
|
+
#return
|
22
|
+
#client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
|
23
|
+
#puts client.wsdl.soap_actions
|
24
|
+
#return
|
25
|
+
#begin
|
26
|
+
# response = client.request(:analyze) do
|
27
|
+
# soap.body = request
|
28
|
+
# end
|
29
|
+
#rescue
|
30
|
+
# puts "EX"
|
31
|
+
#end
|
32
|
+
|
33
|
+
client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
|
34
|
+
result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
|
35
|
+
|
36
|
+
puts response. analyzeResult
|
37
|
+
|
38
|
+
puts result
|
39
|
+
return
|
40
|
+
|
41
|
+
puts
|
42
|
+
puts "BODY: #{response.body}"
|
43
|
+
|
44
|
+
return false if have_error? response.body
|
45
|
+
puts 'sigue'
|
46
|
+
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
47
|
+
end
|
48
|
+
|
49
|
+
def have_error? response
|
50
|
+
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
51
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
52
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
53
|
+
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
54
|
+
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'savon'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Onlineocr < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :convert_to_bw
|
9
|
+
|
10
|
+
private
|
11
|
+
def init
|
12
|
+
super()
|
13
|
+
self.language= :english
|
14
|
+
self.format= :txt
|
15
|
+
self.convert_to_bw= false
|
16
|
+
end
|
17
|
+
|
18
|
+
def ocr_recognize
|
19
|
+
raise Exception, 'You should set username and license as password' unless @username && @password
|
20
|
+
request = {
|
21
|
+
'user_name' => @username,
|
22
|
+
'license_code' => @password,
|
23
|
+
'OCRWSInputImage' => {
|
24
|
+
'fileName' => File.basename(@file),
|
25
|
+
'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
26
|
+
},
|
27
|
+
'OCRWSSetting' => {
|
28
|
+
'ocrLanguages' => self.language.to_s.upcase,
|
29
|
+
'outputDocumentFormat' => self.format.to_s.upcase,
|
30
|
+
'convertToBW' => self.convert_to_bw.to_s,
|
31
|
+
'getOCRText' => true.to_s,
|
32
|
+
'createOutputDocument' => false.to_s,
|
33
|
+
'multiPageDoc' => false.to_s,
|
34
|
+
'ocrWords' => false.to_s
|
35
|
+
},
|
36
|
+
}
|
37
|
+
|
38
|
+
unless debug
|
39
|
+
Savon.configure do |config|
|
40
|
+
#config.log = true # enable logging
|
41
|
+
config.log = false # disable logging
|
42
|
+
config.log_level = :error # changing the log level
|
43
|
+
HTTPI.log = false # to total silent the logging.
|
44
|
+
end
|
45
|
+
end
|
46
|
+
client = Savon::Client.new('http://www.ocrwebservice.com/services/OCRWebService.asmx?WSDL')
|
47
|
+
response = client.request(:ocr_web_service_recognize) do
|
48
|
+
soap.body = request
|
49
|
+
end
|
50
|
+
|
51
|
+
return false if have_error? response.body
|
52
|
+
|
53
|
+
set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
|
54
|
+
end
|
55
|
+
|
56
|
+
def have_error? response
|
57
|
+
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
58
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
59
|
+
return false unless response[:ocr_web_service_recognize_response][:ocrws_response].has_key?(:error_message)
|
60
|
+
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
61
|
+
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
62
|
+
true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Onlineocr2 < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :convert_to_bw
|
9
|
+
|
10
|
+
private
|
11
|
+
def init
|
12
|
+
super()
|
13
|
+
self.language= :english
|
14
|
+
self.format= :txt
|
15
|
+
self.convert_to_bw= false
|
16
|
+
end
|
17
|
+
|
18
|
+
def ocr_recognize
|
19
|
+
raise Exception, 'You should set username and license as password' unless @username && @password
|
20
|
+
request = {
|
21
|
+
'user_name' => @username,
|
22
|
+
'license_code' => @password,
|
23
|
+
'OCRWSSettings' => {
|
24
|
+
# 'ocrLanguages' => [self.language.to_s.upcase],
|
25
|
+
'outputDocumentFormat' => self.format.to_s.upcase,
|
26
|
+
'convertToBW' => self.convert_to_bw,
|
27
|
+
'getOCRText' => true,
|
28
|
+
'createOutputDocument' => false,
|
29
|
+
'multiPageDoc' => false,
|
30
|
+
'ocrWords' => true
|
31
|
+
},
|
32
|
+
'OCRWSInputImage' => {
|
33
|
+
'fileName' => File.basename(@file),
|
34
|
+
# 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
35
|
+
},
|
36
|
+
}
|
37
|
+
#puts request
|
38
|
+
#return
|
39
|
+
data =<<EOT
|
40
|
+
<?xml version="1.0" encoding="utf-8"?>
|
41
|
+
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
42
|
+
<soap:Body>
|
43
|
+
<OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
|
44
|
+
<user_name>#{@username}</user_name>
|
45
|
+
<license_code>#{@password}</license_code>
|
46
|
+
<OCRWSInputImage>
|
47
|
+
<fileName>string</fileName>
|
48
|
+
<fileData>base64Binary</fileData>
|
49
|
+
</OCRWSInputImage>
|
50
|
+
<OCRWSSetting>
|
51
|
+
<ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
|
52
|
+
<outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
|
53
|
+
<convertToBW>#{self.convert_to_bw}</convertToBW>
|
54
|
+
<getOCRText>true</getOCRText>
|
55
|
+
<createOutputDocument>false</createOutputDocument>
|
56
|
+
<multiPageDoc>false</multiPageDoc>
|
57
|
+
<ocrWords>true</ocrWords>
|
58
|
+
</OCRWSSetting>
|
59
|
+
</OCRWebServiceRecognize>
|
60
|
+
</soap:Body>
|
61
|
+
</soap:Envelope>
|
62
|
+
EOT
|
63
|
+
|
64
|
+
headers = {
|
65
|
+
'Host' => 'www.ocrwebservice.com',
|
66
|
+
'Content-Type' => 'application/soap+xml; charset=utf-8',
|
67
|
+
# 'Content-Type' => 'text/xml',
|
68
|
+
'Content-Length' => "#{data.length}",
|
69
|
+
'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
|
70
|
+
}
|
71
|
+
|
72
|
+
host = 'www.ocrwebservice.com'
|
73
|
+
url = '/services/OCRWebService.asmx'
|
74
|
+
|
75
|
+
#data = data.squeeze.tr "\n", ''
|
76
|
+
puts data
|
77
|
+
puts headers
|
78
|
+
|
79
|
+
#return
|
80
|
+
http = Net::HTTP.new(host, 80)
|
81
|
+
http.use_ssl = false
|
82
|
+
resp = http.post(url, data, headers)
|
83
|
+
|
84
|
+
puts resp
|
85
|
+
puts 'Code = ' + resp.code
|
86
|
+
puts 'Message = ' + resp.message
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
return
|
91
|
+
|
92
|
+
puts
|
93
|
+
puts "BODY: #{response.body}"
|
94
|
+
|
95
|
+
return false if have_error? response.body
|
96
|
+
puts 'sigue'
|
97
|
+
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
98
|
+
end
|
99
|
+
|
100
|
+
def have_error? response
|
101
|
+
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
102
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
103
|
+
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
104
|
+
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
105
|
+
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
106
|
+
true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/spec/ocr_spec.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
10
|
-
end
|
1
|
+
root = File.expand_path('../..', __FILE__)
|
2
|
+
require File.join(root, %w[lib ocr])
|
3
|
+
|
4
|
+
describe OCR do
|
5
|
+
it "should be correct size" do
|
6
|
+
OCR.use :dummy
|
7
|
+
end
|
8
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-27 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: savon
|
16
|
+
requirement: &19566960 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.9
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *19566960
|
14
25
|
- !ruby/object:Gem::Dependency
|
15
26
|
name: rspec
|
16
|
-
requirement: &
|
27
|
+
requirement: &19558440 !ruby/object:Gem::Requirement
|
17
28
|
none: false
|
18
29
|
requirements:
|
19
30
|
- - ~>
|
@@ -21,7 +32,7 @@ dependencies:
|
|
21
32
|
version: 2.7.0
|
22
33
|
type: :development
|
23
34
|
prerelease: false
|
24
|
-
version_requirements: *
|
35
|
+
version_requirements: *19558440
|
25
36
|
description: Recognize text and characters from image files using web services.
|
26
37
|
email:
|
27
38
|
- mabarroso@mabarroso.com
|
@@ -30,7 +41,13 @@ extensions: []
|
|
30
41
|
extra_rdoc_files: []
|
31
42
|
files:
|
32
43
|
- lib/ocr.rb
|
44
|
+
- lib/ocrs/free_ocr.rb
|
45
|
+
- lib/ocrs/onlineocr.rb
|
46
|
+
- lib/ocrs/dummy.rb
|
47
|
+
- lib/ocrs/onlineocr2.rb
|
33
48
|
- lib/ocr/version.rb
|
49
|
+
- lib/ocr/ocr.rb
|
50
|
+
- lib/ocr/factory.rb
|
34
51
|
- MIT-LICENSE
|
35
52
|
- Rakefile
|
36
53
|
- README.md
|