ocr 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +32 -3
- data/lib/ocr.rb +0 -1
- data/lib/ocr/ocr.rb +6 -1
- data/lib/ocr/version.rb +1 -1
- data/lib/ocrs/free_ocr.rb +14 -37
- data/lib/ocrs/onlineocr.rb +10 -5
- metadata +6 -7
- data/lib/ocrs/onlineocr2.rb +0 -109
data/README.md
CHANGED
@@ -9,6 +9,8 @@
|
|
9
9
|
* identify: Username and license code as password
|
10
10
|
* languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
|
11
11
|
* output formats: :doc, :pdf, :excel, :html, :txt, :rtf
|
12
|
+
- [Free OCR online webservice](http://www.free-ocr.co.uk/)
|
13
|
+
* identify: Username
|
12
14
|
|
13
15
|
#Installation
|
14
16
|
##From the command line
|
@@ -32,15 +34,42 @@ gem 'ocr'
|
|
32
34
|
```
|
33
35
|
|
34
36
|
## Using
|
37
|
+
- Get a OCR: ocr = OCR.use <OCR_NAME>
|
38
|
+
- Set the login Credentials: ocr.login <YOUR_USER>, <YOUR_PASSWORD>, [<EXTRA_LOGIN_DATA>]
|
39
|
+
- Set proxy configuration: ocr.proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
|
40
|
+
- Set image to work: ocr.file= <FILE_NAME_AND_PATH>
|
41
|
+
- Set languaje: ocr.file= <FILE_NAME_AND_PATH>
|
42
|
+
- Set output format: ocr.format= <FORMAT_NAME>
|
43
|
+
- Set output file: ocr.outputfile= <FILE_NAME_AND_PATH>
|
44
|
+
- Test error: error = ocr.error if ocr.error?
|
45
|
+
- Results: text = ocr.text unless ocr.error?
|
35
46
|
|
36
47
|
### OCR Web Service
|
37
|
-
[OCR Web Service](http://www.ocrwebservice.com/)
|
48
|
+
More info at [OCR Web Service](http://www.ocrwebservice.com/).
|
49
|
+
|
50
|
+
Extra properties convert_to_bw=<BOOLEAN>, multi_page_doc=<BOOLEAN>.
|
38
51
|
|
39
52
|
```ruby
|
40
53
|
ocr = OCR.use :onlineocr
|
41
54
|
|
42
|
-
ocr.login <YOUR_USER>, <
|
43
|
-
ocr.file= '
|
55
|
+
ocr.login <YOUR_USER>, <YOUR_LICENSE_CODE>
|
56
|
+
ocr.file= 'text_image.jpg'
|
57
|
+
ocr.format= :pdf
|
58
|
+
ocr.outputfile= 'text_doc.pdf'
|
59
|
+
ocr.recognize
|
60
|
+
|
61
|
+
puts "ERROR: #{ocr.error}" if ocr.error?
|
62
|
+
puts "RESULT: #{ocr.text}" unless ocr.error?
|
63
|
+
```
|
64
|
+
|
65
|
+
### Free OCR online webservice
|
66
|
+
More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
ocr = OCR.use :free_ocr
|
70
|
+
|
71
|
+
ocr.login <YOUR_USER_NAME>
|
72
|
+
ocr.file= 'text_image.jpg'
|
44
73
|
ocr.recognize
|
45
74
|
|
46
75
|
puts "ERROR: #{ocr.error}" if ocr.error?
|
data/lib/ocr.rb
CHANGED
@@ -2,7 +2,6 @@ require File.expand_path('../ocr/factory', __FILE__)
|
|
2
2
|
require File.expand_path('../ocr/ocr', __FILE__)
|
3
3
|
require File.expand_path('../ocrs/dummy', __FILE__)
|
4
4
|
require File.expand_path('../ocrs/onlineocr', __FILE__)
|
5
|
-
require File.expand_path('../ocrs/onlineocr2', __FILE__)
|
6
5
|
require File.expand_path('../ocrs/free_ocr', __FILE__)
|
7
6
|
|
8
7
|
|
data/lib/ocr/ocr.rb
CHANGED
@@ -7,7 +7,7 @@ module OCR
|
|
7
7
|
attr_reader :text, :error
|
8
8
|
attr_accessor :username, :password, :extra_login_data
|
9
9
|
attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
|
10
|
-
attr_accessor :file, :language, :format
|
10
|
+
attr_accessor :file, :outputfile, :language, :format
|
11
11
|
attr_accessor :debug
|
12
12
|
|
13
13
|
def initialize(*args)
|
@@ -22,6 +22,7 @@ module OCR
|
|
22
22
|
file= false
|
23
23
|
lang= false
|
24
24
|
format= false
|
25
|
+
outputfile= false
|
25
26
|
debug= false
|
26
27
|
end
|
27
28
|
|
@@ -42,6 +43,10 @@ module OCR
|
|
42
43
|
@error != false
|
43
44
|
end
|
44
45
|
|
46
|
+
def outputfile?
|
47
|
+
outputfile != false
|
48
|
+
end
|
49
|
+
|
45
50
|
def recognize
|
46
51
|
clear_error
|
47
52
|
clear_text
|
data/lib/ocr/version.rb
CHANGED
data/lib/ocrs/free_ocr.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'base64'
|
3
|
-
|
1
|
+
require 'savon'
|
4
2
|
|
5
3
|
module OCR
|
6
4
|
class Free_ocr < OCR::Ocr
|
@@ -14,45 +12,24 @@ module OCR
|
|
14
12
|
|
15
13
|
def ocr_recognize
|
16
14
|
raise Exception, 'You should set image file' unless @file
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# soap.body = request
|
28
|
-
# end
|
29
|
-
#rescue
|
30
|
-
# puts "EX"
|
31
|
-
#end
|
32
|
-
|
33
|
-
client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
|
34
|
-
result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
|
35
|
-
|
36
|
-
puts response. analyzeResult
|
37
|
-
|
38
|
-
puts result
|
39
|
-
return
|
40
|
-
|
41
|
-
puts
|
42
|
-
puts "BODY: #{response.body}"
|
15
|
+
request = {
|
16
|
+
'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
17
|
+
}
|
18
|
+
request[:username] = @username if @username
|
19
|
+
|
20
|
+
client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
|
21
|
+
|
22
|
+
response = client.request(:analyze) do
|
23
|
+
soap.body = request
|
24
|
+
end
|
43
25
|
|
44
26
|
return false if have_error? response.body
|
45
|
-
|
46
|
-
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
27
|
+
set_text response.body[:analyze_response][:analyze_result]
|
47
28
|
end
|
48
29
|
|
49
30
|
def have_error? response
|
50
|
-
return true && set_error("No response") unless response.has_key?(:
|
51
|
-
return true && set_error("No response") unless response[:
|
52
|
-
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
53
|
-
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
54
|
-
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
55
|
-
true
|
31
|
+
return true && set_error("No response") unless response.has_key?(:analyze_response)
|
32
|
+
return true && set_error("No response") unless response[:analyze_response].has_key?(:analyze_result)
|
56
33
|
end
|
57
34
|
end
|
58
35
|
end
|
data/lib/ocrs/onlineocr.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
require 'savon'
|
2
|
-
require 'base64'
|
3
|
-
|
4
2
|
|
5
3
|
module OCR
|
6
4
|
class Onlineocr < OCR::Ocr
|
7
5
|
|
8
|
-
attr_accessor :convert_to_bw
|
6
|
+
attr_accessor :convert_to_bw, :multi_page_doc
|
9
7
|
|
10
8
|
private
|
11
9
|
def init
|
@@ -13,6 +11,7 @@ module OCR
|
|
13
11
|
self.language= :english
|
14
12
|
self.format= :txt
|
15
13
|
self.convert_to_bw= false
|
14
|
+
self.multi_page_doc= false
|
16
15
|
end
|
17
16
|
|
18
17
|
def ocr_recognize
|
@@ -29,8 +28,8 @@ module OCR
|
|
29
28
|
'outputDocumentFormat' => self.format.to_s.upcase,
|
30
29
|
'convertToBW' => self.convert_to_bw.to_s,
|
31
30
|
'getOCRText' => true.to_s,
|
32
|
-
'createOutputDocument' =>
|
33
|
-
'multiPageDoc' =>
|
31
|
+
'createOutputDocument' => outputfile?.to_s,
|
32
|
+
'multiPageDoc' => self.multi_page_doc.to_s,
|
34
33
|
'ocrWords' => false.to_s
|
35
34
|
},
|
36
35
|
}
|
@@ -50,6 +49,12 @@ module OCR
|
|
50
49
|
|
51
50
|
return false if have_error? response.body
|
52
51
|
|
52
|
+
if outputfile?
|
53
|
+
File.open(outputfile, 'w+') {|f|
|
54
|
+
f.puts Base64.decode64(response[:ocr_web_service_recognize_response][:ocrws_response][:file_data])
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
53
58
|
set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
|
54
59
|
end
|
55
60
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-28 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: savon
|
16
|
-
requirement: &
|
16
|
+
requirement: &20600860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.9.9
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *20600860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &20588220 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 2.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *20588220
|
36
36
|
description: Recognize text and characters from image files using web services.
|
37
37
|
email:
|
38
38
|
- mabarroso@mabarroso.com
|
@@ -44,7 +44,6 @@ files:
|
|
44
44
|
- lib/ocrs/free_ocr.rb
|
45
45
|
- lib/ocrs/onlineocr.rb
|
46
46
|
- lib/ocrs/dummy.rb
|
47
|
-
- lib/ocrs/onlineocr2.rb
|
48
47
|
- lib/ocr/version.rb
|
49
48
|
- lib/ocr/ocr.rb
|
50
49
|
- lib/ocr/factory.rb
|
data/lib/ocrs/onlineocr2.rb
DELETED
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
require 'base64'
|
3
|
-
|
4
|
-
|
5
|
-
module OCR
|
6
|
-
class Onlineocr2 < OCR::Ocr
|
7
|
-
|
8
|
-
attr_accessor :convert_to_bw
|
9
|
-
|
10
|
-
private
|
11
|
-
def init
|
12
|
-
super()
|
13
|
-
self.language= :english
|
14
|
-
self.format= :txt
|
15
|
-
self.convert_to_bw= false
|
16
|
-
end
|
17
|
-
|
18
|
-
def ocr_recognize
|
19
|
-
raise Exception, 'You should set username and license as password' unless @username && @password
|
20
|
-
request = {
|
21
|
-
'user_name' => @username,
|
22
|
-
'license_code' => @password,
|
23
|
-
'OCRWSSettings' => {
|
24
|
-
# 'ocrLanguages' => [self.language.to_s.upcase],
|
25
|
-
'outputDocumentFormat' => self.format.to_s.upcase,
|
26
|
-
'convertToBW' => self.convert_to_bw,
|
27
|
-
'getOCRText' => true,
|
28
|
-
'createOutputDocument' => false,
|
29
|
-
'multiPageDoc' => false,
|
30
|
-
'ocrWords' => true
|
31
|
-
},
|
32
|
-
'OCRWSInputImage' => {
|
33
|
-
'fileName' => File.basename(@file),
|
34
|
-
# 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
35
|
-
},
|
36
|
-
}
|
37
|
-
#puts request
|
38
|
-
#return
|
39
|
-
data =<<EOT
|
40
|
-
<?xml version="1.0" encoding="utf-8"?>
|
41
|
-
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
42
|
-
<soap:Body>
|
43
|
-
<OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
|
44
|
-
<user_name>#{@username}</user_name>
|
45
|
-
<license_code>#{@password}</license_code>
|
46
|
-
<OCRWSInputImage>
|
47
|
-
<fileName>string</fileName>
|
48
|
-
<fileData>base64Binary</fileData>
|
49
|
-
</OCRWSInputImage>
|
50
|
-
<OCRWSSetting>
|
51
|
-
<ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
|
52
|
-
<outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
|
53
|
-
<convertToBW>#{self.convert_to_bw}</convertToBW>
|
54
|
-
<getOCRText>true</getOCRText>
|
55
|
-
<createOutputDocument>false</createOutputDocument>
|
56
|
-
<multiPageDoc>false</multiPageDoc>
|
57
|
-
<ocrWords>true</ocrWords>
|
58
|
-
</OCRWSSetting>
|
59
|
-
</OCRWebServiceRecognize>
|
60
|
-
</soap:Body>
|
61
|
-
</soap:Envelope>
|
62
|
-
EOT
|
63
|
-
|
64
|
-
headers = {
|
65
|
-
'Host' => 'www.ocrwebservice.com',
|
66
|
-
'Content-Type' => 'application/soap+xml; charset=utf-8',
|
67
|
-
# 'Content-Type' => 'text/xml',
|
68
|
-
'Content-Length' => "#{data.length}",
|
69
|
-
'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
|
70
|
-
}
|
71
|
-
|
72
|
-
host = 'www.ocrwebservice.com'
|
73
|
-
url = '/services/OCRWebService.asmx'
|
74
|
-
|
75
|
-
#data = data.squeeze.tr "\n", ''
|
76
|
-
puts data
|
77
|
-
puts headers
|
78
|
-
|
79
|
-
#return
|
80
|
-
http = Net::HTTP.new(host, 80)
|
81
|
-
http.use_ssl = false
|
82
|
-
resp = http.post(url, data, headers)
|
83
|
-
|
84
|
-
puts resp
|
85
|
-
puts 'Code = ' + resp.code
|
86
|
-
puts 'Message = ' + resp.message
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
return
|
91
|
-
|
92
|
-
puts
|
93
|
-
puts "BODY: #{response.body}"
|
94
|
-
|
95
|
-
return false if have_error? response.body
|
96
|
-
puts 'sigue'
|
97
|
-
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
98
|
-
end
|
99
|
-
|
100
|
-
def have_error? response
|
101
|
-
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
102
|
-
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
103
|
-
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
104
|
-
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
105
|
-
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
106
|
-
true
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|