ocr 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +32 -3
- data/lib/ocr.rb +0 -1
- data/lib/ocr/ocr.rb +6 -1
- data/lib/ocr/version.rb +1 -1
- data/lib/ocrs/free_ocr.rb +14 -37
- data/lib/ocrs/onlineocr.rb +10 -5
- metadata +6 -7
- data/lib/ocrs/onlineocr2.rb +0 -109
data/README.md
CHANGED
@@ -9,6 +9,8 @@
|
|
9
9
|
* identify: Username and license code as password
|
10
10
|
* languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
|
11
11
|
* output formats: :doc, :pdf, :excel, :html, :txt, :rtf
|
12
|
+
- [Free OCR online webservice](http://www.free-ocr.co.uk/)
|
13
|
+
* identify: Username
|
12
14
|
|
13
15
|
#Installation
|
14
16
|
##From the command line
|
@@ -32,15 +34,42 @@ gem 'ocr'
|
|
32
34
|
```
|
33
35
|
|
34
36
|
## Using
|
37
|
+
- Get a OCR: ocr = OCR.use <OCR_NAME>
|
38
|
+
- Set the login Credentials: ocr.login <YOUR_USER>, <YOUR_PASSWORD>, [<EXTRA_LOGIN_DATA>]
|
39
|
+
- Set proxy configuration: ocr.proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
|
40
|
+
- Set image to work: ocr.file= <FILE_NAME_AND_PATH>
|
41
|
+
- Set languaje: ocr.file= <FILE_NAME_AND_PATH>
|
42
|
+
- Set output format: ocr.format= <FORMAT_NAME>
|
43
|
+
- Set output file: ocr.outputfile= <FILE_NAME_AND_PATH>
|
44
|
+
- Test error: error = ocr.error if ocr.error?
|
45
|
+
- Results: text = ocr.text unless ocr.error?
|
35
46
|
|
36
47
|
### OCR Web Service
|
37
|
-
[OCR Web Service](http://www.ocrwebservice.com/)
|
48
|
+
More info at [OCR Web Service](http://www.ocrwebservice.com/).
|
49
|
+
|
50
|
+
Extra properties convert_to_bw=<BOOLEAN>, multi_page_doc=<BOOLEAN>.
|
38
51
|
|
39
52
|
```ruby
|
40
53
|
ocr = OCR.use :onlineocr
|
41
54
|
|
42
|
-
ocr.login <YOUR_USER>, <
|
43
|
-
ocr.file= '
|
55
|
+
ocr.login <YOUR_USER>, <YOUR_LICENSE_CODE>
|
56
|
+
ocr.file= 'text_image.jpg'
|
57
|
+
ocr.format= :pdf
|
58
|
+
ocr.outputfile= 'text_doc.pdf'
|
59
|
+
ocr.recognize
|
60
|
+
|
61
|
+
puts "ERROR: #{ocr.error}" if ocr.error?
|
62
|
+
puts "RESULT: #{ocr.text}" unless ocr.error?
|
63
|
+
```
|
64
|
+
|
65
|
+
### Free OCR online webservice
|
66
|
+
More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
ocr = OCR.use :free_ocr
|
70
|
+
|
71
|
+
ocr.login <YOUR_USER_NAME>
|
72
|
+
ocr.file= 'text_image.jpg'
|
44
73
|
ocr.recognize
|
45
74
|
|
46
75
|
puts "ERROR: #{ocr.error}" if ocr.error?
|
data/lib/ocr.rb
CHANGED
@@ -2,7 +2,6 @@ require File.expand_path('../ocr/factory', __FILE__)
|
|
2
2
|
require File.expand_path('../ocr/ocr', __FILE__)
|
3
3
|
require File.expand_path('../ocrs/dummy', __FILE__)
|
4
4
|
require File.expand_path('../ocrs/onlineocr', __FILE__)
|
5
|
-
require File.expand_path('../ocrs/onlineocr2', __FILE__)
|
6
5
|
require File.expand_path('../ocrs/free_ocr', __FILE__)
|
7
6
|
|
8
7
|
|
data/lib/ocr/ocr.rb
CHANGED
@@ -7,7 +7,7 @@ module OCR
|
|
7
7
|
attr_reader :text, :error
|
8
8
|
attr_accessor :username, :password, :extra_login_data
|
9
9
|
attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
|
10
|
-
attr_accessor :file, :language, :format
|
10
|
+
attr_accessor :file, :outputfile, :language, :format
|
11
11
|
attr_accessor :debug
|
12
12
|
|
13
13
|
def initialize(*args)
|
@@ -22,6 +22,7 @@ module OCR
|
|
22
22
|
file= false
|
23
23
|
lang= false
|
24
24
|
format= false
|
25
|
+
outputfile= false
|
25
26
|
debug= false
|
26
27
|
end
|
27
28
|
|
@@ -42,6 +43,10 @@ module OCR
|
|
42
43
|
@error != false
|
43
44
|
end
|
44
45
|
|
46
|
+
def outputfile?
|
47
|
+
outputfile != false
|
48
|
+
end
|
49
|
+
|
45
50
|
def recognize
|
46
51
|
clear_error
|
47
52
|
clear_text
|
data/lib/ocr/version.rb
CHANGED
data/lib/ocrs/free_ocr.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'base64'
|
3
|
-
|
1
|
+
require 'savon'
|
4
2
|
|
5
3
|
module OCR
|
6
4
|
class Free_ocr < OCR::Ocr
|
@@ -14,45 +12,24 @@ module OCR
|
|
14
12
|
|
15
13
|
def ocr_recognize
|
16
14
|
raise Exception, 'You should set image file' unless @file
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# soap.body = request
|
28
|
-
# end
|
29
|
-
#rescue
|
30
|
-
# puts "EX"
|
31
|
-
#end
|
32
|
-
|
33
|
-
client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
|
34
|
-
result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
|
35
|
-
|
36
|
-
puts response. analyzeResult
|
37
|
-
|
38
|
-
puts result
|
39
|
-
return
|
40
|
-
|
41
|
-
puts
|
42
|
-
puts "BODY: #{response.body}"
|
15
|
+
request = {
|
16
|
+
'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
17
|
+
}
|
18
|
+
request[:username] = @username if @username
|
19
|
+
|
20
|
+
client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
|
21
|
+
|
22
|
+
response = client.request(:analyze) do
|
23
|
+
soap.body = request
|
24
|
+
end
|
43
25
|
|
44
26
|
return false if have_error? response.body
|
45
|
-
|
46
|
-
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
27
|
+
set_text response.body[:analyze_response][:analyze_result]
|
47
28
|
end
|
48
29
|
|
49
30
|
def have_error? response
|
50
|
-
return true && set_error("No response") unless response.has_key?(:
|
51
|
-
return true && set_error("No response") unless response[:
|
52
|
-
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
53
|
-
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
54
|
-
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
55
|
-
true
|
31
|
+
return true && set_error("No response") unless response.has_key?(:analyze_response)
|
32
|
+
return true && set_error("No response") unless response[:analyze_response].has_key?(:analyze_result)
|
56
33
|
end
|
57
34
|
end
|
58
35
|
end
|
data/lib/ocrs/onlineocr.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
require 'savon'
|
2
|
-
require 'base64'
|
3
|
-
|
4
2
|
|
5
3
|
module OCR
|
6
4
|
class Onlineocr < OCR::Ocr
|
7
5
|
|
8
|
-
attr_accessor :convert_to_bw
|
6
|
+
attr_accessor :convert_to_bw, :multi_page_doc
|
9
7
|
|
10
8
|
private
|
11
9
|
def init
|
@@ -13,6 +11,7 @@ module OCR
|
|
13
11
|
self.language= :english
|
14
12
|
self.format= :txt
|
15
13
|
self.convert_to_bw= false
|
14
|
+
self.multi_page_doc= false
|
16
15
|
end
|
17
16
|
|
18
17
|
def ocr_recognize
|
@@ -29,8 +28,8 @@ module OCR
|
|
29
28
|
'outputDocumentFormat' => self.format.to_s.upcase,
|
30
29
|
'convertToBW' => self.convert_to_bw.to_s,
|
31
30
|
'getOCRText' => true.to_s,
|
32
|
-
'createOutputDocument' =>
|
33
|
-
'multiPageDoc' =>
|
31
|
+
'createOutputDocument' => outputfile?.to_s,
|
32
|
+
'multiPageDoc' => self.multi_page_doc.to_s,
|
34
33
|
'ocrWords' => false.to_s
|
35
34
|
},
|
36
35
|
}
|
@@ -50,6 +49,12 @@ module OCR
|
|
50
49
|
|
51
50
|
return false if have_error? response.body
|
52
51
|
|
52
|
+
if outputfile?
|
53
|
+
File.open(outputfile, 'w+') {|f|
|
54
|
+
f.puts Base64.decode64(response[:ocr_web_service_recognize_response][:ocrws_response][:file_data])
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
53
58
|
set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
|
54
59
|
end
|
55
60
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-28 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: savon
|
16
|
-
requirement: &
|
16
|
+
requirement: &20600860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.9.9
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *20600860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &20588220 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 2.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *20588220
|
36
36
|
description: Recognize text and characters from image files using web services.
|
37
37
|
email:
|
38
38
|
- mabarroso@mabarroso.com
|
@@ -44,7 +44,6 @@ files:
|
|
44
44
|
- lib/ocrs/free_ocr.rb
|
45
45
|
- lib/ocrs/onlineocr.rb
|
46
46
|
- lib/ocrs/dummy.rb
|
47
|
-
- lib/ocrs/onlineocr2.rb
|
48
47
|
- lib/ocr/version.rb
|
49
48
|
- lib/ocr/ocr.rb
|
50
49
|
- lib/ocr/factory.rb
|
data/lib/ocrs/onlineocr2.rb
DELETED
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
require 'base64'
|
3
|
-
|
4
|
-
|
5
|
-
module OCR
|
6
|
-
class Onlineocr2 < OCR::Ocr
|
7
|
-
|
8
|
-
attr_accessor :convert_to_bw
|
9
|
-
|
10
|
-
private
|
11
|
-
def init
|
12
|
-
super()
|
13
|
-
self.language= :english
|
14
|
-
self.format= :txt
|
15
|
-
self.convert_to_bw= false
|
16
|
-
end
|
17
|
-
|
18
|
-
def ocr_recognize
|
19
|
-
raise Exception, 'You should set username and license as password' unless @username && @password
|
20
|
-
request = {
|
21
|
-
'user_name' => @username,
|
22
|
-
'license_code' => @password,
|
23
|
-
'OCRWSSettings' => {
|
24
|
-
# 'ocrLanguages' => [self.language.to_s.upcase],
|
25
|
-
'outputDocumentFormat' => self.format.to_s.upcase,
|
26
|
-
'convertToBW' => self.convert_to_bw,
|
27
|
-
'getOCRText' => true,
|
28
|
-
'createOutputDocument' => false,
|
29
|
-
'multiPageDoc' => false,
|
30
|
-
'ocrWords' => true
|
31
|
-
},
|
32
|
-
'OCRWSInputImage' => {
|
33
|
-
'fileName' => File.basename(@file),
|
34
|
-
# 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
|
35
|
-
},
|
36
|
-
}
|
37
|
-
#puts request
|
38
|
-
#return
|
39
|
-
data =<<EOT
|
40
|
-
<?xml version="1.0" encoding="utf-8"?>
|
41
|
-
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
42
|
-
<soap:Body>
|
43
|
-
<OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
|
44
|
-
<user_name>#{@username}</user_name>
|
45
|
-
<license_code>#{@password}</license_code>
|
46
|
-
<OCRWSInputImage>
|
47
|
-
<fileName>string</fileName>
|
48
|
-
<fileData>base64Binary</fileData>
|
49
|
-
</OCRWSInputImage>
|
50
|
-
<OCRWSSetting>
|
51
|
-
<ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
|
52
|
-
<outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
|
53
|
-
<convertToBW>#{self.convert_to_bw}</convertToBW>
|
54
|
-
<getOCRText>true</getOCRText>
|
55
|
-
<createOutputDocument>false</createOutputDocument>
|
56
|
-
<multiPageDoc>false</multiPageDoc>
|
57
|
-
<ocrWords>true</ocrWords>
|
58
|
-
</OCRWSSetting>
|
59
|
-
</OCRWebServiceRecognize>
|
60
|
-
</soap:Body>
|
61
|
-
</soap:Envelope>
|
62
|
-
EOT
|
63
|
-
|
64
|
-
headers = {
|
65
|
-
'Host' => 'www.ocrwebservice.com',
|
66
|
-
'Content-Type' => 'application/soap+xml; charset=utf-8',
|
67
|
-
# 'Content-Type' => 'text/xml',
|
68
|
-
'Content-Length' => "#{data.length}",
|
69
|
-
'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
|
70
|
-
}
|
71
|
-
|
72
|
-
host = 'www.ocrwebservice.com'
|
73
|
-
url = '/services/OCRWebService.asmx'
|
74
|
-
|
75
|
-
#data = data.squeeze.tr "\n", ''
|
76
|
-
puts data
|
77
|
-
puts headers
|
78
|
-
|
79
|
-
#return
|
80
|
-
http = Net::HTTP.new(host, 80)
|
81
|
-
http.use_ssl = false
|
82
|
-
resp = http.post(url, data, headers)
|
83
|
-
|
84
|
-
puts resp
|
85
|
-
puts 'Code = ' + resp.code
|
86
|
-
puts 'Message = ' + resp.message
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
return
|
91
|
-
|
92
|
-
puts
|
93
|
-
puts "BODY: #{response.body}"
|
94
|
-
|
95
|
-
return false if have_error? response.body
|
96
|
-
puts 'sigue'
|
97
|
-
set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
|
98
|
-
end
|
99
|
-
|
100
|
-
def have_error? response
|
101
|
-
return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
|
102
|
-
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
103
|
-
return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
|
104
|
-
return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
|
105
|
-
set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
|
106
|
-
true
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|