ocr 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -9,6 +9,8 @@
9
9
  * identify: Username and license code as password
10
10
  * languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
11
11
  * output formats: :doc, :pdf, :excel, :html, :txt, :rtf
12
+ - [Free OCR online webservice](http://www.free-ocr.co.uk/)
13
+ * identify: Username
12
14
 
13
15
  #Installation
14
16
  ##From the command line
@@ -32,15 +34,42 @@ gem 'ocr'
32
34
  ```
33
35
 
34
36
  ## Using
37
+ - Get a OCR: ocr = OCR.use <OCR_NAME>
38
+ - Set the login Credentials: ocr.login <YOUR_USER>, <YOUR_PASSWORD>, [<EXTRA_LOGIN_DATA>]
39
+ - Set proxy configuration: ocr.proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
40
+ - Set image to work: ocr.file= <FILE_NAME_AND_PATH>
41
+ - Set languaje: ocr.file= <FILE_NAME_AND_PATH>
42
+ - Set output format: ocr.format= <FORMAT_NAME>
43
+ - Set output file: ocr.outputfile= <FILE_NAME_AND_PATH>
44
+ - Test error: error = ocr.error if ocr.error?
45
+ - Results: text = ocr.text unless ocr.error?
35
46
 
36
47
  ### OCR Web Service
37
- [OCR Web Service](http://www.ocrwebservice.com/)
48
+ More info at [OCR Web Service](http://www.ocrwebservice.com/).
49
+
50
+ Extra properties convert_to_bw=<BOOLEAN>, multi_page_doc=<BOOLEAN>.
38
51
 
39
52
  ```ruby
40
53
  ocr = OCR.use :onlineocr
41
54
 
42
- ocr.login <YOUR_USER>, <LICENSE_CODE>
43
- ocr.file= 'pkg/text_test2.jpg'
55
+ ocr.login <YOUR_USER>, <YOUR_LICENSE_CODE>
56
+ ocr.file= 'text_image.jpg'
57
+ ocr.format= :pdf
58
+ ocr.outputfile= 'text_doc.pdf'
59
+ ocr.recognize
60
+
61
+ puts "ERROR: #{ocr.error}" if ocr.error?
62
+ puts "RESULT: #{ocr.text}" unless ocr.error?
63
+ ```
64
+
65
+ ### Free OCR online webservice
66
+ More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
67
+
68
+ ```ruby
69
+ ocr = OCR.use :free_ocr
70
+
71
+ ocr.login <YOUR_USER_NAME>
72
+ ocr.file= 'text_image.jpg'
44
73
  ocr.recognize
45
74
 
46
75
  puts "ERROR: #{ocr.error}" if ocr.error?
data/lib/ocr.rb CHANGED
@@ -2,7 +2,6 @@ require File.expand_path('../ocr/factory', __FILE__)
2
2
  require File.expand_path('../ocr/ocr', __FILE__)
3
3
  require File.expand_path('../ocrs/dummy', __FILE__)
4
4
  require File.expand_path('../ocrs/onlineocr', __FILE__)
5
- require File.expand_path('../ocrs/onlineocr2', __FILE__)
6
5
  require File.expand_path('../ocrs/free_ocr', __FILE__)
7
6
 
8
7
 
@@ -7,7 +7,7 @@ module OCR
7
7
  attr_reader :text, :error
8
8
  attr_accessor :username, :password, :extra_login_data
9
9
  attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
10
- attr_accessor :file, :language, :format
10
+ attr_accessor :file, :outputfile, :language, :format
11
11
  attr_accessor :debug
12
12
 
13
13
  def initialize(*args)
@@ -22,6 +22,7 @@ module OCR
22
22
  file= false
23
23
  lang= false
24
24
  format= false
25
+ outputfile= false
25
26
  debug= false
26
27
  end
27
28
 
@@ -42,6 +43,10 @@ module OCR
42
43
  @error != false
43
44
  end
44
45
 
46
+ def outputfile?
47
+ outputfile != false
48
+ end
49
+
45
50
  def recognize
46
51
  clear_error
47
52
  clear_text
@@ -1,3 +1,3 @@
1
1
  module OCR
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,6 +1,4 @@
1
- require 'soap/wsdlDriver'
2
- require 'base64'
3
-
1
+ require 'savon'
4
2
 
5
3
  module OCR
6
4
  class Free_ocr < OCR::Ocr
@@ -14,45 +12,24 @@ module OCR
14
12
 
15
13
  def ocr_recognize
16
14
  raise Exception, 'You should set image file' unless @file
17
- # request = {
18
- # 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
19
- # }
20
- #puts request
21
- #return
22
- #client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
23
- #puts client.wsdl.soap_actions
24
- #return
25
- #begin
26
- # response = client.request(:analyze) do
27
- # soap.body = request
28
- # end
29
- #rescue
30
- # puts "EX"
31
- #end
32
-
33
- client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
34
- result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
35
-
36
- puts response. analyzeResult
37
-
38
- puts result
39
- return
40
-
41
- puts
42
- puts "BODY: #{response.body}"
15
+ request = {
16
+ 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
17
+ }
18
+ request[:username] = @username if @username
19
+
20
+ client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
21
+
22
+ response = client.request(:analyze) do
23
+ soap.body = request
24
+ end
43
25
 
44
26
  return false if have_error? response.body
45
- puts 'sigue'
46
- set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
27
+ set_text response.body[:analyze_response][:analyze_result]
47
28
  end
48
29
 
49
30
  def have_error? response
50
- return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
51
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
52
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
53
- return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
54
- set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
55
- true
31
+ return true && set_error("No response") unless response.has_key?(:analyze_response)
32
+ return true && set_error("No response") unless response[:analyze_response].has_key?(:analyze_result)
56
33
  end
57
34
  end
58
35
  end
@@ -1,11 +1,9 @@
1
1
  require 'savon'
2
- require 'base64'
3
-
4
2
 
5
3
  module OCR
6
4
  class Onlineocr < OCR::Ocr
7
5
 
8
- attr_accessor :convert_to_bw
6
+ attr_accessor :convert_to_bw, :multi_page_doc
9
7
 
10
8
  private
11
9
  def init
@@ -13,6 +11,7 @@ module OCR
13
11
  self.language= :english
14
12
  self.format= :txt
15
13
  self.convert_to_bw= false
14
+ self.multi_page_doc= false
16
15
  end
17
16
 
18
17
  def ocr_recognize
@@ -29,8 +28,8 @@ module OCR
29
28
  'outputDocumentFormat' => self.format.to_s.upcase,
30
29
  'convertToBW' => self.convert_to_bw.to_s,
31
30
  'getOCRText' => true.to_s,
32
- 'createOutputDocument' => false.to_s,
33
- 'multiPageDoc' => false.to_s,
31
+ 'createOutputDocument' => outputfile?.to_s,
32
+ 'multiPageDoc' => self.multi_page_doc.to_s,
34
33
  'ocrWords' => false.to_s
35
34
  },
36
35
  }
@@ -50,6 +49,12 @@ module OCR
50
49
 
51
50
  return false if have_error? response.body
52
51
 
52
+ if outputfile?
53
+ File.open(outputfile, 'w+') {|f|
54
+ f.puts Base64.decode64(response[:ocr_web_service_recognize_response][:ocrws_response][:file_data])
55
+ }
56
+ end
57
+
53
58
  set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
54
59
  end
55
60
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-27 00:00:00.000000000Z
12
+ date: 2012-02-28 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: savon
16
- requirement: &19566960 !ruby/object:Gem::Requirement
16
+ requirement: &20600860 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.9.9
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *19566960
24
+ version_requirements: *20600860
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &19558440 !ruby/object:Gem::Requirement
27
+ requirement: &20588220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: 2.7.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *19558440
35
+ version_requirements: *20588220
36
36
  description: Recognize text and characters from image files using web services.
37
37
  email:
38
38
  - mabarroso@mabarroso.com
@@ -44,7 +44,6 @@ files:
44
44
  - lib/ocrs/free_ocr.rb
45
45
  - lib/ocrs/onlineocr.rb
46
46
  - lib/ocrs/dummy.rb
47
- - lib/ocrs/onlineocr2.rb
48
47
  - lib/ocr/version.rb
49
48
  - lib/ocr/ocr.rb
50
49
  - lib/ocr/factory.rb
@@ -1,109 +0,0 @@
1
- require 'net/http'
2
- require 'base64'
3
-
4
-
5
- module OCR
6
- class Onlineocr2 < OCR::Ocr
7
-
8
- attr_accessor :convert_to_bw
9
-
10
- private
11
- def init
12
- super()
13
- self.language= :english
14
- self.format= :txt
15
- self.convert_to_bw= false
16
- end
17
-
18
- def ocr_recognize
19
- raise Exception, 'You should set username and license as password' unless @username && @password
20
- request = {
21
- 'user_name' => @username,
22
- 'license_code' => @password,
23
- 'OCRWSSettings' => {
24
- # 'ocrLanguages' => [self.language.to_s.upcase],
25
- 'outputDocumentFormat' => self.format.to_s.upcase,
26
- 'convertToBW' => self.convert_to_bw,
27
- 'getOCRText' => true,
28
- 'createOutputDocument' => false,
29
- 'multiPageDoc' => false,
30
- 'ocrWords' => true
31
- },
32
- 'OCRWSInputImage' => {
33
- 'fileName' => File.basename(@file),
34
- # 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
35
- },
36
- }
37
- #puts request
38
- #return
39
- data =<<EOT
40
- <?xml version="1.0" encoding="utf-8"?>
41
- <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
42
- <soap:Body>
43
- <OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
44
- <user_name>#{@username}</user_name>
45
- <license_code>#{@password}</license_code>
46
- <OCRWSInputImage>
47
- <fileName>string</fileName>
48
- <fileData>base64Binary</fileData>
49
- </OCRWSInputImage>
50
- <OCRWSSetting>
51
- <ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
52
- <outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
53
- <convertToBW>#{self.convert_to_bw}</convertToBW>
54
- <getOCRText>true</getOCRText>
55
- <createOutputDocument>false</createOutputDocument>
56
- <multiPageDoc>false</multiPageDoc>
57
- <ocrWords>true</ocrWords>
58
- </OCRWSSetting>
59
- </OCRWebServiceRecognize>
60
- </soap:Body>
61
- </soap:Envelope>
62
- EOT
63
-
64
- headers = {
65
- 'Host' => 'www.ocrwebservice.com',
66
- 'Content-Type' => 'application/soap+xml; charset=utf-8',
67
- # 'Content-Type' => 'text/xml',
68
- 'Content-Length' => "#{data.length}",
69
- 'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
70
- }
71
-
72
- host = 'www.ocrwebservice.com'
73
- url = '/services/OCRWebService.asmx'
74
-
75
- #data = data.squeeze.tr "\n", ''
76
- puts data
77
- puts headers
78
-
79
- #return
80
- http = Net::HTTP.new(host, 80)
81
- http.use_ssl = false
82
- resp = http.post(url, data, headers)
83
-
84
- puts resp
85
- puts 'Code = ' + resp.code
86
- puts 'Message = ' + resp.message
87
-
88
-
89
-
90
- return
91
-
92
- puts
93
- puts "BODY: #{response.body}"
94
-
95
- return false if have_error? response.body
96
- puts 'sigue'
97
- set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
98
- end
99
-
100
- def have_error? response
101
- return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
102
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
103
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
104
- return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
105
- set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
106
- true
107
- end
108
- end
109
- end