ocr 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -9,6 +9,8 @@
9
9
  * identify: Username and license code as password
10
10
  * languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
11
11
  * output formats: :doc, :pdf, :excel, :html, :txt, :rtf
12
+ - [Free OCR online webservice](http://www.free-ocr.co.uk/)
13
+ * identify: Username
12
14
 
13
15
  #Installation
14
16
  ##From the command line
@@ -32,15 +34,42 @@ gem 'ocr'
32
34
  ```
33
35
 
34
36
  ## Using
37
+ - Get a OCR: ocr = OCR.use <OCR_NAME>
38
+ - Set the login Credentials: ocr.login <YOUR_USER>, <YOUR_PASSWORD>, [<EXTRA_LOGIN_DATA>]
39
+ - Set proxy configuration: ocr.proxy p_addr, p_port = nil, p_user = nil, p_pass = nil
40
+ - Set image to work: ocr.file= <FILE_NAME_AND_PATH>
41
+ - Set languaje: ocr.file= <FILE_NAME_AND_PATH>
42
+ - Set output format: ocr.format= <FORMAT_NAME>
43
+ - Set output file: ocr.outputfile= <FILE_NAME_AND_PATH>
44
+ - Test error: error = ocr.error if ocr.error?
45
+ - Results: text = ocr.text unless ocr.error?
35
46
 
36
47
  ### OCR Web Service
37
- [OCR Web Service](http://www.ocrwebservice.com/)
48
+ More info at [OCR Web Service](http://www.ocrwebservice.com/).
49
+
50
+ Extra properties convert_to_bw=<BOOLEAN>, multi_page_doc=<BOOLEAN>.
38
51
 
39
52
  ```ruby
40
53
  ocr = OCR.use :onlineocr
41
54
 
42
- ocr.login <YOUR_USER>, <LICENSE_CODE>
43
- ocr.file= 'pkg/text_test2.jpg'
55
+ ocr.login <YOUR_USER>, <YOUR_LICENSE_CODE>
56
+ ocr.file= 'text_image.jpg'
57
+ ocr.format= :pdf
58
+ ocr.outputfile= 'text_doc.pdf'
59
+ ocr.recognize
60
+
61
+ puts "ERROR: #{ocr.error}" if ocr.error?
62
+ puts "RESULT: #{ocr.text}" unless ocr.error?
63
+ ```
64
+
65
+ ### Free OCR online webservice
66
+ More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
67
+
68
+ ```ruby
69
+ ocr = OCR.use :free_ocr
70
+
71
+ ocr.login <YOUR_USER_NAME>
72
+ ocr.file= 'text_image.jpg'
44
73
  ocr.recognize
45
74
 
46
75
  puts "ERROR: #{ocr.error}" if ocr.error?
data/lib/ocr.rb CHANGED
@@ -2,7 +2,6 @@ require File.expand_path('../ocr/factory', __FILE__)
2
2
  require File.expand_path('../ocr/ocr', __FILE__)
3
3
  require File.expand_path('../ocrs/dummy', __FILE__)
4
4
  require File.expand_path('../ocrs/onlineocr', __FILE__)
5
- require File.expand_path('../ocrs/onlineocr2', __FILE__)
6
5
  require File.expand_path('../ocrs/free_ocr', __FILE__)
7
6
 
8
7
 
@@ -7,7 +7,7 @@ module OCR
7
7
  attr_reader :text, :error
8
8
  attr_accessor :username, :password, :extra_login_data
9
9
  attr_accessor :proxy_addr, :proxy_port, :proxy_user, :proxy_pass
10
- attr_accessor :file, :language, :format
10
+ attr_accessor :file, :outputfile, :language, :format
11
11
  attr_accessor :debug
12
12
 
13
13
  def initialize(*args)
@@ -22,6 +22,7 @@ module OCR
22
22
  file= false
23
23
  lang= false
24
24
  format= false
25
+ outputfile= false
25
26
  debug= false
26
27
  end
27
28
 
@@ -42,6 +43,10 @@ module OCR
42
43
  @error != false
43
44
  end
44
45
 
46
+ def outputfile?
47
+ outputfile != false
48
+ end
49
+
45
50
  def recognize
46
51
  clear_error
47
52
  clear_text
@@ -1,3 +1,3 @@
1
1
  module OCR
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,6 +1,4 @@
1
- require 'soap/wsdlDriver'
2
- require 'base64'
3
-
1
+ require 'savon'
4
2
 
5
3
  module OCR
6
4
  class Free_ocr < OCR::Ocr
@@ -14,45 +12,24 @@ module OCR
14
12
 
15
13
  def ocr_recognize
16
14
  raise Exception, 'You should set image file' unless @file
17
- # request = {
18
- # 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
19
- # }
20
- #puts request
21
- #return
22
- #client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
23
- #puts client.wsdl.soap_actions
24
- #return
25
- #begin
26
- # response = client.request(:analyze) do
27
- # soap.body = request
28
- # end
29
- #rescue
30
- # puts "EX"
31
- #end
32
-
33
- client = SOAP::WSDLDriverFactory.new( 'http://www.free-ocr.co.uk/ocr.asmx?WSDL' ).create_rpc_driver
34
- result = client.analyze(:image => File.open(@file, 'rb') { |f| [f.read].pack('m*') })
35
-
36
- puts response. analyzeResult
37
-
38
- puts result
39
- return
40
-
41
- puts
42
- puts "BODY: #{response.body}"
15
+ request = {
16
+ 'image' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
17
+ }
18
+ request[:username] = @username if @username
19
+
20
+ client = Savon::Client.new('http://www.free-ocr.co.uk/ocr.asmx?WSDL')
21
+
22
+ response = client.request(:analyze) do
23
+ soap.body = request
24
+ end
43
25
 
44
26
  return false if have_error? response.body
45
- puts 'sigue'
46
- set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
27
+ set_text response.body[:analyze_response][:analyze_result]
47
28
  end
48
29
 
49
30
  def have_error? response
50
- return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
51
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
52
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
53
- return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
54
- set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
55
- true
31
+ return true && set_error("No response") unless response.has_key?(:analyze_response)
32
+ return true && set_error("No response") unless response[:analyze_response].has_key?(:analyze_result)
56
33
  end
57
34
  end
58
35
  end
@@ -1,11 +1,9 @@
1
1
  require 'savon'
2
- require 'base64'
3
-
4
2
 
5
3
  module OCR
6
4
  class Onlineocr < OCR::Ocr
7
5
 
8
- attr_accessor :convert_to_bw
6
+ attr_accessor :convert_to_bw, :multi_page_doc
9
7
 
10
8
  private
11
9
  def init
@@ -13,6 +11,7 @@ module OCR
13
11
  self.language= :english
14
12
  self.format= :txt
15
13
  self.convert_to_bw= false
14
+ self.multi_page_doc= false
16
15
  end
17
16
 
18
17
  def ocr_recognize
@@ -29,8 +28,8 @@ module OCR
29
28
  'outputDocumentFormat' => self.format.to_s.upcase,
30
29
  'convertToBW' => self.convert_to_bw.to_s,
31
30
  'getOCRText' => true.to_s,
32
- 'createOutputDocument' => false.to_s,
33
- 'multiPageDoc' => false.to_s,
31
+ 'createOutputDocument' => outputfile?.to_s,
32
+ 'multiPageDoc' => self.multi_page_doc.to_s,
34
33
  'ocrWords' => false.to_s
35
34
  },
36
35
  }
@@ -50,6 +49,12 @@ module OCR
50
49
 
51
50
  return false if have_error? response.body
52
51
 
52
+ if outputfile?
53
+ File.open(outputfile, 'w+') {|f|
54
+ f.puts Base64.decode64(response[:ocr_web_service_recognize_response][:ocrws_response][:file_data])
55
+ }
56
+ end
57
+
53
58
  set_text response[:ocr_web_service_recognize_response][:ocrws_response][:ocr_text][:array_of_string][:string]
54
59
  end
55
60
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-27 00:00:00.000000000Z
12
+ date: 2012-02-28 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: savon
16
- requirement: &19566960 !ruby/object:Gem::Requirement
16
+ requirement: &20600860 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.9.9
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *19566960
24
+ version_requirements: *20600860
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &19558440 !ruby/object:Gem::Requirement
27
+ requirement: &20588220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: 2.7.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *19558440
35
+ version_requirements: *20588220
36
36
  description: Recognize text and characters from image files using web services.
37
37
  email:
38
38
  - mabarroso@mabarroso.com
@@ -44,7 +44,6 @@ files:
44
44
  - lib/ocrs/free_ocr.rb
45
45
  - lib/ocrs/onlineocr.rb
46
46
  - lib/ocrs/dummy.rb
47
- - lib/ocrs/onlineocr2.rb
48
47
  - lib/ocr/version.rb
49
48
  - lib/ocr/ocr.rb
50
49
  - lib/ocr/factory.rb
@@ -1,109 +0,0 @@
1
- require 'net/http'
2
- require 'base64'
3
-
4
-
5
- module OCR
6
- class Onlineocr2 < OCR::Ocr
7
-
8
- attr_accessor :convert_to_bw
9
-
10
- private
11
- def init
12
- super()
13
- self.language= :english
14
- self.format= :txt
15
- self.convert_to_bw= false
16
- end
17
-
18
- def ocr_recognize
19
- raise Exception, 'You should set username and license as password' unless @username && @password
20
- request = {
21
- 'user_name' => @username,
22
- 'license_code' => @password,
23
- 'OCRWSSettings' => {
24
- # 'ocrLanguages' => [self.language.to_s.upcase],
25
- 'outputDocumentFormat' => self.format.to_s.upcase,
26
- 'convertToBW' => self.convert_to_bw,
27
- 'getOCRText' => true,
28
- 'createOutputDocument' => false,
29
- 'multiPageDoc' => false,
30
- 'ocrWords' => true
31
- },
32
- 'OCRWSInputImage' => {
33
- 'fileName' => File.basename(@file),
34
- # 'fileData' => File.open(@file, 'rb') { |f| [f.read].pack('m*') }
35
- },
36
- }
37
- #puts request
38
- #return
39
- data =<<EOT
40
- <?xml version="1.0" encoding="utf-8"?>
41
- <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
42
- <soap:Body>
43
- <OCRWebServiceRecognize xmlns="http://stockservice.contoso.com/wse/samples/2005/10">
44
- <user_name>#{@username}</user_name>
45
- <license_code>#{@password}</license_code>
46
- <OCRWSInputImage>
47
- <fileName>string</fileName>
48
- <fileData>base64Binary</fileData>
49
- </OCRWSInputImage>
50
- <OCRWSSetting>
51
- <ocrLanguages>#{self.language.to_s.upcase}</ocrLanguages>
52
- <outputDocumentFormat>#{self.format.to_s.upcase}</outputDocumentFormat>
53
- <convertToBW>#{self.convert_to_bw}</convertToBW>
54
- <getOCRText>true</getOCRText>
55
- <createOutputDocument>false</createOutputDocument>
56
- <multiPageDoc>false</multiPageDoc>
57
- <ocrWords>true</ocrWords>
58
- </OCRWSSetting>
59
- </OCRWebServiceRecognize>
60
- </soap:Body>
61
- </soap:Envelope>
62
- EOT
63
-
64
- headers = {
65
- 'Host' => 'www.ocrwebservice.com',
66
- 'Content-Type' => 'application/soap+xml; charset=utf-8',
67
- # 'Content-Type' => 'text/xml',
68
- 'Content-Length' => "#{data.length}",
69
- 'SOAPAction' => '"http://stockservice.contoso.com/wse/samples/2005/10/OCRWebServiceRecognize"'
70
- }
71
-
72
- host = 'www.ocrwebservice.com'
73
- url = '/services/OCRWebService.asmx'
74
-
75
- #data = data.squeeze.tr "\n", ''
76
- puts data
77
- puts headers
78
-
79
- #return
80
- http = Net::HTTP.new(host, 80)
81
- http.use_ssl = false
82
- resp = http.post(url, data, headers)
83
-
84
- puts resp
85
- puts 'Code = ' + resp.code
86
- puts 'Message = ' + resp.message
87
-
88
-
89
-
90
- return
91
-
92
- puts
93
- puts "BODY: #{response.body}"
94
-
95
- return false if have_error? response.body
96
- puts 'sigue'
97
- set_text !response[:ocr_text].nil? ? response[:ocr_text] : ''
98
- end
99
-
100
- def have_error? response
101
- return true && set_error("No response") unless response.has_key?(:ocr_web_service_recognize_response)
102
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
103
- return true && set_error("No response") unless response[:ocr_web_service_recognize_response].has_key?(:ocrws_response)
104
- return false if response[:ocr_web_service_recognize_response][:ocrws_response][:error_message].nil?
105
- set_error response[:ocr_web_service_recognize_response][:ocrws_response][:error_message]
106
- true
107
- end
108
- end
109
- end