ocr 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -5,12 +5,15 @@
5
5
  Recognize text and characters from image files using web services.
6
6
 
7
7
  ##Web services supported
8
+ - [WeOCR project](http://weocr.ocrgrid.org/)
8
9
  - [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
9
10
  * identify: Username and license code as password
10
11
  * languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
11
12
  * output formats: :doc, :pdf, :excel, :html, :txt, :rtf
12
13
  - [Free OCR online webservice](http://www.free-ocr.co.uk/)
13
14
  * identify: Username
15
+ * No tested for images more than 100x100px in size.
16
+ * Free service is limited to 100x100px images.
14
17
 
15
18
  #Installation
16
19
  ##From the command line
@@ -44,10 +47,27 @@ gem 'ocr'
44
47
  - Test error: error = ocr.error if ocr.error?
45
48
  - Results: text = ocr.text unless ocr.error?
46
49
 
50
+ ### WeOCR project
51
+ More info at [WeOCR project](http://weocr.ocrgrid.org/).
52
+
53
+ Extra properties outputencoding=NAME.
54
+
55
+ ```ruby
56
+ ocr = OCR.use :weocr
57
+
58
+ ocr.file= 'text_image.jpg'
59
+ ocr.format= :txt
60
+ ocr.outputencoding="utf-8"
61
+ ocr.recognize
62
+
63
+ puts "ERROR: #{ocr.error}" if ocr.error?
64
+ puts "RESULT: #{ocr.text}" unless ocr.error?
65
+ ```
66
+
47
67
  ### OCR Web Service
48
68
  More info at [OCR Web Service](http://www.ocrwebservice.com/).
49
69
 
50
- Extra properties convert_to_bw=<BOOLEAN>, multi_page_doc=<BOOLEAN>.
70
+ Extra properties convert_to_bw=BOOLEAN, multi_page_doc=BOOLEAN.
51
71
 
52
72
  ```ruby
53
73
  ocr = OCR.use :onlineocr
@@ -63,7 +83,7 @@ gem 'ocr'
63
83
  ```
64
84
 
65
85
  ### Free OCR online webservice
66
- More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
86
+ More info at [Free OCR online webservice](http://www.free-ocr.co.uk/). No tested for images larger than 100x100px. Free service is limited to 100x100px images.
67
87
 
68
88
  ```ruby
69
89
  ocr = OCR.use :free_ocr
data/lib/ocr.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require File.expand_path('../ocr/factory', __FILE__)
2
2
  require File.expand_path('../ocr/ocr', __FILE__)
3
3
  require File.expand_path('../ocrs/dummy', __FILE__)
4
+ require File.expand_path('../ocrs/weocr', __FILE__)
4
5
  require File.expand_path('../ocrs/onlineocr', __FILE__)
5
6
  require File.expand_path('../ocrs/free_ocr', __FILE__)
6
7
 
data/lib/ocr/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module OCR
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/ocrs/weocr.rb ADDED
@@ -0,0 +1,55 @@
1
+ require 'net/http'
2
+ require 'rexml/document'
3
+ require 'nokogiri'
4
+
5
+ module OCR
6
+ class Weocr < OCR::Ocr
7
+
8
+ attr_accessor :outputencoding, :servers, :servers_info, :server_cgi
9
+
10
+ def ocr_servers
11
+ @servers = []
12
+ @servers_info = {}
13
+ # Get OCR servers
14
+ url = 'http://weocr.ocrgrid.org/cgi-bin/weocr/search.cgi?lang=&fmt=xml'
15
+ xml_data = Net::HTTP.get(URI.parse(url))
16
+ doc = REXML::Document.new(xml_data)
17
+ doc.elements.each('weocrlist/server/url') do |ele|
18
+ @servers << ele.text
19
+ end
20
+
21
+ return unless @servers.count > 0
22
+
23
+ xml_data = Net::HTTP.get(URI.parse("#{@servers[0]}srvspec.xml"))
24
+ doc = REXML::Document.new(xml_data)
25
+ doc.elements.each('ocrserver/svinfo/cgi') do |ele|
26
+ @server_cgi = ele.text
27
+ end
28
+ end
29
+
30
+ private
31
+ def init
32
+ super()
33
+ self.outputencoding= 'utf-8'
34
+ self.server_cgi= false
35
+ self.ocr_servers
36
+ end
37
+
38
+ def ocr_recognize
39
+ raise Exception, 'No available OCR server' unless @server_cgi
40
+ res = `curl -F userfile=@#{@file} \
41
+ -F outputencoding="#{outputencoding}" \
42
+ -F outputformat="#{format.to_s}" \
43
+ #{@server_cgi} 2>/dev/null`
44
+
45
+ doc = Nokogiri::HTML.parse(res)
46
+ err = doc.search('h2').first
47
+ return false if have_error? err.content if err
48
+ set_text doc.search('pre').first.content
49
+ end
50
+
51
+ def have_error? response
52
+ return true && set_error(response) if response
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-28 00:00:00.000000000Z
12
+ date: 2012-03-02 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: savon
16
- requirement: &20600860 !ruby/object:Gem::Requirement
16
+ requirement: &11878720 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,21 @@ dependencies:
21
21
  version: 0.9.9
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *20600860
24
+ version_requirements: *11878720
25
+ - !ruby/object:Gem::Dependency
26
+ name: nokogiri
27
+ requirement: &11875660 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.0
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *11875660
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rspec
27
- requirement: &20588220 !ruby/object:Gem::Requirement
38
+ requirement: &11873900 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,7 +43,7 @@ dependencies:
32
43
  version: 2.7.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *20588220
46
+ version_requirements: *11873900
36
47
  description: Recognize text and characters from image files using web services.
37
48
  email:
38
49
  - mabarroso@mabarroso.com
@@ -42,6 +53,7 @@ extra_rdoc_files: []
42
53
  files:
43
54
  - lib/ocr.rb
44
55
  - lib/ocrs/free_ocr.rb
56
+ - lib/ocrs/weocr.rb
45
57
  - lib/ocrs/onlineocr.rb
46
58
  - lib/ocrs/dummy.rb
47
59
  - lib/ocr/version.rb