ocr 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +22 -2
- data/lib/ocr.rb +1 -0
- data/lib/ocr/version.rb +1 -1
- data/lib/ocrs/weocr.rb +55 -0
- metadata +18 -6
data/README.md
CHANGED
@@ -5,12 +5,15 @@
|
|
5
5
|
Recognize text and characters from image files using web services.
|
6
6
|
|
7
7
|
##Web services supported
|
8
|
+
- [WeOCR project](http://weocr.ocrgrid.org/)
|
8
9
|
- [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
|
9
10
|
* identify: Username and license code as password
|
10
11
|
* languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
|
11
12
|
* output formats: :doc, :pdf, :excel, :html, :txt, :rtf
|
12
13
|
- [Free OCR online webservice](http://www.free-ocr.co.uk/)
|
13
14
|
* identify: Username
|
15
|
+
* No tested for images more than 100x100px in size.
|
16
|
+
* Free service is limited to 100x100px images.
|
14
17
|
|
15
18
|
#Installation
|
16
19
|
##From the command line
|
@@ -44,10 +47,27 @@ gem 'ocr'
|
|
44
47
|
- Test error: error = ocr.error if ocr.error?
|
45
48
|
- Results: text = ocr.text unless ocr.error?
|
46
49
|
|
50
|
+
### WeOCR project
|
51
|
+
More info at [WeOCR project](http://weocr.ocrgrid.org/).
|
52
|
+
|
53
|
+
Extra properties outputencoding=NAME.
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
ocr = OCR.use :weocr
|
57
|
+
|
58
|
+
ocr.file= 'text_image.jpg'
|
59
|
+
ocr.format= :txt
|
60
|
+
ocr.outputencoding="utf-8"
|
61
|
+
ocr.recognize
|
62
|
+
|
63
|
+
puts "ERROR: #{ocr.error}" if ocr.error?
|
64
|
+
puts "RESULT: #{ocr.text}" unless ocr.error?
|
65
|
+
```
|
66
|
+
|
47
67
|
### OCR Web Service
|
48
68
|
More info at [OCR Web Service](http://www.ocrwebservice.com/).
|
49
69
|
|
50
|
-
Extra properties convert_to_bw
|
70
|
+
Extra properties convert_to_bw=BOOLEAN, multi_page_doc=BOOLEAN.
|
51
71
|
|
52
72
|
```ruby
|
53
73
|
ocr = OCR.use :onlineocr
|
@@ -63,7 +83,7 @@ gem 'ocr'
|
|
63
83
|
```
|
64
84
|
|
65
85
|
### Free OCR online webservice
|
66
|
-
More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
|
86
|
+
More info at [Free OCR online webservice](http://www.free-ocr.co.uk/). No tested for images larger than 100x100px. Free service is limited to 100x100px images.
|
67
87
|
|
68
88
|
```ruby
|
69
89
|
ocr = OCR.use :free_ocr
|
data/lib/ocr.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.expand_path('../ocr/factory', __FILE__)
|
2
2
|
require File.expand_path('../ocr/ocr', __FILE__)
|
3
3
|
require File.expand_path('../ocrs/dummy', __FILE__)
|
4
|
+
require File.expand_path('../ocrs/weocr', __FILE__)
|
4
5
|
require File.expand_path('../ocrs/onlineocr', __FILE__)
|
5
6
|
require File.expand_path('../ocrs/free_ocr', __FILE__)
|
6
7
|
|
data/lib/ocr/version.rb
CHANGED
data/lib/ocrs/weocr.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Weocr < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :outputencoding, :servers, :servers_info, :server_cgi
|
9
|
+
|
10
|
+
def ocr_servers
|
11
|
+
@servers = []
|
12
|
+
@servers_info = {}
|
13
|
+
# Get OCR servers
|
14
|
+
url = 'http://weocr.ocrgrid.org/cgi-bin/weocr/search.cgi?lang=&fmt=xml'
|
15
|
+
xml_data = Net::HTTP.get(URI.parse(url))
|
16
|
+
doc = REXML::Document.new(xml_data)
|
17
|
+
doc.elements.each('weocrlist/server/url') do |ele|
|
18
|
+
@servers << ele.text
|
19
|
+
end
|
20
|
+
|
21
|
+
return unless @servers.count > 0
|
22
|
+
|
23
|
+
xml_data = Net::HTTP.get(URI.parse("#{@servers[0]}srvspec.xml"))
|
24
|
+
doc = REXML::Document.new(xml_data)
|
25
|
+
doc.elements.each('ocrserver/svinfo/cgi') do |ele|
|
26
|
+
@server_cgi = ele.text
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def init
|
32
|
+
super()
|
33
|
+
self.outputencoding= 'utf-8'
|
34
|
+
self.server_cgi= false
|
35
|
+
self.ocr_servers
|
36
|
+
end
|
37
|
+
|
38
|
+
def ocr_recognize
|
39
|
+
raise Exception, 'No available OCR server' unless @server_cgi
|
40
|
+
res = `curl -F userfile=@#{@file} \
|
41
|
+
-F outputencoding="#{outputencoding}" \
|
42
|
+
-F outputformat="#{format.to_s}" \
|
43
|
+
#{@server_cgi} 2>/dev/null`
|
44
|
+
|
45
|
+
doc = Nokogiri::HTML.parse(res)
|
46
|
+
err = doc.search('h2').first
|
47
|
+
return false if have_error? err.content if err
|
48
|
+
set_text doc.search('pre').first.content
|
49
|
+
end
|
50
|
+
|
51
|
+
def have_error? response
|
52
|
+
return true && set_error(response) if response
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02
|
12
|
+
date: 2012-03-02 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: savon
|
16
|
-
requirement: &
|
16
|
+
requirement: &11878720 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: 0.9.9
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *11878720
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: nokogiri
|
27
|
+
requirement: &11875660 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.0
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *11875660
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: rspec
|
27
|
-
requirement: &
|
38
|
+
requirement: &11873900 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ~>
|
@@ -32,7 +43,7 @@ dependencies:
|
|
32
43
|
version: 2.7.0
|
33
44
|
type: :development
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *11873900
|
36
47
|
description: Recognize text and characters from image files using web services.
|
37
48
|
email:
|
38
49
|
- mabarroso@mabarroso.com
|
@@ -42,6 +53,7 @@ extra_rdoc_files: []
|
|
42
53
|
files:
|
43
54
|
- lib/ocr.rb
|
44
55
|
- lib/ocrs/free_ocr.rb
|
56
|
+
- lib/ocrs/weocr.rb
|
45
57
|
- lib/ocrs/onlineocr.rb
|
46
58
|
- lib/ocrs/dummy.rb
|
47
59
|
- lib/ocr/version.rb
|