ocr 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +22 -2
- data/lib/ocr.rb +1 -0
- data/lib/ocr/version.rb +1 -1
- data/lib/ocrs/weocr.rb +55 -0
- metadata +18 -6
data/README.md
CHANGED
@@ -5,12 +5,15 @@
|
|
5
5
|
Recognize text and characters from image files using web services.
|
6
6
|
|
7
7
|
##Web services supported
|
8
|
+
- [WeOCR project](http://weocr.ocrgrid.org/)
|
8
9
|
- [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
|
9
10
|
* identify: Username and license code as password
|
10
11
|
* languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
|
11
12
|
* output formats: :doc, :pdf, :excel, :html, :txt, :rtf
|
12
13
|
- [Free OCR online webservice](http://www.free-ocr.co.uk/)
|
13
14
|
* identify: Username
|
15
|
+
* No tested for images more than 100x100px in size.
|
16
|
+
* Free service is limited to 100x100px images.
|
14
17
|
|
15
18
|
#Installation
|
16
19
|
##From the command line
|
@@ -44,10 +47,27 @@ gem 'ocr'
|
|
44
47
|
- Test error: error = ocr.error if ocr.error?
|
45
48
|
- Results: text = ocr.text unless ocr.error?
|
46
49
|
|
50
|
+
### WeOCR project
|
51
|
+
More info at [WeOCR project](http://weocr.ocrgrid.org/).
|
52
|
+
|
53
|
+
Extra properties outputencoding=NAME.
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
ocr = OCR.use :weocr
|
57
|
+
|
58
|
+
ocr.file= 'text_image.jpg'
|
59
|
+
ocr.format= :txt
|
60
|
+
ocr.outputencoding="utf-8"
|
61
|
+
ocr.recognize
|
62
|
+
|
63
|
+
puts "ERROR: #{ocr.error}" if ocr.error?
|
64
|
+
puts "RESULT: #{ocr.text}" unless ocr.error?
|
65
|
+
```
|
66
|
+
|
47
67
|
### OCR Web Service
|
48
68
|
More info at [OCR Web Service](http://www.ocrwebservice.com/).
|
49
69
|
|
50
|
-
Extra properties convert_to_bw
|
70
|
+
Extra properties convert_to_bw=BOOLEAN, multi_page_doc=BOOLEAN.
|
51
71
|
|
52
72
|
```ruby
|
53
73
|
ocr = OCR.use :onlineocr
|
@@ -63,7 +83,7 @@ gem 'ocr'
|
|
63
83
|
```
|
64
84
|
|
65
85
|
### Free OCR online webservice
|
66
|
-
More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
|
86
|
+
More info at [Free OCR online webservice](http://www.free-ocr.co.uk/). No tested for images larger than 100x100px. Free service is limited to 100x100px images.
|
67
87
|
|
68
88
|
```ruby
|
69
89
|
ocr = OCR.use :free_ocr
|
data/lib/ocr.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.expand_path('../ocr/factory', __FILE__)
|
2
2
|
require File.expand_path('../ocr/ocr', __FILE__)
|
3
3
|
require File.expand_path('../ocrs/dummy', __FILE__)
|
4
|
+
require File.expand_path('../ocrs/weocr', __FILE__)
|
4
5
|
require File.expand_path('../ocrs/onlineocr', __FILE__)
|
5
6
|
require File.expand_path('../ocrs/free_ocr', __FILE__)
|
6
7
|
|
data/lib/ocr/version.rb
CHANGED
data/lib/ocrs/weocr.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module OCR
|
6
|
+
class Weocr < OCR::Ocr
|
7
|
+
|
8
|
+
attr_accessor :outputencoding, :servers, :servers_info, :server_cgi
|
9
|
+
|
10
|
+
def ocr_servers
|
11
|
+
@servers = []
|
12
|
+
@servers_info = {}
|
13
|
+
# Get OCR servers
|
14
|
+
url = 'http://weocr.ocrgrid.org/cgi-bin/weocr/search.cgi?lang=&fmt=xml'
|
15
|
+
xml_data = Net::HTTP.get(URI.parse(url))
|
16
|
+
doc = REXML::Document.new(xml_data)
|
17
|
+
doc.elements.each('weocrlist/server/url') do |ele|
|
18
|
+
@servers << ele.text
|
19
|
+
end
|
20
|
+
|
21
|
+
return unless @servers.count > 0
|
22
|
+
|
23
|
+
xml_data = Net::HTTP.get(URI.parse("#{@servers[0]}srvspec.xml"))
|
24
|
+
doc = REXML::Document.new(xml_data)
|
25
|
+
doc.elements.each('ocrserver/svinfo/cgi') do |ele|
|
26
|
+
@server_cgi = ele.text
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def init
|
32
|
+
super()
|
33
|
+
self.outputencoding= 'utf-8'
|
34
|
+
self.server_cgi= false
|
35
|
+
self.ocr_servers
|
36
|
+
end
|
37
|
+
|
38
|
+
def ocr_recognize
|
39
|
+
raise Exception, 'No available OCR server' unless @server_cgi
|
40
|
+
res = `curl -F userfile=@#{@file} \
|
41
|
+
-F outputencoding="#{outputencoding}" \
|
42
|
+
-F outputformat="#{format.to_s}" \
|
43
|
+
#{@server_cgi} 2>/dev/null`
|
44
|
+
|
45
|
+
doc = Nokogiri::HTML.parse(res)
|
46
|
+
err = doc.search('h2').first
|
47
|
+
return false if have_error? err.content if err
|
48
|
+
set_text doc.search('pre').first.content
|
49
|
+
end
|
50
|
+
|
51
|
+
def have_error? response
|
52
|
+
return true && set_error(response) if response
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02
|
12
|
+
date: 2012-03-02 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: savon
|
16
|
-
requirement: &
|
16
|
+
requirement: &11878720 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: 0.9.9
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *11878720
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: nokogiri
|
27
|
+
requirement: &11875660 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.0
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *11875660
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: rspec
|
27
|
-
requirement: &
|
38
|
+
requirement: &11873900 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ~>
|
@@ -32,7 +43,7 @@ dependencies:
|
|
32
43
|
version: 2.7.0
|
33
44
|
type: :development
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *11873900
|
36
47
|
description: Recognize text and characters from image files using web services.
|
37
48
|
email:
|
38
49
|
- mabarroso@mabarroso.com
|
@@ -42,6 +53,7 @@ extra_rdoc_files: []
|
|
42
53
|
files:
|
43
54
|
- lib/ocr.rb
|
44
55
|
- lib/ocrs/free_ocr.rb
|
56
|
+
- lib/ocrs/weocr.rb
|
45
57
|
- lib/ocrs/onlineocr.rb
|
46
58
|
- lib/ocrs/dummy.rb
|
47
59
|
- lib/ocr/version.rb
|