ocr 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +22 -2
 - data/lib/ocr.rb +1 -0
 - data/lib/ocr/version.rb +1 -1
 - data/lib/ocrs/weocr.rb +55 -0
 - metadata +18 -6
 
    
        data/README.md
    CHANGED
    
    | 
         @@ -5,12 +5,15 @@ 
     | 
|
| 
       5 
5 
     | 
    
         
             
              Recognize text and characters from image files using web services.
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
            ##Web services supported
         
     | 
| 
      
 8 
     | 
    
         
            +
              - [WeOCR project](http://weocr.ocrgrid.org/)
         
     | 
| 
       8 
9 
     | 
    
         
             
              - [OCR Web Service](http://www.ocrwebservice.com/)([Online OCR](http://www.onlineocr.net/))
         
     | 
| 
       9 
10 
     | 
    
         
             
                * identify: Username and license code as password
         
     | 
| 
       10 
11 
     | 
    
         
             
                * languages: :brazilian, :byelorussian, :bulgarian, :catalan, :croatian, :czech, :danish, :dutch, :english, :estonian, :finnish, :french, :german, :greek, :hungarian, :indonesian, :italian, :latin, :latvian, :lithuanian, :moldavian, :polish, :portuguese, :romanian, :russian, :serbian, :slovakian, :slovenian, :spanish, :swedish, :turkish, :ukrainian
         
     | 
| 
       11 
12 
     | 
    
         
             
                * output formats: :doc, :pdf, :excel, :html, :txt, :rtf
         
     | 
| 
       12 
13 
     | 
    
         
             
              - [Free OCR online webservice](http://www.free-ocr.co.uk/)
         
     | 
| 
       13 
14 
     | 
    
         
             
                * identify: Username
         
     | 
| 
      
 15 
     | 
    
         
            +
                * No tested for images more than 100x100px in size.
         
     | 
| 
      
 16 
     | 
    
         
            +
                * Free service is limited to 100x100px images.
         
     | 
| 
       14 
17 
     | 
    
         | 
| 
       15 
18 
     | 
    
         
             
            #Installation
         
     | 
| 
       16 
19 
     | 
    
         
             
            ##From the command line
         
     | 
| 
         @@ -44,10 +47,27 @@ gem 'ocr' 
     | 
|
| 
       44 
47 
     | 
    
         
             
              - Test error: error = ocr.error if ocr.error?
         
     | 
| 
       45 
48 
     | 
    
         
             
              - Results: text = ocr.text unless ocr.error?
         
     | 
| 
       46 
49 
     | 
    
         | 
| 
      
 50 
     | 
    
         
            +
            ### WeOCR project
         
     | 
| 
      
 51 
     | 
    
         
            +
              More info at [WeOCR project](http://weocr.ocrgrid.org/).
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
              Extra properties outputencoding=NAME.
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 56 
     | 
    
         
            +
              ocr = OCR.use :weocr
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
              ocr.file= 'text_image.jpg'
         
     | 
| 
      
 59 
     | 
    
         
            +
              ocr.format= :txt
         
     | 
| 
      
 60 
     | 
    
         
            +
              ocr.outputencoding="utf-8"
         
     | 
| 
      
 61 
     | 
    
         
            +
              ocr.recognize
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
              puts "ERROR: #{ocr.error}" if ocr.error?
         
     | 
| 
      
 64 
     | 
    
         
            +
              puts "RESULT: #{ocr.text}" unless ocr.error?
         
     | 
| 
      
 65 
     | 
    
         
            +
            ```
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
       47 
67 
     | 
    
         
             
            ### OCR Web Service
         
     | 
| 
       48 
68 
     | 
    
         
             
              More info at [OCR Web Service](http://www.ocrwebservice.com/).
         
     | 
| 
       49 
69 
     | 
    
         | 
| 
       50 
     | 
    
         
            -
              Extra properties convert_to_bw 
     | 
| 
      
 70 
     | 
    
         
            +
              Extra properties convert_to_bw=BOOLEAN, multi_page_doc=BOOLEAN.
         
     | 
| 
       51 
71 
     | 
    
         | 
| 
       52 
72 
     | 
    
         
             
            ```ruby
         
     | 
| 
       53 
73 
     | 
    
         
             
              ocr = OCR.use :onlineocr
         
     | 
| 
         @@ -63,7 +83,7 @@ gem 'ocr' 
     | 
|
| 
       63 
83 
     | 
    
         
             
            ```
         
     | 
| 
       64 
84 
     | 
    
         | 
| 
       65 
85 
     | 
    
         
             
            ### Free OCR online webservice
         
     | 
| 
       66 
     | 
    
         
            -
              More info at [Free OCR online webservice](http://www.free-ocr.co.uk/).
         
     | 
| 
      
 86 
     | 
    
         
            +
              More info at [Free OCR online webservice](http://www.free-ocr.co.uk/). No tested for images larger than 100x100px. Free service is limited to 100x100px images.
         
     | 
| 
       67 
87 
     | 
    
         | 
| 
       68 
88 
     | 
    
         
             
            ```ruby
         
     | 
| 
       69 
89 
     | 
    
         
             
              ocr = OCR.use :free_ocr
         
     | 
    
        data/lib/ocr.rb
    CHANGED
    
    | 
         @@ -1,6 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require File.expand_path('../ocr/factory', __FILE__)
         
     | 
| 
       2 
2 
     | 
    
         
             
            require File.expand_path('../ocr/ocr', __FILE__)
         
     | 
| 
       3 
3 
     | 
    
         
             
            require File.expand_path('../ocrs/dummy', __FILE__)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require File.expand_path('../ocrs/weocr', __FILE__)
         
     | 
| 
       4 
5 
     | 
    
         
             
            require File.expand_path('../ocrs/onlineocr', __FILE__)
         
     | 
| 
       5 
6 
     | 
    
         
             
            require File.expand_path('../ocrs/free_ocr', __FILE__)
         
     | 
| 
       6 
7 
     | 
    
         | 
    
        data/lib/ocr/version.rb
    CHANGED
    
    
    
        data/lib/ocrs/weocr.rb
    ADDED
    
    | 
         @@ -0,0 +1,55 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'net/http'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'rexml/document'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'nokogiri'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module OCR
         
     | 
| 
      
 6 
     | 
    
         
            +
              class Weocr < OCR::Ocr
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                attr_accessor :outputencoding, :servers, :servers_info, :server_cgi
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                def ocr_servers
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @servers = []
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @servers_info = {}
         
     | 
| 
      
 13 
     | 
    
         
            +
                  # Get OCR servers
         
     | 
| 
      
 14 
     | 
    
         
            +
                  url = 'http://weocr.ocrgrid.org/cgi-bin/weocr/search.cgi?lang=&fmt=xml'
         
     | 
| 
      
 15 
     | 
    
         
            +
                  xml_data = Net::HTTP.get(URI.parse(url))
         
     | 
| 
      
 16 
     | 
    
         
            +
                  doc = REXML::Document.new(xml_data)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  doc.elements.each('weocrlist/server/url') do |ele|
         
     | 
| 
      
 18 
     | 
    
         
            +
                    @servers << ele.text
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  return unless @servers.count > 0
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  xml_data = Net::HTTP.get(URI.parse("#{@servers[0]}srvspec.xml"))
         
     | 
| 
      
 24 
     | 
    
         
            +
                  doc = REXML::Document.new(xml_data)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  doc.elements.each('ocrserver/svinfo/cgi') do |ele|
         
     | 
| 
      
 26 
     | 
    
         
            +
                    @server_cgi = ele.text
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                private
         
     | 
| 
      
 31 
     | 
    
         
            +
                def init
         
     | 
| 
      
 32 
     | 
    
         
            +
                  super()
         
     | 
| 
      
 33 
     | 
    
         
            +
                  self.outputencoding= 'utf-8'
         
     | 
| 
      
 34 
     | 
    
         
            +
                  self.server_cgi= false
         
     | 
| 
      
 35 
     | 
    
         
            +
                  self.ocr_servers
         
     | 
| 
      
 36 
     | 
    
         
            +
                end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                def ocr_recognize
         
     | 
| 
      
 39 
     | 
    
         
            +
                  raise Exception, 'No available OCR server' unless @server_cgi
         
     | 
| 
      
 40 
     | 
    
         
            +
                 res = `curl -F userfile=@#{@file} \
         
     | 
| 
      
 41 
     | 
    
         
            +
                 -F outputencoding="#{outputencoding}" \
         
     | 
| 
      
 42 
     | 
    
         
            +
                 -F outputformat="#{format.to_s}" \
         
     | 
| 
      
 43 
     | 
    
         
            +
                 #{@server_cgi} 2>/dev/null`
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                  doc = Nokogiri::HTML.parse(res)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  err = doc.search('h2').first
         
     | 
| 
      
 47 
     | 
    
         
            +
                  return false if have_error? err.content if err
         
     | 
| 
      
 48 
     | 
    
         
            +
                  set_text doc.search('pre').first.content
         
     | 
| 
      
 49 
     | 
    
         
            +
                end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                def have_error? response
         
     | 
| 
      
 52 
     | 
    
         
            +
                  return true && set_error(response) if response
         
     | 
| 
      
 53 
     | 
    
         
            +
                end
         
     | 
| 
      
 54 
     | 
    
         
            +
              end
         
     | 
| 
      
 55 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: ocr
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.3.0
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,11 +9,11 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2012-02 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2012-03-02 00:00:00.000000000Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: savon
         
     | 
| 
       16 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &11878720 !ruby/object:Gem::Requirement
         
     | 
| 
       17 
17 
     | 
    
         
             
                none: false
         
     | 
| 
       18 
18 
     | 
    
         
             
                requirements:
         
     | 
| 
       19 
19 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -21,10 +21,21 @@ dependencies: 
     | 
|
| 
       21 
21 
     | 
    
         
             
                    version: 0.9.9
         
     | 
| 
       22 
22 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       23 
23 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       24 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *11878720
         
     | 
| 
      
 25 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 26 
     | 
    
         
            +
              name: nokogiri
         
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &11875660 !ruby/object:Gem::Requirement
         
     | 
| 
      
 28 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 29 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 30 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 31 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: 1.5.0
         
     | 
| 
      
 33 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 34 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *11875660
         
     | 
| 
       25 
36 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       26 
37 
     | 
    
         
             
              name: rspec
         
     | 
| 
       27 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &11873900 !ruby/object:Gem::Requirement
         
     | 
| 
       28 
39 
     | 
    
         
             
                none: false
         
     | 
| 
       29 
40 
     | 
    
         
             
                requirements:
         
     | 
| 
       30 
41 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -32,7 +43,7 @@ dependencies: 
     | 
|
| 
       32 
43 
     | 
    
         
             
                    version: 2.7.0
         
     | 
| 
       33 
44 
     | 
    
         
             
              type: :development
         
     | 
| 
       34 
45 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       35 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 46 
     | 
    
         
            +
              version_requirements: *11873900
         
     | 
| 
       36 
47 
     | 
    
         
             
            description: Recognize text and characters from image files using web services.
         
     | 
| 
       37 
48 
     | 
    
         
             
            email:
         
     | 
| 
       38 
49 
     | 
    
         
             
            - mabarroso@mabarroso.com
         
     | 
| 
         @@ -42,6 +53,7 @@ extra_rdoc_files: [] 
     | 
|
| 
       42 
53 
     | 
    
         
             
            files:
         
     | 
| 
       43 
54 
     | 
    
         
             
            - lib/ocr.rb
         
     | 
| 
       44 
55 
     | 
    
         
             
            - lib/ocrs/free_ocr.rb
         
     | 
| 
      
 56 
     | 
    
         
            +
            - lib/ocrs/weocr.rb
         
     | 
| 
       45 
57 
     | 
    
         
             
            - lib/ocrs/onlineocr.rb
         
     | 
| 
       46 
58 
     | 
    
         
             
            - lib/ocrs/dummy.rb
         
     | 
| 
       47 
59 
     | 
    
         
             
            - lib/ocr/version.rb
         
     |