ppe-dbpediafinder 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +23 -0
- data/dbpediafinder.gemspec +13 -0
- data/lib/dbpedia_finder/finder.rb +72 -0
- data/lib/dbpediafinder.rb +1 -0
- metadata +58 -0
    
        data/README
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
| 1 | 
            +
            dbpediafinder by Yves Raimond
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            A very small Ruby gems for finding URIs in DBpedia, given a label and an optional disambiguation string.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Works in two steps:
         | 
| 6 | 
            +
             | 
| 7 | 
            +
             1) Uses Google AJAX API to find out possible candidates
         | 
| 8 | 
            +
             2) Uses DBpedia SPARQL end-point to validate one of those candidates
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            Example use:
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              require 'rubygems'
         | 
| 13 | 
            +
              require 'dbpediafinder'
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              finder = DbpediaFinder::Finder.new
         | 
| 16 | 
            +
              finder.find('j.s. bach', 'composer')
         | 
| 17 | 
            +
            => ["Johann Sebastian Bach", "http://dbpedia.org/resource/Johann_Sebastian_Bach"]
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              finder.find('david aaronovitch', 'bbc radio presenter')
         | 
| 20 | 
            +
            => ["David Aaronovitch", "http://dbpedia.org/resource/David_Aaronovitch"]
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              finder.find('isle of wight')
         | 
| 23 | 
            +
            => ["Isle of Wight", "http://dbpedia.org/resource/Isle_of_Wight"]
         | 
| @@ -0,0 +1,13 @@ | |
| 1 | 
            +
            Gem::Specification.new do |s|
         | 
| 2 | 
            +
              s.name = "ppe-dbpediafinder"
         | 
| 3 | 
            +
              s.version = "0.0.1"
         | 
| 4 | 
            +
              s.date = "2009-11-19"
         | 
| 5 | 
            +
              s.summary = "DBpedia finder"
         | 
| 6 | 
            +
              s.email = "yves@dbtune.org"
         | 
| 7 | 
            +
              s.homepage = "http://github.com/moustaki/dbpediafinder"
         | 
| 8 | 
            +
              s.description = "A small rubygem for dbpedia lookups"
         | 
| 9 | 
            +
              s.has_rdoc = true
         | 
| 10 | 
            +
              s.authors = ['Yves Raimond']
         | 
| 11 | 
            +
              s.files = ["README", "dbpediafinder.gemspec", "lib", "lib/dbpediafinder.rb", "lib/dbpedia_finder/finder.rb"]
         | 
| 12 | 
            +
            end
         | 
| 13 | 
            +
             | 
| @@ -0,0 +1,72 @@ | |
| 1 | 
            +
            require 'rubygems'
         | 
| 2 | 
            +
            require '4store-ruby'
         | 
| 3 | 
            +
            require 'uri'
         | 
| 4 | 
            +
            require 'json'
         | 
| 5 | 
            +
            require 'net/http'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module DbpediaFinder
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              class Finder
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                def initialize
         | 
| 12 | 
            +
                  @store = FourStore::Store.new 'http://dbpedia.org/sparql'
         | 
| 13 | 
            +
                  @proxy = URI.parse(ENV['HTTP_PROXY']) if ENV['HTTP_PROXY']
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                def find(label, disambiguation = nil)
         | 
| 17 | 
            +
                  results = google_search(label, disambiguation)
         | 
| 18 | 
            +
                  results.each do |uri|
         | 
| 19 | 
            +
                    dbpedia = wikipedia_to_dbpedia(uri)
         | 
| 20 | 
            +
                    next if dbpedia.split('Category:').size > 1
         | 
| 21 | 
            +
                    query = "
         | 
| 22 | 
            +
                      PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
         | 
| 23 | 
            +
                      SELECT DISTINCT ?label WHERE {
         | 
| 24 | 
            +
                        <#{dbpedia}> rdfs:label ?label
         | 
| 25 | 
            +
                        FILTER (
         | 
| 26 | 
            +
                          regex(?label, '#{clean_label(label)}', 'i')
         | 
| 27 | 
            +
                        )
         | 
| 28 | 
            +
                      }
         | 
| 29 | 
            +
                    "
         | 
| 30 | 
            +
                    match = @store.select query
         | 
| 31 | 
            +
                    return [match[0]['label'], dbpedia] if match.size > 0
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
                  return nil
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                def google_search(label, disambiguation)
         | 
| 37 | 
            +
                  if disambiguation
         | 
| 38 | 
            +
                    query = "\"#{label}\" #{disambiguation} site:en.wikipedia.org"
         | 
| 39 | 
            +
                  else
         | 
| 40 | 
            +
                    query = "\"#{label}\" site:en.wikipedia.org"
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
                  query = URI.encode(query)
         | 
| 43 | 
            +
                  google_url_s = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=#{query}"
         | 
| 44 | 
            +
                  url = URI.parse(google_url_s)
         | 
| 45 | 
            +
                  if @proxy
         | 
| 46 | 
            +
                    h = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(url.host, url.port)
         | 
| 47 | 
            +
                  else
         | 
| 48 | 
            +
                    h = Net::HTTP.new(url.host, url.port)
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
                  h.start do |h|
         | 
| 51 | 
            +
                    res = h.get(url.path + "?" + url.query)
         | 
| 52 | 
            +
                    json = JSON.parse(res.body)
         | 
| 53 | 
            +
                    results = json["responseData"]["results"].map { |result| result["url"] }
         | 
| 54 | 
            +
                    return results
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def wikipedia_to_dbpedia(wikipedia)
         | 
| 59 | 
            +
                  url_key = wikipedia.split('/').last
         | 
| 60 | 
            +
                  return "http://dbpedia.org/resource/" + url_key
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                def clean_label(label)
         | 
| 64 | 
            +
                  # Remove initials (as they are expanded in Wikipedia/DBpedia labels)
         | 
| 65 | 
            +
                  cleaned_label = label.split(' ').select { |l| l.split('.').size == 1 }.join(' ')
         | 
| 66 | 
            +
                  cleaned_label = cleaned_label.split("'").join("\\'")
         | 
| 67 | 
            +
                  cleaned_label
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
              end
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            end
         | 
| @@ -0,0 +1 @@ | |
| 1 | 
            +
            require File.join(File.expand_path(File.dirname(__FILE__)), 'dbpedia_finder', 'finder.rb')
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,58 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            +
            name: ppe-dbpediafinder
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              version: 0.0.1
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors: 
         | 
| 7 | 
            +
            - Yves Raimond
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            date: 2009-11-19 00:00:00 +00:00
         | 
| 13 | 
            +
            default_executable: 
         | 
| 14 | 
            +
            dependencies: []
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            description: A small rubygem for dbpedia lookups
         | 
| 17 | 
            +
            email: yves@dbtune.org
         | 
| 18 | 
            +
            executables: []
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            extensions: []
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            extra_rdoc_files: []
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            files: 
         | 
| 25 | 
            +
            - README
         | 
| 26 | 
            +
            - dbpediafinder.gemspec
         | 
| 27 | 
            +
            - lib/dbpediafinder.rb
         | 
| 28 | 
            +
            - lib/dbpedia_finder/finder.rb
         | 
| 29 | 
            +
            has_rdoc: true
         | 
| 30 | 
            +
            homepage: http://github.com/moustaki/dbpediafinder
         | 
| 31 | 
            +
            licenses: []
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            post_install_message: 
         | 
| 34 | 
            +
            rdoc_options: []
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            require_paths: 
         | 
| 37 | 
            +
            - lib
         | 
| 38 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 39 | 
            +
              requirements: 
         | 
| 40 | 
            +
              - - ">="
         | 
| 41 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 42 | 
            +
                  version: "0"
         | 
| 43 | 
            +
              version: 
         | 
| 44 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 45 | 
            +
              requirements: 
         | 
| 46 | 
            +
              - - ">="
         | 
| 47 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 48 | 
            +
                  version: "0"
         | 
| 49 | 
            +
              version: 
         | 
| 50 | 
            +
            requirements: []
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            rubyforge_project: 
         | 
| 53 | 
            +
            rubygems_version: 1.3.5
         | 
| 54 | 
            +
            signing_key: 
         | 
| 55 | 
            +
            specification_version: 3
         | 
| 56 | 
            +
            summary: DBpedia finder
         | 
| 57 | 
            +
            test_files: []
         | 
| 58 | 
            +
             |