ppe-dbpediafinder 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ dbpediafinder by Yves Raimond
2
+
3
+ A very small Ruby gems for finding URIs in DBpedia, given a label and an optional disambiguation string.
4
+
5
+ Works in two steps:
6
+
7
+ 1) Uses Google AJAX API to find out possible candidates
8
+ 2) Uses DBpedia SPARQL end-point to validate one of those candidates
9
+
10
+ Example use:
11
+
12
+ require 'rubygems'
13
+ require 'dbpediafinder'
14
+
15
+ finder = DbpediaFinder::Finder.new
16
+ finder.find('j.s. bach', 'composer')
17
+ => ["Johann Sebastian Bach", "http://dbpedia.org/resource/Johann_Sebastian_Bach"]
18
+
19
+ finder.find('david aaronovitch', 'bbc radio presenter')
20
+ => ["David Aaronovitch", "http://dbpedia.org/resource/David_Aaronovitch"]
21
+
22
+ finder.find('isle of wight')
23
+ => ["Isle of Wight", "http://dbpedia.org/resource/Isle_of_Wight"]
@@ -0,0 +1,13 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "ppe-dbpediafinder"
3
+ s.version = "0.0.1"
4
+ s.date = "2009-11-19"
5
+ s.summary = "DBpedia finder"
6
+ s.email = "yves@dbtune.org"
7
+ s.homepage = "http://github.com/moustaki/dbpediafinder"
8
+ s.description = "A small rubygem for dbpedia lookups"
9
+ s.has_rdoc = true
10
+ s.authors = ['Yves Raimond']
11
+ s.files = ["README", "dbpediafinder.gemspec", "lib", "lib/dbpediafinder.rb", "lib/dbpedia_finder/finder.rb"]
12
+ end
13
+
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+ require '4store-ruby'
3
+ require 'uri'
4
+ require 'json'
5
+ require 'net/http'
6
+
7
+ module DbpediaFinder
8
+
9
+ class Finder
10
+
11
+ def initialize
12
+ @store = FourStore::Store.new 'http://dbpedia.org/sparql'
13
+ @proxy = URI.parse(ENV['HTTP_PROXY']) if ENV['HTTP_PROXY']
14
+ end
15
+
16
+ def find(label, disambiguation = nil)
17
+ results = google_search(label, disambiguation)
18
+ results.each do |uri|
19
+ dbpedia = wikipedia_to_dbpedia(uri)
20
+ next if dbpedia.split('Category:').size > 1
21
+ query = "
22
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
23
+ SELECT DISTINCT ?label WHERE {
24
+ <#{dbpedia}> rdfs:label ?label
25
+ FILTER (
26
+ regex(?label, '#{clean_label(label)}', 'i')
27
+ )
28
+ }
29
+ "
30
+ match = @store.select query
31
+ return [match[0]['label'], dbpedia] if match.size > 0
32
+ end
33
+ return nil
34
+ end
35
+
36
+ def google_search(label, disambiguation)
37
+ if disambiguation
38
+ query = "\"#{label}\" #{disambiguation} site:en.wikipedia.org"
39
+ else
40
+ query = "\"#{label}\" site:en.wikipedia.org"
41
+ end
42
+ query = URI.encode(query)
43
+ google_url_s = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=#{query}"
44
+ url = URI.parse(google_url_s)
45
+ if @proxy
46
+ h = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(url.host, url.port)
47
+ else
48
+ h = Net::HTTP.new(url.host, url.port)
49
+ end
50
+ h.start do |h|
51
+ res = h.get(url.path + "?" + url.query)
52
+ json = JSON.parse(res.body)
53
+ results = json["responseData"]["results"].map { |result| result["url"] }
54
+ return results
55
+ end
56
+ end
57
+
58
+ def wikipedia_to_dbpedia(wikipedia)
59
+ url_key = wikipedia.split('/').last
60
+ return "http://dbpedia.org/resource/" + url_key
61
+ end
62
+
63
+ def clean_label(label)
64
+ # Remove initials (as they are expanded in Wikipedia/DBpedia labels)
65
+ cleaned_label = label.split(' ').select { |l| l.split('.').size == 1 }.join(' ')
66
+ cleaned_label = cleaned_label.split("'").join("\\'")
67
+ cleaned_label
68
+ end
69
+
70
+ end
71
+
72
+ end
@@ -0,0 +1 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), 'dbpedia_finder', 'finder.rb')
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ppe-dbpediafinder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Yves Raimond
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-19 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A small rubygem for dbpedia lookups
17
+ email: yves@dbtune.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README
26
+ - dbpediafinder.gemspec
27
+ - lib/dbpediafinder.rb
28
+ - lib/dbpedia_finder/finder.rb
29
+ has_rdoc: true
30
+ homepage: http://github.com/moustaki/dbpediafinder
31
+ licenses: []
32
+
33
+ post_install_message:
34
+ rdoc_options: []
35
+
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ version:
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ requirements: []
51
+
52
+ rubyforge_project:
53
+ rubygems_version: 1.3.5
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: DBpedia finder
57
+ test_files: []
58
+