ppe-dbpediafinder 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ dbpediafinder by Yves Raimond
2
+
3
+ A very small Ruby gems for finding URIs in DBpedia, given a label and an optional disambiguation string.
4
+
5
+ Works in two steps:
6
+
7
+ 1) Uses Google AJAX API to find out possible candidates
8
+ 2) Uses DBpedia SPARQL end-point to validate one of those candidates
9
+
10
+ Example use:
11
+
12
+ require 'rubygems'
13
+ require 'dbpediafinder'
14
+
15
+ finder = DbpediaFinder::Finder.new
16
+ finder.find('j.s. bach', 'composer')
17
+ => ["Johann Sebastian Bach", "http://dbpedia.org/resource/Johann_Sebastian_Bach"]
18
+
19
+ finder.find('david aaronovitch', 'bbc radio presenter')
20
+ => ["David Aaronovitch", "http://dbpedia.org/resource/David_Aaronovitch"]
21
+
22
+ finder.find('isle of wight')
23
+ => ["Isle of Wight", "http://dbpedia.org/resource/Isle_of_Wight"]
@@ -0,0 +1,13 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "ppe-dbpediafinder"
3
+ s.version = "0.0.1"
4
+ s.date = "2009-11-19"
5
+ s.summary = "DBpedia finder"
6
+ s.email = "yves@dbtune.org"
7
+ s.homepage = "http://github.com/moustaki/dbpediafinder"
8
+ s.description = "A small rubygem for dbpedia lookups"
9
+ s.has_rdoc = true
10
+ s.authors = ['Yves Raimond']
11
+ s.files = ["README", "dbpediafinder.gemspec", "lib", "lib/dbpediafinder.rb", "lib/dbpedia_finder/finder.rb"]
12
+ end
13
+
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+ require '4store-ruby'
3
+ require 'uri'
4
+ require 'json'
5
+ require 'net/http'
6
+
7
+ module DbpediaFinder
8
+
9
+ class Finder
10
+
11
+ def initialize
12
+ @store = FourStore::Store.new 'http://dbpedia.org/sparql'
13
+ @proxy = URI.parse(ENV['HTTP_PROXY']) if ENV['HTTP_PROXY']
14
+ end
15
+
16
+ def find(label, disambiguation = nil)
17
+ results = google_search(label, disambiguation)
18
+ results.each do |uri|
19
+ dbpedia = wikipedia_to_dbpedia(uri)
20
+ next if dbpedia.split('Category:').size > 1
21
+ query = "
22
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
23
+ SELECT DISTINCT ?label WHERE {
24
+ <#{dbpedia}> rdfs:label ?label
25
+ FILTER (
26
+ regex(?label, '#{clean_label(label)}', 'i')
27
+ )
28
+ }
29
+ "
30
+ match = @store.select query
31
+ return [match[0]['label'], dbpedia] if match.size > 0
32
+ end
33
+ return nil
34
+ end
35
+
36
+ def google_search(label, disambiguation)
37
+ if disambiguation
38
+ query = "\"#{label}\" #{disambiguation} site:en.wikipedia.org"
39
+ else
40
+ query = "\"#{label}\" site:en.wikipedia.org"
41
+ end
42
+ query = URI.encode(query)
43
+ google_url_s = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=#{query}"
44
+ url = URI.parse(google_url_s)
45
+ if @proxy
46
+ h = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(url.host, url.port)
47
+ else
48
+ h = Net::HTTP.new(url.host, url.port)
49
+ end
50
+ h.start do |h|
51
+ res = h.get(url.path + "?" + url.query)
52
+ json = JSON.parse(res.body)
53
+ results = json["responseData"]["results"].map { |result| result["url"] }
54
+ return results
55
+ end
56
+ end
57
+
58
+ def wikipedia_to_dbpedia(wikipedia)
59
+ url_key = wikipedia.split('/').last
60
+ return "http://dbpedia.org/resource/" + url_key
61
+ end
62
+
63
+ def clean_label(label)
64
+ # Remove initials (as they are expanded in Wikipedia/DBpedia labels)
65
+ cleaned_label = label.split(' ').select { |l| l.split('.').size == 1 }.join(' ')
66
+ cleaned_label = cleaned_label.split("'").join("\\'")
67
+ cleaned_label
68
+ end
69
+
70
+ end
71
+
72
+ end
@@ -0,0 +1 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), 'dbpedia_finder', 'finder.rb')
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ppe-dbpediafinder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Yves Raimond
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-19 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A small rubygem for dbpedia lookups
17
+ email: yves@dbtune.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README
26
+ - dbpediafinder.gemspec
27
+ - lib/dbpediafinder.rb
28
+ - lib/dbpedia_finder/finder.rb
29
+ has_rdoc: true
30
+ homepage: http://github.com/moustaki/dbpediafinder
31
+ licenses: []
32
+
33
+ post_install_message:
34
+ rdoc_options: []
35
+
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ version:
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ requirements: []
51
+
52
+ rubyforge_project:
53
+ rubygems_version: 1.3.5
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: DBpedia finder
57
+ test_files: []
58
+