ppe-dbpediafinder 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +23 -0
- data/dbpediafinder.gemspec +13 -0
- data/lib/dbpedia_finder/finder.rb +72 -0
- data/lib/dbpediafinder.rb +1 -0
- metadata +58 -0
data/README
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
dbpediafinder by Yves Raimond
|
2
|
+
|
3
|
+
A very small Ruby gems for finding URIs in DBpedia, given a label and an optional disambiguation string.
|
4
|
+
|
5
|
+
Works in two steps:
|
6
|
+
|
7
|
+
1) Uses Google AJAX API to find out possible candidates
|
8
|
+
2) Uses DBpedia SPARQL end-point to validate one of those candidates
|
9
|
+
|
10
|
+
Example use:
|
11
|
+
|
12
|
+
require 'rubygems'
|
13
|
+
require 'dbpediafinder'
|
14
|
+
|
15
|
+
finder = DbpediaFinder::Finder.new
|
16
|
+
finder.find('j.s. bach', 'composer')
|
17
|
+
=> ["Johann Sebastian Bach", "http://dbpedia.org/resource/Johann_Sebastian_Bach"]
|
18
|
+
|
19
|
+
finder.find('david aaronovitch', 'bbc radio presenter')
|
20
|
+
=> ["David Aaronovitch", "http://dbpedia.org/resource/David_Aaronovitch"]
|
21
|
+
|
22
|
+
finder.find('isle of wight')
|
23
|
+
=> ["Isle of Wight", "http://dbpedia.org/resource/Isle_of_Wight"]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "ppe-dbpediafinder"
|
3
|
+
s.version = "0.0.1"
|
4
|
+
s.date = "2009-11-19"
|
5
|
+
s.summary = "DBpedia finder"
|
6
|
+
s.email = "yves@dbtune.org"
|
7
|
+
s.homepage = "http://github.com/moustaki/dbpediafinder"
|
8
|
+
s.description = "A small rubygem for dbpedia lookups"
|
9
|
+
s.has_rdoc = true
|
10
|
+
s.authors = ['Yves Raimond']
|
11
|
+
s.files = ["README", "dbpediafinder.gemspec", "lib", "lib/dbpediafinder.rb", "lib/dbpedia_finder/finder.rb"]
|
12
|
+
end
|
13
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require '4store-ruby'
|
3
|
+
require 'uri'
|
4
|
+
require 'json'
|
5
|
+
require 'net/http'
|
6
|
+
|
7
|
+
module DbpediaFinder
|
8
|
+
|
9
|
+
class Finder
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@store = FourStore::Store.new 'http://dbpedia.org/sparql'
|
13
|
+
@proxy = URI.parse(ENV['HTTP_PROXY']) if ENV['HTTP_PROXY']
|
14
|
+
end
|
15
|
+
|
16
|
+
def find(label, disambiguation = nil)
|
17
|
+
results = google_search(label, disambiguation)
|
18
|
+
results.each do |uri|
|
19
|
+
dbpedia = wikipedia_to_dbpedia(uri)
|
20
|
+
next if dbpedia.split('Category:').size > 1
|
21
|
+
query = "
|
22
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
23
|
+
SELECT DISTINCT ?label WHERE {
|
24
|
+
<#{dbpedia}> rdfs:label ?label
|
25
|
+
FILTER (
|
26
|
+
regex(?label, '#{clean_label(label)}', 'i')
|
27
|
+
)
|
28
|
+
}
|
29
|
+
"
|
30
|
+
match = @store.select query
|
31
|
+
return [match[0]['label'], dbpedia] if match.size > 0
|
32
|
+
end
|
33
|
+
return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def google_search(label, disambiguation)
|
37
|
+
if disambiguation
|
38
|
+
query = "\"#{label}\" #{disambiguation} site:en.wikipedia.org"
|
39
|
+
else
|
40
|
+
query = "\"#{label}\" site:en.wikipedia.org"
|
41
|
+
end
|
42
|
+
query = URI.encode(query)
|
43
|
+
google_url_s = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=#{query}"
|
44
|
+
url = URI.parse(google_url_s)
|
45
|
+
if @proxy
|
46
|
+
h = Net::HTTP::Proxy(@proxy.host, @proxy.port).new(url.host, url.port)
|
47
|
+
else
|
48
|
+
h = Net::HTTP.new(url.host, url.port)
|
49
|
+
end
|
50
|
+
h.start do |h|
|
51
|
+
res = h.get(url.path + "?" + url.query)
|
52
|
+
json = JSON.parse(res.body)
|
53
|
+
results = json["responseData"]["results"].map { |result| result["url"] }
|
54
|
+
return results
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def wikipedia_to_dbpedia(wikipedia)
|
59
|
+
url_key = wikipedia.split('/').last
|
60
|
+
return "http://dbpedia.org/resource/" + url_key
|
61
|
+
end
|
62
|
+
|
63
|
+
def clean_label(label)
|
64
|
+
# Remove initials (as they are expanded in Wikipedia/DBpedia labels)
|
65
|
+
cleaned_label = label.split(' ').select { |l| l.split('.').size == 1 }.join(' ')
|
66
|
+
cleaned_label = cleaned_label.split("'").join("\\'")
|
67
|
+
cleaned_label
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), 'dbpedia_finder', 'finder.rb')
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ppe-dbpediafinder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yves Raimond
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-19 00:00:00 +00:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A small rubygem for dbpedia lookups
|
17
|
+
email: yves@dbtune.org
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README
|
26
|
+
- dbpediafinder.gemspec
|
27
|
+
- lib/dbpediafinder.rb
|
28
|
+
- lib/dbpedia_finder/finder.rb
|
29
|
+
has_rdoc: true
|
30
|
+
homepage: http://github.com/moustaki/dbpediafinder
|
31
|
+
licenses: []
|
32
|
+
|
33
|
+
post_install_message:
|
34
|
+
rdoc_options: []
|
35
|
+
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: "0"
|
43
|
+
version:
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
requirements: []
|
51
|
+
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 1.3.5
|
54
|
+
signing_key:
|
55
|
+
specification_version: 3
|
56
|
+
summary: DBpedia finder
|
57
|
+
test_files: []
|
58
|
+
|