dblp 0.2.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -3
- data/lib/dblp/grabber.rb +42 -9
- data/lib/dblp/parser.rb +6 -3
- data/lib/dblp/version.rb +2 -2
- data/lib/dblp.rb +3 -3
- data/test/test.aux +3 -0
- data/test/test_dblp.rb +22 -3
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
-
== 0.
|
1
|
+
== 0.3.2 2008-06-29
|
2
|
+
* refactored interface for grabbing data from different sources
|
3
|
+
* added citeseer for finding the right bibtex entry
|
2
4
|
|
3
|
-
|
5
|
+
== 0.2.0 2008-05-29
|
4
6
|
|
7
|
+
* added documentation and more command line options
|
5
8
|
|
6
|
-
== 0.0
|
9
|
+
== 0.1.0 2008-05-28
|
7
10
|
|
8
11
|
* 1 major enhancement:
|
9
12
|
* Initial release
|
data/lib/dblp/grabber.rb
CHANGED
@@ -2,28 +2,61 @@ require 'open-uri'
|
|
2
2
|
|
3
3
|
module Dblp
|
4
4
|
|
5
|
-
# Const url to fetch from
|
6
|
-
DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
|
7
|
-
|
8
5
|
class Grabber
|
9
6
|
|
10
|
-
|
7
|
+
# Const url to fetch from
|
8
|
+
DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
|
9
|
+
|
10
|
+
def read_html(url)
|
11
11
|
content = ""
|
12
|
-
open(
|
12
|
+
open(url) do |f|
|
13
13
|
content = f.read
|
14
14
|
end
|
15
|
+
content
|
16
|
+
end
|
15
17
|
|
18
|
+
def extract_pre(content)
|
16
19
|
# extract the bibtex code, that is in pre tags
|
17
20
|
result = content.scan(/<pre>(.*?)<.pre>/mix)
|
18
21
|
if result
|
19
|
-
result.inject(
|
20
|
-
m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
|
22
|
+
result.inject([]) do |m, k|
|
23
|
+
#m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
|
24
|
+
m << k[0].gsub(/(<.*?>)/, "")
|
21
25
|
m
|
22
26
|
end
|
23
27
|
else
|
24
|
-
|
28
|
+
[]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def grab(key)
|
34
|
+
begin
|
35
|
+
# Check the key
|
36
|
+
if key =~ /DBLP:/
|
37
|
+
content = read_html(DBLP_URL + key.gsub("DBLP:", ""))
|
38
|
+
extract_pre(content)
|
39
|
+
else
|
40
|
+
CiteseerGrabber.new.grab(key)
|
41
|
+
end
|
42
|
+
rescue
|
43
|
+
[]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
class CiteseerGrabber < Grabber
|
50
|
+
|
51
|
+
CITESEE_URL = "http://citeseer.ist.psu.edu/"
|
52
|
+
|
53
|
+
def grab(key)
|
54
|
+
begin
|
55
|
+
content = read_html(CITESEE_URL + key + ".html")
|
56
|
+
extract_pre(content)
|
57
|
+
rescue
|
58
|
+
[]
|
25
59
|
end
|
26
|
-
|
27
60
|
end
|
28
61
|
|
29
62
|
end
|
data/lib/dblp/parser.rb
CHANGED
@@ -9,9 +9,12 @@ module Dblp
|
|
9
9
|
|
10
10
|
def parse
|
11
11
|
File.readlines(@filename).inject([]) {|m, l|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
cnt = l.match(/\\citation\{(.*?)\}/)
|
13
|
+
|
14
|
+
if cnt
|
15
|
+
cnt[1].split(",").each do |t|
|
16
|
+
m << t
|
17
|
+
end
|
15
18
|
end
|
16
19
|
m
|
17
20
|
}.uniq
|
data/lib/dblp/version.rb
CHANGED
data/lib/dblp.rb
CHANGED
@@ -17,11 +17,11 @@ module Dblp
|
|
17
17
|
parser = Dblp::Parser.new(file)
|
18
18
|
grabber = Dblp::Grabber.new
|
19
19
|
File.open(options.output, "w+") do |f|
|
20
|
-
f.puts parser.parse.inject(
|
21
|
-
m
|
20
|
+
f.puts parser.parse.inject([]) {|m, l|
|
21
|
+
m << grabber.grab(l)
|
22
22
|
overall_size = m.size
|
23
23
|
m
|
24
|
-
}.
|
24
|
+
}.uniq.join("\n")
|
25
25
|
end
|
26
26
|
|
27
27
|
if options.bibtex
|
data/test/test.aux
CHANGED
@@ -13,6 +13,9 @@
|
|
13
13
|
|
14
14
|
\citation{conf/btw/JacobsA07}
|
15
15
|
\citation{DBLP:conf/cidr/StonebrakerBCCGHHLRZ07}
|
16
|
+
|
17
|
+
\citation{Martin,Grund}
|
18
|
+
|
16
19
|
\citation{Stonebraker:2007fk}
|
17
20
|
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}}
|
18
21
|
\newlabel{sec:introduction}{{I}{1}{Introduction\relax }{section.1}{}}
|
data/test/test_dblp.rb
CHANGED
@@ -9,16 +9,35 @@ class TestDblp < Test::Unit::TestCase
|
|
9
9
|
|
10
10
|
p = Dblp::Parser.new(File.dirname(__FILE__) + "/test.aux")
|
11
11
|
result = p.parse
|
12
|
-
|
12
|
+
|
13
|
+
assert_equal 5, result.size
|
13
14
|
assert_equal "conf/btw/JacobsA07", result.first
|
14
|
-
assert_equal "conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
|
15
|
+
assert_equal "DBLP:conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
|
15
16
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def test_grabber
|
19
20
|
|
20
21
|
g = Dblp::Grabber.new
|
21
|
-
|
22
|
+
|
23
|
+
res = g.grab("DBLP:conf/btw/JacobsA07")
|
24
|
+
assert res.size == 2
|
25
|
+
|
26
|
+
res = g.grab("DBLP:conf/icde/ZukowskiHNB06")
|
27
|
+
|
28
|
+
res = g.grab("DBLP:conf/btw/JacobsAss07")
|
29
|
+
assert res.size == 0
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def test_citeseer
|
34
|
+
|
35
|
+
g = Dblp::CiteseerGrabber.new
|
36
|
+
res = g.grab("graefe91data")
|
37
|
+
assert res.size == 1
|
38
|
+
|
39
|
+
res = g.grab("nixnurnix")
|
40
|
+
assert res.size == 0
|
22
41
|
|
23
42
|
end
|
24
43
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dblp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Grund
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-06-12 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|