dblp 0.2.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -3
- data/lib/dblp/grabber.rb +42 -9
- data/lib/dblp/parser.rb +6 -3
- data/lib/dblp/version.rb +2 -2
- data/lib/dblp.rb +3 -3
- data/test/test.aux +3 -0
- data/test/test_dblp.rb +22 -3
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
-
== 0.
|
1
|
+
== 0.3.2 2008-06-29
|
2
|
+
* refactored interface for grabbing data from different sources
|
3
|
+
* added citeseer for finding the right bibtex entry
|
2
4
|
|
3
|
-
|
5
|
+
== 0.2.0 2008-05-29
|
4
6
|
|
7
|
+
* added documentation and more command line options
|
5
8
|
|
6
|
-
== 0.0
|
9
|
+
== 0.1.0 2008-05-28
|
7
10
|
|
8
11
|
* 1 major enhancement:
|
9
12
|
* Initial release
|
data/lib/dblp/grabber.rb
CHANGED
@@ -2,28 +2,61 @@ require 'open-uri'
|
|
2
2
|
|
3
3
|
module Dblp
|
4
4
|
|
5
|
-
# Const url to fetch from
|
6
|
-
DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
|
7
|
-
|
8
5
|
class Grabber
|
9
6
|
|
10
|
-
|
7
|
+
# Const url to fetch from
|
8
|
+
DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
|
9
|
+
|
10
|
+
def read_html(url)
|
11
11
|
content = ""
|
12
|
-
open(
|
12
|
+
open(url) do |f|
|
13
13
|
content = f.read
|
14
14
|
end
|
15
|
+
content
|
16
|
+
end
|
15
17
|
|
18
|
+
def extract_pre(content)
|
16
19
|
# extract the bibtex code, that is in pre tags
|
17
20
|
result = content.scan(/<pre>(.*?)<.pre>/mix)
|
18
21
|
if result
|
19
|
-
result.inject(
|
20
|
-
m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
|
22
|
+
result.inject([]) do |m, k|
|
23
|
+
#m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
|
24
|
+
m << k[0].gsub(/(<.*?>)/, "")
|
21
25
|
m
|
22
26
|
end
|
23
27
|
else
|
24
|
-
|
28
|
+
[]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def grab(key)
|
34
|
+
begin
|
35
|
+
# Check the key
|
36
|
+
if key =~ /DBLP:/
|
37
|
+
content = read_html(DBLP_URL + key.gsub("DBLP:", ""))
|
38
|
+
extract_pre(content)
|
39
|
+
else
|
40
|
+
CiteseerGrabber.new.grab(key)
|
41
|
+
end
|
42
|
+
rescue
|
43
|
+
[]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
class CiteseerGrabber < Grabber
|
50
|
+
|
51
|
+
CITESEE_URL = "http://citeseer.ist.psu.edu/"
|
52
|
+
|
53
|
+
def grab(key)
|
54
|
+
begin
|
55
|
+
content = read_html(CITESEE_URL + key + ".html")
|
56
|
+
extract_pre(content)
|
57
|
+
rescue
|
58
|
+
[]
|
25
59
|
end
|
26
|
-
|
27
60
|
end
|
28
61
|
|
29
62
|
end
|
data/lib/dblp/parser.rb
CHANGED
@@ -9,9 +9,12 @@ module Dblp
|
|
9
9
|
|
10
10
|
def parse
|
11
11
|
File.readlines(@filename).inject([]) {|m, l|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
cnt = l.match(/\\citation\{(.*?)\}/)
|
13
|
+
|
14
|
+
if cnt
|
15
|
+
cnt[1].split(",").each do |t|
|
16
|
+
m << t
|
17
|
+
end
|
15
18
|
end
|
16
19
|
m
|
17
20
|
}.uniq
|
data/lib/dblp/version.rb
CHANGED
data/lib/dblp.rb
CHANGED
@@ -17,11 +17,11 @@ module Dblp
|
|
17
17
|
parser = Dblp::Parser.new(file)
|
18
18
|
grabber = Dblp::Grabber.new
|
19
19
|
File.open(options.output, "w+") do |f|
|
20
|
-
f.puts parser.parse.inject(
|
21
|
-
m
|
20
|
+
f.puts parser.parse.inject([]) {|m, l|
|
21
|
+
m << grabber.grab(l)
|
22
22
|
overall_size = m.size
|
23
23
|
m
|
24
|
-
}.
|
24
|
+
}.uniq.join("\n")
|
25
25
|
end
|
26
26
|
|
27
27
|
if options.bibtex
|
data/test/test.aux
CHANGED
@@ -13,6 +13,9 @@
|
|
13
13
|
|
14
14
|
\citation{conf/btw/JacobsA07}
|
15
15
|
\citation{DBLP:conf/cidr/StonebrakerBCCGHHLRZ07}
|
16
|
+
|
17
|
+
\citation{Martin,Grund}
|
18
|
+
|
16
19
|
\citation{Stonebraker:2007fk}
|
17
20
|
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}}
|
18
21
|
\newlabel{sec:introduction}{{I}{1}{Introduction\relax }{section.1}{}}
|
data/test/test_dblp.rb
CHANGED
@@ -9,16 +9,35 @@ class TestDblp < Test::Unit::TestCase
|
|
9
9
|
|
10
10
|
p = Dblp::Parser.new(File.dirname(__FILE__) + "/test.aux")
|
11
11
|
result = p.parse
|
12
|
-
|
12
|
+
|
13
|
+
assert_equal 5, result.size
|
13
14
|
assert_equal "conf/btw/JacobsA07", result.first
|
14
|
-
assert_equal "conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
|
15
|
+
assert_equal "DBLP:conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
|
15
16
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def test_grabber
|
19
20
|
|
20
21
|
g = Dblp::Grabber.new
|
21
|
-
|
22
|
+
|
23
|
+
res = g.grab("DBLP:conf/btw/JacobsA07")
|
24
|
+
assert res.size == 2
|
25
|
+
|
26
|
+
res = g.grab("DBLP:conf/icde/ZukowskiHNB06")
|
27
|
+
|
28
|
+
res = g.grab("DBLP:conf/btw/JacobsAss07")
|
29
|
+
assert res.size == 0
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def test_citeseer
|
34
|
+
|
35
|
+
g = Dblp::CiteseerGrabber.new
|
36
|
+
res = g.grab("graefe91data")
|
37
|
+
assert res.size == 1
|
38
|
+
|
39
|
+
res = g.grab("nixnurnix")
|
40
|
+
assert res.size == 0
|
22
41
|
|
23
42
|
end
|
24
43
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dblp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Grund
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-06-12 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|