dblp 0.2.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,9 +1,12 @@
1
- == 0.0.1 2008-05-29
1
+ == 0.3.2 2008-06-29
2
+ * refactored interface for grabbing data from different sources
3
+ * added citeseer for finding the right bibtex entry
2
4
 
3
- * added documentation and more command line options
5
+ == 0.2.0 2008-05-29
4
6
 
7
+ * added documentation and more command line options
5
8
 
6
- == 0.0.1 2008-05-28
9
+ == 0.1.0 2008-05-28
7
10
 
8
11
  * 1 major enhancement:
9
12
  * Initial release
data/lib/dblp/grabber.rb CHANGED
@@ -2,28 +2,61 @@ require 'open-uri'
2
2
 
3
3
  module Dblp
4
4
 
5
- # Const url to fetch from
6
- DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
7
-
8
5
  class Grabber
9
6
 
10
- def grab(key)
7
+ # Const url to fetch from
8
+ DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
9
+
10
+ def read_html(url)
11
11
  content = ""
12
- open(DBLP_URL + key) do |f|
12
+ open(url) do |f|
13
13
  content = f.read
14
14
  end
15
+ content
16
+ end
15
17
 
18
+ def extract_pre(content)
16
19
  # extract the bibtex code, that is in pre tags
17
20
  result = content.scan(/<pre>(.*?)<.pre>/mix)
18
21
  if result
19
- result.inject({}) do |m, k|
20
- m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
22
+ result.inject([]) do |m, k|
23
+ #m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
24
+ m << k[0].gsub(/(<.*?>)/, "")
21
25
  m
22
26
  end
23
27
  else
24
- {}
28
+ []
29
+ end
30
+ end
31
+
32
+
33
+ def grab(key)
34
+ begin
35
+ # Check the key
36
+ if key =~ /DBLP:/
37
+ content = read_html(DBLP_URL + key.gsub("DBLP:", ""))
38
+ extract_pre(content)
39
+ else
40
+ CiteseerGrabber.new.grab(key)
41
+ end
42
+ rescue
43
+ []
44
+ end
45
+ end
46
+
47
+ end
48
+
49
+ class CiteseerGrabber < Grabber
50
+
51
+ CITESEE_URL = "http://citeseer.ist.psu.edu/"
52
+
53
+ def grab(key)
54
+ begin
55
+ content = read_html(CITESEE_URL + key + ".html")
56
+ extract_pre(content)
57
+ rescue
58
+ []
25
59
  end
26
-
27
60
  end
28
61
 
29
62
  end
data/lib/dblp/parser.rb CHANGED
@@ -9,9 +9,12 @@ module Dblp
9
9
 
10
10
  def parse
11
11
  File.readlines(@filename).inject([]) {|m, l|
12
- result = l.match(/\\citation\{(.*:)?(.*?)\}/)
13
- if result
14
- m << result[2]
12
+ cnt = l.match(/\\citation\{(.*?)\}/)
13
+
14
+ if cnt
15
+ cnt[1].split(",").each do |t|
16
+ m << t
17
+ end
15
18
  end
16
19
  m
17
20
  }.uniq
data/lib/dblp/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Dblp #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 2
5
- TINY = 0
4
+ MINOR = 3
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/dblp.rb CHANGED
@@ -17,11 +17,11 @@ module Dblp
17
17
  parser = Dblp::Parser.new(file)
18
18
  grabber = Dblp::Grabber.new
19
19
  File.open(options.output, "w+") do |f|
20
- f.puts parser.parse.inject({}) {|m, l|
21
- m.merge!(grabber.grab(l))
20
+ f.puts parser.parse.inject([]) {|m, l|
21
+ m << grabber.grab(l)
22
22
  overall_size = m.size
23
23
  m
24
- }.values.join("\n")
24
+ }.uniq.join("\n")
25
25
  end
26
26
 
27
27
  if options.bibtex
data/test/test.aux CHANGED
@@ -13,6 +13,9 @@
13
13
 
14
14
  \citation{conf/btw/JacobsA07}
15
15
  \citation{DBLP:conf/cidr/StonebrakerBCCGHHLRZ07}
16
+
17
+ \citation{Martin,Grund}
18
+
16
19
  \citation{Stonebraker:2007fk}
17
20
  \@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}}
18
21
  \newlabel{sec:introduction}{{I}{1}{Introduction\relax }{section.1}{}}
data/test/test_dblp.rb CHANGED
@@ -9,16 +9,35 @@ class TestDblp < Test::Unit::TestCase
9
9
 
10
10
  p = Dblp::Parser.new(File.dirname(__FILE__) + "/test.aux")
11
11
  result = p.parse
12
- assert result.size == 3
12
+
13
+ assert_equal 5, result.size
13
14
  assert_equal "conf/btw/JacobsA07", result.first
14
- assert_equal "conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
15
+ assert_equal "DBLP:conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
15
16
 
16
17
  end
17
18
 
18
19
  def test_grabber
19
20
 
20
21
  g = Dblp::Grabber.new
21
- assert g.grab("conf/btw/JacobsA07").values.size == 2
22
+
23
+ res = g.grab("DBLP:conf/btw/JacobsA07")
24
+ assert res.size == 2
25
+
26
+ res = g.grab("DBLP:conf/icde/ZukowskiHNB06")
27
+
28
+ res = g.grab("DBLP:conf/btw/JacobsAss07")
29
+ assert res.size == 0
30
+ end
31
+
32
+
33
+ def test_citeseer
34
+
35
+ g = Dblp::CiteseerGrabber.new
36
+ res = g.grab("graefe91data")
37
+ assert res.size == 1
38
+
39
+ res = g.grab("nixnurnix")
40
+ assert res.size == 0
22
41
 
23
42
  end
24
43
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dblp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Grund
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-29 00:00:00 +02:00
12
+ date: 2008-06-12 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies: []
15
15