dblp 0.2.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,9 +1,12 @@
1
- == 0.0.1 2008-05-29
1
+ == 0.3.2 2008-06-29
2
+ * refactored interface for grabbing data from different sources
3
+ * added citeseer for finding the right bibtex entry
2
4
 
3
- * added documentation and more command line options
5
+ == 0.2.0 2008-05-29
4
6
 
7
+ * added documentation and more command line options
5
8
 
6
- == 0.0.1 2008-05-28
9
+ == 0.1.0 2008-05-28
7
10
 
8
11
  * 1 major enhancement:
9
12
  * Initial release
data/lib/dblp/grabber.rb CHANGED
@@ -2,28 +2,61 @@ require 'open-uri'
2
2
 
3
3
  module Dblp
4
4
 
5
- # Const url to fetch from
6
- DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
7
-
8
5
  class Grabber
9
6
 
10
- def grab(key)
7
+ # Const url to fetch from
8
+ DBLP_URL = "http://dblp.uni-trier.de/rec/bibtex/"
9
+
10
+ def read_html(url)
11
11
  content = ""
12
- open(DBLP_URL + key) do |f|
12
+ open(url) do |f|
13
13
  content = f.read
14
14
  end
15
+ content
16
+ end
15
17
 
18
+ def extract_pre(content)
16
19
  # extract the bibtex code, that is in pre tags
17
20
  result = content.scan(/<pre>(.*?)<.pre>/mix)
18
21
  if result
19
- result.inject({}) do |m, k|
20
- m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
22
+ result.inject([]) do |m, k|
23
+ #m[k[0].match(/@.*\{(.*?),/)[1].gsub(/(<.*?>)/, "")] = k[0].gsub(/(<.*?>)/, "")
24
+ m << k[0].gsub(/(<.*?>)/, "")
21
25
  m
22
26
  end
23
27
  else
24
- {}
28
+ []
29
+ end
30
+ end
31
+
32
+
33
+ def grab(key)
34
+ begin
35
+ # Check the key
36
+ if key =~ /DBLP:/
37
+ content = read_html(DBLP_URL + key.gsub("DBLP:", ""))
38
+ extract_pre(content)
39
+ else
40
+ CiteseerGrabber.new.grab(key)
41
+ end
42
+ rescue
43
+ []
44
+ end
45
+ end
46
+
47
+ end
48
+
49
+ class CiteseerGrabber < Grabber
50
+
51
+ CITESEE_URL = "http://citeseer.ist.psu.edu/"
52
+
53
+ def grab(key)
54
+ begin
55
+ content = read_html(CITESEE_URL + key + ".html")
56
+ extract_pre(content)
57
+ rescue
58
+ []
25
59
  end
26
-
27
60
  end
28
61
 
29
62
  end
data/lib/dblp/parser.rb CHANGED
@@ -9,9 +9,12 @@ module Dblp
9
9
 
10
10
  def parse
11
11
  File.readlines(@filename).inject([]) {|m, l|
12
- result = l.match(/\\citation\{(.*:)?(.*?)\}/)
13
- if result
14
- m << result[2]
12
+ cnt = l.match(/\\citation\{(.*?)\}/)
13
+
14
+ if cnt
15
+ cnt[1].split(",").each do |t|
16
+ m << t
17
+ end
15
18
  end
16
19
  m
17
20
  }.uniq
data/lib/dblp/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Dblp #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 2
5
- TINY = 0
4
+ MINOR = 3
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/dblp.rb CHANGED
@@ -17,11 +17,11 @@ module Dblp
17
17
  parser = Dblp::Parser.new(file)
18
18
  grabber = Dblp::Grabber.new
19
19
  File.open(options.output, "w+") do |f|
20
- f.puts parser.parse.inject({}) {|m, l|
21
- m.merge!(grabber.grab(l))
20
+ f.puts parser.parse.inject([]) {|m, l|
21
+ m << grabber.grab(l)
22
22
  overall_size = m.size
23
23
  m
24
- }.values.join("\n")
24
+ }.uniq.join("\n")
25
25
  end
26
26
 
27
27
  if options.bibtex
data/test/test.aux CHANGED
@@ -13,6 +13,9 @@
13
13
 
14
14
  \citation{conf/btw/JacobsA07}
15
15
  \citation{DBLP:conf/cidr/StonebrakerBCCGHHLRZ07}
16
+
17
+ \citation{Martin,Grund}
18
+
16
19
  \citation{Stonebraker:2007fk}
17
20
  \@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}}
18
21
  \newlabel{sec:introduction}{{I}{1}{Introduction\relax }{section.1}{}}
data/test/test_dblp.rb CHANGED
@@ -9,16 +9,35 @@ class TestDblp < Test::Unit::TestCase
9
9
 
10
10
  p = Dblp::Parser.new(File.dirname(__FILE__) + "/test.aux")
11
11
  result = p.parse
12
- assert result.size == 3
12
+
13
+ assert_equal 5, result.size
13
14
  assert_equal "conf/btw/JacobsA07", result.first
14
- assert_equal "conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
15
+ assert_equal "DBLP:conf/cidr/StonebrakerBCCGHHLRZ07", result[1]
15
16
 
16
17
  end
17
18
 
18
19
  def test_grabber
19
20
 
20
21
  g = Dblp::Grabber.new
21
- assert g.grab("conf/btw/JacobsA07").values.size == 2
22
+
23
+ res = g.grab("DBLP:conf/btw/JacobsA07")
24
+ assert res.size == 2
25
+
26
+ res = g.grab("DBLP:conf/icde/ZukowskiHNB06")
27
+
28
+ res = g.grab("DBLP:conf/btw/JacobsAss07")
29
+ assert res.size == 0
30
+ end
31
+
32
+
33
+ def test_citeseer
34
+
35
+ g = Dblp::CiteseerGrabber.new
36
+ res = g.grab("graefe91data")
37
+ assert res.size == 1
38
+
39
+ res = g.grab("nixnurnix")
40
+ assert res.size == 0
22
41
 
23
42
  end
24
43
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dblp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Grund
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-29 00:00:00 +02:00
12
+ date: 2008-06-12 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies: []
15
15