aurels-rbib 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +4 -0
- data/bibtex/bibliography.rb +45 -0
- data/bibtex/entry.rb +70 -0
- data/bibtex/field.rb +17 -0
- data/bibtex/lexer.rb +123 -0
- data/bibtex/parser.rb +116 -0
- data/bibtex/test_bibliography.rb +76 -0
- data/bibtex/test_entry.rb +70 -0
- data/bibtex/test_field.rb +17 -0
- data/bibtex/test_lexer.rb +116 -0
- data/bibtex/test_parser.rb +27 -0
- data/example.bib +753 -0
- data/glom_citeulike.rb +21 -0
- data/run_unit_tests.rb +12 -0
- data/tara_no_url.rb +14 -0
- metadata +67 -0
data/README
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'bibtex/entry'
|
2
|
+
|
3
|
+
module BibTeX
|
4
|
+
|
5
|
+
class Bibliography
|
6
|
+
attr_reader :entries
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@entries = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(e)
|
13
|
+
if e.kind_of? Entry then
|
14
|
+
@entries[e.key] = e
|
15
|
+
else
|
16
|
+
raise 'Cannot add non-entries to bibliography'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](key)
|
21
|
+
@entries[key] or raise "No entry #{key}"
|
22
|
+
end
|
23
|
+
|
24
|
+
# Transform the entries in some way and return a
|
25
|
+
# new bibliography
|
26
|
+
def map
|
27
|
+
r = Bibliography.new
|
28
|
+
@entries.each do |k, e|
|
29
|
+
r << yield(e)
|
30
|
+
end
|
31
|
+
return r
|
32
|
+
end
|
33
|
+
|
34
|
+
def save(filename)
|
35
|
+
f = File.new(filename, 'w')
|
36
|
+
f.puts self.to_s
|
37
|
+
f.close
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
@entries.keys.sort.collect { |k| @entries[k].to_s }.join
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
data/bibtex/entry.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module BibTeX
|
2
|
+
|
3
|
+
# A single entry in a bibliography
|
4
|
+
class Entry
|
5
|
+
attr_reader :type, :key
|
6
|
+
|
7
|
+
def initialize(type, key)
|
8
|
+
@type = type
|
9
|
+
@key = key
|
10
|
+
@fields = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_field(obj, value = nil)
|
14
|
+
if obj.kind_of? Field then
|
15
|
+
@fields[obj.key] = obj
|
16
|
+
else
|
17
|
+
@fields[obj] = Field.new(obj, value)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](key)
|
22
|
+
f = @fields[key]
|
23
|
+
if f then
|
24
|
+
f.value
|
25
|
+
else
|
26
|
+
raise "No field with key #{key}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
fs = @fields.collect { |k, f| " #{f.to_s}" }.sort.join ",\n"
|
32
|
+
"@#{@type}{#{@key},\n#{fs}\n}\n\n"
|
33
|
+
end
|
34
|
+
|
35
|
+
def reject_fields(keys)
|
36
|
+
r = Entry.new(@type, @key)
|
37
|
+
@fields.each do |k, f|
|
38
|
+
r.add_field f unless keys.index k
|
39
|
+
end
|
40
|
+
return r
|
41
|
+
end
|
42
|
+
|
43
|
+
def select_fields(keys)
|
44
|
+
r = Entry.new(@type, @key)
|
45
|
+
@fields.each do |k, f|
|
46
|
+
r.add_field f if keys.index k
|
47
|
+
end
|
48
|
+
return r
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Different types of entries
|
53
|
+
module EntryType
|
54
|
+
Book = 'book'
|
55
|
+
Article = 'article'
|
56
|
+
Booklet = 'booklet'
|
57
|
+
Conference = 'conference'
|
58
|
+
InBook = 'inbook'
|
59
|
+
InCollection = 'incollection'
|
60
|
+
InProceedings = 'inproceedings'
|
61
|
+
Manual = 'manual'
|
62
|
+
MastersThesis = 'mastersthesis'
|
63
|
+
Misc = 'misc'
|
64
|
+
PhDThesis = 'phdthesis'
|
65
|
+
Proceedings = 'proceedings'
|
66
|
+
TechReport = 'techreport'
|
67
|
+
Unpublished = 'unpublished'
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
data/bibtex/field.rb
ADDED
data/bibtex/lexer.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module BibTeX
|
4
|
+
class SourcePos
|
5
|
+
attr_reader :line, :column, :file
|
6
|
+
|
7
|
+
def initialize(line, column, file)
|
8
|
+
@line = line
|
9
|
+
@column = column
|
10
|
+
@file = file
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
"#{file}:#{line}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class RuleSet
|
19
|
+
def initialize
|
20
|
+
@rules = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def match(regexp, result)
|
24
|
+
@rules << [regexp, result]
|
25
|
+
end
|
26
|
+
|
27
|
+
def literals(words)
|
28
|
+
words.each do |w|
|
29
|
+
match /#{w}/, w
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
@rules.each do |pair|
|
35
|
+
yield pair[0], pair[1]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class LexerError < RuntimeError
|
41
|
+
attr_reader :src_pos
|
42
|
+
|
43
|
+
def initialize(mess, src_pos)
|
44
|
+
super(mess)
|
45
|
+
@src_pos = src_pos
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Lexer
|
50
|
+
attr_reader :lval, :ignore_whitespace
|
51
|
+
attr_accessor :ignore_newlines, :file_name
|
52
|
+
|
53
|
+
def initialize(ignore_whitespace = false)
|
54
|
+
@scanner = StringScanner.new('')
|
55
|
+
@rules = RuleSet.new
|
56
|
+
@ignore_whitespace = ignore_whitespace
|
57
|
+
@ignore_newlines = ignore_whitespace
|
58
|
+
@lineno = 1
|
59
|
+
@file_name = '<unknown>'
|
60
|
+
yield @rules
|
61
|
+
end
|
62
|
+
|
63
|
+
# ignore_whitespace turns on ignore_newlines too
|
64
|
+
def ignore_whitespace=(b)
|
65
|
+
@ignore_whitespace = b
|
66
|
+
@ignore_newlines = b
|
67
|
+
end
|
68
|
+
|
69
|
+
def feed(str)
|
70
|
+
@scanner = StringScanner.new(str)
|
71
|
+
@cols_prev = 0
|
72
|
+
end
|
73
|
+
|
74
|
+
def src_pos
|
75
|
+
SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
|
76
|
+
end
|
77
|
+
|
78
|
+
def next_token!
|
79
|
+
if @scanner.check /^\s*\n/ then
|
80
|
+
@lineno += 1
|
81
|
+
@cols_prev = @scanner.pos + 1
|
82
|
+
end
|
83
|
+
skip_whitespace
|
84
|
+
@rules.each do |regexp, result|
|
85
|
+
return result if @lval = @scanner.scan(regexp)
|
86
|
+
end
|
87
|
+
unexpect = if @scanner.rest.length < 10 then
|
88
|
+
@scanner.rest
|
89
|
+
else
|
90
|
+
"#{@scanner.rest.first 10}..."
|
91
|
+
end
|
92
|
+
raise LexerError.new("Unexpected input #{unexpect}", src_pos)
|
93
|
+
end
|
94
|
+
|
95
|
+
def peek_token
|
96
|
+
tok = self.next_token!
|
97
|
+
@scanner.unscan
|
98
|
+
return tok
|
99
|
+
end
|
100
|
+
|
101
|
+
def peek_lval
|
102
|
+
peek_token
|
103
|
+
@lval
|
104
|
+
end
|
105
|
+
|
106
|
+
def more_tokens?
|
107
|
+
skip_whitespace
|
108
|
+
not @scanner.eos?
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def skip_whitespace
|
114
|
+
if @ignore_newlines and @ignore_whitespace then
|
115
|
+
@scanner.skip /\s+/
|
116
|
+
elsif @ignore_whitespace then
|
117
|
+
@scanner.skip /[ \t\r]+/
|
118
|
+
elsif @ignore_newlines then
|
119
|
+
@scanner.skip /[\r\n]+/
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/bibtex/parser.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'bibtex/bibliography'
|
2
|
+
require 'bibtex/entry'
|
3
|
+
require 'bibtex/field'
|
4
|
+
require 'bibtex/lexer'
|
5
|
+
|
6
|
+
module BibTeX
|
7
|
+
|
8
|
+
class Parser
|
9
|
+
def self.parse(filename)
|
10
|
+
parse_string File.read(filename)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_string(data)
|
14
|
+
@lexer.feed data
|
15
|
+
|
16
|
+
b = Bibliography.new
|
17
|
+
while @lexer.more_tokens?
|
18
|
+
b << parse_entry
|
19
|
+
end
|
20
|
+
return b
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def self.parse_entry
|
26
|
+
expect :at, '@'
|
27
|
+
type = expect :id
|
28
|
+
expect :lbrace, '{'
|
29
|
+
key = expect :id
|
30
|
+
|
31
|
+
e = Entry.new(type, key)
|
32
|
+
while @lexer.peek_token != :rbrace
|
33
|
+
expect :comma, ','
|
34
|
+
e.add_field parse_field
|
35
|
+
end
|
36
|
+
|
37
|
+
expect :rbrace, '}'
|
38
|
+
return e
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.parse_field
|
42
|
+
key = expect :id
|
43
|
+
expect :equals, '='
|
44
|
+
value = parse_value
|
45
|
+
Field.new(key.intern, value)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parse_value
|
49
|
+
close = :rbrace
|
50
|
+
brace_count = 1
|
51
|
+
if @lexer.peek_token == :dquote then
|
52
|
+
expect :dquote
|
53
|
+
close = :dquote
|
54
|
+
elsif @lexer.peek_token == :lbrace then
|
55
|
+
expect :lbrace, '{'
|
56
|
+
else
|
57
|
+
# Not surrounded by quotes or braces
|
58
|
+
brace_count = 0
|
59
|
+
end
|
60
|
+
|
61
|
+
str = ''
|
62
|
+
@lexer.ignore_whitespace = false
|
63
|
+
@lexer.ignore_newlines = true
|
64
|
+
loop do
|
65
|
+
unless @lexer.more_tokens?
|
66
|
+
raise 'Unexpected end of input'
|
67
|
+
end
|
68
|
+
|
69
|
+
if (@lexer.peek_token == :comma \
|
70
|
+
or @lexer.peek_token == :rbrace) and brace_count == 0 then
|
71
|
+
# A field not delimited by "" or {}
|
72
|
+
@lexer.ignore_whitespace = true
|
73
|
+
return str
|
74
|
+
end
|
75
|
+
|
76
|
+
case @lexer.next_token!
|
77
|
+
when :rbrace, close
|
78
|
+
brace_count -= 1
|
79
|
+
if brace_count == 0 then
|
80
|
+
@lexer.ignore_whitespace = true
|
81
|
+
return str
|
82
|
+
else
|
83
|
+
str += '}'
|
84
|
+
end
|
85
|
+
when :lbrace
|
86
|
+
str += '{'
|
87
|
+
brace_count += 1
|
88
|
+
else
|
89
|
+
str += @lexer.lval
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.expect(token, pretty = nil)
|
95
|
+
pretty ||= token.to_s
|
96
|
+
got = @lexer.next_token!
|
97
|
+
unless got == token then
|
98
|
+
raise "#{@lexer.src_pos}: Expected '#{pretty}' but found '#{got}' (text='#{@lexer.lval}')"
|
99
|
+
else
|
100
|
+
@lexer.lval
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
@lexer = Lexer.new(true) do |rules|
|
105
|
+
rules.match /@/, :at
|
106
|
+
rules.match /\{/, :lbrace
|
107
|
+
rules.match /\}/, :rbrace
|
108
|
+
rules.match /\"/, :dquote
|
109
|
+
rules.match /\=/, :equals
|
110
|
+
rules.match /\,/, :comma
|
111
|
+
rules.match /[\w\-_:&]+/, :id
|
112
|
+
rules.match /.+?/, :cdata
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'bibtex/bibliography'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class TestBibliography < Test::Unit::TestCase
|
5
|
+
include BibTeX
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@b = Bibliography.new
|
9
|
+
|
10
|
+
@foo01 = Entry.new(EntryType::Book, 'foo01')
|
11
|
+
@foo01.add_field :author, 'C. Doof'
|
12
|
+
@foo01.add_field :year, 2007
|
13
|
+
@foo01.add_field Field.new(:url, 'www.doof.me.uk')
|
14
|
+
|
15
|
+
@bar99 = Entry.new(EntryType::Article, 'bar99')
|
16
|
+
@bar99.add_field :author, 'N. Cakesniffer'
|
17
|
+
@bar99.add_field :year, 1999
|
18
|
+
@bar99.add_field Field.new(:url, 'www.cakesniffer.co.uk')
|
19
|
+
|
20
|
+
@b << @foo01
|
21
|
+
@b << @bar99
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_basic
|
25
|
+
assert_equal 2, @b.entries.length
|
26
|
+
assert_equal @foo01, @b['foo01']
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_map
|
30
|
+
expect = <<END
|
31
|
+
@article{bar99,
|
32
|
+
author = {N. Cakesniffer},
|
33
|
+
year = {1999}
|
34
|
+
}
|
35
|
+
|
36
|
+
@book{foo01,
|
37
|
+
author = {C. Doof},
|
38
|
+
year = {2007}
|
39
|
+
}
|
40
|
+
|
41
|
+
END
|
42
|
+
urlless = @b.map do |e|
|
43
|
+
e.reject_fields [:url]
|
44
|
+
end
|
45
|
+
assert_equal expect, urlless.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_to_s
|
49
|
+
expect = <<END
|
50
|
+
@article{bar99,
|
51
|
+
author = {N. Cakesniffer},
|
52
|
+
url = {www.cakesniffer.co.uk},
|
53
|
+
year = {1999}
|
54
|
+
}
|
55
|
+
|
56
|
+
@book{foo01,
|
57
|
+
author = {C. Doof},
|
58
|
+
url = {www.doof.me.uk},
|
59
|
+
year = {2007}
|
60
|
+
}
|
61
|
+
|
62
|
+
END
|
63
|
+
assert_equal expect, @b.to_s
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_save
|
67
|
+
fname = '/tmp/_test.bib'
|
68
|
+
@b.save fname
|
69
|
+
|
70
|
+
f = File.new(fname)
|
71
|
+
assert_equal @b.to_s, f.read
|
72
|
+
f.close
|
73
|
+
File.delete fname
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|