aurels-rbib 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +4 -0
- data/bibtex/bibliography.rb +45 -0
- data/bibtex/entry.rb +70 -0
- data/bibtex/field.rb +17 -0
- data/bibtex/lexer.rb +123 -0
- data/bibtex/parser.rb +116 -0
- data/bibtex/test_bibliography.rb +76 -0
- data/bibtex/test_entry.rb +70 -0
- data/bibtex/test_field.rb +17 -0
- data/bibtex/test_lexer.rb +116 -0
- data/bibtex/test_parser.rb +27 -0
- data/example.bib +753 -0
- data/glom_citeulike.rb +21 -0
- data/run_unit_tests.rb +12 -0
- data/tara_no_url.rb +14 -0
- metadata +67 -0
data/README
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'bibtex/entry'
|
2
|
+
|
3
|
+
module BibTeX
|
4
|
+
|
5
|
+
class Bibliography
|
6
|
+
attr_reader :entries
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@entries = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(e)
|
13
|
+
if e.kind_of? Entry then
|
14
|
+
@entries[e.key] = e
|
15
|
+
else
|
16
|
+
raise 'Cannot add non-entries to bibliography'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](key)
|
21
|
+
@entries[key] or raise "No entry #{key}"
|
22
|
+
end
|
23
|
+
|
24
|
+
# Transform the entries in some way and return a
|
25
|
+
# new bibliography
|
26
|
+
def map
|
27
|
+
r = Bibliography.new
|
28
|
+
@entries.each do |k, e|
|
29
|
+
r << yield(e)
|
30
|
+
end
|
31
|
+
return r
|
32
|
+
end
|
33
|
+
|
34
|
+
def save(filename)
|
35
|
+
f = File.new(filename, 'w')
|
36
|
+
f.puts self.to_s
|
37
|
+
f.close
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
@entries.keys.sort.collect { |k| @entries[k].to_s }.join
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
data/bibtex/entry.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module BibTeX
|
2
|
+
|
3
|
+
# A single entry in a bibliography
|
4
|
+
class Entry
|
5
|
+
attr_reader :type, :key
|
6
|
+
|
7
|
+
def initialize(type, key)
|
8
|
+
@type = type
|
9
|
+
@key = key
|
10
|
+
@fields = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_field(obj, value = nil)
|
14
|
+
if obj.kind_of? Field then
|
15
|
+
@fields[obj.key] = obj
|
16
|
+
else
|
17
|
+
@fields[obj] = Field.new(obj, value)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](key)
|
22
|
+
f = @fields[key]
|
23
|
+
if f then
|
24
|
+
f.value
|
25
|
+
else
|
26
|
+
raise "No field with key #{key}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
fs = @fields.collect { |k, f| " #{f.to_s}" }.sort.join ",\n"
|
32
|
+
"@#{@type}{#{@key},\n#{fs}\n}\n\n"
|
33
|
+
end
|
34
|
+
|
35
|
+
def reject_fields(keys)
|
36
|
+
r = Entry.new(@type, @key)
|
37
|
+
@fields.each do |k, f|
|
38
|
+
r.add_field f unless keys.index k
|
39
|
+
end
|
40
|
+
return r
|
41
|
+
end
|
42
|
+
|
43
|
+
def select_fields(keys)
|
44
|
+
r = Entry.new(@type, @key)
|
45
|
+
@fields.each do |k, f|
|
46
|
+
r.add_field f if keys.index k
|
47
|
+
end
|
48
|
+
return r
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Different types of entries
|
53
|
+
module EntryType
|
54
|
+
Book = 'book'
|
55
|
+
Article = 'article'
|
56
|
+
Booklet = 'booklet'
|
57
|
+
Conference = 'conference'
|
58
|
+
InBook = 'inbook'
|
59
|
+
InCollection = 'incollection'
|
60
|
+
InProceedings = 'inproceedings'
|
61
|
+
Manual = 'manual'
|
62
|
+
MastersThesis = 'mastersthesis'
|
63
|
+
Misc = 'misc'
|
64
|
+
PhDThesis = 'phdthesis'
|
65
|
+
Proceedings = 'proceedings'
|
66
|
+
TechReport = 'techreport'
|
67
|
+
Unpublished = 'unpublished'
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
data/bibtex/field.rb
ADDED
data/bibtex/lexer.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module BibTeX
|
4
|
+
class SourcePos
|
5
|
+
attr_reader :line, :column, :file
|
6
|
+
|
7
|
+
def initialize(line, column, file)
|
8
|
+
@line = line
|
9
|
+
@column = column
|
10
|
+
@file = file
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
"#{file}:#{line}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class RuleSet
|
19
|
+
def initialize
|
20
|
+
@rules = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def match(regexp, result)
|
24
|
+
@rules << [regexp, result]
|
25
|
+
end
|
26
|
+
|
27
|
+
def literals(words)
|
28
|
+
words.each do |w|
|
29
|
+
match /#{w}/, w
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
@rules.each do |pair|
|
35
|
+
yield pair[0], pair[1]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class LexerError < RuntimeError
|
41
|
+
attr_reader :src_pos
|
42
|
+
|
43
|
+
def initialize(mess, src_pos)
|
44
|
+
super(mess)
|
45
|
+
@src_pos = src_pos
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Lexer
|
50
|
+
attr_reader :lval, :ignore_whitespace
|
51
|
+
attr_accessor :ignore_newlines, :file_name
|
52
|
+
|
53
|
+
def initialize(ignore_whitespace = false)
|
54
|
+
@scanner = StringScanner.new('')
|
55
|
+
@rules = RuleSet.new
|
56
|
+
@ignore_whitespace = ignore_whitespace
|
57
|
+
@ignore_newlines = ignore_whitespace
|
58
|
+
@lineno = 1
|
59
|
+
@file_name = '<unknown>'
|
60
|
+
yield @rules
|
61
|
+
end
|
62
|
+
|
63
|
+
# ignore_whitespace turns on ignore_newlines too
|
64
|
+
def ignore_whitespace=(b)
|
65
|
+
@ignore_whitespace = b
|
66
|
+
@ignore_newlines = b
|
67
|
+
end
|
68
|
+
|
69
|
+
def feed(str)
|
70
|
+
@scanner = StringScanner.new(str)
|
71
|
+
@cols_prev = 0
|
72
|
+
end
|
73
|
+
|
74
|
+
def src_pos
|
75
|
+
SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
|
76
|
+
end
|
77
|
+
|
78
|
+
def next_token!
|
79
|
+
if @scanner.check /^\s*\n/ then
|
80
|
+
@lineno += 1
|
81
|
+
@cols_prev = @scanner.pos + 1
|
82
|
+
end
|
83
|
+
skip_whitespace
|
84
|
+
@rules.each do |regexp, result|
|
85
|
+
return result if @lval = @scanner.scan(regexp)
|
86
|
+
end
|
87
|
+
unexpect = if @scanner.rest.length < 10 then
|
88
|
+
@scanner.rest
|
89
|
+
else
|
90
|
+
"#{@scanner.rest.first 10}..."
|
91
|
+
end
|
92
|
+
raise LexerError.new("Unexpected input #{unexpect}", src_pos)
|
93
|
+
end
|
94
|
+
|
95
|
+
def peek_token
|
96
|
+
tok = self.next_token!
|
97
|
+
@scanner.unscan
|
98
|
+
return tok
|
99
|
+
end
|
100
|
+
|
101
|
+
def peek_lval
|
102
|
+
peek_token
|
103
|
+
@lval
|
104
|
+
end
|
105
|
+
|
106
|
+
def more_tokens?
|
107
|
+
skip_whitespace
|
108
|
+
not @scanner.eos?
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def skip_whitespace
|
114
|
+
if @ignore_newlines and @ignore_whitespace then
|
115
|
+
@scanner.skip /\s+/
|
116
|
+
elsif @ignore_whitespace then
|
117
|
+
@scanner.skip /[ \t\r]+/
|
118
|
+
elsif @ignore_newlines then
|
119
|
+
@scanner.skip /[\r\n]+/
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/bibtex/parser.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'bibtex/bibliography'
|
2
|
+
require 'bibtex/entry'
|
3
|
+
require 'bibtex/field'
|
4
|
+
require 'bibtex/lexer'
|
5
|
+
|
6
|
+
module BibTeX
|
7
|
+
|
8
|
+
class Parser
|
9
|
+
def self.parse(filename)
|
10
|
+
parse_string File.read(filename)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_string(data)
|
14
|
+
@lexer.feed data
|
15
|
+
|
16
|
+
b = Bibliography.new
|
17
|
+
while @lexer.more_tokens?
|
18
|
+
b << parse_entry
|
19
|
+
end
|
20
|
+
return b
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def self.parse_entry
|
26
|
+
expect :at, '@'
|
27
|
+
type = expect :id
|
28
|
+
expect :lbrace, '{'
|
29
|
+
key = expect :id
|
30
|
+
|
31
|
+
e = Entry.new(type, key)
|
32
|
+
while @lexer.peek_token != :rbrace
|
33
|
+
expect :comma, ','
|
34
|
+
e.add_field parse_field
|
35
|
+
end
|
36
|
+
|
37
|
+
expect :rbrace, '}'
|
38
|
+
return e
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.parse_field
|
42
|
+
key = expect :id
|
43
|
+
expect :equals, '='
|
44
|
+
value = parse_value
|
45
|
+
Field.new(key.intern, value)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parse_value
|
49
|
+
close = :rbrace
|
50
|
+
brace_count = 1
|
51
|
+
if @lexer.peek_token == :dquote then
|
52
|
+
expect :dquote
|
53
|
+
close = :dquote
|
54
|
+
elsif @lexer.peek_token == :lbrace then
|
55
|
+
expect :lbrace, '{'
|
56
|
+
else
|
57
|
+
# Not surrounded by quotes or braces
|
58
|
+
brace_count = 0
|
59
|
+
end
|
60
|
+
|
61
|
+
str = ''
|
62
|
+
@lexer.ignore_whitespace = false
|
63
|
+
@lexer.ignore_newlines = true
|
64
|
+
loop do
|
65
|
+
unless @lexer.more_tokens?
|
66
|
+
raise 'Unexpected end of input'
|
67
|
+
end
|
68
|
+
|
69
|
+
if (@lexer.peek_token == :comma \
|
70
|
+
or @lexer.peek_token == :rbrace) and brace_count == 0 then
|
71
|
+
# A field not delimited by "" or {}
|
72
|
+
@lexer.ignore_whitespace = true
|
73
|
+
return str
|
74
|
+
end
|
75
|
+
|
76
|
+
case @lexer.next_token!
|
77
|
+
when :rbrace, close
|
78
|
+
brace_count -= 1
|
79
|
+
if brace_count == 0 then
|
80
|
+
@lexer.ignore_whitespace = true
|
81
|
+
return str
|
82
|
+
else
|
83
|
+
str += '}'
|
84
|
+
end
|
85
|
+
when :lbrace
|
86
|
+
str += '{'
|
87
|
+
brace_count += 1
|
88
|
+
else
|
89
|
+
str += @lexer.lval
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.expect(token, pretty = nil)
|
95
|
+
pretty ||= token.to_s
|
96
|
+
got = @lexer.next_token!
|
97
|
+
unless got == token then
|
98
|
+
raise "#{@lexer.src_pos}: Expected '#{pretty}' but found '#{got}' (text='#{@lexer.lval}')"
|
99
|
+
else
|
100
|
+
@lexer.lval
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
@lexer = Lexer.new(true) do |rules|
|
105
|
+
rules.match /@/, :at
|
106
|
+
rules.match /\{/, :lbrace
|
107
|
+
rules.match /\}/, :rbrace
|
108
|
+
rules.match /\"/, :dquote
|
109
|
+
rules.match /\=/, :equals
|
110
|
+
rules.match /\,/, :comma
|
111
|
+
rules.match /[\w\-_:&]+/, :id
|
112
|
+
rules.match /.+?/, :cdata
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'bibtex/bibliography'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class TestBibliography < Test::Unit::TestCase
|
5
|
+
include BibTeX
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@b = Bibliography.new
|
9
|
+
|
10
|
+
@foo01 = Entry.new(EntryType::Book, 'foo01')
|
11
|
+
@foo01.add_field :author, 'C. Doof'
|
12
|
+
@foo01.add_field :year, 2007
|
13
|
+
@foo01.add_field Field.new(:url, 'www.doof.me.uk')
|
14
|
+
|
15
|
+
@bar99 = Entry.new(EntryType::Article, 'bar99')
|
16
|
+
@bar99.add_field :author, 'N. Cakesniffer'
|
17
|
+
@bar99.add_field :year, 1999
|
18
|
+
@bar99.add_field Field.new(:url, 'www.cakesniffer.co.uk')
|
19
|
+
|
20
|
+
@b << @foo01
|
21
|
+
@b << @bar99
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_basic
|
25
|
+
assert_equal 2, @b.entries.length
|
26
|
+
assert_equal @foo01, @b['foo01']
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_map
|
30
|
+
expect = <<END
|
31
|
+
@article{bar99,
|
32
|
+
author = {N. Cakesniffer},
|
33
|
+
year = {1999}
|
34
|
+
}
|
35
|
+
|
36
|
+
@book{foo01,
|
37
|
+
author = {C. Doof},
|
38
|
+
year = {2007}
|
39
|
+
}
|
40
|
+
|
41
|
+
END
|
42
|
+
urlless = @b.map do |e|
|
43
|
+
e.reject_fields [:url]
|
44
|
+
end
|
45
|
+
assert_equal expect, urlless.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_to_s
|
49
|
+
expect = <<END
|
50
|
+
@article{bar99,
|
51
|
+
author = {N. Cakesniffer},
|
52
|
+
url = {www.cakesniffer.co.uk},
|
53
|
+
year = {1999}
|
54
|
+
}
|
55
|
+
|
56
|
+
@book{foo01,
|
57
|
+
author = {C. Doof},
|
58
|
+
url = {www.doof.me.uk},
|
59
|
+
year = {2007}
|
60
|
+
}
|
61
|
+
|
62
|
+
END
|
63
|
+
assert_equal expect, @b.to_s
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_save
|
67
|
+
fname = '/tmp/_test.bib'
|
68
|
+
@b.save fname
|
69
|
+
|
70
|
+
f = File.new(fname)
|
71
|
+
assert_equal @b.to_s, f.read
|
72
|
+
f.close
|
73
|
+
File.delete fname
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|