rbib 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +19 -0
- data/README.rdoc +19 -0
- data/Rakefile +14 -0
- data/examples/example.bib +753 -0
- data/examples/glom_citeulike.rb +23 -0
- data/examples/tara_no_url.rb +15 -0
- data/lib/bibtex.rb +9 -0
- data/lib/bibtex/bibliography.rb +46 -0
- data/lib/bibtex/entry.rb +89 -0
- data/lib/bibtex/field.rb +17 -0
- data/lib/bibtex/lexer.rb +123 -0
- data/lib/bibtex/parser.rb +116 -0
- data/rbib.gemspec +32 -0
- data/test/run_unit_tests.rb +15 -0
- data/test/test_bibliography.rb +76 -0
- data/test/test_entry.rb +70 -0
- data/test/test_field.rb +17 -0
- data/test/test_lexer.rb +116 -0
- data/test/test_parser.rb +27 -0
- metadata +102 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# Download your CiteULike bibliography and strip
|
5
|
+
# unnecessary fields.
|
6
|
+
#
|
7
|
+
|
8
|
+
##### YOUR SETTINGS HERE #####
|
9
|
+
User = 'NickGasson'
|
10
|
+
BadFields = [:url]
|
11
|
+
##### NO NEED TO EDIT BELOW HERE #####
|
12
|
+
|
13
|
+
require 'net/http'
|
14
|
+
require 'uri'
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
require "bibtex"
|
18
|
+
|
19
|
+
bibtex = Net::HTTP.get URI.parse("http://www.citeulike.org/bibtex/user/#{User}")
|
20
|
+
BibTeX::Parser.parse_string(bibtex).map do |entry|
|
21
|
+
entry.reject_fields BadFields
|
22
|
+
end.save("#{User}.bib")
|
23
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# Strip URL fields from each BibTeX file on the command line.
|
5
|
+
# Write the output to filename.stripped.bib
|
6
|
+
#
|
7
|
+
|
8
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
9
|
+
require "bibtex"
|
10
|
+
|
11
|
+
ARGV.each do |file|
|
12
|
+
BibTeX::Parser.parse(file).map do |entry|
|
13
|
+
entry.reject_fields [:url]
|
14
|
+
end.save(file.sub(/\.bib$/, '.stripped.bib'))
|
15
|
+
end
|
data/lib/bibtex.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'bibtex/entry'
|
2
|
+
|
3
|
+
module Bibtex
|
4
|
+
|
5
|
+
class Bibliography
|
6
|
+
attr_reader :entries
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@entries = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(e)
|
13
|
+
if e.kind_of? Entry then
|
14
|
+
$stderr.print "Warning: Bibtex duplicate entry <#{e.key}>\n" if @entries[e.key]
|
15
|
+
@entries[e.key] = e
|
16
|
+
else
|
17
|
+
raise 'Cannot add non-entries to bibliography'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](key)
|
22
|
+
@entries[key] or raise "No entry #{key}"
|
23
|
+
end
|
24
|
+
|
25
|
+
# Transform the entries in some way and return a
|
26
|
+
# new bibliography
|
27
|
+
def map
|
28
|
+
r = Bibliography.new
|
29
|
+
@entries.each do |k, e|
|
30
|
+
r << yield(e)
|
31
|
+
end
|
32
|
+
return r
|
33
|
+
end
|
34
|
+
|
35
|
+
def save(filename)
|
36
|
+
f = File.new(filename, 'w')
|
37
|
+
f.puts self.to_s
|
38
|
+
f.close
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
@entries.keys.sort.collect { |k| @entries[k].to_s }.join
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
data/lib/bibtex/entry.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
module Bibtex
|
2
|
+
|
3
|
+
# A single entry in a bibliography
|
4
|
+
class Entry
|
5
|
+
attr_reader :type, :key
|
6
|
+
|
7
|
+
def initialize(type, key)
|
8
|
+
@type = type
|
9
|
+
@key = key
|
10
|
+
@fields = {}
|
11
|
+
$stderr.print key,"\n" if $DEBUG
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_field(obj, value = nil)
|
15
|
+
if obj.kind_of? Field then
|
16
|
+
@fields[obj.key] = obj
|
17
|
+
else
|
18
|
+
@fields[obj] = Field.new(obj, value)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def [](key)
|
23
|
+
f = @fields[key]
|
24
|
+
f = @fields[key.to_s.downcase.to_sym] if !f
|
25
|
+
if f then
|
26
|
+
f.value
|
27
|
+
else
|
28
|
+
# raise "No field with key #{key}"
|
29
|
+
""
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def has? key
|
34
|
+
field = self[key]
|
35
|
+
field != nil and field.strip != ''
|
36
|
+
end
|
37
|
+
|
38
|
+
# Make sure the field exists and has meaningful data
|
39
|
+
def required key
|
40
|
+
if !has?(key)
|
41
|
+
$stderr.print self
|
42
|
+
raise "Key #{key} does not exist"
|
43
|
+
end
|
44
|
+
self[key]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def to_s
|
49
|
+
fs = @fields.collect { |k, f| " #{f.to_s}" }.sort.join ",\n"
|
50
|
+
"@#{@type}{#{@key},\n#{fs}\n}\n\n"
|
51
|
+
end
|
52
|
+
|
53
|
+
def reject_fields(keys)
|
54
|
+
r = Entry.new(@type, @key)
|
55
|
+
@fields.each do |k, f|
|
56
|
+
r.add_field f unless keys.index k
|
57
|
+
end
|
58
|
+
return r
|
59
|
+
end
|
60
|
+
|
61
|
+
def select_fields(keys)
|
62
|
+
r = Entry.new(@type, @key)
|
63
|
+
@fields.each do |k, f|
|
64
|
+
r.add_field f if keys.index k
|
65
|
+
end
|
66
|
+
return r
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
# Different types of entries
|
72
|
+
module EntryType
|
73
|
+
Book = 'book'
|
74
|
+
Article = 'article'
|
75
|
+
Booklet = 'booklet'
|
76
|
+
Conference = 'conference'
|
77
|
+
InBook = 'inbook'
|
78
|
+
InCollection = 'incollection'
|
79
|
+
InProceedings = 'inproceedings'
|
80
|
+
Manual = 'manual'
|
81
|
+
MastersThesis = 'mastersthesis'
|
82
|
+
Misc = 'misc'
|
83
|
+
PhDThesis = 'phdthesis'
|
84
|
+
Proceedings = 'proceedings'
|
85
|
+
TechReport = 'techreport'
|
86
|
+
Unpublished = 'unpublished'
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
data/lib/bibtex/field.rb
ADDED
data/lib/bibtex/lexer.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Bibtex
|
4
|
+
class SourcePos
|
5
|
+
attr_reader :line, :column, :file
|
6
|
+
|
7
|
+
def initialize(line, column, file)
|
8
|
+
@line = line
|
9
|
+
@column = column
|
10
|
+
@file = file
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
"#{file}:#{line}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class RuleSet
|
19
|
+
def initialize
|
20
|
+
@rules = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def match(regexp, result)
|
24
|
+
@rules << [regexp, result]
|
25
|
+
end
|
26
|
+
|
27
|
+
def literals(words)
|
28
|
+
words.each do |w|
|
29
|
+
match(/#{w}/, w)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
@rules.each do |pair|
|
35
|
+
yield pair[0], pair[1]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class LexerError < RuntimeError
|
41
|
+
attr_reader :src_pos
|
42
|
+
|
43
|
+
def initialize(mess, src_pos)
|
44
|
+
super(mess)
|
45
|
+
@src_pos = src_pos
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Lexer
|
50
|
+
attr_reader :lval, :ignore_whitespace
|
51
|
+
attr_accessor :ignore_newlines, :file_name
|
52
|
+
|
53
|
+
def initialize(ignore_whitespace = false)
|
54
|
+
@scanner = StringScanner.new('')
|
55
|
+
@rules = RuleSet.new
|
56
|
+
@ignore_whitespace = ignore_whitespace
|
57
|
+
@ignore_newlines = ignore_whitespace
|
58
|
+
@lineno = 1
|
59
|
+
@file_name = '<unknown>'
|
60
|
+
yield @rules
|
61
|
+
end
|
62
|
+
|
63
|
+
# ignore_whitespace turns on ignore_newlines too
|
64
|
+
def ignore_whitespace=(b)
|
65
|
+
@ignore_whitespace = b
|
66
|
+
@ignore_newlines = b
|
67
|
+
end
|
68
|
+
|
69
|
+
def feed(str)
|
70
|
+
@scanner = StringScanner.new(str)
|
71
|
+
@cols_prev = 0
|
72
|
+
end
|
73
|
+
|
74
|
+
def src_pos
|
75
|
+
SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
|
76
|
+
end
|
77
|
+
|
78
|
+
def next_token!
|
79
|
+
if @scanner.check(/^\s*\n/) then
|
80
|
+
@lineno += 1
|
81
|
+
@cols_prev = @scanner.pos + 1
|
82
|
+
end
|
83
|
+
skip_whitespace
|
84
|
+
@rules.each do |regexp, result|
|
85
|
+
return result if @lval = @scanner.scan(regexp)
|
86
|
+
end
|
87
|
+
unexpect = if @scanner.rest.length < 10 then
|
88
|
+
@scanner.rest
|
89
|
+
else
|
90
|
+
"#{@scanner.rest.first 10}..."
|
91
|
+
end
|
92
|
+
raise LexerError.new("Unexpected input #{unexpect}", src_pos)
|
93
|
+
end
|
94
|
+
|
95
|
+
def peek_token
|
96
|
+
tok = self.next_token!
|
97
|
+
@scanner.unscan
|
98
|
+
return tok
|
99
|
+
end
|
100
|
+
|
101
|
+
def peek_lval
|
102
|
+
peek_token
|
103
|
+
@lval
|
104
|
+
end
|
105
|
+
|
106
|
+
def more_tokens?
|
107
|
+
skip_whitespace
|
108
|
+
not @scanner.eos?
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def skip_whitespace
|
114
|
+
if @ignore_newlines and @ignore_whitespace then
|
115
|
+
@scanner.skip(/\s+/)
|
116
|
+
elsif @ignore_whitespace then
|
117
|
+
@scanner.skip(/[ \t\r]+/)
|
118
|
+
elsif @ignore_newlines then
|
119
|
+
@scanner.skip(/[\r\n]+/)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'bibtex/bibliography'
|
2
|
+
require 'bibtex/entry'
|
3
|
+
require 'bibtex/field'
|
4
|
+
require 'bibtex/lexer'
|
5
|
+
|
6
|
+
module Bibtex
|
7
|
+
|
8
|
+
class Parser
|
9
|
+
def self.parse(filename)
|
10
|
+
parse_string File.read(filename)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_string(data)
|
14
|
+
@lexer.feed data
|
15
|
+
|
16
|
+
b = Bibliography.new
|
17
|
+
while @lexer.more_tokens?
|
18
|
+
b << parse_entry
|
19
|
+
end
|
20
|
+
return b
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def self.parse_entry
|
26
|
+
expect :at, '@'
|
27
|
+
type = expect :id
|
28
|
+
expect :lbrace, '{'
|
29
|
+
key = expect :id
|
30
|
+
|
31
|
+
e = Entry.new(type, key)
|
32
|
+
while @lexer.peek_token != :rbrace
|
33
|
+
expect :comma, ','
|
34
|
+
e.add_field parse_field
|
35
|
+
end
|
36
|
+
|
37
|
+
expect :rbrace, '}'
|
38
|
+
return e
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.parse_field
|
42
|
+
key = expect :id
|
43
|
+
expect :equals, '='
|
44
|
+
value = parse_value
|
45
|
+
Field.new(key.intern, value)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parse_value
|
49
|
+
close = :rbrace
|
50
|
+
brace_count = 1
|
51
|
+
if @lexer.peek_token == :dquote then
|
52
|
+
expect :dquote
|
53
|
+
close = :dquote
|
54
|
+
elsif @lexer.peek_token == :lbrace then
|
55
|
+
expect :lbrace, '{'
|
56
|
+
else
|
57
|
+
# Not surrounded by quotes or braces
|
58
|
+
brace_count = 0
|
59
|
+
end
|
60
|
+
|
61
|
+
str = ''
|
62
|
+
@lexer.ignore_whitespace = false
|
63
|
+
@lexer.ignore_newlines = true
|
64
|
+
loop do
|
65
|
+
unless @lexer.more_tokens?
|
66
|
+
raise 'Unexpected end of input'
|
67
|
+
end
|
68
|
+
|
69
|
+
if (@lexer.peek_token == :comma \
|
70
|
+
or @lexer.peek_token == :rbrace) and brace_count == 0 then
|
71
|
+
# A field not delimited by "" or {}
|
72
|
+
@lexer.ignore_whitespace = true
|
73
|
+
return str
|
74
|
+
end
|
75
|
+
|
76
|
+
case @lexer.next_token!
|
77
|
+
when :rbrace, close
|
78
|
+
brace_count -= 1
|
79
|
+
if brace_count == 0 then
|
80
|
+
@lexer.ignore_whitespace = true
|
81
|
+
return str
|
82
|
+
else
|
83
|
+
str += '}'
|
84
|
+
end
|
85
|
+
when :lbrace
|
86
|
+
str += '{'
|
87
|
+
brace_count += 1
|
88
|
+
else
|
89
|
+
str += @lexer.lval
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.expect(token, pretty = nil)
|
95
|
+
pretty ||= token.to_s
|
96
|
+
got = @lexer.next_token!
|
97
|
+
unless got == token then
|
98
|
+
raise "#{@lexer.src_pos}: Expected '#{pretty}' but found token '#{got}' (text='#{@lexer.lval}')"
|
99
|
+
else
|
100
|
+
@lexer.lval
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
@lexer = Lexer.new(true) do |rules|
|
105
|
+
rules.match(/@/,:at)
|
106
|
+
rules.match(/\{/,:lbrace)
|
107
|
+
rules.match(/\}/,:rbrace)
|
108
|
+
rules.match(/\"/,:dquote)
|
109
|
+
rules.match(/\=/,:equals)
|
110
|
+
rules.match(/\,/,:comma)
|
111
|
+
rules.match(/[\w\-:&]+/,:id)
|
112
|
+
rules.match(/.+?/,:cdata)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|