rbib 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +19 -0
- data/README.rdoc +19 -0
- data/Rakefile +14 -0
- data/examples/example.bib +753 -0
- data/examples/glom_citeulike.rb +23 -0
- data/examples/tara_no_url.rb +15 -0
- data/lib/bibtex.rb +9 -0
- data/lib/bibtex/bibliography.rb +46 -0
- data/lib/bibtex/entry.rb +89 -0
- data/lib/bibtex/field.rb +17 -0
- data/lib/bibtex/lexer.rb +123 -0
- data/lib/bibtex/parser.rb +116 -0
- data/rbib.gemspec +32 -0
- data/test/run_unit_tests.rb +15 -0
- data/test/test_bibliography.rb +76 -0
- data/test/test_entry.rb +70 -0
- data/test/test_field.rb +17 -0
- data/test/test_lexer.rb +116 -0
- data/test/test_parser.rb +27 -0
- metadata +102 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# Download your CiteULike bibliography and strip
|
5
|
+
# unnecessary fields.
|
6
|
+
#
|
7
|
+
|
8
|
+
##### YOUR SETTINGS HERE #####
|
9
|
+
User = 'NickGasson'
|
10
|
+
BadFields = [:url]
|
11
|
+
##### NO NEED TO EDIT BELOW HERE #####
|
12
|
+
|
13
|
+
require 'net/http'
|
14
|
+
require 'uri'
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
require "bibtex"
|
18
|
+
|
19
|
+
bibtex = Net::HTTP.get URI.parse("http://www.citeulike.org/bibtex/user/#{User}")
|
20
|
+
BibTeX::Parser.parse_string(bibtex).map do |entry|
|
21
|
+
entry.reject_fields BadFields
|
22
|
+
end.save("#{User}.bib")
|
23
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# Strip URL fields from each BibTeX file on the command line.
|
5
|
+
# Write the output to filename.stripped.bib
|
6
|
+
#
|
7
|
+
|
8
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
9
|
+
require "bibtex"
|
10
|
+
|
11
|
+
ARGV.each do |file|
|
12
|
+
BibTeX::Parser.parse(file).map do |entry|
|
13
|
+
entry.reject_fields [:url]
|
14
|
+
end.save(file.sub(/\.bib$/, '.stripped.bib'))
|
15
|
+
end
|
data/lib/bibtex.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'bibtex/entry'
|
2
|
+
|
3
|
+
module Bibtex
|
4
|
+
|
5
|
+
class Bibliography
|
6
|
+
attr_reader :entries
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@entries = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(e)
|
13
|
+
if e.kind_of? Entry then
|
14
|
+
$stderr.print "Warning: Bibtex duplicate entry <#{e.key}>\n" if @entries[e.key]
|
15
|
+
@entries[e.key] = e
|
16
|
+
else
|
17
|
+
raise 'Cannot add non-entries to bibliography'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](key)
|
22
|
+
@entries[key] or raise "No entry #{key}"
|
23
|
+
end
|
24
|
+
|
25
|
+
# Transform the entries in some way and return a
|
26
|
+
# new bibliography
|
27
|
+
def map
|
28
|
+
r = Bibliography.new
|
29
|
+
@entries.each do |k, e|
|
30
|
+
r << yield(e)
|
31
|
+
end
|
32
|
+
return r
|
33
|
+
end
|
34
|
+
|
35
|
+
def save(filename)
|
36
|
+
f = File.new(filename, 'w')
|
37
|
+
f.puts self.to_s
|
38
|
+
f.close
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
@entries.keys.sort.collect { |k| @entries[k].to_s }.join
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
data/lib/bibtex/entry.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
module Bibtex
|
2
|
+
|
3
|
+
# A single entry in a bibliography
|
4
|
+
class Entry
|
5
|
+
attr_reader :type, :key
|
6
|
+
|
7
|
+
def initialize(type, key)
|
8
|
+
@type = type
|
9
|
+
@key = key
|
10
|
+
@fields = {}
|
11
|
+
$stderr.print key,"\n" if $DEBUG
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_field(obj, value = nil)
|
15
|
+
if obj.kind_of? Field then
|
16
|
+
@fields[obj.key] = obj
|
17
|
+
else
|
18
|
+
@fields[obj] = Field.new(obj, value)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def [](key)
|
23
|
+
f = @fields[key]
|
24
|
+
f = @fields[key.to_s.downcase.to_sym] if !f
|
25
|
+
if f then
|
26
|
+
f.value
|
27
|
+
else
|
28
|
+
# raise "No field with key #{key}"
|
29
|
+
""
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def has? key
|
34
|
+
field = self[key]
|
35
|
+
field != nil and field.strip != ''
|
36
|
+
end
|
37
|
+
|
38
|
+
# Make sure the field exists and has meaningful data
|
39
|
+
def required key
|
40
|
+
if !has?(key)
|
41
|
+
$stderr.print self
|
42
|
+
raise "Key #{key} does not exist"
|
43
|
+
end
|
44
|
+
self[key]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def to_s
|
49
|
+
fs = @fields.collect { |k, f| " #{f.to_s}" }.sort.join ",\n"
|
50
|
+
"@#{@type}{#{@key},\n#{fs}\n}\n\n"
|
51
|
+
end
|
52
|
+
|
53
|
+
def reject_fields(keys)
|
54
|
+
r = Entry.new(@type, @key)
|
55
|
+
@fields.each do |k, f|
|
56
|
+
r.add_field f unless keys.index k
|
57
|
+
end
|
58
|
+
return r
|
59
|
+
end
|
60
|
+
|
61
|
+
def select_fields(keys)
|
62
|
+
r = Entry.new(@type, @key)
|
63
|
+
@fields.each do |k, f|
|
64
|
+
r.add_field f if keys.index k
|
65
|
+
end
|
66
|
+
return r
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
# Different types of entries
|
72
|
+
module EntryType
|
73
|
+
Book = 'book'
|
74
|
+
Article = 'article'
|
75
|
+
Booklet = 'booklet'
|
76
|
+
Conference = 'conference'
|
77
|
+
InBook = 'inbook'
|
78
|
+
InCollection = 'incollection'
|
79
|
+
InProceedings = 'inproceedings'
|
80
|
+
Manual = 'manual'
|
81
|
+
MastersThesis = 'mastersthesis'
|
82
|
+
Misc = 'misc'
|
83
|
+
PhDThesis = 'phdthesis'
|
84
|
+
Proceedings = 'proceedings'
|
85
|
+
TechReport = 'techreport'
|
86
|
+
Unpublished = 'unpublished'
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
data/lib/bibtex/field.rb
ADDED
data/lib/bibtex/lexer.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Bibtex
|
4
|
+
class SourcePos
|
5
|
+
attr_reader :line, :column, :file
|
6
|
+
|
7
|
+
def initialize(line, column, file)
|
8
|
+
@line = line
|
9
|
+
@column = column
|
10
|
+
@file = file
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
"#{file}:#{line}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class RuleSet
|
19
|
+
def initialize
|
20
|
+
@rules = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def match(regexp, result)
|
24
|
+
@rules << [regexp, result]
|
25
|
+
end
|
26
|
+
|
27
|
+
def literals(words)
|
28
|
+
words.each do |w|
|
29
|
+
match(/#{w}/, w)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
@rules.each do |pair|
|
35
|
+
yield pair[0], pair[1]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class LexerError < RuntimeError
|
41
|
+
attr_reader :src_pos
|
42
|
+
|
43
|
+
def initialize(mess, src_pos)
|
44
|
+
super(mess)
|
45
|
+
@src_pos = src_pos
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Lexer
|
50
|
+
attr_reader :lval, :ignore_whitespace
|
51
|
+
attr_accessor :ignore_newlines, :file_name
|
52
|
+
|
53
|
+
def initialize(ignore_whitespace = false)
|
54
|
+
@scanner = StringScanner.new('')
|
55
|
+
@rules = RuleSet.new
|
56
|
+
@ignore_whitespace = ignore_whitespace
|
57
|
+
@ignore_newlines = ignore_whitespace
|
58
|
+
@lineno = 1
|
59
|
+
@file_name = '<unknown>'
|
60
|
+
yield @rules
|
61
|
+
end
|
62
|
+
|
63
|
+
# ignore_whitespace turns on ignore_newlines too
|
64
|
+
def ignore_whitespace=(b)
|
65
|
+
@ignore_whitespace = b
|
66
|
+
@ignore_newlines = b
|
67
|
+
end
|
68
|
+
|
69
|
+
def feed(str)
|
70
|
+
@scanner = StringScanner.new(str)
|
71
|
+
@cols_prev = 0
|
72
|
+
end
|
73
|
+
|
74
|
+
def src_pos
|
75
|
+
SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
|
76
|
+
end
|
77
|
+
|
78
|
+
def next_token!
|
79
|
+
if @scanner.check(/^\s*\n/) then
|
80
|
+
@lineno += 1
|
81
|
+
@cols_prev = @scanner.pos + 1
|
82
|
+
end
|
83
|
+
skip_whitespace
|
84
|
+
@rules.each do |regexp, result|
|
85
|
+
return result if @lval = @scanner.scan(regexp)
|
86
|
+
end
|
87
|
+
unexpect = if @scanner.rest.length < 10 then
|
88
|
+
@scanner.rest
|
89
|
+
else
|
90
|
+
"#{@scanner.rest.first 10}..."
|
91
|
+
end
|
92
|
+
raise LexerError.new("Unexpected input #{unexpect}", src_pos)
|
93
|
+
end
|
94
|
+
|
95
|
+
def peek_token
|
96
|
+
tok = self.next_token!
|
97
|
+
@scanner.unscan
|
98
|
+
return tok
|
99
|
+
end
|
100
|
+
|
101
|
+
def peek_lval
|
102
|
+
peek_token
|
103
|
+
@lval
|
104
|
+
end
|
105
|
+
|
106
|
+
def more_tokens?
|
107
|
+
skip_whitespace
|
108
|
+
not @scanner.eos?
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def skip_whitespace
|
114
|
+
if @ignore_newlines and @ignore_whitespace then
|
115
|
+
@scanner.skip(/\s+/)
|
116
|
+
elsif @ignore_whitespace then
|
117
|
+
@scanner.skip(/[ \t\r]+/)
|
118
|
+
elsif @ignore_newlines then
|
119
|
+
@scanner.skip(/[\r\n]+/)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'bibtex/bibliography'
|
2
|
+
require 'bibtex/entry'
|
3
|
+
require 'bibtex/field'
|
4
|
+
require 'bibtex/lexer'
|
5
|
+
|
6
|
+
module Bibtex
|
7
|
+
|
8
|
+
class Parser
|
9
|
+
def self.parse(filename)
|
10
|
+
parse_string File.read(filename)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_string(data)
|
14
|
+
@lexer.feed data
|
15
|
+
|
16
|
+
b = Bibliography.new
|
17
|
+
while @lexer.more_tokens?
|
18
|
+
b << parse_entry
|
19
|
+
end
|
20
|
+
return b
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def self.parse_entry
|
26
|
+
expect :at, '@'
|
27
|
+
type = expect :id
|
28
|
+
expect :lbrace, '{'
|
29
|
+
key = expect :id
|
30
|
+
|
31
|
+
e = Entry.new(type, key)
|
32
|
+
while @lexer.peek_token != :rbrace
|
33
|
+
expect :comma, ','
|
34
|
+
e.add_field parse_field
|
35
|
+
end
|
36
|
+
|
37
|
+
expect :rbrace, '}'
|
38
|
+
return e
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.parse_field
|
42
|
+
key = expect :id
|
43
|
+
expect :equals, '='
|
44
|
+
value = parse_value
|
45
|
+
Field.new(key.intern, value)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parse_value
|
49
|
+
close = :rbrace
|
50
|
+
brace_count = 1
|
51
|
+
if @lexer.peek_token == :dquote then
|
52
|
+
expect :dquote
|
53
|
+
close = :dquote
|
54
|
+
elsif @lexer.peek_token == :lbrace then
|
55
|
+
expect :lbrace, '{'
|
56
|
+
else
|
57
|
+
# Not surrounded by quotes or braces
|
58
|
+
brace_count = 0
|
59
|
+
end
|
60
|
+
|
61
|
+
str = ''
|
62
|
+
@lexer.ignore_whitespace = false
|
63
|
+
@lexer.ignore_newlines = true
|
64
|
+
loop do
|
65
|
+
unless @lexer.more_tokens?
|
66
|
+
raise 'Unexpected end of input'
|
67
|
+
end
|
68
|
+
|
69
|
+
if (@lexer.peek_token == :comma \
|
70
|
+
or @lexer.peek_token == :rbrace) and brace_count == 0 then
|
71
|
+
# A field not delimited by "" or {}
|
72
|
+
@lexer.ignore_whitespace = true
|
73
|
+
return str
|
74
|
+
end
|
75
|
+
|
76
|
+
case @lexer.next_token!
|
77
|
+
when :rbrace, close
|
78
|
+
brace_count -= 1
|
79
|
+
if brace_count == 0 then
|
80
|
+
@lexer.ignore_whitespace = true
|
81
|
+
return str
|
82
|
+
else
|
83
|
+
str += '}'
|
84
|
+
end
|
85
|
+
when :lbrace
|
86
|
+
str += '{'
|
87
|
+
brace_count += 1
|
88
|
+
else
|
89
|
+
str += @lexer.lval
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.expect(token, pretty = nil)
|
95
|
+
pretty ||= token.to_s
|
96
|
+
got = @lexer.next_token!
|
97
|
+
unless got == token then
|
98
|
+
raise "#{@lexer.src_pos}: Expected '#{pretty}' but found token '#{got}' (text='#{@lexer.lval}')"
|
99
|
+
else
|
100
|
+
@lexer.lval
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
@lexer = Lexer.new(true) do |rules|
|
105
|
+
rules.match(/@/,:at)
|
106
|
+
rules.match(/\{/,:lbrace)
|
107
|
+
rules.match(/\}/,:rbrace)
|
108
|
+
rules.match(/\"/,:dquote)
|
109
|
+
rules.match(/\=/,:equals)
|
110
|
+
rules.match(/\,/,:comma)
|
111
|
+
rules.match(/[\w\-:&]+/,:id)
|
112
|
+
rules.match(/.+?/,:cdata)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|