rbib 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Download your CiteULike bibliography and strip
5
+ # unnecessary fields.
6
+ #
7
+
8
+ ##### YOUR SETTINGS HERE #####
9
+ User = 'NickGasson'
10
+ BadFields = [:url]
11
+ ##### NO NEED TO EDIT BELOW HERE #####
12
+
13
+ require 'net/http'
14
+ require 'uri'
15
+
16
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
17
+ require "bibtex"
18
+
19
+ bibtex = Net::HTTP.get URI.parse("http://www.citeulike.org/bibtex/user/#{User}")
20
+ BibTeX::Parser.parse_string(bibtex).map do |entry|
21
+ entry.reject_fields BadFields
22
+ end.save("#{User}.bib")
23
+
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Strip URL fields from each BibTeX file on the command line.
5
+ # Write the output to filename.stripped.bib
6
+ #
7
+
8
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
+ require "bibtex"
10
+
11
+ ARGV.each do |file|
12
+ BibTeX::Parser.parse(file).map do |entry|
13
+ entry.reject_fields [:url]
14
+ end.save(file.sub(/\.bib$/, '.stripped.bib'))
15
+ end
@@ -0,0 +1,9 @@
1
+ module Bibtex
2
+ # NOTHING
3
+ end
4
+
5
+ require 'bibtex/parser'
6
+ require 'bibtex/bibliography'
7
+ require 'bibtex/entry'
8
+ require 'bibtex/field'
9
+ require 'bibtex/lexer'
@@ -0,0 +1,46 @@
1
+ require 'bibtex/entry'
2
+
3
+ module Bibtex
4
+
5
+ class Bibliography
6
+ attr_reader :entries
7
+
8
+ def initialize
9
+ @entries = {}
10
+ end
11
+
12
+ def <<(e)
13
+ if e.kind_of? Entry then
14
+ $stderr.print "Warning: Bibtex duplicate entry <#{e.key}>\n" if @entries[e.key]
15
+ @entries[e.key] = e
16
+ else
17
+ raise 'Cannot add non-entries to bibliography'
18
+ end
19
+ end
20
+
21
+ def [](key)
22
+ @entries[key] or raise "No entry #{key}"
23
+ end
24
+
25
+ # Transform the entries in some way and return a
26
+ # new bibliography
27
+ def map
28
+ r = Bibliography.new
29
+ @entries.each do |k, e|
30
+ r << yield(e)
31
+ end
32
+ return r
33
+ end
34
+
35
+ def save(filename)
36
+ f = File.new(filename, 'w')
37
+ f.puts self.to_s
38
+ f.close
39
+ end
40
+
41
+ def to_s
42
+ @entries.keys.sort.collect { |k| @entries[k].to_s }.join
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,89 @@
1
+ module Bibtex
2
+
3
+ # A single entry in a bibliography
4
+ class Entry
5
+ attr_reader :type, :key
6
+
7
+ def initialize(type, key)
8
+ @type = type
9
+ @key = key
10
+ @fields = {}
11
+ $stderr.print key,"\n" if $DEBUG
12
+ end
13
+
14
+ def add_field(obj, value = nil)
15
+ if obj.kind_of? Field then
16
+ @fields[obj.key] = obj
17
+ else
18
+ @fields[obj] = Field.new(obj, value)
19
+ end
20
+ end
21
+
22
+ def [](key)
23
+ f = @fields[key]
24
+ f = @fields[key.to_s.downcase.to_sym] if !f
25
+ if f then
26
+ f.value
27
+ else
28
+ # raise "No field with key #{key}"
29
+ ""
30
+ end
31
+ end
32
+
33
+ def has? key
34
+ field = self[key]
35
+ field != nil and field.strip != ''
36
+ end
37
+
38
+ # Make sure the field exists and has meaningful data
39
+ def required key
40
+ if !has?(key)
41
+ $stderr.print self
42
+ raise "Key #{key} does not exist"
43
+ end
44
+ self[key]
45
+ end
46
+
47
+
48
+ def to_s
49
+ fs = @fields.collect { |k, f| " #{f.to_s}" }.sort.join ",\n"
50
+ "@#{@type}{#{@key},\n#{fs}\n}\n\n"
51
+ end
52
+
53
+ def reject_fields(keys)
54
+ r = Entry.new(@type, @key)
55
+ @fields.each do |k, f|
56
+ r.add_field f unless keys.index k
57
+ end
58
+ return r
59
+ end
60
+
61
+ def select_fields(keys)
62
+ r = Entry.new(@type, @key)
63
+ @fields.each do |k, f|
64
+ r.add_field f if keys.index k
65
+ end
66
+ return r
67
+ end
68
+
69
+ end
70
+
71
+ # Different types of entries
72
+ module EntryType
73
+ Book = 'book'
74
+ Article = 'article'
75
+ Booklet = 'booklet'
76
+ Conference = 'conference'
77
+ InBook = 'inbook'
78
+ InCollection = 'incollection'
79
+ InProceedings = 'inproceedings'
80
+ Manual = 'manual'
81
+ MastersThesis = 'mastersthesis'
82
+ Misc = 'misc'
83
+ PhDThesis = 'phdthesis'
84
+ Proceedings = 'proceedings'
85
+ TechReport = 'techreport'
86
+ Unpublished = 'unpublished'
87
+ end
88
+
89
+ end
@@ -0,0 +1,17 @@
1
+ module Bibtex
2
+
3
+ # A field within an entry E.g. author = {Foo}
4
+ class Field
5
+ attr_reader :key, :value
6
+
7
+ def initialize(key, value)
8
+ @key = key
9
+ @value = value
10
+ end
11
+
12
+ def to_s
13
+ "#{@key} = {#{@value}}"
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,123 @@
1
+ require 'strscan'
2
+
3
+ module Bibtex
4
+ class SourcePos
5
+ attr_reader :line, :column, :file
6
+
7
+ def initialize(line, column, file)
8
+ @line = line
9
+ @column = column
10
+ @file = file
11
+ end
12
+
13
+ def to_s
14
+ "#{file}:#{line}"
15
+ end
16
+ end
17
+
18
+ class RuleSet
19
+ def initialize
20
+ @rules = []
21
+ end
22
+
23
+ def match(regexp, result)
24
+ @rules << [regexp, result]
25
+ end
26
+
27
+ def literals(words)
28
+ words.each do |w|
29
+ match(/#{w}/, w)
30
+ end
31
+ end
32
+
33
+ def each
34
+ @rules.each do |pair|
35
+ yield pair[0], pair[1]
36
+ end
37
+ end
38
+ end
39
+
40
+ class LexerError < RuntimeError
41
+ attr_reader :src_pos
42
+
43
+ def initialize(mess, src_pos)
44
+ super(mess)
45
+ @src_pos = src_pos
46
+ end
47
+ end
48
+
49
+ class Lexer
50
+ attr_reader :lval, :ignore_whitespace
51
+ attr_accessor :ignore_newlines, :file_name
52
+
53
+ def initialize(ignore_whitespace = false)
54
+ @scanner = StringScanner.new('')
55
+ @rules = RuleSet.new
56
+ @ignore_whitespace = ignore_whitespace
57
+ @ignore_newlines = ignore_whitespace
58
+ @lineno = 1
59
+ @file_name = '<unknown>'
60
+ yield @rules
61
+ end
62
+
63
+ # ignore_whitespace turns on ignore_newlines too
64
+ def ignore_whitespace=(b)
65
+ @ignore_whitespace = b
66
+ @ignore_newlines = b
67
+ end
68
+
69
+ def feed(str)
70
+ @scanner = StringScanner.new(str)
71
+ @cols_prev = 0
72
+ end
73
+
74
+ def src_pos
75
+ SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
76
+ end
77
+
78
+ def next_token!
79
+ if @scanner.check(/^\s*\n/) then
80
+ @lineno += 1
81
+ @cols_prev = @scanner.pos + 1
82
+ end
83
+ skip_whitespace
84
+ @rules.each do |regexp, result|
85
+ return result if @lval = @scanner.scan(regexp)
86
+ end
87
+ unexpect = if @scanner.rest.length < 10 then
88
+ @scanner.rest
89
+ else
90
+ "#{@scanner.rest.first 10}..."
91
+ end
92
+ raise LexerError.new("Unexpected input #{unexpect}", src_pos)
93
+ end
94
+
95
+ def peek_token
96
+ tok = self.next_token!
97
+ @scanner.unscan
98
+ return tok
99
+ end
100
+
101
+ def peek_lval
102
+ peek_token
103
+ @lval
104
+ end
105
+
106
+ def more_tokens?
107
+ skip_whitespace
108
+ not @scanner.eos?
109
+ end
110
+
111
+ private
112
+
113
+ def skip_whitespace
114
+ if @ignore_newlines and @ignore_whitespace then
115
+ @scanner.skip(/\s+/)
116
+ elsif @ignore_whitespace then
117
+ @scanner.skip(/[ \t\r]+/)
118
+ elsif @ignore_newlines then
119
+ @scanner.skip(/[\r\n]+/)
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,116 @@
1
+ require 'bibtex/bibliography'
2
+ require 'bibtex/entry'
3
+ require 'bibtex/field'
4
+ require 'bibtex/lexer'
5
+
6
+ module Bibtex
7
+
8
+ class Parser
9
+ def self.parse(filename)
10
+ parse_string File.read(filename)
11
+ end
12
+
13
+ def self.parse_string(data)
14
+ @lexer.feed data
15
+
16
+ b = Bibliography.new
17
+ while @lexer.more_tokens?
18
+ b << parse_entry
19
+ end
20
+ return b
21
+ end
22
+
23
+ private
24
+
25
+ def self.parse_entry
26
+ expect :at, '@'
27
+ type = expect :id
28
+ expect :lbrace, '{'
29
+ key = expect :id
30
+
31
+ e = Entry.new(type, key)
32
+ while @lexer.peek_token != :rbrace
33
+ expect :comma, ','
34
+ e.add_field parse_field
35
+ end
36
+
37
+ expect :rbrace, '}'
38
+ return e
39
+ end
40
+
41
+ def self.parse_field
42
+ key = expect :id
43
+ expect :equals, '='
44
+ value = parse_value
45
+ Field.new(key.intern, value)
46
+ end
47
+
48
+ def self.parse_value
49
+ close = :rbrace
50
+ brace_count = 1
51
+ if @lexer.peek_token == :dquote then
52
+ expect :dquote
53
+ close = :dquote
54
+ elsif @lexer.peek_token == :lbrace then
55
+ expect :lbrace, '{'
56
+ else
57
+ # Not surrounded by quotes or braces
58
+ brace_count = 0
59
+ end
60
+
61
+ str = ''
62
+ @lexer.ignore_whitespace = false
63
+ @lexer.ignore_newlines = true
64
+ loop do
65
+ unless @lexer.more_tokens?
66
+ raise 'Unexpected end of input'
67
+ end
68
+
69
+ if (@lexer.peek_token == :comma \
70
+ or @lexer.peek_token == :rbrace) and brace_count == 0 then
71
+ # A field not delimited by "" or {}
72
+ @lexer.ignore_whitespace = true
73
+ return str
74
+ end
75
+
76
+ case @lexer.next_token!
77
+ when :rbrace, close
78
+ brace_count -= 1
79
+ if brace_count == 0 then
80
+ @lexer.ignore_whitespace = true
81
+ return str
82
+ else
83
+ str += '}'
84
+ end
85
+ when :lbrace
86
+ str += '{'
87
+ brace_count += 1
88
+ else
89
+ str += @lexer.lval
90
+ end
91
+ end
92
+ end
93
+
94
+ def self.expect(token, pretty = nil)
95
+ pretty ||= token.to_s
96
+ got = @lexer.next_token!
97
+ unless got == token then
98
+ raise "#{@lexer.src_pos}: Expected '#{pretty}' but found token '#{got}' (text='#{@lexer.lval}')"
99
+ else
100
+ @lexer.lval
101
+ end
102
+ end
103
+
104
+ @lexer = Lexer.new(true) do |rules|
105
+ rules.match(/@/,:at)
106
+ rules.match(/\{/,:lbrace)
107
+ rules.match(/\}/,:rbrace)
108
+ rules.match(/\"/,:dquote)
109
+ rules.match(/\=/,:equals)
110
+ rules.match(/\,/,:comma)
111
+ rules.match(/[\w\-:&]+/,:id)
112
+ rules.match(/.+?/,:cdata)
113
+ end
114
+ end
115
+
116
+ end