rbib 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Download your CiteULike bibliography and strip
5
+ # unnecessary fields.
6
+ #
7
+
8
+ ##### YOUR SETTINGS HERE #####
9
+ User = 'NickGasson'
10
+ BadFields = [:url]
11
+ ##### NO NEED TO EDIT BELOW HERE #####
12
+
13
+ require 'net/http'
14
+ require 'uri'
15
+
16
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
17
+ require "bibtex"
18
+
19
+ bibtex = Net::HTTP.get URI.parse("http://www.citeulike.org/bibtex/user/#{User}")
20
+ BibTeX::Parser.parse_string(bibtex).map do |entry|
21
+ entry.reject_fields BadFields
22
+ end.save("#{User}.bib")
23
+
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # Strip URL fields from each BibTeX file on the command line.
5
+ # Write the output to filename.stripped.bib
6
+ #
7
+
8
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
+ require "bibtex"
10
+
11
+ ARGV.each do |file|
12
+ BibTeX::Parser.parse(file).map do |entry|
13
+ entry.reject_fields [:url]
14
+ end.save(file.sub(/\.bib$/, '.stripped.bib'))
15
+ end
@@ -0,0 +1,9 @@
1
+ module Bibtex
2
+ # NOTHING
3
+ end
4
+
5
+ require 'bibtex/parser'
6
+ require 'bibtex/bibliography'
7
+ require 'bibtex/entry'
8
+ require 'bibtex/field'
9
+ require 'bibtex/lexer'
@@ -0,0 +1,46 @@
1
+ require 'bibtex/entry'
2
+
3
+ module Bibtex
4
+
5
+ class Bibliography
6
+ attr_reader :entries
7
+
8
+ def initialize
9
+ @entries = {}
10
+ end
11
+
12
+ def <<(e)
13
+ if e.kind_of? Entry then
14
+ $stderr.print "Warning: Bibtex duplicate entry <#{e.key}>\n" if @entries[e.key]
15
+ @entries[e.key] = e
16
+ else
17
+ raise 'Cannot add non-entries to bibliography'
18
+ end
19
+ end
20
+
21
+ def [](key)
22
+ @entries[key] or raise "No entry #{key}"
23
+ end
24
+
25
+ # Transform the entries in some way and return a
26
+ # new bibliography
27
+ def map
28
+ r = Bibliography.new
29
+ @entries.each do |k, e|
30
+ r << yield(e)
31
+ end
32
+ return r
33
+ end
34
+
35
+ def save(filename)
36
+ f = File.new(filename, 'w')
37
+ f.puts self.to_s
38
+ f.close
39
+ end
40
+
41
+ def to_s
42
+ @entries.keys.sort.collect { |k| @entries[k].to_s }.join
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,89 @@
1
+ module Bibtex
2
+
3
+ # A single entry in a bibliography
4
+ class Entry
5
+ attr_reader :type, :key
6
+
7
+ def initialize(type, key)
8
+ @type = type
9
+ @key = key
10
+ @fields = {}
11
+ $stderr.print key,"\n" if $DEBUG
12
+ end
13
+
14
+ def add_field(obj, value = nil)
15
+ if obj.kind_of? Field then
16
+ @fields[obj.key] = obj
17
+ else
18
+ @fields[obj] = Field.new(obj, value)
19
+ end
20
+ end
21
+
22
+ def [](key)
23
+ f = @fields[key]
24
+ f = @fields[key.to_s.downcase.to_sym] if !f
25
+ if f then
26
+ f.value
27
+ else
28
+ # raise "No field with key #{key}"
29
+ ""
30
+ end
31
+ end
32
+
33
+ def has? key
34
+ field = self[key]
35
+ field != nil and field.strip != ''
36
+ end
37
+
38
+ # Make sure the field exists and has meaningful data
39
+ def required key
40
+ if !has?(key)
41
+ $stderr.print self
42
+ raise "Key #{key} does not exist"
43
+ end
44
+ self[key]
45
+ end
46
+
47
+
48
+ def to_s
49
+ fs = @fields.collect { |k, f| " #{f.to_s}" }.sort.join ",\n"
50
+ "@#{@type}{#{@key},\n#{fs}\n}\n\n"
51
+ end
52
+
53
+ def reject_fields(keys)
54
+ r = Entry.new(@type, @key)
55
+ @fields.each do |k, f|
56
+ r.add_field f unless keys.index k
57
+ end
58
+ return r
59
+ end
60
+
61
+ def select_fields(keys)
62
+ r = Entry.new(@type, @key)
63
+ @fields.each do |k, f|
64
+ r.add_field f if keys.index k
65
+ end
66
+ return r
67
+ end
68
+
69
+ end
70
+
71
+ # Different types of entries
72
+ module EntryType
73
+ Book = 'book'
74
+ Article = 'article'
75
+ Booklet = 'booklet'
76
+ Conference = 'conference'
77
+ InBook = 'inbook'
78
+ InCollection = 'incollection'
79
+ InProceedings = 'inproceedings'
80
+ Manual = 'manual'
81
+ MastersThesis = 'mastersthesis'
82
+ Misc = 'misc'
83
+ PhDThesis = 'phdthesis'
84
+ Proceedings = 'proceedings'
85
+ TechReport = 'techreport'
86
+ Unpublished = 'unpublished'
87
+ end
88
+
89
+ end
@@ -0,0 +1,17 @@
1
+ module Bibtex
2
+
3
+ # A field within an entry E.g. author = {Foo}
4
+ class Field
5
+ attr_reader :key, :value
6
+
7
+ def initialize(key, value)
8
+ @key = key
9
+ @value = value
10
+ end
11
+
12
+ def to_s
13
+ "#{@key} = {#{@value}}"
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,123 @@
1
+ require 'strscan'
2
+
3
+ module Bibtex
4
+ class SourcePos
5
+ attr_reader :line, :column, :file
6
+
7
+ def initialize(line, column, file)
8
+ @line = line
9
+ @column = column
10
+ @file = file
11
+ end
12
+
13
+ def to_s
14
+ "#{file}:#{line}"
15
+ end
16
+ end
17
+
18
+ class RuleSet
19
+ def initialize
20
+ @rules = []
21
+ end
22
+
23
+ def match(regexp, result)
24
+ @rules << [regexp, result]
25
+ end
26
+
27
+ def literals(words)
28
+ words.each do |w|
29
+ match(/#{w}/, w)
30
+ end
31
+ end
32
+
33
+ def each
34
+ @rules.each do |pair|
35
+ yield pair[0], pair[1]
36
+ end
37
+ end
38
+ end
39
+
40
+ class LexerError < RuntimeError
41
+ attr_reader :src_pos
42
+
43
+ def initialize(mess, src_pos)
44
+ super(mess)
45
+ @src_pos = src_pos
46
+ end
47
+ end
48
+
49
+ class Lexer
50
+ attr_reader :lval, :ignore_whitespace
51
+ attr_accessor :ignore_newlines, :file_name
52
+
53
+ def initialize(ignore_whitespace = false)
54
+ @scanner = StringScanner.new('')
55
+ @rules = RuleSet.new
56
+ @ignore_whitespace = ignore_whitespace
57
+ @ignore_newlines = ignore_whitespace
58
+ @lineno = 1
59
+ @file_name = '<unknown>'
60
+ yield @rules
61
+ end
62
+
63
+ # ignore_whitespace turns on ignore_newlines too
64
+ def ignore_whitespace=(b)
65
+ @ignore_whitespace = b
66
+ @ignore_newlines = b
67
+ end
68
+
69
+ def feed(str)
70
+ @scanner = StringScanner.new(str)
71
+ @cols_prev = 0
72
+ end
73
+
74
+ def src_pos
75
+ SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
76
+ end
77
+
78
+ def next_token!
79
+ if @scanner.check(/^\s*\n/) then
80
+ @lineno += 1
81
+ @cols_prev = @scanner.pos + 1
82
+ end
83
+ skip_whitespace
84
+ @rules.each do |regexp, result|
85
+ return result if @lval = @scanner.scan(regexp)
86
+ end
87
+ unexpect = if @scanner.rest.length < 10 then
88
+ @scanner.rest
89
+ else
90
+ "#{@scanner.rest.first 10}..."
91
+ end
92
+ raise LexerError.new("Unexpected input #{unexpect}", src_pos)
93
+ end
94
+
95
+ def peek_token
96
+ tok = self.next_token!
97
+ @scanner.unscan
98
+ return tok
99
+ end
100
+
101
+ def peek_lval
102
+ peek_token
103
+ @lval
104
+ end
105
+
106
+ def more_tokens?
107
+ skip_whitespace
108
+ not @scanner.eos?
109
+ end
110
+
111
+ private
112
+
113
+ def skip_whitespace
114
+ if @ignore_newlines and @ignore_whitespace then
115
+ @scanner.skip(/\s+/)
116
+ elsif @ignore_whitespace then
117
+ @scanner.skip(/[ \t\r]+/)
118
+ elsif @ignore_newlines then
119
+ @scanner.skip(/[\r\n]+/)
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,116 @@
1
+ require 'bibtex/bibliography'
2
+ require 'bibtex/entry'
3
+ require 'bibtex/field'
4
+ require 'bibtex/lexer'
5
+
6
+ module Bibtex
7
+
8
+ class Parser
9
+ def self.parse(filename)
10
+ parse_string File.read(filename)
11
+ end
12
+
13
+ def self.parse_string(data)
14
+ @lexer.feed data
15
+
16
+ b = Bibliography.new
17
+ while @lexer.more_tokens?
18
+ b << parse_entry
19
+ end
20
+ return b
21
+ end
22
+
23
+ private
24
+
25
+ def self.parse_entry
26
+ expect :at, '@'
27
+ type = expect :id
28
+ expect :lbrace, '{'
29
+ key = expect :id
30
+
31
+ e = Entry.new(type, key)
32
+ while @lexer.peek_token != :rbrace
33
+ expect :comma, ','
34
+ e.add_field parse_field
35
+ end
36
+
37
+ expect :rbrace, '}'
38
+ return e
39
+ end
40
+
41
+ def self.parse_field
42
+ key = expect :id
43
+ expect :equals, '='
44
+ value = parse_value
45
+ Field.new(key.intern, value)
46
+ end
47
+
48
+ def self.parse_value
49
+ close = :rbrace
50
+ brace_count = 1
51
+ if @lexer.peek_token == :dquote then
52
+ expect :dquote
53
+ close = :dquote
54
+ elsif @lexer.peek_token == :lbrace then
55
+ expect :lbrace, '{'
56
+ else
57
+ # Not surrounded by quotes or braces
58
+ brace_count = 0
59
+ end
60
+
61
+ str = ''
62
+ @lexer.ignore_whitespace = false
63
+ @lexer.ignore_newlines = true
64
+ loop do
65
+ unless @lexer.more_tokens?
66
+ raise 'Unexpected end of input'
67
+ end
68
+
69
+ if (@lexer.peek_token == :comma \
70
+ or @lexer.peek_token == :rbrace) and brace_count == 0 then
71
+ # A field not delimited by "" or {}
72
+ @lexer.ignore_whitespace = true
73
+ return str
74
+ end
75
+
76
+ case @lexer.next_token!
77
+ when :rbrace, close
78
+ brace_count -= 1
79
+ if brace_count == 0 then
80
+ @lexer.ignore_whitespace = true
81
+ return str
82
+ else
83
+ str += '}'
84
+ end
85
+ when :lbrace
86
+ str += '{'
87
+ brace_count += 1
88
+ else
89
+ str += @lexer.lval
90
+ end
91
+ end
92
+ end
93
+
94
+ def self.expect(token, pretty = nil)
95
+ pretty ||= token.to_s
96
+ got = @lexer.next_token!
97
+ unless got == token then
98
+ raise "#{@lexer.src_pos}: Expected '#{pretty}' but found token '#{got}' (text='#{@lexer.lval}')"
99
+ else
100
+ @lexer.lval
101
+ end
102
+ end
103
+
104
+ @lexer = Lexer.new(true) do |rules|
105
+ rules.match(/@/,:at)
106
+ rules.match(/\{/,:lbrace)
107
+ rules.match(/\}/,:rbrace)
108
+ rules.match(/\"/,:dquote)
109
+ rules.match(/\=/,:equals)
110
+ rules.match(/\,/,:comma)
111
+ rules.match(/[\w\-:&]+/,:id)
112
+ rules.match(/.+?/,:cdata)
113
+ end
114
+ end
115
+
116
+ end