bibtex_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/LICENSE +20 -0
- data/README +107 -0
- data/Rakefile +46 -0
- data/lib/bibtex_author.rb +50 -0
- data/lib/bibtex_entry.rb +184 -0
- data/lib/bibtex_parser.rb +189 -0
- metadata +70 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010, Jeffrey Shantz
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
= BibTeXParser - Simple BibTeX parsing
|
2
|
+
|
3
|
+
BibTeXParser is a simple parser for the BibTeX format, allowing a file
|
4
|
+
containing BibTeX citations to be parsed into individual entries consisting of
|
5
|
+
key-value pairs.
|
6
|
+
|
7
|
+
This is a new library, and so there are likely to be bugs and strange citation
|
8
|
+
formats that I have not yet encountered. If you happen to find a citation
|
9
|
+
which is not parsed properly by this library, please see the Bug Reporting
|
10
|
+
section below.
|
11
|
+
|
12
|
+
== Author
|
13
|
+
|
14
|
+
Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
15
|
+
|
16
|
+
== License
|
17
|
+
|
18
|
+
BibTeXParser is Copyright (C) 2010, Jeff Shantz, and is licensed under the MIT
|
19
|
+
license. Please see LICENSE for more details
|
20
|
+
|
21
|
+
== Parsing a BibTeX File
|
22
|
+
|
23
|
+
A BibTeX file can be parsed using the BibTeXParser class:
|
24
|
+
|
25
|
+
require 'rubygems'
|
26
|
+
require 'bibtex_parser'
|
27
|
+
|
28
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
29
|
+
|
30
|
+
entries.each do |entry|
|
31
|
+
puts entry.title
|
32
|
+
puts entry.year
|
33
|
+
end
|
34
|
+
|
35
|
+
== Accessing entry data
|
36
|
+
|
37
|
+
To access entry data, you can either index into the entry, specifying the key
|
38
|
+
name to get or set, or simply use "dot" notation, as shown below:
|
39
|
+
|
40
|
+
require 'rubygems'
|
41
|
+
require 'bibtex_parser'
|
42
|
+
|
43
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
44
|
+
entry = entries[0]
|
45
|
+
|
46
|
+
entry.year = 2010
|
47
|
+
entry[:year] = 2010 # Equivalent
|
48
|
+
|
49
|
+
puts entry.title
|
50
|
+
puts entry[:title] # Equivalent
|
51
|
+
|
52
|
+
== Getting author data
|
53
|
+
|
54
|
+
Authors (or editors) are stored under the 'author' key as an array of
|
55
|
+
BibTeXAuthor objects. A BibTeXAuthor contains an author's name parsed into
|
56
|
+
First, Middle, and Last parts. Unlike other BibTeX parsers currently in
|
57
|
+
existence, this parser does not extract "von" and "jr" parts of names. This
|
58
|
+
may be implemented in the future, should demand arise.
|
59
|
+
|
60
|
+
require 'rubygems'
|
61
|
+
require 'bibtex_parser'
|
62
|
+
|
63
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
64
|
+
entry = entries[0]
|
65
|
+
first_author = entry.authors[0]
|
66
|
+
|
67
|
+
puts first_author.first_name
|
68
|
+
puts first_author.middle_name
|
69
|
+
puts first_author.last_name
|
70
|
+
|
71
|
+
== Checking whether or not a citation is valid
|
72
|
+
|
73
|
+
The 'valid?' method and the 'errors' array will be of assistance in determining
|
74
|
+
whether or not a citation is valid and, if not, why it is failing.
|
75
|
+
|
76
|
+
require 'rubygems'
|
77
|
+
require 'bibtex_parser'
|
78
|
+
|
79
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
80
|
+
entry = entries[0]
|
81
|
+
|
82
|
+
unless (entry.valid?)
|
83
|
+
entry.errors.each do |error|
|
84
|
+
puts error
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
== Special entry attributes
|
89
|
+
|
90
|
+
The following attributes of the BibTexEntry class may be of interest to you:
|
91
|
+
|
92
|
+
* raw_bibtex - Returns the raw BibTeX citation from which the entry was
|
93
|
+
created
|
94
|
+
* valid - Whether or not the citation was properly parsed
|
95
|
+
* type - The citation type (e.g. "inproceedings" or "techreport")
|
96
|
+
* key - The entry's citation key
|
97
|
+
* errors - Array of parsing errors, if the citation is invalid
|
98
|
+
|
99
|
+
== Reporting Bugs
|
100
|
+
|
101
|
+
Should you find a bug with the parser, please send an email to the address below
|
102
|
+
with the following:
|
103
|
+
|
104
|
+
* The version of the parser you are using
|
105
|
+
* The failing BibTeX citation attached
|
106
|
+
|
107
|
+
Send bug reports to: x@y, where x is equal to jshantz4, and y = csd.uwo.ca
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/testtask'
|
7
|
+
require 'spec/rake/spectask'
|
8
|
+
|
9
|
+
spec = Gem::Specification.new do |s|
|
10
|
+
s.name = 'bibtex_parser'
|
11
|
+
s.version = '1.0.0'
|
12
|
+
s.has_rdoc = true
|
13
|
+
s.extra_rdoc_files = ['README', 'LICENSE','CHANGELOG']
|
14
|
+
s.summary = 'A parser for the BibTeX citation format'
|
15
|
+
s.description = s.summary
|
16
|
+
s.author = 'Jeff Shantz'
|
17
|
+
s.email = 'x@y, where x = jshantz4, y = csd.uwo.ca'
|
18
|
+
# s.executables = ['your_executable_here']
|
19
|
+
s.files = %w(LICENSE README CHANGELOG Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
|
20
|
+
s.require_path = "lib"
|
21
|
+
s.bindir = "bin"
|
22
|
+
end
|
23
|
+
|
24
|
+
Rake::GemPackageTask.new(spec) do |p|
|
25
|
+
p.gem_spec = spec
|
26
|
+
p.need_tar = true
|
27
|
+
p.need_zip = true
|
28
|
+
end
|
29
|
+
|
30
|
+
Rake::RDocTask.new do |rdoc|
|
31
|
+
files =['README', 'LICENSE', 'CHANGELOG', 'lib/**/*.rb']
|
32
|
+
rdoc.rdoc_files.add(files)
|
33
|
+
rdoc.main = "README" # page to start on
|
34
|
+
rdoc.title = "BibTeXParser Docs"
|
35
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
36
|
+
rdoc.options << '--line-numbers'
|
37
|
+
end
|
38
|
+
|
39
|
+
Rake::TestTask.new do |t|
|
40
|
+
t.test_files = FileList['test/**/*.rb']
|
41
|
+
end
|
42
|
+
|
43
|
+
Spec::Rake::SpecTask.new do |t|
|
44
|
+
t.spec_files = FileList['spec/**/*.rb']
|
45
|
+
t.libs << Dir["lib"]
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# = BibTeXParser - Simple BibTeX parsing
|
2
|
+
#
|
3
|
+
# Author: Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
4
|
+
#
|
5
|
+
# Licensed under the MIT license -- see LICENSE for more details
|
6
|
+
|
7
|
+
class BibTeXAuthor
|
8
|
+
|
9
|
+
attr_accessor :first_name, :last_name, :middle_name, :raw_name
|
10
|
+
|
11
|
+
def initialize(name)
|
12
|
+
@raw_name = name.strip
|
13
|
+
parse
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse
|
17
|
+
|
18
|
+
if @raw_name =~ /^([\w.-]+)\s*(?:(\w\.))?\s*([\w.-]+)$/ # First M. Last
|
19
|
+
@first_name = $1
|
20
|
+
@last_name = $3
|
21
|
+
@middle_name = $2 unless $2.nil?
|
22
|
+
elsif @raw_name =~ /^([\w.-]+)\s*,\s*([\w.-]+)\s*(?:([\w]\.)?)$/ # Last, First M.
|
23
|
+
@first_name = $2
|
24
|
+
@last_name = $1
|
25
|
+
@middle_name = $3 unless $3.nil?
|
26
|
+
elsif @raw_name.index(",") # De Last, First
|
27
|
+
parts = @raw_name.split(/\s*,\s*/)
|
28
|
+
@last_name = parts[0]
|
29
|
+
@first_name = parts[1]
|
30
|
+
@middle_name = nil
|
31
|
+
else # First De Last
|
32
|
+
parts = @raw_name.split(/\s+/)
|
33
|
+
@first_name = parts.shift
|
34
|
+
@last_name = parts.join(" ")
|
35
|
+
@middle_name = nil
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
# def inspect
|
41
|
+
#
|
42
|
+
# s = ""
|
43
|
+
# s += "\t\tFIRST : #{@first_name}\n"
|
44
|
+
# s += "\t\tMIDDLE : #{@middle_name}\n"
|
45
|
+
# s += "\t\tLAST : #{@last_name}\n"
|
46
|
+
#
|
47
|
+
# return s
|
48
|
+
#
|
49
|
+
# end
|
50
|
+
end
|
data/lib/bibtex_entry.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'bibtex_author'
|
2
|
+
|
3
|
+
# = BibTeXParser - Simple BibTeX parsing
|
4
|
+
#
|
5
|
+
# Author: Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
6
|
+
#
|
7
|
+
# Licensed under the MIT license -- see LICENSE for more details
|
8
|
+
|
9
|
+
class BibTeXEntry
|
10
|
+
|
11
|
+
attr_accessor :raw_bibtex, :valid, :type, :key, :errors, :authors
|
12
|
+
|
13
|
+
def initialize(block)
|
14
|
+
|
15
|
+
@raw_bibtex = block
|
16
|
+
@valid = true
|
17
|
+
@fields = {}
|
18
|
+
@authors = []
|
19
|
+
@errors = []
|
20
|
+
self.parse
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
def valid?
|
25
|
+
@valid
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](key)
|
29
|
+
key = key.strip.downcase.to_sym if (key.is_a?(String))
|
30
|
+
@fields[key]
|
31
|
+
end
|
32
|
+
|
33
|
+
def []=(key, value)
|
34
|
+
key = key.strip.downcase.to_sym if (key.is_a?(String))
|
35
|
+
@fields[key] = value
|
36
|
+
end
|
37
|
+
|
38
|
+
def has_key?(key)
|
39
|
+
key = key.strip.downcase.to_sym if (key.is_a?(String))
|
40
|
+
@fields.has_key?(key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse
|
44
|
+
@valid = true
|
45
|
+
@errors = []
|
46
|
+
@raw_bibtex.strip!
|
47
|
+
@parsed = @raw_bibtex
|
48
|
+
|
49
|
+
return unless parse_type
|
50
|
+
return unless parse_key
|
51
|
+
parse_fields
|
52
|
+
parse_authors
|
53
|
+
|
54
|
+
if ((! self.has_key?(:year)) || (self[:year].eql?("")))
|
55
|
+
@errors << "Missing year field"
|
56
|
+
@valid = false
|
57
|
+
end
|
58
|
+
|
59
|
+
if ((! self.has_key?(:title)) || (self[:title].eql?("")))
|
60
|
+
@errors << "Missing title field"
|
61
|
+
@valid = false
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# def inspect
|
67
|
+
#
|
68
|
+
# s = "TYPE: #{@type}\n"
|
69
|
+
# s += "KEY: #{@key}\n"
|
70
|
+
# s += "FIELDS [#{@fields.size}]: \n"
|
71
|
+
# s += "VALID: #{@valid}\n"
|
72
|
+
#
|
73
|
+
# @fields.each do |k,v|
|
74
|
+
# s += "\t#{k}\t=> #{v}\n"
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# s += "AUTHORS [#{@authors.size}]: \n"
|
78
|
+
# @authors.each do |author|
|
79
|
+
# s+= author.inspect
|
80
|
+
# end
|
81
|
+
# s += "\n\n#{@parsed}"
|
82
|
+
#
|
83
|
+
# s += "ERRORS [#{@errors.size}]: \n"
|
84
|
+
# @errors.each do |error|
|
85
|
+
# s+= "\t\t#{error}\n"
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# return s
|
89
|
+
#
|
90
|
+
# end
|
91
|
+
|
92
|
+
def method_missing(sym, *arguments)
|
93
|
+
|
94
|
+
if (sym.to_s =~ /=$/)
|
95
|
+
self.send(:[]=, sym.to_s.chop, *arguments)
|
96
|
+
else
|
97
|
+
self.send(:[], sym)
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def parse_type
|
106
|
+
|
107
|
+
if (@parsed =~ /^(\s*@([[:alpha:]]+)\s*\{)/)
|
108
|
+
@type = $2
|
109
|
+
@parsed = @parsed[$1.length..-1]
|
110
|
+
@valid = true
|
111
|
+
else
|
112
|
+
@errors << "Missing citation type"
|
113
|
+
@valid = false
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
def parse_key
|
119
|
+
|
120
|
+
idx = @parsed.index(",")
|
121
|
+
|
122
|
+
if (idx)
|
123
|
+
|
124
|
+
key = @parsed[0...idx].strip
|
125
|
+
|
126
|
+
# Quick sanity check to ensure the user specified a key -- don't
|
127
|
+
# want to treat a "KEY = VALUE," pair as the key simply because
|
128
|
+
# there's a comma after the pair
|
129
|
+
if (key =~ /=/)
|
130
|
+
@errors << "Missing citation key"
|
131
|
+
@valid = false
|
132
|
+
|
133
|
+
else
|
134
|
+
@key = key
|
135
|
+
@parsed = @parsed[idx+1..-1]
|
136
|
+
end
|
137
|
+
else
|
138
|
+
@errors << "Missing citation key"
|
139
|
+
@valid = false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def parse_fields
|
144
|
+
|
145
|
+
pairs = @parsed.scan(/(\s*[[:alpha:]]+\s*=\s*(?:\{.*\}|".*"))/)
|
146
|
+
|
147
|
+
pairs.each do |pair|
|
148
|
+
parts = pair[0].split("=")
|
149
|
+
key = parts[0].strip.downcase
|
150
|
+
value = parts[1].strip
|
151
|
+
value.sub!(/^\{(.*)\}$/,"\\1") if value
|
152
|
+
value.sub!(/^"(.*)"$/,"\\1") if value
|
153
|
+
self[key] = value.strip
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
def parse_authors
|
159
|
+
|
160
|
+
unless ((self.has_key?(:author)) || (self.has_key?(:editor)))
|
161
|
+
@errors << "Missing author or editor field"
|
162
|
+
@valid = false
|
163
|
+
return
|
164
|
+
end
|
165
|
+
|
166
|
+
if ((self.has_key?(:author)) && (self[:author].length > 0))
|
167
|
+
authors = self[:author]
|
168
|
+
elsif ((self.has_key?(:editor)) && (self[:editor].length > 0))
|
169
|
+
authors = self[:editor]
|
170
|
+
else
|
171
|
+
@errors << "Missing author or editor field"
|
172
|
+
@valid = false
|
173
|
+
return
|
174
|
+
end
|
175
|
+
|
176
|
+
author_list = authors.split(/\s+and\s+/)
|
177
|
+
|
178
|
+
author_list.each do |author_name|
|
179
|
+
@authors << BibTeXAuthor.new(author_name)
|
180
|
+
end
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
require 'bibtex_entry'
|
2
|
+
|
3
|
+
# = BibTeXParser - Simple BibTeX parsing
|
4
|
+
#
|
5
|
+
# BibTeXParser is a simple parser for the BibTeX format, allowing a file
|
6
|
+
# containing BibTeX citations to be parsed into individual entries consisting of
|
7
|
+
# key-value pairs.
|
8
|
+
#
|
9
|
+
# This is a new library, and so there are likely to be bugs and strange citation
|
10
|
+
# formats that I have not yet encountered. If you happen to find a citation
|
11
|
+
# which is not parsed properly by this library, please see the Bug Reporting
|
12
|
+
# section below.
|
13
|
+
#
|
14
|
+
# == Author
|
15
|
+
#
|
16
|
+
# Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
17
|
+
#
|
18
|
+
# == License
|
19
|
+
#
|
20
|
+
# BibTeXParser is Copyright (C) 2010, Jeff Shantz, and is licensed under the MIT
|
21
|
+
# license. Please see LICENSE for more details
|
22
|
+
#
|
23
|
+
# == Parsing a BibTeX File
|
24
|
+
#
|
25
|
+
# A BibTeX file can be parsed using the BibTeXParser class:
|
26
|
+
#
|
27
|
+
# require 'rubygems'
|
28
|
+
# require 'bibtex_parser'
|
29
|
+
#
|
30
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
31
|
+
#
|
32
|
+
# entries.each do |entry|
|
33
|
+
# puts entry.title
|
34
|
+
# puts entry.year
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# == Accessing entry data
|
38
|
+
#
|
39
|
+
# To access entry data, you can either index into the entry, specifying the key
|
40
|
+
# name to get or set, or simply use "dot" notation, as shown below:
|
41
|
+
#
|
42
|
+
# require 'rubygems'
|
43
|
+
# require 'bibtex_parser'
|
44
|
+
#
|
45
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
46
|
+
# entry = entries[0]
|
47
|
+
#
|
48
|
+
# entry.year = 2010
|
49
|
+
# entry[:year] = 2010 # Equivalent
|
50
|
+
#
|
51
|
+
# puts entry.title
|
52
|
+
# puts entry[:title] # Equivalent
|
53
|
+
#
|
54
|
+
# == Getting author data
|
55
|
+
#
|
56
|
+
# Authors (or editors) are stored under the 'author' key as an array of
|
57
|
+
# BibTeXAuthor objects. A BibTeXAuthor contains an author's name parsed into
|
58
|
+
# First, Middle, and Last parts. Unlike other BibTeX parsers currently in
|
59
|
+
# existence, this parser does not extract "von" and "jr" parts of names. This
|
60
|
+
# may be implemented in the future, should demand arise.
|
61
|
+
#
|
62
|
+
# require 'rubygems'
|
63
|
+
# require 'bibtex_parser'
|
64
|
+
#
|
65
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
66
|
+
# entry = entries[0]
|
67
|
+
# first_author = entry.authors[0]
|
68
|
+
#
|
69
|
+
# puts first_author.first_name
|
70
|
+
# puts first_author.middle_name
|
71
|
+
# puts first_author.last_name
|
72
|
+
#
|
73
|
+
# == Checking whether or not a citation is valid
|
74
|
+
#
|
75
|
+
# The 'valid?' method and the 'errors' array will be of assistance in
|
76
|
+
# determining whether or not a citation is valid and, if not, why it is failing.
|
77
|
+
#
|
78
|
+
# require 'rubygems'
|
79
|
+
# require 'bibtex_parser'
|
80
|
+
#
|
81
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
82
|
+
# entry = entries[0]
|
83
|
+
#
|
84
|
+
# unless (entry.valid?)
|
85
|
+
# entry.errors.each do |error|
|
86
|
+
# puts error
|
87
|
+
# end
|
88
|
+
# end
|
89
|
+
#
|
90
|
+
# == Special entry attributes
|
91
|
+
#
|
92
|
+
# The following attributes of the BibTexEntry class may be of interest to you:
|
93
|
+
#
|
94
|
+
# * raw_bibtex - Returns the raw BibTeX citation from which the entry was
|
95
|
+
# created
|
96
|
+
# * valid - Whether or not the citation was properly parsed
|
97
|
+
# * type - The citation type (e.g. "inproceedings" or "techreport")
|
98
|
+
# * key - The entry's citation key
|
99
|
+
# * errors - Array of parsing errors, if the citation is invalid
|
100
|
+
#
|
101
|
+
# == Reporting Bugs
|
102
|
+
#
|
103
|
+
# Should you find a bug with the parser, please send an email to the address
|
104
|
+
# below with the following:
|
105
|
+
#
|
106
|
+
# * The version of the parser you are using
|
107
|
+
# * The failing BibTeX citation attached
|
108
|
+
#
|
109
|
+
# Send bug reports to: x@y, where x is equal to jshantz4, and y = csd.uwo.ca
|
110
|
+
|
111
|
+
class BibTeXParser
|
112
|
+
|
113
|
+
def BibTeXParser.parse_bibtex_file(filename)
|
114
|
+
|
115
|
+
f = File.open(filename)
|
116
|
+
blocks = []
|
117
|
+
entries = []
|
118
|
+
|
119
|
+
while (!f.eof?)
|
120
|
+
line = f.gets
|
121
|
+
|
122
|
+
if ((line =~ /^\s*@[[:alpha:]]+/) && (line !~ /@(string|preamble)/i))
|
123
|
+
blocks << extract_block(f,line)
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
blocks.each do |block|
|
129
|
+
entries << BibTeXEntry.new(block)
|
130
|
+
end
|
131
|
+
|
132
|
+
return entries
|
133
|
+
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def BibTeXParser.extract_block(f, start_line)
|
139
|
+
|
140
|
+
stack = []
|
141
|
+
first_brace_seen = false
|
142
|
+
block = ""
|
143
|
+
|
144
|
+
start_line.each_char do |c|
|
145
|
+
|
146
|
+
block += c
|
147
|
+
|
148
|
+
if (c.eql?("{"))
|
149
|
+
stack.push("{")
|
150
|
+
first_brace_seen = true
|
151
|
+
elsif (c.eql?("}"))
|
152
|
+
stack.pop()
|
153
|
+
end
|
154
|
+
|
155
|
+
return block if ((first_brace_seen) && (stack.length == 0))
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
while (((first_brace_seen) ^ (stack.length == 0)) && (!f.eof?))
|
160
|
+
|
161
|
+
line = f.gets
|
162
|
+
|
163
|
+
line.each_char do |c|
|
164
|
+
|
165
|
+
block += c
|
166
|
+
|
167
|
+
if (c.eql?("{"))
|
168
|
+
stack.push("{")
|
169
|
+
first_brace_seen = true
|
170
|
+
elsif (c.eql?("}"))
|
171
|
+
stack.pop()
|
172
|
+
end
|
173
|
+
|
174
|
+
break if ((first_brace_seen) && (stack.length == 0))
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
return block
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
#entries = BibTeXParser.parse_bibtex_file('cs9544a_project.bib')
|
186
|
+
#entries = BibTeXParser.parse_bibtex_file('test.bib')
|
187
|
+
#entries.each do |entry|
|
188
|
+
# puts "NEW ENTRY:\n==================================================\n#{entry.inspect}"
|
189
|
+
#end
|
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bibtex_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Jeff Shantz
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-02 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A parser for the BibTeX citation format
|
22
|
+
email: x@y, where x = jshantz4, y = csd.uwo.ca
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README
|
29
|
+
- LICENSE
|
30
|
+
- CHANGELOG
|
31
|
+
files:
|
32
|
+
- LICENSE
|
33
|
+
- README
|
34
|
+
- CHANGELOG
|
35
|
+
- Rakefile
|
36
|
+
- lib/bibtex_author.rb
|
37
|
+
- lib/bibtex_entry.rb
|
38
|
+
- lib/bibtex_parser.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage:
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
segments:
|
53
|
+
- 0
|
54
|
+
version: "0"
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
segments:
|
60
|
+
- 0
|
61
|
+
version: "0"
|
62
|
+
requirements: []
|
63
|
+
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.3.6
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: A parser for the BibTeX citation format
|
69
|
+
test_files: []
|
70
|
+
|