bibtex_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/LICENSE +20 -0
- data/README +107 -0
- data/Rakefile +46 -0
- data/lib/bibtex_author.rb +50 -0
- data/lib/bibtex_entry.rb +184 -0
- data/lib/bibtex_parser.rb +189 -0
- metadata +70 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010, Jeffrey Shantz
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
= BibTeXParser - Simple BibTeX parsing
|
2
|
+
|
3
|
+
BibTeXParser is a simple parser for the BibTeX format, allowing a file
|
4
|
+
containing BibTeX citations to be parsed into individual entries consisting of
|
5
|
+
key-value pairs.
|
6
|
+
|
7
|
+
This is a new library, and so there are likely to be bugs and strange citation
|
8
|
+
formats that I have not yet encountered. If you happen to find a citation
|
9
|
+
which is not parsed properly by this library, please see the Bug Reporting
|
10
|
+
section below.
|
11
|
+
|
12
|
+
== Author
|
13
|
+
|
14
|
+
Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
15
|
+
|
16
|
+
== License
|
17
|
+
|
18
|
+
BibTeXParser is Copyright (C) 2010, Jeff Shantz, and is licensed under the MIT
|
19
|
+
license. Please see LICENSE for more details
|
20
|
+
|
21
|
+
== Parsing a BibTeX File
|
22
|
+
|
23
|
+
A BibTeX file can be parsed using the BibTeXParser class:
|
24
|
+
|
25
|
+
require 'rubygems'
|
26
|
+
require 'bibtex_parser'
|
27
|
+
|
28
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
29
|
+
|
30
|
+
entries.each do |entry|
|
31
|
+
puts entry.title
|
32
|
+
puts entry.year
|
33
|
+
end
|
34
|
+
|
35
|
+
== Accessing entry data
|
36
|
+
|
37
|
+
To access entry data, you can either index into the entry, specifying the key
|
38
|
+
name to get or set, or simply use "dot" notation, as shown below:
|
39
|
+
|
40
|
+
require 'rubygems'
|
41
|
+
require 'bibtex_parser'
|
42
|
+
|
43
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
44
|
+
entry = entries[0]
|
45
|
+
|
46
|
+
entry.year = 2010
|
47
|
+
entry[:year] = 2010 # Equivalent
|
48
|
+
|
49
|
+
puts entry.title
|
50
|
+
puts entry[:title] # Equivalent
|
51
|
+
|
52
|
+
== Getting author data
|
53
|
+
|
54
|
+
Authors (or editors) are stored under the 'author' key as an array of
|
55
|
+
BibTeXAuthor objects. A BibTeXAuthor contains an author's name parsed into
|
56
|
+
First, Middle, and Last parts. Unlike other BibTeX parsers currently in
|
57
|
+
existence, this parser does not extract "von" and "jr" parts of names. This
|
58
|
+
may be implemented in the future, should demand arise.
|
59
|
+
|
60
|
+
require 'rubygems'
|
61
|
+
require 'bibtex_parser'
|
62
|
+
|
63
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
64
|
+
entry = entries[0]
|
65
|
+
first_author = entry.authors[0]
|
66
|
+
|
67
|
+
puts first_author.first_name
|
68
|
+
puts first_author.middle_name
|
69
|
+
puts first_author.last_name
|
70
|
+
|
71
|
+
== Checking whether or not a citation is valid
|
72
|
+
|
73
|
+
The 'valid?' method and the 'errors' array will be of assistance in determining
|
74
|
+
whether or not a citation is valid and, if not, why it is failing.
|
75
|
+
|
76
|
+
require 'rubygems'
|
77
|
+
require 'bibtex_parser'
|
78
|
+
|
79
|
+
entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
80
|
+
entry = entries[0]
|
81
|
+
|
82
|
+
unless (entry.valid?)
|
83
|
+
entry.errors.each do |error|
|
84
|
+
puts error
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
== Special entry attributes
|
89
|
+
|
90
|
+
The following attributes of the BibTexEntry class may be of interest to you:
|
91
|
+
|
92
|
+
* raw_bibtex - Returns the raw BibTeX citation from which the entry was
|
93
|
+
created
|
94
|
+
* valid - Whether or not the citation was properly parsed
|
95
|
+
* type - The citation type (e.g. "inproceedings" or "techreport")
|
96
|
+
* key - The entry's citation key
|
97
|
+
* errors - Array of parsing errors, if the citation is invalid
|
98
|
+
|
99
|
+
== Reporting Bugs
|
100
|
+
|
101
|
+
Should you find a bug with the parser, please send an email to the address below
|
102
|
+
with the following:
|
103
|
+
|
104
|
+
* The version of the parser you are using
|
105
|
+
* The failing BibTeX citation attached
|
106
|
+
|
107
|
+
Send bug reports to: x@y, where x is equal to jshantz4, and y = csd.uwo.ca
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/testtask'
|
7
|
+
require 'spec/rake/spectask'
|
8
|
+
|
9
|
+
spec = Gem::Specification.new do |s|
|
10
|
+
s.name = 'bibtex_parser'
|
11
|
+
s.version = '1.0.0'
|
12
|
+
s.has_rdoc = true
|
13
|
+
s.extra_rdoc_files = ['README', 'LICENSE','CHANGELOG']
|
14
|
+
s.summary = 'A parser for the BibTeX citation format'
|
15
|
+
s.description = s.summary
|
16
|
+
s.author = 'Jeff Shantz'
|
17
|
+
s.email = 'x@y, where x = jshantz4, y = csd.uwo.ca'
|
18
|
+
# s.executables = ['your_executable_here']
|
19
|
+
s.files = %w(LICENSE README CHANGELOG Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
|
20
|
+
s.require_path = "lib"
|
21
|
+
s.bindir = "bin"
|
22
|
+
end
|
23
|
+
|
24
|
+
Rake::GemPackageTask.new(spec) do |p|
|
25
|
+
p.gem_spec = spec
|
26
|
+
p.need_tar = true
|
27
|
+
p.need_zip = true
|
28
|
+
end
|
29
|
+
|
30
|
+
Rake::RDocTask.new do |rdoc|
|
31
|
+
files =['README', 'LICENSE', 'CHANGELOG', 'lib/**/*.rb']
|
32
|
+
rdoc.rdoc_files.add(files)
|
33
|
+
rdoc.main = "README" # page to start on
|
34
|
+
rdoc.title = "BibTeXParser Docs"
|
35
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
36
|
+
rdoc.options << '--line-numbers'
|
37
|
+
end
|
38
|
+
|
39
|
+
Rake::TestTask.new do |t|
|
40
|
+
t.test_files = FileList['test/**/*.rb']
|
41
|
+
end
|
42
|
+
|
43
|
+
Spec::Rake::SpecTask.new do |t|
|
44
|
+
t.spec_files = FileList['spec/**/*.rb']
|
45
|
+
t.libs << Dir["lib"]
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# = BibTeXParser - Simple BibTeX parsing
|
2
|
+
#
|
3
|
+
# Author: Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
4
|
+
#
|
5
|
+
# Licensed under the MIT license -- see LICENSE for more details
|
6
|
+
|
7
|
+
class BibTeXAuthor
|
8
|
+
|
9
|
+
attr_accessor :first_name, :last_name, :middle_name, :raw_name
|
10
|
+
|
11
|
+
def initialize(name)
|
12
|
+
@raw_name = name.strip
|
13
|
+
parse
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse
|
17
|
+
|
18
|
+
if @raw_name =~ /^([\w.-]+)\s*(?:(\w\.))?\s*([\w.-]+)$/ # First M. Last
|
19
|
+
@first_name = $1
|
20
|
+
@last_name = $3
|
21
|
+
@middle_name = $2 unless $2.nil?
|
22
|
+
elsif @raw_name =~ /^([\w.-]+)\s*,\s*([\w.-]+)\s*(?:([\w]\.)?)$/ # Last, First M.
|
23
|
+
@first_name = $2
|
24
|
+
@last_name = $1
|
25
|
+
@middle_name = $3 unless $3.nil?
|
26
|
+
elsif @raw_name.index(",") # De Last, First
|
27
|
+
parts = @raw_name.split(/\s*,\s*/)
|
28
|
+
@last_name = parts[0]
|
29
|
+
@first_name = parts[1]
|
30
|
+
@middle_name = nil
|
31
|
+
else # First De Last
|
32
|
+
parts = @raw_name.split(/\s+/)
|
33
|
+
@first_name = parts.shift
|
34
|
+
@last_name = parts.join(" ")
|
35
|
+
@middle_name = nil
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
# def inspect
|
41
|
+
#
|
42
|
+
# s = ""
|
43
|
+
# s += "\t\tFIRST : #{@first_name}\n"
|
44
|
+
# s += "\t\tMIDDLE : #{@middle_name}\n"
|
45
|
+
# s += "\t\tLAST : #{@last_name}\n"
|
46
|
+
#
|
47
|
+
# return s
|
48
|
+
#
|
49
|
+
# end
|
50
|
+
end
|
data/lib/bibtex_entry.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'bibtex_author'
|
2
|
+
|
3
|
+
# = BibTeXParser - Simple BibTeX parsing
|
4
|
+
#
|
5
|
+
# Author: Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
6
|
+
#
|
7
|
+
# Licensed under the MIT license -- see LICENSE for more details
|
8
|
+
|
9
|
+
class BibTeXEntry
|
10
|
+
|
11
|
+
attr_accessor :raw_bibtex, :valid, :type, :key, :errors, :authors
|
12
|
+
|
13
|
+
def initialize(block)
|
14
|
+
|
15
|
+
@raw_bibtex = block
|
16
|
+
@valid = true
|
17
|
+
@fields = {}
|
18
|
+
@authors = []
|
19
|
+
@errors = []
|
20
|
+
self.parse
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
def valid?
|
25
|
+
@valid
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](key)
|
29
|
+
key = key.strip.downcase.to_sym if (key.is_a?(String))
|
30
|
+
@fields[key]
|
31
|
+
end
|
32
|
+
|
33
|
+
def []=(key, value)
|
34
|
+
key = key.strip.downcase.to_sym if (key.is_a?(String))
|
35
|
+
@fields[key] = value
|
36
|
+
end
|
37
|
+
|
38
|
+
def has_key?(key)
|
39
|
+
key = key.strip.downcase.to_sym if (key.is_a?(String))
|
40
|
+
@fields.has_key?(key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse
|
44
|
+
@valid = true
|
45
|
+
@errors = []
|
46
|
+
@raw_bibtex.strip!
|
47
|
+
@parsed = @raw_bibtex
|
48
|
+
|
49
|
+
return unless parse_type
|
50
|
+
return unless parse_key
|
51
|
+
parse_fields
|
52
|
+
parse_authors
|
53
|
+
|
54
|
+
if ((! self.has_key?(:year)) || (self[:year].eql?("")))
|
55
|
+
@errors << "Missing year field"
|
56
|
+
@valid = false
|
57
|
+
end
|
58
|
+
|
59
|
+
if ((! self.has_key?(:title)) || (self[:title].eql?("")))
|
60
|
+
@errors << "Missing title field"
|
61
|
+
@valid = false
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# def inspect
|
67
|
+
#
|
68
|
+
# s = "TYPE: #{@type}\n"
|
69
|
+
# s += "KEY: #{@key}\n"
|
70
|
+
# s += "FIELDS [#{@fields.size}]: \n"
|
71
|
+
# s += "VALID: #{@valid}\n"
|
72
|
+
#
|
73
|
+
# @fields.each do |k,v|
|
74
|
+
# s += "\t#{k}\t=> #{v}\n"
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# s += "AUTHORS [#{@authors.size}]: \n"
|
78
|
+
# @authors.each do |author|
|
79
|
+
# s+= author.inspect
|
80
|
+
# end
|
81
|
+
# s += "\n\n#{@parsed}"
|
82
|
+
#
|
83
|
+
# s += "ERRORS [#{@errors.size}]: \n"
|
84
|
+
# @errors.each do |error|
|
85
|
+
# s+= "\t\t#{error}\n"
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# return s
|
89
|
+
#
|
90
|
+
# end
|
91
|
+
|
92
|
+
def method_missing(sym, *arguments)
|
93
|
+
|
94
|
+
if (sym.to_s =~ /=$/)
|
95
|
+
self.send(:[]=, sym.to_s.chop, *arguments)
|
96
|
+
else
|
97
|
+
self.send(:[], sym)
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def parse_type
|
106
|
+
|
107
|
+
if (@parsed =~ /^(\s*@([[:alpha:]]+)\s*\{)/)
|
108
|
+
@type = $2
|
109
|
+
@parsed = @parsed[$1.length..-1]
|
110
|
+
@valid = true
|
111
|
+
else
|
112
|
+
@errors << "Missing citation type"
|
113
|
+
@valid = false
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
def parse_key
|
119
|
+
|
120
|
+
idx = @parsed.index(",")
|
121
|
+
|
122
|
+
if (idx)
|
123
|
+
|
124
|
+
key = @parsed[0...idx].strip
|
125
|
+
|
126
|
+
# Quick sanity check to ensure the user specified a key -- don't
|
127
|
+
# want to treat a "KEY = VALUE," pair as the key simply because
|
128
|
+
# there's a comma after the pair
|
129
|
+
if (key =~ /=/)
|
130
|
+
@errors << "Missing citation key"
|
131
|
+
@valid = false
|
132
|
+
|
133
|
+
else
|
134
|
+
@key = key
|
135
|
+
@parsed = @parsed[idx+1..-1]
|
136
|
+
end
|
137
|
+
else
|
138
|
+
@errors << "Missing citation key"
|
139
|
+
@valid = false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def parse_fields
|
144
|
+
|
145
|
+
pairs = @parsed.scan(/(\s*[[:alpha:]]+\s*=\s*(?:\{.*\}|".*"))/)
|
146
|
+
|
147
|
+
pairs.each do |pair|
|
148
|
+
parts = pair[0].split("=")
|
149
|
+
key = parts[0].strip.downcase
|
150
|
+
value = parts[1].strip
|
151
|
+
value.sub!(/^\{(.*)\}$/,"\\1") if value
|
152
|
+
value.sub!(/^"(.*)"$/,"\\1") if value
|
153
|
+
self[key] = value.strip
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
def parse_authors
|
159
|
+
|
160
|
+
unless ((self.has_key?(:author)) || (self.has_key?(:editor)))
|
161
|
+
@errors << "Missing author or editor field"
|
162
|
+
@valid = false
|
163
|
+
return
|
164
|
+
end
|
165
|
+
|
166
|
+
if ((self.has_key?(:author)) && (self[:author].length > 0))
|
167
|
+
authors = self[:author]
|
168
|
+
elsif ((self.has_key?(:editor)) && (self[:editor].length > 0))
|
169
|
+
authors = self[:editor]
|
170
|
+
else
|
171
|
+
@errors << "Missing author or editor field"
|
172
|
+
@valid = false
|
173
|
+
return
|
174
|
+
end
|
175
|
+
|
176
|
+
author_list = authors.split(/\s+and\s+/)
|
177
|
+
|
178
|
+
author_list.each do |author_name|
|
179
|
+
@authors << BibTeXAuthor.new(author_name)
|
180
|
+
end
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
require 'bibtex_entry'
|
2
|
+
|
3
|
+
# = BibTeXParser - Simple BibTeX parsing
|
4
|
+
#
|
5
|
+
# BibTeXParser is a simple parser for the BibTeX format, allowing a file
|
6
|
+
# containing BibTeX citations to be parsed into individual entries consisting of
|
7
|
+
# key-value pairs.
|
8
|
+
#
|
9
|
+
# This is a new library, and so there are likely to be bugs and strange citation
|
10
|
+
# formats that I have not yet encountered. If you happen to find a citation
|
11
|
+
# which is not parsed properly by this library, please see the Bug Reporting
|
12
|
+
# section below.
|
13
|
+
#
|
14
|
+
# == Author
|
15
|
+
#
|
16
|
+
# Jeff Shantz <x@y, where x is equal to jshantz4, y = csd.uwo.ca>
|
17
|
+
#
|
18
|
+
# == License
|
19
|
+
#
|
20
|
+
# BibTeXParser is Copyright (C) 2010, Jeff Shantz, and is licensed under the MIT
|
21
|
+
# license. Please see LICENSE for more details
|
22
|
+
#
|
23
|
+
# == Parsing a BibTeX File
|
24
|
+
#
|
25
|
+
# A BibTeX file can be parsed using the BibTeXParser class:
|
26
|
+
#
|
27
|
+
# require 'rubygems'
|
28
|
+
# require 'bibtex_parser'
|
29
|
+
#
|
30
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
31
|
+
#
|
32
|
+
# entries.each do |entry|
|
33
|
+
# puts entry.title
|
34
|
+
# puts entry.year
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# == Accessing entry data
|
38
|
+
#
|
39
|
+
# To access entry data, you can either index into the entry, specifying the key
|
40
|
+
# name to get or set, or simply use "dot" notation, as shown below:
|
41
|
+
#
|
42
|
+
# require 'rubygems'
|
43
|
+
# require 'bibtex_parser'
|
44
|
+
#
|
45
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
46
|
+
# entry = entries[0]
|
47
|
+
#
|
48
|
+
# entry.year = 2010
|
49
|
+
# entry[:year] = 2010 # Equivalent
|
50
|
+
#
|
51
|
+
# puts entry.title
|
52
|
+
# puts entry[:title] # Equivalent
|
53
|
+
#
|
54
|
+
# == Getting author data
|
55
|
+
#
|
56
|
+
# Authors (or editors) are stored under the 'author' key as an array of
|
57
|
+
# BibTeXAuthor objects. A BibTeXAuthor contains an author's name parsed into
|
58
|
+
# First, Middle, and Last parts. Unlike other BibTeX parsers currently in
|
59
|
+
# existence, this parser does not extract "von" and "jr" parts of names. This
|
60
|
+
# may be implemented in the future, should demand arise.
|
61
|
+
#
|
62
|
+
# require 'rubygems'
|
63
|
+
# require 'bibtex_parser'
|
64
|
+
#
|
65
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
66
|
+
# entry = entries[0]
|
67
|
+
# first_author = entry.authors[0]
|
68
|
+
#
|
69
|
+
# puts first_author.first_name
|
70
|
+
# puts first_author.middle_name
|
71
|
+
# puts first_author.last_name
|
72
|
+
#
|
73
|
+
# == Checking whether or not a citation is valid
|
74
|
+
#
|
75
|
+
# The 'valid?' method and the 'errors' array will be of assistance in
|
76
|
+
# determining whether or not a citation is valid and, if not, why it is failing.
|
77
|
+
#
|
78
|
+
# require 'rubygems'
|
79
|
+
# require 'bibtex_parser'
|
80
|
+
#
|
81
|
+
# entries = BibTeXParser.parse_bibtex_file('thesis.bib')
|
82
|
+
# entry = entries[0]
|
83
|
+
#
|
84
|
+
# unless (entry.valid?)
|
85
|
+
# entry.errors.each do |error|
|
86
|
+
# puts error
|
87
|
+
# end
|
88
|
+
# end
|
89
|
+
#
|
90
|
+
# == Special entry attributes
|
91
|
+
#
|
92
|
+
# The following attributes of the BibTexEntry class may be of interest to you:
|
93
|
+
#
|
94
|
+
# * raw_bibtex - Returns the raw BibTeX citation from which the entry was
|
95
|
+
# created
|
96
|
+
# * valid - Whether or not the citation was properly parsed
|
97
|
+
# * type - The citation type (e.g. "inproceedings" or "techreport")
|
98
|
+
# * key - The entry's citation key
|
99
|
+
# * errors - Array of parsing errors, if the citation is invalid
|
100
|
+
#
|
101
|
+
# == Reporting Bugs
|
102
|
+
#
|
103
|
+
# Should you find a bug with the parser, please send an email to the address
|
104
|
+
# below with the following:
|
105
|
+
#
|
106
|
+
# * The version of the parser you are using
|
107
|
+
# * The failing BibTeX citation attached
|
108
|
+
#
|
109
|
+
# Send bug reports to: x@y, where x is equal to jshantz4, and y = csd.uwo.ca
|
110
|
+
|
111
|
+
class BibTeXParser
|
112
|
+
|
113
|
+
def BibTeXParser.parse_bibtex_file(filename)
|
114
|
+
|
115
|
+
f = File.open(filename)
|
116
|
+
blocks = []
|
117
|
+
entries = []
|
118
|
+
|
119
|
+
while (!f.eof?)
|
120
|
+
line = f.gets
|
121
|
+
|
122
|
+
if ((line =~ /^\s*@[[:alpha:]]+/) && (line !~ /@(string|preamble)/i))
|
123
|
+
blocks << extract_block(f,line)
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
blocks.each do |block|
|
129
|
+
entries << BibTeXEntry.new(block)
|
130
|
+
end
|
131
|
+
|
132
|
+
return entries
|
133
|
+
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def BibTeXParser.extract_block(f, start_line)
|
139
|
+
|
140
|
+
stack = []
|
141
|
+
first_brace_seen = false
|
142
|
+
block = ""
|
143
|
+
|
144
|
+
start_line.each_char do |c|
|
145
|
+
|
146
|
+
block += c
|
147
|
+
|
148
|
+
if (c.eql?("{"))
|
149
|
+
stack.push("{")
|
150
|
+
first_brace_seen = true
|
151
|
+
elsif (c.eql?("}"))
|
152
|
+
stack.pop()
|
153
|
+
end
|
154
|
+
|
155
|
+
return block if ((first_brace_seen) && (stack.length == 0))
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
while (((first_brace_seen) ^ (stack.length == 0)) && (!f.eof?))
|
160
|
+
|
161
|
+
line = f.gets
|
162
|
+
|
163
|
+
line.each_char do |c|
|
164
|
+
|
165
|
+
block += c
|
166
|
+
|
167
|
+
if (c.eql?("{"))
|
168
|
+
stack.push("{")
|
169
|
+
first_brace_seen = true
|
170
|
+
elsif (c.eql?("}"))
|
171
|
+
stack.pop()
|
172
|
+
end
|
173
|
+
|
174
|
+
break if ((first_brace_seen) && (stack.length == 0))
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
return block
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
#entries = BibTeXParser.parse_bibtex_file('cs9544a_project.bib')
|
186
|
+
#entries = BibTeXParser.parse_bibtex_file('test.bib')
|
187
|
+
#entries.each do |entry|
|
188
|
+
# puts "NEW ENTRY:\n==================================================\n#{entry.inspect}"
|
189
|
+
#end
|
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bibtex_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Jeff Shantz
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-02 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A parser for the BibTeX citation format
|
22
|
+
email: x@y, where x = jshantz4, y = csd.uwo.ca
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README
|
29
|
+
- LICENSE
|
30
|
+
- CHANGELOG
|
31
|
+
files:
|
32
|
+
- LICENSE
|
33
|
+
- README
|
34
|
+
- CHANGELOG
|
35
|
+
- Rakefile
|
36
|
+
- lib/bibtex_author.rb
|
37
|
+
- lib/bibtex_entry.rb
|
38
|
+
- lib/bibtex_parser.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage:
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
segments:
|
53
|
+
- 0
|
54
|
+
version: "0"
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
segments:
|
60
|
+
- 0
|
61
|
+
version: "0"
|
62
|
+
requirements: []
|
63
|
+
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.3.6
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: A parser for the BibTeX citation format
|
69
|
+
test_files: []
|
70
|
+
|