bibtex-ruby 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bibtex-ruby might be problematic. Click here for more details.
- data/Gemfile.lock +4 -2
- data/History.txt +5 -0
- data/Manifest +4 -0
- data/README.md +9 -12
- data/bibtex-ruby.gemspec +1 -0
- data/features/issues/braced_strings.feature +48 -0
- data/features/issues/trailing_comma.feature +21 -0
- data/features/step_definitions/bibtex_steps.rb +9 -1
- data/features/strings.feature +1 -0
- data/lib/bibtex/bibliography.rb +25 -22
- data/lib/bibtex/bibtex.y +11 -23
- data/lib/bibtex/elements.rb +11 -13
- data/lib/bibtex/entry.rb +4 -2
- data/lib/bibtex/lexer.rb +142 -163
- data/lib/bibtex/names.rb +10 -10
- data/lib/bibtex/parser.output +135 -132
- data/lib/bibtex/parser.rb +69 -81
- data/lib/bibtex/utilities.rb +1 -1
- data/lib/bibtex/version.rb +1 -1
- data/test/bibtex/test_lexer.rb +11 -0
- data/test/fixtures/benchmark.bib +1 -1
- data/test/profile.dot +129 -74
- data/test/profile.rb +4 -2
- metadata +18 -3
data/lib/bibtex/entry.rb
CHANGED
@@ -227,6 +227,8 @@ module BibTeX
|
|
227
227
|
def added_to_bibliography(bibliography)
|
228
228
|
super
|
229
229
|
bibliography.entries[key] = self
|
230
|
+
parse_names if bibliography.options[:parse_names]
|
231
|
+
parse_months if bibliography.options[:parse_months]
|
230
232
|
self
|
231
233
|
end
|
232
234
|
|
@@ -262,10 +264,10 @@ module BibTeX
|
|
262
264
|
# Parses all name values of the entry. Tries to replace and join the
|
263
265
|
# value prior to parsing.
|
264
266
|
def parse_names
|
267
|
+
strings = bibliography ? bibliography.strings.values : []
|
265
268
|
NAME_FIELDS.each do |key|
|
266
269
|
if name = @fields[key]
|
267
|
-
name.replace(
|
268
|
-
name.join
|
270
|
+
name.replace(strings).join
|
269
271
|
name = name.to_name
|
270
272
|
@fields[key] = name
|
271
273
|
end
|
data/lib/bibtex/lexer.rb
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
#++
|
18
18
|
|
19
19
|
require 'strscan'
|
20
|
-
|
20
|
+
require 'forwardable'
|
21
21
|
|
22
22
|
module BibTeX
|
23
23
|
|
@@ -25,12 +25,19 @@ module BibTeX
|
|
25
25
|
# The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.
|
26
26
|
#
|
27
27
|
class Lexer
|
28
|
+
extend Forwardable
|
29
|
+
include Enumerable
|
28
30
|
|
29
|
-
attr_reader :
|
31
|
+
attr_reader :options, :stack, :mode, :scanner
|
32
|
+
attr_writer :mode
|
33
|
+
|
34
|
+
def_delegator :@scanner, :string, :data
|
35
|
+
def_delegators :@stack, :each
|
30
36
|
|
31
37
|
DEFAULTS = { :include => [:errors], :strict => true }.freeze
|
32
38
|
|
33
|
-
|
39
|
+
MODE = Hash.new(:meta).merge(:bibtex => :bibtex, :entry => :bibtex, :string => :bibtex, :preamble => :bibtex, :comment => :bibtex, :meta => :meta, :literal => :literal, :content => :content).freeze
|
40
|
+
|
34
41
|
#
|
35
42
|
# Creates a new instance. Possible options and their respective
|
36
43
|
# default values are:
|
@@ -45,53 +52,38 @@ module BibTeX
|
|
45
52
|
#
|
46
53
|
def initialize(options = {})
|
47
54
|
@options = DEFAULTS.merge(options)
|
48
|
-
|
55
|
+
reset
|
49
56
|
end
|
50
57
|
|
58
|
+
def reset
|
59
|
+
@stack, @brace_level, @mode, @active_object = [], 0, :meta, nil
|
60
|
+
@scanner.reset if @scanner
|
61
|
+
|
62
|
+
# cache options for speed
|
63
|
+
@include_meta_content = @options[:include].include?(:meta_content)
|
64
|
+
@include_errors = @options[:include].include?(:errors)
|
65
|
+
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
51
69
|
# Sets the source for the lexical analysis and resets the internal state.
|
52
|
-
def data=(
|
53
|
-
@
|
54
|
-
|
55
|
-
@mode = :meta
|
56
|
-
@active_object = nil
|
57
|
-
@data = StringScanner.new(string)
|
58
|
-
|
59
|
-
# @line_breaks = []
|
60
|
-
# @line_breaks << @data.pos until @data.scan_until(/\n|$/).empty?
|
61
|
-
# @data.reset
|
70
|
+
def data=(data)
|
71
|
+
@scanner = StringScanner.new(data)
|
72
|
+
reset
|
62
73
|
end
|
63
74
|
|
64
|
-
|
65
|
-
|
66
|
-
0 # (@line_breaks.find_index { |n| n >= index } || 0) + 1
|
67
|
-
end
|
68
|
-
|
75
|
+
def symbols; @stack.map(&:first); end
|
76
|
+
|
69
77
|
# Returns the next token from the parse stack.
|
70
|
-
def next_token
|
71
|
-
@stack.shift
|
72
|
-
end
|
78
|
+
def next_token; @stack.shift; end
|
73
79
|
|
74
|
-
def mode=(mode)
|
75
|
-
# Log.debug("Lexer: switching to #{mode} mode...")
|
76
|
-
|
77
|
-
@active_object = case
|
78
|
-
when [:comment,:string,:preamble,:entry].include?(mode) then mode
|
79
|
-
when mode == :meta then nil
|
80
|
-
else @active_object
|
81
|
-
end
|
82
|
-
|
83
|
-
@mode = mode
|
84
|
-
end
|
85
|
-
|
86
80
|
# Returns true if the lexer is currenty parsing a BibTeX object.
|
87
81
|
def bibtex_mode?
|
88
|
-
[:bibtex
|
82
|
+
MODE[@mode] == :bibtex
|
89
83
|
end
|
90
84
|
|
91
|
-
|
92
|
-
define_method
|
93
|
-
mode == m.to_sym
|
94
|
-
end
|
85
|
+
[:meta, :literal, :content].each do |m|
|
86
|
+
define_method("#{m}_mode?") { @mode == m }
|
95
87
|
end
|
96
88
|
|
97
89
|
# Returns true if the lexer is currently parsing the given object type.
|
@@ -102,155 +94,142 @@ module BibTeX
|
|
102
94
|
# Returns true if the lexer is currently in strict mode.
|
103
95
|
def strict?; !!(@options[:strict]); end
|
104
96
|
|
105
|
-
# Pushes a value onto the parse stack.
|
97
|
+
# Pushes a value onto the parse stack. Returns the Lexer.
|
106
98
|
def push(value)
|
107
|
-
case
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
leave_object
|
114
|
-
when value[0] == :META_CONTENT
|
115
|
-
if @options[:include].include?(:meta_content)
|
116
|
-
value[1] = [value[1], line_number_at(@data.pos)]
|
117
|
-
@stack.push(value)
|
99
|
+
case value[0]
|
100
|
+
when :CONTENT, :STRING_LITERAL
|
101
|
+
if !@stack.empty? && value[0] == @stack[-1][0]
|
102
|
+
@stack[-1][1] << value[1]
|
103
|
+
else
|
104
|
+
@stack.push(value)
|
118
105
|
end
|
106
|
+
when :ERROR
|
107
|
+
@stack.push(value) if @include_errors
|
108
|
+
leave_object
|
109
|
+
when :META_CONTENT
|
110
|
+
@stack.push(value) if @include_meta_content
|
119
111
|
else
|
120
|
-
value[1] = [value[1], line_number_at(@data.pos)]
|
121
112
|
@stack.push(value)
|
122
113
|
end
|
114
|
+
|
123
115
|
self
|
124
116
|
end
|
125
117
|
|
126
118
|
# Start the lexical analysis.
|
127
|
-
def analyse(
|
128
|
-
raise(ArgumentError, 'Lexer: failed to start analysis: no source given!')
|
129
|
-
|
130
|
-
|
131
|
-
self.data =
|
132
|
-
@data.reset
|
119
|
+
def analyse(string = nil)
|
120
|
+
raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') unless
|
121
|
+
string || @scanner
|
122
|
+
|
123
|
+
self.data = string || @scanner.string
|
133
124
|
|
134
|
-
|
135
|
-
|
136
|
-
when bibtex_mode?
|
137
|
-
parse_bibtex
|
138
|
-
when meta_mode?
|
139
|
-
parse_meta
|
140
|
-
when content_mode?
|
141
|
-
parse_content
|
142
|
-
when literal_mode?
|
143
|
-
parse_literal
|
144
|
-
end
|
125
|
+
until @scanner.eos?
|
126
|
+
send("parse_#{MODE[@mode]}")
|
145
127
|
end
|
146
128
|
|
147
|
-
|
148
|
-
push [false, '$end']
|
129
|
+
push([false, '$end'])
|
149
130
|
end
|
150
131
|
|
132
|
+
private
|
133
|
+
|
151
134
|
def parse_bibtex
|
152
135
|
case
|
153
|
-
when @
|
154
|
-
when @
|
136
|
+
when @scanner.scan(/[\t\r\n\s]+/o)
|
137
|
+
when @scanner.scan(/\{/o)
|
155
138
|
@brace_level += 1
|
156
|
-
push
|
157
|
-
if
|
158
|
-
|
159
|
-
end
|
160
|
-
when @data.scan(/\}/o)
|
161
|
-
return error_unbalanced_braces if @brace_level < 1
|
139
|
+
push([:LBRACE,'{'])
|
140
|
+
@mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)
|
141
|
+
when @scanner.scan(/\}/o)
|
162
142
|
@brace_level -= 1
|
163
|
-
push
|
164
|
-
leave_object if @brace_level == 0
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
enter_object
|
180
|
-
when @data.scan(/./o)
|
181
|
-
error_unexpected_token
|
143
|
+
push([:RBRACE,'}'])
|
144
|
+
return leave_object if @brace_level == 0
|
145
|
+
return error_unbalanced_braces if @brace_level < 0
|
146
|
+
when @scanner.scan( /=/o)
|
147
|
+
push([:EQ,'='])
|
148
|
+
when @scanner.scan(/,/o)
|
149
|
+
push([:COMMA,','])
|
150
|
+
when @scanner.scan(/\d+/o)
|
151
|
+
push([:NUMBER,@scanner.matched])
|
152
|
+
when @scanner.scan(/[a-z\d\/:_!$\.%&*-]+/io)
|
153
|
+
push([:NAME,@scanner.matched])
|
154
|
+
when @scanner.scan(/"/o)
|
155
|
+
@mode = :literal
|
156
|
+
when @scanner.scan(/#/o)
|
157
|
+
push([:SHARP,'#'])
|
158
|
+
when @scanner.scan(/@/o)
|
182
159
|
enter_object
|
160
|
+
when @scanner.scan(/./o)
|
161
|
+
error_unexpected_token
|
183
162
|
end
|
184
163
|
end
|
185
164
|
|
186
165
|
def parse_meta
|
187
|
-
match = @
|
188
|
-
|
189
|
-
push
|
166
|
+
match = @scanner.scan_until(strict? ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
|
167
|
+
if @scanner.matched
|
168
|
+
push([:META_CONTENT,match.chop])
|
190
169
|
enter_object
|
191
170
|
else
|
192
|
-
push
|
193
|
-
@
|
171
|
+
push([:META_CONTENT,@scanner.rest])
|
172
|
+
@scanner.terminate
|
194
173
|
end
|
195
174
|
end
|
196
175
|
|
197
176
|
def parse_content
|
198
|
-
match = @
|
199
|
-
case @
|
177
|
+
match = @scanner.scan_until(/\{|\}/o)
|
178
|
+
case @scanner.matched
|
200
179
|
when '{'
|
201
180
|
@brace_level += 1
|
202
|
-
push
|
181
|
+
push([:CONTENT,match])
|
203
182
|
when '}'
|
204
183
|
@brace_level -= 1
|
205
184
|
case
|
206
|
-
when @brace_level < 0
|
207
|
-
push [:CONTENT,match.chop]
|
208
|
-
error_unbalanced_braces
|
209
185
|
when @brace_level == 0
|
210
|
-
push
|
211
|
-
push
|
186
|
+
push([:CONTENT,match.chop])
|
187
|
+
push([:RBRACE,'}'])
|
212
188
|
leave_object
|
213
|
-
when @brace_level == 1 && active?(:
|
214
|
-
push
|
215
|
-
push
|
216
|
-
|
189
|
+
when @brace_level == 1 && !active?(:comment)
|
190
|
+
push([:CONTENT,match.chop])
|
191
|
+
push([:RBRACE,'}'])
|
192
|
+
@mode = :bibtex
|
193
|
+
when @brace_level < 0
|
194
|
+
push([:CONTENT,match.chop])
|
195
|
+
error_unbalanced_braces
|
217
196
|
else
|
218
|
-
push
|
197
|
+
push([:CONTENT,match])
|
219
198
|
end
|
220
199
|
else
|
221
|
-
push
|
222
|
-
@
|
200
|
+
push([:CONTENT,@scanner.rest])
|
201
|
+
@scanner.terminate
|
223
202
|
error_unterminated_content
|
224
203
|
end
|
225
204
|
end
|
226
205
|
|
227
206
|
def parse_literal
|
228
|
-
match = @
|
229
|
-
case @
|
207
|
+
match = @scanner.scan_until(/[\{\}"\n]/o)
|
208
|
+
case @scanner.matched
|
230
209
|
when '{'
|
231
210
|
@brace_level += 1
|
232
|
-
push
|
211
|
+
push([:STRING_LITERAL,match])
|
233
212
|
when '}'
|
234
213
|
@brace_level -= 1
|
235
214
|
if @brace_level < 1
|
236
|
-
push
|
215
|
+
push([:STRING_LITERAL,match.chop])
|
237
216
|
error_unbalanced_braces
|
238
217
|
else
|
239
|
-
push
|
218
|
+
push([:STRING_LITERAL,match])
|
240
219
|
end
|
241
220
|
when '"'
|
242
221
|
if @brace_level == 1
|
243
|
-
push
|
244
|
-
|
222
|
+
push([:STRING_LITERAL,match.chop])
|
223
|
+
@mode = :bibtex
|
245
224
|
else
|
246
|
-
push
|
225
|
+
push([:STRING_LITERAL,match])
|
247
226
|
end
|
248
227
|
when "\n"
|
249
|
-
push
|
228
|
+
push([:STRING_LITERAL,match.chop])
|
250
229
|
error_unterminated_string
|
251
230
|
else
|
252
|
-
push
|
253
|
-
@
|
231
|
+
push([:STRING_LITERAL,@scanner.rest])
|
232
|
+
@scanner.terminate
|
254
233
|
error_unterminated_string
|
255
234
|
end
|
256
235
|
end
|
@@ -258,63 +237,63 @@ module BibTeX
|
|
258
237
|
# Called when the lexer encounters a new BibTeX object.
|
259
238
|
def enter_object
|
260
239
|
@brace_level = 0
|
261
|
-
self.mode = :bibtex
|
262
240
|
push [:AT,'@']
|
263
241
|
|
264
242
|
case
|
265
|
-
when @
|
266
|
-
|
267
|
-
push [:STRING, @
|
268
|
-
when @
|
269
|
-
|
270
|
-
push [:PREAMBLE,
|
271
|
-
when @
|
272
|
-
|
273
|
-
push [:COMMENT,
|
274
|
-
when @
|
275
|
-
|
276
|
-
push [:NAME, @
|
243
|
+
when @scanner.scan(/string/io)
|
244
|
+
@mode = @active_object = :string
|
245
|
+
push [:STRING, @scanner.matched]
|
246
|
+
when @scanner.scan(/preamble/io)
|
247
|
+
@mode = @active_object = :preamble
|
248
|
+
push [:PREAMBLE, @scanner.matched]
|
249
|
+
when @scanner.scan(/comment/io)
|
250
|
+
@mode = @active_object = :comment
|
251
|
+
push [:COMMENT, @scanner.matched]
|
252
|
+
when @scanner.scan(/[a-z\d:_!\.$%&*-]+/io)
|
253
|
+
@mode = @active_object = :entry
|
254
|
+
push [:NAME, @scanner.matched]
|
255
|
+
else
|
256
|
+
error_unexpected_object
|
277
257
|
end
|
278
258
|
end
|
279
259
|
|
280
260
|
# Called when parser leaves a BibTeX object.
|
281
261
|
def leave_object
|
282
|
-
|
283
|
-
@brace_level = 0
|
262
|
+
@mode, @active_object, @brace_level = :meta, nil, 0
|
284
263
|
end
|
285
264
|
|
286
|
-
|
287
265
|
def error_unbalanced_braces
|
288
|
-
|
289
|
-
|
290
|
-
backtrace [:E_UNBALANCED_BRACES, [self.data.matched,n]]
|
266
|
+
Log.warn("Lexer: unbalanced braces at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
|
267
|
+
backtrace [:E_UNBALANCED, @scanner.matched]
|
291
268
|
end
|
292
269
|
|
293
270
|
def error_unterminated_string
|
294
|
-
|
295
|
-
|
296
|
-
backtrace [:E_UNTERMINATED_STRING, [@data.matched,n]]
|
271
|
+
Log.warn("Lexer: unterminated string at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
|
272
|
+
backtrace [:E_UNTERMINATED_STRING, @scanner.matched]
|
297
273
|
end
|
298
274
|
|
299
275
|
def error_unterminated_content
|
300
|
-
|
301
|
-
|
302
|
-
backtrace [:E_UNTERMINATED_CONTENT, [@data.matched,n]]
|
276
|
+
Log.warn("Lexer: unterminated content at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
|
277
|
+
backtrace [:E_UNTERMINATED_CONTENT, @scanner.matched]
|
303
278
|
end
|
304
279
|
|
305
280
|
def error_unexpected_token
|
306
|
-
|
307
|
-
|
308
|
-
backtrace [:E_UNEXPECTED_TOKEN, [@data.matched,n]]
|
281
|
+
Log.warn("Lexer: unexpected token `#{@scanner.matched}' at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
|
282
|
+
backtrace [:E_UNEXPECTED_TOKEN, @scanner.matched]
|
309
283
|
end
|
310
|
-
|
311
|
-
def
|
312
|
-
|
313
|
-
|
314
|
-
trace << error
|
315
|
-
push [:ERROR,trace]
|
284
|
+
|
285
|
+
def error_unexpected_object
|
286
|
+
Log.warn("Lexer: unexpected object at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
|
287
|
+
backtrace [:E_UNEXPECTED_OBJECT, '@']
|
316
288
|
end
|
317
289
|
|
290
|
+
def backtrace(error)
|
291
|
+
bt = []
|
292
|
+
bt.unshift(@stack.pop) until @stack.empty? || (!bt.empty? && [:AT,:META_CONTENT].include?(bt[0][0]))
|
293
|
+
bt << error
|
294
|
+
push [:ERROR,bt]
|
295
|
+
end
|
296
|
+
|
318
297
|
end
|
319
298
|
|
320
299
|
end
|