bibtex-ruby 1.3.5 → 1.3.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bibtex-ruby might be problematic. Click here for more details.

data/lib/bibtex/entry.rb CHANGED
@@ -227,6 +227,8 @@ module BibTeX
227
227
  def added_to_bibliography(bibliography)
228
228
  super
229
229
  bibliography.entries[key] = self
230
+ parse_names if bibliography.options[:parse_names]
231
+ parse_months if bibliography.options[:parse_months]
230
232
  self
231
233
  end
232
234
 
@@ -262,10 +264,10 @@ module BibTeX
262
264
  # Parses all name values of the entry. Tries to replace and join the
263
265
  # value prior to parsing.
264
266
  def parse_names
267
+ strings = bibliography ? bibliography.strings.values : []
265
268
  NAME_FIELDS.each do |key|
266
269
  if name = @fields[key]
267
- name.replace(bibliography.q('@string')) unless bibliography.nil?
268
- name.join
270
+ name.replace(strings).join
269
271
  name = name.to_name
270
272
  @fields[key] = name
271
273
  end
data/lib/bibtex/lexer.rb CHANGED
@@ -17,7 +17,7 @@
17
17
  #++
18
18
 
19
19
  require 'strscan'
20
-
20
+ require 'forwardable'
21
21
 
22
22
  module BibTeX
23
23
 
@@ -25,12 +25,19 @@ module BibTeX
25
25
  # The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.
26
26
  #
27
27
  class Lexer
28
+ extend Forwardable
29
+ include Enumerable
28
30
 
29
- attr_reader :data, :options, :stack, :mode
31
+ attr_reader :options, :stack, :mode, :scanner
32
+ attr_writer :mode
33
+
34
+ def_delegator :@scanner, :string, :data
35
+ def_delegators :@stack, :each
30
36
 
31
37
  DEFAULTS = { :include => [:errors], :strict => true }.freeze
32
38
 
33
-
39
+ MODE = Hash.new(:meta).merge(:bibtex => :bibtex, :entry => :bibtex, :string => :bibtex, :preamble => :bibtex, :comment => :bibtex, :meta => :meta, :literal => :literal, :content => :content).freeze
40
+
34
41
  #
35
42
  # Creates a new instance. Possible options and their respective
36
43
  # default values are:
@@ -45,53 +52,38 @@ module BibTeX
45
52
  #
46
53
  def initialize(options = {})
47
54
  @options = DEFAULTS.merge(options)
48
- @data = nil
55
+ reset
49
56
  end
50
57
 
58
+ def reset
59
+ @stack, @brace_level, @mode, @active_object = [], 0, :meta, nil
60
+ @scanner.reset if @scanner
61
+
62
+ # cache options for speed
63
+ @include_meta_content = @options[:include].include?(:meta_content)
64
+ @include_errors = @options[:include].include?(:errors)
65
+
66
+ self
67
+ end
68
+
51
69
  # Sets the source for the lexical analysis and resets the internal state.
52
- def data=(string)
53
- @stack = []
54
- @brace_level = 0
55
- @mode = :meta
56
- @active_object = nil
57
- @data = StringScanner.new(string)
58
-
59
- # @line_breaks = []
60
- # @line_breaks << @data.pos until @data.scan_until(/\n|$/).empty?
61
- # @data.reset
70
+ def data=(data)
71
+ @scanner = StringScanner.new(data)
72
+ reset
62
73
  end
63
74
 
64
- # Returns the line number at a given position in the source.
65
- def line_number_at(index)
66
- 0 # (@line_breaks.find_index { |n| n >= index } || 0) + 1
67
- end
68
-
75
+ def symbols; @stack.map(&:first); end
76
+
69
77
  # Returns the next token from the parse stack.
70
- def next_token
71
- @stack.shift
72
- end
78
+ def next_token; @stack.shift; end
73
79
 
74
- def mode=(mode)
75
- # Log.debug("Lexer: switching to #{mode} mode...")
76
-
77
- @active_object = case
78
- when [:comment,:string,:preamble,:entry].include?(mode) then mode
79
- when mode == :meta then nil
80
- else @active_object
81
- end
82
-
83
- @mode = mode
84
- end
85
-
86
80
  # Returns true if the lexer is currenty parsing a BibTeX object.
87
81
  def bibtex_mode?
88
- [:bibtex,:comment,:string,:preamble,:entry].include?(self.mode)
82
+ MODE[@mode] == :bibtex
89
83
  end
90
84
 
91
- %w{ meta literal content }.each do |m|
92
- define_method "#{m}_mode?" do
93
- mode == m.to_sym
94
- end
85
+ [:meta, :literal, :content].each do |m|
86
+ define_method("#{m}_mode?") { @mode == m }
95
87
  end
96
88
 
97
89
  # Returns true if the lexer is currently parsing the given object type.
@@ -102,155 +94,142 @@ module BibTeX
102
94
  # Returns true if the lexer is currently in strict mode.
103
95
  def strict?; !!(@options[:strict]); end
104
96
 
105
- # Pushes a value onto the parse stack.
97
+ # Pushes a value onto the parse stack. Returns the Lexer.
106
98
  def push(value)
107
- case
108
- when ([:CONTENT,:STRING_LITERAL].include?(value[0]) && value[0] == @stack.last[0])
109
- @stack.last[1][0] << value[1]
110
- @stack.last[1][1] = line_number_at(@data.pos)
111
- when value[0] == :ERROR
112
- @stack.push(value) if @options[:include].include?(:errors)
113
- leave_object
114
- when value[0] == :META_CONTENT
115
- if @options[:include].include?(:meta_content)
116
- value[1] = [value[1], line_number_at(@data.pos)]
117
- @stack.push(value)
99
+ case value[0]
100
+ when :CONTENT, :STRING_LITERAL
101
+ if !@stack.empty? && value[0] == @stack[-1][0]
102
+ @stack[-1][1] << value[1]
103
+ else
104
+ @stack.push(value)
118
105
  end
106
+ when :ERROR
107
+ @stack.push(value) if @include_errors
108
+ leave_object
109
+ when :META_CONTENT
110
+ @stack.push(value) if @include_meta_content
119
111
  else
120
- value[1] = [value[1], line_number_at(@data.pos)]
121
112
  @stack.push(value)
122
113
  end
114
+
123
115
  self
124
116
  end
125
117
 
126
118
  # Start the lexical analysis.
127
- def analyse(data=nil)
128
- raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') if data.nil? && @data.nil?
129
- Log.debug('Lexer: starting lexical analysis...')
130
-
131
- self.data = data || @data.string
132
- @data.reset
119
+ def analyse(string = nil)
120
+ raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') unless
121
+ string || @scanner
122
+
123
+ self.data = string || @scanner.string
133
124
 
134
- until @data.eos?
135
- case
136
- when bibtex_mode?
137
- parse_bibtex
138
- when meta_mode?
139
- parse_meta
140
- when content_mode?
141
- parse_content
142
- when literal_mode?
143
- parse_literal
144
- end
125
+ until @scanner.eos?
126
+ send("parse_#{MODE[@mode]}")
145
127
  end
146
128
 
147
- Log.debug('Lexer: finished lexical analysis.')
148
- push [false, '$end']
129
+ push([false, '$end'])
149
130
  end
150
131
 
132
+ private
133
+
151
134
  def parse_bibtex
152
135
  case
153
- when @data.scan(/[\t\r\n\s]+/o)
154
- when @data.scan(/\{/o)
136
+ when @scanner.scan(/[\t\r\n\s]+/o)
137
+ when @scanner.scan(/\{/o)
155
138
  @brace_level += 1
156
- push [:LBRACE,'{']
157
- if (@brace_level == 1 && active?(:comment)) || (@brace_level == 2 && active?(:entry))
158
- self.mode = :content
159
- end
160
- when @data.scan(/\}/o)
161
- return error_unbalanced_braces if @brace_level < 1
139
+ push([:LBRACE,'{'])
140
+ @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)
141
+ when @scanner.scan(/\}/o)
162
142
  @brace_level -= 1
163
- push [:RBRACE,'}']
164
- leave_object if @brace_level == 0
165
- when @data.scan( /=/o)
166
- push [:EQ,'=']
167
- when @data.scan(/,/o)
168
- push [:COMMA,',']
169
- when @data.scan(/#/o)
170
- push [:SHARP,'#']
171
- when @data.scan(/\d+/o)
172
- push [:NUMBER,@data.matched]
173
- when @data.scan(/[a-z\d\/:_!$\.%&*-]+/io)
174
- push [:NAME,@data.matched]
175
- when @data.scan(/"/o)
176
- self.mode = :literal
177
- when @data.scan(/@/o)
178
- error_unexpected_token
179
- enter_object
180
- when @data.scan(/./o)
181
- error_unexpected_token
143
+ push([:RBRACE,'}'])
144
+ return leave_object if @brace_level == 0
145
+ return error_unbalanced_braces if @brace_level < 0
146
+ when @scanner.scan( /=/o)
147
+ push([:EQ,'='])
148
+ when @scanner.scan(/,/o)
149
+ push([:COMMA,','])
150
+ when @scanner.scan(/\d+/o)
151
+ push([:NUMBER,@scanner.matched])
152
+ when @scanner.scan(/[a-z\d\/:_!$\.%&*-]+/io)
153
+ push([:NAME,@scanner.matched])
154
+ when @scanner.scan(/"/o)
155
+ @mode = :literal
156
+ when @scanner.scan(/#/o)
157
+ push([:SHARP,'#'])
158
+ when @scanner.scan(/@/o)
182
159
  enter_object
160
+ when @scanner.scan(/./o)
161
+ error_unexpected_token
183
162
  end
184
163
  end
185
164
 
186
165
  def parse_meta
187
- match = @data.scan_until(strict? ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
188
- unless @data.matched.nil?
189
- push [:META_CONTENT, match.chop]
166
+ match = @scanner.scan_until(strict? ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
167
+ if @scanner.matched
168
+ push([:META_CONTENT,match.chop])
190
169
  enter_object
191
170
  else
192
- push [:META_CONTENT,@data.rest]
193
- @data.terminate
171
+ push([:META_CONTENT,@scanner.rest])
172
+ @scanner.terminate
194
173
  end
195
174
  end
196
175
 
197
176
  def parse_content
198
- match = @data.scan_until(/\{|\}/o)
199
- case @data.matched
177
+ match = @scanner.scan_until(/\{|\}/o)
178
+ case @scanner.matched
200
179
  when '{'
201
180
  @brace_level += 1
202
- push [:CONTENT,match]
181
+ push([:CONTENT,match])
203
182
  when '}'
204
183
  @brace_level -= 1
205
184
  case
206
- when @brace_level < 0
207
- push [:CONTENT,match.chop]
208
- error_unbalanced_braces
209
185
  when @brace_level == 0
210
- push [:CONTENT,match.chop]
211
- push [:RBRACE,'}']
186
+ push([:CONTENT,match.chop])
187
+ push([:RBRACE,'}'])
212
188
  leave_object
213
- when @brace_level == 1 && active?(:entry)
214
- push [:CONTENT,match.chop]
215
- push [:RBRACE,'}']
216
- self.mode = :bibtex
189
+ when @brace_level == 1 && !active?(:comment)
190
+ push([:CONTENT,match.chop])
191
+ push([:RBRACE,'}'])
192
+ @mode = :bibtex
193
+ when @brace_level < 0
194
+ push([:CONTENT,match.chop])
195
+ error_unbalanced_braces
217
196
  else
218
- push [:CONTENT, match]
197
+ push([:CONTENT,match])
219
198
  end
220
199
  else
221
- push [:CONTENT,@data.rest]
222
- @data.terminate
200
+ push([:CONTENT,@scanner.rest])
201
+ @scanner.terminate
223
202
  error_unterminated_content
224
203
  end
225
204
  end
226
205
 
227
206
  def parse_literal
228
- match = @data.scan_until(/[\{\}"\n]/o)
229
- case @data.matched
207
+ match = @scanner.scan_until(/[\{\}"\n]/o)
208
+ case @scanner.matched
230
209
  when '{'
231
210
  @brace_level += 1
232
- push [:STRING_LITERAL,match]
211
+ push([:STRING_LITERAL,match])
233
212
  when '}'
234
213
  @brace_level -= 1
235
214
  if @brace_level < 1
236
- push [:STRING_LITERAL,match.chop]
215
+ push([:STRING_LITERAL,match.chop])
237
216
  error_unbalanced_braces
238
217
  else
239
- push [:STRING_LITERAL,match]
218
+ push([:STRING_LITERAL,match])
240
219
  end
241
220
  when '"'
242
221
  if @brace_level == 1
243
- push [:STRING_LITERAL,match.chop]
244
- self.mode = :bibtex
222
+ push([:STRING_LITERAL,match.chop])
223
+ @mode = :bibtex
245
224
  else
246
- push [:STRING_LITERAL,match]
225
+ push([:STRING_LITERAL,match])
247
226
  end
248
227
  when "\n"
249
- push [:STRING_LITERAL,match.chop]
228
+ push([:STRING_LITERAL,match.chop])
250
229
  error_unterminated_string
251
230
  else
252
- push [:STRING_LITERAL,self.data.rest]
253
- @data.terminate
231
+ push([:STRING_LITERAL,@scanner.rest])
232
+ @scanner.terminate
254
233
  error_unterminated_string
255
234
  end
256
235
  end
@@ -258,63 +237,63 @@ module BibTeX
258
237
  # Called when the lexer encounters a new BibTeX object.
259
238
  def enter_object
260
239
  @brace_level = 0
261
- self.mode = :bibtex
262
240
  push [:AT,'@']
263
241
 
264
242
  case
265
- when @data.scan(/string/io)
266
- self.mode = :string
267
- push [:STRING, @data.matched]
268
- when @data.scan(/preamble/io)
269
- self.mode = :preamble
270
- push [:PREAMBLE, self.data.matched]
271
- when @data.scan(/comment/io)
272
- self.mode = :comment
273
- push [:COMMENT, self.data.matched]
274
- when @data.scan(/[a-z\d:_!\.$%&*-]+/io)
275
- self.mode = :entry
276
- push [:NAME, @data.matched]
243
+ when @scanner.scan(/string/io)
244
+ @mode = @active_object = :string
245
+ push [:STRING, @scanner.matched]
246
+ when @scanner.scan(/preamble/io)
247
+ @mode = @active_object = :preamble
248
+ push [:PREAMBLE, @scanner.matched]
249
+ when @scanner.scan(/comment/io)
250
+ @mode = @active_object = :comment
251
+ push [:COMMENT, @scanner.matched]
252
+ when @scanner.scan(/[a-z\d:_!\.$%&*-]+/io)
253
+ @mode = @active_object = :entry
254
+ push [:NAME, @scanner.matched]
255
+ else
256
+ error_unexpected_object
277
257
  end
278
258
  end
279
259
 
280
260
  # Called when parser leaves a BibTeX object.
281
261
  def leave_object
282
- self.mode = :meta
283
- @brace_level = 0
262
+ @mode, @active_object, @brace_level = :meta, nil, 0
284
263
  end
285
264
 
286
-
287
265
  def error_unbalanced_braces
288
- n = line_number_at(@data.pos)
289
- Log.warn("Lexer: unbalanced braces on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
290
- backtrace [:E_UNBALANCED_BRACES, [self.data.matched,n]]
266
+ Log.warn("Lexer: unbalanced braces at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
267
+ backtrace [:E_UNBALANCED, @scanner.matched]
291
268
  end
292
269
 
293
270
  def error_unterminated_string
294
- n = line_number_at(@data.pos)
295
- Log.warn("Lexer: unterminated string on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
296
- backtrace [:E_UNTERMINATED_STRING, [@data.matched,n]]
271
+ Log.warn("Lexer: unterminated string at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
272
+ backtrace [:E_UNTERMINATED_STRING, @scanner.matched]
297
273
  end
298
274
 
299
275
  def error_unterminated_content
300
- n = line_number_at(@data.pos)
301
- Log.warn("Lexer: unterminated content on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
302
- backtrace [:E_UNTERMINATED_CONTENT, [@data.matched,n]]
276
+ Log.warn("Lexer: unterminated content at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
277
+ backtrace [:E_UNTERMINATED_CONTENT, @scanner.matched]
303
278
  end
304
279
 
305
280
  def error_unexpected_token
306
- n = line_number_at(@data.pos)
307
- Log.warn("Lexer: unexpected token `#{@data.matched}' on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
308
- backtrace [:E_UNEXPECTED_TOKEN, [@data.matched,n]]
281
+ Log.warn("Lexer: unexpected token `#{@scanner.matched}' at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
282
+ backtrace [:E_UNEXPECTED_TOKEN, @scanner.matched]
309
283
  end
310
-
311
- def backtrace(error)
312
- trace = []
313
- trace.unshift(@stack.pop) until @stack.empty? || (!trace.empty? && [:AT,:META_CONTENT].include?(trace[0][0]))
314
- trace << error
315
- push [:ERROR,trace]
284
+
285
+ def error_unexpected_object
286
+ Log.warn("Lexer: unexpected object at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
287
+ backtrace [:E_UNEXPECTED_OBJECT, '@']
316
288
  end
317
289
 
290
+ def backtrace(error)
291
+ bt = []
292
+ bt.unshift(@stack.pop) until @stack.empty? || (!bt.empty? && [:AT,:META_CONTENT].include?(bt[0][0]))
293
+ bt << error
294
+ push [:ERROR,bt]
295
+ end
296
+
318
297
  end
319
298
 
320
299
  end