bibtex-ruby 1.3.5 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bibtex-ruby might be problematic. Click here for more details.

data/lib/bibtex/entry.rb CHANGED
@@ -227,6 +227,8 @@ module BibTeX
227
227
  def added_to_bibliography(bibliography)
228
228
  super
229
229
  bibliography.entries[key] = self
230
+ parse_names if bibliography.options[:parse_names]
231
+ parse_months if bibliography.options[:parse_months]
230
232
  self
231
233
  end
232
234
 
@@ -262,10 +264,10 @@ module BibTeX
262
264
  # Parses all name values of the entry. Tries to replace and join the
263
265
  # value prior to parsing.
264
266
  def parse_names
267
+ strings = bibliography ? bibliography.strings.values : []
265
268
  NAME_FIELDS.each do |key|
266
269
  if name = @fields[key]
267
- name.replace(bibliography.q('@string')) unless bibliography.nil?
268
- name.join
270
+ name.replace(strings).join
269
271
  name = name.to_name
270
272
  @fields[key] = name
271
273
  end
data/lib/bibtex/lexer.rb CHANGED
@@ -17,7 +17,7 @@
17
17
  #++
18
18
 
19
19
  require 'strscan'
20
-
20
+ require 'forwardable'
21
21
 
22
22
  module BibTeX
23
23
 
@@ -25,12 +25,19 @@ module BibTeX
25
25
  # The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.
26
26
  #
27
27
  class Lexer
28
+ extend Forwardable
29
+ include Enumerable
28
30
 
29
- attr_reader :data, :options, :stack, :mode
31
+ attr_reader :options, :stack, :mode, :scanner
32
+ attr_writer :mode
33
+
34
+ def_delegator :@scanner, :string, :data
35
+ def_delegators :@stack, :each
30
36
 
31
37
  DEFAULTS = { :include => [:errors], :strict => true }.freeze
32
38
 
33
-
39
+ MODE = Hash.new(:meta).merge(:bibtex => :bibtex, :entry => :bibtex, :string => :bibtex, :preamble => :bibtex, :comment => :bibtex, :meta => :meta, :literal => :literal, :content => :content).freeze
40
+
34
41
  #
35
42
  # Creates a new instance. Possible options and their respective
36
43
  # default values are:
@@ -45,53 +52,38 @@ module BibTeX
45
52
  #
46
53
  def initialize(options = {})
47
54
  @options = DEFAULTS.merge(options)
48
- @data = nil
55
+ reset
49
56
  end
50
57
 
58
+ def reset
59
+ @stack, @brace_level, @mode, @active_object = [], 0, :meta, nil
60
+ @scanner.reset if @scanner
61
+
62
+ # cache options for speed
63
+ @include_meta_content = @options[:include].include?(:meta_content)
64
+ @include_errors = @options[:include].include?(:errors)
65
+
66
+ self
67
+ end
68
+
51
69
  # Sets the source for the lexical analysis and resets the internal state.
52
- def data=(string)
53
- @stack = []
54
- @brace_level = 0
55
- @mode = :meta
56
- @active_object = nil
57
- @data = StringScanner.new(string)
58
-
59
- # @line_breaks = []
60
- # @line_breaks << @data.pos until @data.scan_until(/\n|$/).empty?
61
- # @data.reset
70
+ def data=(data)
71
+ @scanner = StringScanner.new(data)
72
+ reset
62
73
  end
63
74
 
64
- # Returns the line number at a given position in the source.
65
- def line_number_at(index)
66
- 0 # (@line_breaks.find_index { |n| n >= index } || 0) + 1
67
- end
68
-
75
+ def symbols; @stack.map(&:first); end
76
+
69
77
  # Returns the next token from the parse stack.
70
- def next_token
71
- @stack.shift
72
- end
78
+ def next_token; @stack.shift; end
73
79
 
74
- def mode=(mode)
75
- # Log.debug("Lexer: switching to #{mode} mode...")
76
-
77
- @active_object = case
78
- when [:comment,:string,:preamble,:entry].include?(mode) then mode
79
- when mode == :meta then nil
80
- else @active_object
81
- end
82
-
83
- @mode = mode
84
- end
85
-
86
80
  # Returns true if the lexer is currenty parsing a BibTeX object.
87
81
  def bibtex_mode?
88
- [:bibtex,:comment,:string,:preamble,:entry].include?(self.mode)
82
+ MODE[@mode] == :bibtex
89
83
  end
90
84
 
91
- %w{ meta literal content }.each do |m|
92
- define_method "#{m}_mode?" do
93
- mode == m.to_sym
94
- end
85
+ [:meta, :literal, :content].each do |m|
86
+ define_method("#{m}_mode?") { @mode == m }
95
87
  end
96
88
 
97
89
  # Returns true if the lexer is currently parsing the given object type.
@@ -102,155 +94,142 @@ module BibTeX
102
94
  # Returns true if the lexer is currently in strict mode.
103
95
  def strict?; !!(@options[:strict]); end
104
96
 
105
- # Pushes a value onto the parse stack.
97
+ # Pushes a value onto the parse stack. Returns the Lexer.
106
98
  def push(value)
107
- case
108
- when ([:CONTENT,:STRING_LITERAL].include?(value[0]) && value[0] == @stack.last[0])
109
- @stack.last[1][0] << value[1]
110
- @stack.last[1][1] = line_number_at(@data.pos)
111
- when value[0] == :ERROR
112
- @stack.push(value) if @options[:include].include?(:errors)
113
- leave_object
114
- when value[0] == :META_CONTENT
115
- if @options[:include].include?(:meta_content)
116
- value[1] = [value[1], line_number_at(@data.pos)]
117
- @stack.push(value)
99
+ case value[0]
100
+ when :CONTENT, :STRING_LITERAL
101
+ if !@stack.empty? && value[0] == @stack[-1][0]
102
+ @stack[-1][1] << value[1]
103
+ else
104
+ @stack.push(value)
118
105
  end
106
+ when :ERROR
107
+ @stack.push(value) if @include_errors
108
+ leave_object
109
+ when :META_CONTENT
110
+ @stack.push(value) if @include_meta_content
119
111
  else
120
- value[1] = [value[1], line_number_at(@data.pos)]
121
112
  @stack.push(value)
122
113
  end
114
+
123
115
  self
124
116
  end
125
117
 
126
118
  # Start the lexical analysis.
127
- def analyse(data=nil)
128
- raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') if data.nil? && @data.nil?
129
- Log.debug('Lexer: starting lexical analysis...')
130
-
131
- self.data = data || @data.string
132
- @data.reset
119
+ def analyse(string = nil)
120
+ raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') unless
121
+ string || @scanner
122
+
123
+ self.data = string || @scanner.string
133
124
 
134
- until @data.eos?
135
- case
136
- when bibtex_mode?
137
- parse_bibtex
138
- when meta_mode?
139
- parse_meta
140
- when content_mode?
141
- parse_content
142
- when literal_mode?
143
- parse_literal
144
- end
125
+ until @scanner.eos?
126
+ send("parse_#{MODE[@mode]}")
145
127
  end
146
128
 
147
- Log.debug('Lexer: finished lexical analysis.')
148
- push [false, '$end']
129
+ push([false, '$end'])
149
130
  end
150
131
 
132
+ private
133
+
151
134
  def parse_bibtex
152
135
  case
153
- when @data.scan(/[\t\r\n\s]+/o)
154
- when @data.scan(/\{/o)
136
+ when @scanner.scan(/[\t\r\n\s]+/o)
137
+ when @scanner.scan(/\{/o)
155
138
  @brace_level += 1
156
- push [:LBRACE,'{']
157
- if (@brace_level == 1 && active?(:comment)) || (@brace_level == 2 && active?(:entry))
158
- self.mode = :content
159
- end
160
- when @data.scan(/\}/o)
161
- return error_unbalanced_braces if @brace_level < 1
139
+ push([:LBRACE,'{'])
140
+ @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)
141
+ when @scanner.scan(/\}/o)
162
142
  @brace_level -= 1
163
- push [:RBRACE,'}']
164
- leave_object if @brace_level == 0
165
- when @data.scan( /=/o)
166
- push [:EQ,'=']
167
- when @data.scan(/,/o)
168
- push [:COMMA,',']
169
- when @data.scan(/#/o)
170
- push [:SHARP,'#']
171
- when @data.scan(/\d+/o)
172
- push [:NUMBER,@data.matched]
173
- when @data.scan(/[a-z\d\/:_!$\.%&*-]+/io)
174
- push [:NAME,@data.matched]
175
- when @data.scan(/"/o)
176
- self.mode = :literal
177
- when @data.scan(/@/o)
178
- error_unexpected_token
179
- enter_object
180
- when @data.scan(/./o)
181
- error_unexpected_token
143
+ push([:RBRACE,'}'])
144
+ return leave_object if @brace_level == 0
145
+ return error_unbalanced_braces if @brace_level < 0
146
+ when @scanner.scan( /=/o)
147
+ push([:EQ,'='])
148
+ when @scanner.scan(/,/o)
149
+ push([:COMMA,','])
150
+ when @scanner.scan(/\d+/o)
151
+ push([:NUMBER,@scanner.matched])
152
+ when @scanner.scan(/[a-z\d\/:_!$\.%&*-]+/io)
153
+ push([:NAME,@scanner.matched])
154
+ when @scanner.scan(/"/o)
155
+ @mode = :literal
156
+ when @scanner.scan(/#/o)
157
+ push([:SHARP,'#'])
158
+ when @scanner.scan(/@/o)
182
159
  enter_object
160
+ when @scanner.scan(/./o)
161
+ error_unexpected_token
183
162
  end
184
163
  end
185
164
 
186
165
  def parse_meta
187
- match = @data.scan_until(strict? ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
188
- unless @data.matched.nil?
189
- push [:META_CONTENT, match.chop]
166
+ match = @scanner.scan_until(strict? ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
167
+ if @scanner.matched
168
+ push([:META_CONTENT,match.chop])
190
169
  enter_object
191
170
  else
192
- push [:META_CONTENT,@data.rest]
193
- @data.terminate
171
+ push([:META_CONTENT,@scanner.rest])
172
+ @scanner.terminate
194
173
  end
195
174
  end
196
175
 
197
176
  def parse_content
198
- match = @data.scan_until(/\{|\}/o)
199
- case @data.matched
177
+ match = @scanner.scan_until(/\{|\}/o)
178
+ case @scanner.matched
200
179
  when '{'
201
180
  @brace_level += 1
202
- push [:CONTENT,match]
181
+ push([:CONTENT,match])
203
182
  when '}'
204
183
  @brace_level -= 1
205
184
  case
206
- when @brace_level < 0
207
- push [:CONTENT,match.chop]
208
- error_unbalanced_braces
209
185
  when @brace_level == 0
210
- push [:CONTENT,match.chop]
211
- push [:RBRACE,'}']
186
+ push([:CONTENT,match.chop])
187
+ push([:RBRACE,'}'])
212
188
  leave_object
213
- when @brace_level == 1 && active?(:entry)
214
- push [:CONTENT,match.chop]
215
- push [:RBRACE,'}']
216
- self.mode = :bibtex
189
+ when @brace_level == 1 && !active?(:comment)
190
+ push([:CONTENT,match.chop])
191
+ push([:RBRACE,'}'])
192
+ @mode = :bibtex
193
+ when @brace_level < 0
194
+ push([:CONTENT,match.chop])
195
+ error_unbalanced_braces
217
196
  else
218
- push [:CONTENT, match]
197
+ push([:CONTENT,match])
219
198
  end
220
199
  else
221
- push [:CONTENT,@data.rest]
222
- @data.terminate
200
+ push([:CONTENT,@scanner.rest])
201
+ @scanner.terminate
223
202
  error_unterminated_content
224
203
  end
225
204
  end
226
205
 
227
206
  def parse_literal
228
- match = @data.scan_until(/[\{\}"\n]/o)
229
- case @data.matched
207
+ match = @scanner.scan_until(/[\{\}"\n]/o)
208
+ case @scanner.matched
230
209
  when '{'
231
210
  @brace_level += 1
232
- push [:STRING_LITERAL,match]
211
+ push([:STRING_LITERAL,match])
233
212
  when '}'
234
213
  @brace_level -= 1
235
214
  if @brace_level < 1
236
- push [:STRING_LITERAL,match.chop]
215
+ push([:STRING_LITERAL,match.chop])
237
216
  error_unbalanced_braces
238
217
  else
239
- push [:STRING_LITERAL,match]
218
+ push([:STRING_LITERAL,match])
240
219
  end
241
220
  when '"'
242
221
  if @brace_level == 1
243
- push [:STRING_LITERAL,match.chop]
244
- self.mode = :bibtex
222
+ push([:STRING_LITERAL,match.chop])
223
+ @mode = :bibtex
245
224
  else
246
- push [:STRING_LITERAL,match]
225
+ push([:STRING_LITERAL,match])
247
226
  end
248
227
  when "\n"
249
- push [:STRING_LITERAL,match.chop]
228
+ push([:STRING_LITERAL,match.chop])
250
229
  error_unterminated_string
251
230
  else
252
- push [:STRING_LITERAL,self.data.rest]
253
- @data.terminate
231
+ push([:STRING_LITERAL,@scanner.rest])
232
+ @scanner.terminate
254
233
  error_unterminated_string
255
234
  end
256
235
  end
@@ -258,63 +237,63 @@ module BibTeX
258
237
  # Called when the lexer encounters a new BibTeX object.
259
238
  def enter_object
260
239
  @brace_level = 0
261
- self.mode = :bibtex
262
240
  push [:AT,'@']
263
241
 
264
242
  case
265
- when @data.scan(/string/io)
266
- self.mode = :string
267
- push [:STRING, @data.matched]
268
- when @data.scan(/preamble/io)
269
- self.mode = :preamble
270
- push [:PREAMBLE, self.data.matched]
271
- when @data.scan(/comment/io)
272
- self.mode = :comment
273
- push [:COMMENT, self.data.matched]
274
- when @data.scan(/[a-z\d:_!\.$%&*-]+/io)
275
- self.mode = :entry
276
- push [:NAME, @data.matched]
243
+ when @scanner.scan(/string/io)
244
+ @mode = @active_object = :string
245
+ push [:STRING, @scanner.matched]
246
+ when @scanner.scan(/preamble/io)
247
+ @mode = @active_object = :preamble
248
+ push [:PREAMBLE, @scanner.matched]
249
+ when @scanner.scan(/comment/io)
250
+ @mode = @active_object = :comment
251
+ push [:COMMENT, @scanner.matched]
252
+ when @scanner.scan(/[a-z\d:_!\.$%&*-]+/io)
253
+ @mode = @active_object = :entry
254
+ push [:NAME, @scanner.matched]
255
+ else
256
+ error_unexpected_object
277
257
  end
278
258
  end
279
259
 
280
260
  # Called when parser leaves a BibTeX object.
281
261
  def leave_object
282
- self.mode = :meta
283
- @brace_level = 0
262
+ @mode, @active_object, @brace_level = :meta, nil, 0
284
263
  end
285
264
 
286
-
287
265
  def error_unbalanced_braces
288
- n = line_number_at(@data.pos)
289
- Log.warn("Lexer: unbalanced braces on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
290
- backtrace [:E_UNBALANCED_BRACES, [self.data.matched,n]]
266
+ Log.warn("Lexer: unbalanced braces at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
267
+ backtrace [:E_UNBALANCED, @scanner.matched]
291
268
  end
292
269
 
293
270
  def error_unterminated_string
294
- n = line_number_at(@data.pos)
295
- Log.warn("Lexer: unterminated string on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
296
- backtrace [:E_UNTERMINATED_STRING, [@data.matched,n]]
271
+ Log.warn("Lexer: unterminated string at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
272
+ backtrace [:E_UNTERMINATED_STRING, @scanner.matched]
297
273
  end
298
274
 
299
275
  def error_unterminated_content
300
- n = line_number_at(@data.pos)
301
- Log.warn("Lexer: unterminated content on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
302
- backtrace [:E_UNTERMINATED_CONTENT, [@data.matched,n]]
276
+ Log.warn("Lexer: unterminated content at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
277
+ backtrace [:E_UNTERMINATED_CONTENT, @scanner.matched]
303
278
  end
304
279
 
305
280
  def error_unexpected_token
306
- n = line_number_at(@data.pos)
307
- Log.warn("Lexer: unexpected token `#{@data.matched}' on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
308
- backtrace [:E_UNEXPECTED_TOKEN, [@data.matched,n]]
281
+ Log.warn("Lexer: unexpected token `#{@scanner.matched}' at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
282
+ backtrace [:E_UNEXPECTED_TOKEN, @scanner.matched]
309
283
  end
310
-
311
- def backtrace(error)
312
- trace = []
313
- trace.unshift(@stack.pop) until @stack.empty? || (!trace.empty? && [:AT,:META_CONTENT].include?(trace[0][0]))
314
- trace << error
315
- push [:ERROR,trace]
284
+
285
+ def error_unexpected_object
286
+ Log.warn("Lexer: unexpected object at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
287
+ backtrace [:E_UNEXPECTED_OBJECT, '@']
316
288
  end
317
289
 
290
+ def backtrace(error)
291
+ bt = []
292
+ bt.unshift(@stack.pop) until @stack.empty? || (!bt.empty? && [:AT,:META_CONTENT].include?(bt[0][0]))
293
+ bt << error
294
+ push [:ERROR,bt]
295
+ end
296
+
318
297
  end
319
298
 
320
299
  end