kpeg 0.9.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,13 +1,380 @@
1
- require 'kpeg/compiled_parser'
1
+ class KPeg::StringEscape
2
+ # :stopdoc:
3
+
4
+ # This is distinct from setup_parser so that a standalone parser
5
+ # can redefine #initialize and still have access to the proper
6
+ # parser setup code.
7
+ def initialize(str, debug=false)
8
+ setup_parser(str, debug)
9
+ end
10
+
11
+
12
+
13
+ # Prepares for parsing +str+. If you define a custom initialize you must
14
+ # call this method before #parse
15
+ def setup_parser(str, debug=false)
16
+ set_string str, 0
17
+ @memoizations = Hash.new { |h,k| h[k] = {} }
18
+ @result = nil
19
+ @failed_rule = nil
20
+ @failing_rule_offset = -1
21
+ @line_offsets = nil
22
+
23
+ setup_foreign_grammar
24
+ end
25
+
26
+ attr_reader :string
27
+ attr_reader :failing_rule_offset
28
+ attr_accessor :result, :pos
29
+
30
+ def current_column(target=pos)
31
+ if c = string.rindex("\n", target-1)
32
+ return target - c - 1
33
+ end
34
+
35
+ target + 1
36
+ end
37
+
38
+ if [].respond_to? :bsearch_index
39
+ def current_line(target=pos)
40
+ unless @line_offsets
41
+ @line_offsets = [-1]
42
+ total = 0
43
+ string.each_line do |line|
44
+ @line_offsets << total
45
+ total += line.size
46
+ end
47
+ @line_offsets << total
48
+ end
49
+
50
+ @line_offsets.bsearch_index {|x| x >= target } || -1
51
+ end
52
+ else
53
+ def current_line(target=pos)
54
+ cur_offset = 0
55
+ cur_line = 0
56
+
57
+ string.each_line do |line|
58
+ cur_line += 1
59
+ cur_offset += line.size
60
+ return cur_line if cur_offset >= target
61
+ end
62
+
63
+ -1
64
+ end
65
+ end
66
+
67
+ def lines
68
+ lines = []
69
+ string.each_line { |l| lines << l }
70
+ lines
71
+ end
72
+
73
+
74
+
75
+ def get_text(start)
76
+ @string[start..@pos-1]
77
+ end
78
+
79
+ # Sets the string and current parsing position for the parser.
80
+ def set_string string, pos
81
+ @string = string
82
+ @string_size = string ? string.size : 0
83
+ @pos = pos
84
+ end
85
+
86
+ def show_pos
87
+ width = 10
88
+ if @pos < width
89
+ "#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
90
+ else
91
+ "#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
92
+ end
93
+ end
94
+
95
+ def failure_info
96
+ l = current_line @failing_rule_offset
97
+ c = current_column @failing_rule_offset
98
+
99
+ if @failed_rule.kind_of? Symbol
100
+ info = self.class::Rules[@failed_rule]
101
+ "line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
102
+ else
103
+ "line #{l}, column #{c}: failed rule '#{@failed_rule}'"
104
+ end
105
+ end
106
+
107
+ def failure_caret
108
+ l = current_line @failing_rule_offset
109
+ c = current_column @failing_rule_offset
110
+
111
+ line = lines[l-1]
112
+ "#{line}\n#{' ' * (c - 1)}^"
113
+ end
114
+
115
+ def failure_character
116
+ l = current_line @failing_rule_offset
117
+ c = current_column @failing_rule_offset
118
+ lines[l-1][c-1, 1]
119
+ end
120
+
121
+ def failure_oneline
122
+ l = current_line @failing_rule_offset
123
+ c = current_column @failing_rule_offset
124
+
125
+ char = lines[l-1][c-1, 1]
126
+
127
+ if @failed_rule.kind_of? Symbol
128
+ info = self.class::Rules[@failed_rule]
129
+ "@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
130
+ else
131
+ "@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
132
+ end
133
+ end
134
+
135
+ class ParseError < RuntimeError
136
+ end
137
+
138
+ def raise_error
139
+ raise ParseError, failure_oneline
140
+ end
141
+
142
+ def show_error(io=STDOUT)
143
+ error_pos = @failing_rule_offset
144
+ line_no = current_line(error_pos)
145
+ col_no = current_column(error_pos)
146
+
147
+ io.puts "On line #{line_no}, column #{col_no}:"
148
+
149
+ if @failed_rule.kind_of? Symbol
150
+ info = self.class::Rules[@failed_rule]
151
+ io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
152
+ else
153
+ io.puts "Failed to match rule '#{@failed_rule}'"
154
+ end
155
+
156
+ io.puts "Got: #{string[error_pos,1].inspect}"
157
+ line = lines[line_no-1]
158
+ io.puts "=> #{line}"
159
+ io.print(" " * (col_no + 3))
160
+ io.puts "^"
161
+ end
162
+
163
+ def set_failed_rule(name)
164
+ if @pos > @failing_rule_offset
165
+ @failed_rule = name
166
+ @failing_rule_offset = @pos
167
+ end
168
+ end
169
+
170
+ attr_reader :failed_rule
171
+
172
+ def match_string(str)
173
+ len = str.size
174
+ if @string[pos,len] == str
175
+ @pos += len
176
+ return str
177
+ end
178
+
179
+ return nil
180
+ end
181
+
182
+ def scan(reg)
183
+ if m = reg.match(@string, @pos)
184
+ @pos = m.end(0)
185
+ return true
186
+ end
187
+
188
+ return nil
189
+ end
190
+
191
+ if "".respond_to? :ord
192
+ def get_byte
193
+ if @pos >= @string_size
194
+ return nil
195
+ end
196
+
197
+ s = @string[@pos].ord
198
+ @pos += 1
199
+ s
200
+ end
201
+ else
202
+ def get_byte
203
+ if @pos >= @string_size
204
+ return nil
205
+ end
206
+
207
+ s = @string[@pos]
208
+ @pos += 1
209
+ s
210
+ end
211
+ end
212
+
213
+ def parse(rule=nil)
214
+ # We invoke the rules indirectly via apply
215
+ # instead of by just calling them as methods because
216
+ # if the rules use left recursion, apply needs to
217
+ # manage that.
218
+
219
+ if !rule
220
+ apply(:_root)
221
+ else
222
+ method = rule.gsub("-","_hyphen_")
223
+ apply :"_#{method}"
224
+ end
225
+ end
226
+
227
+ class MemoEntry
228
+ def initialize(ans, pos)
229
+ @ans = ans
230
+ @pos = pos
231
+ @result = nil
232
+ @set = false
233
+ @left_rec = false
234
+ end
235
+
236
+ attr_reader :ans, :pos, :result, :set
237
+ attr_accessor :left_rec
238
+
239
+ def move!(ans, pos, result)
240
+ @ans = ans
241
+ @pos = pos
242
+ @result = result
243
+ @set = true
244
+ @left_rec = false
245
+ end
246
+ end
247
+
248
+ def external_invoke(other, rule, *args)
249
+ old_pos = @pos
250
+ old_string = @string
251
+
252
+ set_string other.string, other.pos
253
+
254
+ begin
255
+ if val = __send__(rule, *args)
256
+ other.pos = @pos
257
+ other.result = @result
258
+ else
259
+ other.set_failed_rule "#{self.class}##{rule}"
260
+ end
261
+ val
262
+ ensure
263
+ set_string old_string, old_pos
264
+ end
265
+ end
266
+
267
+ def apply_with_args(rule, *args)
268
+ memo_key = [rule, args]
269
+ if m = @memoizations[memo_key][@pos]
270
+ @pos = m.pos
271
+ if !m.set
272
+ m.left_rec = true
273
+ return nil
274
+ end
275
+
276
+ @result = m.result
2
277
 
3
- class KPeg::StringEscape < KPeg::CompiledParser
278
+ return m.ans
279
+ else
280
+ m = MemoEntry.new(nil, @pos)
281
+ @memoizations[memo_key][@pos] = m
282
+ start_pos = @pos
283
+
284
+ ans = __send__ rule, *args
285
+
286
+ lr = m.left_rec
287
+
288
+ m.move! ans, @pos, @result
289
+
290
+ # Don't bother trying to grow the left recursion
291
+ # if it's failing straight away (thus there is no seed)
292
+ if ans and lr
293
+ return grow_lr(rule, args, start_pos, m)
294
+ else
295
+ return ans
296
+ end
297
+ end
298
+ end
299
+
300
+ def apply(rule)
301
+ if m = @memoizations[rule][@pos]
302
+ @pos = m.pos
303
+ if !m.set
304
+ m.left_rec = true
305
+ return nil
306
+ end
307
+
308
+ @result = m.result
309
+
310
+ return m.ans
311
+ else
312
+ m = MemoEntry.new(nil, @pos)
313
+ @memoizations[rule][@pos] = m
314
+ start_pos = @pos
315
+
316
+ ans = __send__ rule
317
+
318
+ lr = m.left_rec
319
+
320
+ m.move! ans, @pos, @result
321
+
322
+ # Don't bother trying to grow the left recursion
323
+ # if it's failing straight away (thus there is no seed)
324
+ if ans and lr
325
+ return grow_lr(rule, nil, start_pos, m)
326
+ else
327
+ return ans
328
+ end
329
+ end
330
+ end
331
+
332
+ def grow_lr(rule, args, start_pos, m)
333
+ while true
334
+ @pos = start_pos
335
+ @result = m.result
336
+
337
+ if args
338
+ ans = __send__ rule, *args
339
+ else
340
+ ans = __send__ rule
341
+ end
342
+ return nil unless ans
343
+
344
+ break if @pos <= m.pos
345
+
346
+ m.move! ans, @pos, @result
347
+ end
348
+
349
+ @result = m.result
350
+ @pos = m.pos
351
+ return m.ans
352
+ end
353
+
354
+ class RuleInfo
355
+ def initialize(name, rendered)
356
+ @name = name
357
+ @rendered = rendered
358
+ end
359
+
360
+ attr_reader :name, :rendered
361
+ end
362
+
363
+ def self.rule_info(name, rendered)
364
+ RuleInfo.new(name, rendered)
365
+ end
366
+
367
+
368
+ # :startdoc:
4
369
 
5
370
 
6
371
  attr_reader :text
7
372
 
8
373
 
374
+ # :stopdoc:
375
+ def setup_foreign_grammar; end
9
376
 
10
- # segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
377
+ # segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\r" { "\\r" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
11
378
  def _segment
12
379
 
13
380
  _save = self.pos
@@ -16,7 +383,7 @@ class KPeg::StringEscape < KPeg::CompiledParser
16
383
  _save1 = self.pos
17
384
  while true # sequence
18
385
  _text_start = self.pos
19
- _tmp = scan(/\A(?-mix:[\w ]+)/)
386
+ _tmp = scan(/\G(?-mix:[\w ]+)/)
20
387
  if _tmp
21
388
  text = get_text(_text_start)
22
389
  end
@@ -73,12 +440,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
73
440
 
74
441
  _save4 = self.pos
75
442
  while true # sequence
76
- _tmp = match_string("\t")
443
+ _tmp = match_string("\r")
77
444
  unless _tmp
78
445
  self.pos = _save4
79
446
  break
80
447
  end
81
- @result = begin; "\\t" ; end
448
+ @result = begin; "\\r" ; end
82
449
  _tmp = true
83
450
  unless _tmp
84
451
  self.pos = _save4
@@ -91,12 +458,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
91
458
 
92
459
  _save5 = self.pos
93
460
  while true # sequence
94
- _tmp = match_string("\b")
461
+ _tmp = match_string("\t")
95
462
  unless _tmp
96
463
  self.pos = _save5
97
464
  break
98
465
  end
99
- @result = begin; "\\b" ; end
466
+ @result = begin; "\\t" ; end
100
467
  _tmp = true
101
468
  unless _tmp
102
469
  self.pos = _save5
@@ -109,12 +476,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
109
476
 
110
477
  _save6 = self.pos
111
478
  while true # sequence
112
- _tmp = match_string("\"")
479
+ _tmp = match_string("\b")
113
480
  unless _tmp
114
481
  self.pos = _save6
115
482
  break
116
483
  end
117
- @result = begin; "\\\"" ; end
484
+ @result = begin; "\\b" ; end
118
485
  _tmp = true
119
486
  unless _tmp
120
487
  self.pos = _save6
@@ -126,6 +493,24 @@ class KPeg::StringEscape < KPeg::CompiledParser
126
493
  self.pos = _save
127
494
 
128
495
  _save7 = self.pos
496
+ while true # sequence
497
+ _tmp = match_string("\"")
498
+ unless _tmp
499
+ self.pos = _save7
500
+ break
501
+ end
502
+ @result = begin; "\\\"" ; end
503
+ _tmp = true
504
+ unless _tmp
505
+ self.pos = _save7
506
+ end
507
+ break
508
+ end # end sequence
509
+
510
+ break if _tmp
511
+ self.pos = _save
512
+
513
+ _save8 = self.pos
129
514
  while true # sequence
130
515
  _text_start = self.pos
131
516
  _tmp = get_byte
@@ -133,13 +518,13 @@ class KPeg::StringEscape < KPeg::CompiledParser
133
518
  text = get_text(_text_start)
134
519
  end
135
520
  unless _tmp
136
- self.pos = _save7
521
+ self.pos = _save8
137
522
  break
138
523
  end
139
524
  @result = begin; text ; end
140
525
  _tmp = true
141
526
  unless _tmp
142
- self.pos = _save7
527
+ self.pos = _save8
143
528
  end
144
529
  break
145
530
  end # end sequence
@@ -247,8 +632,9 @@ class KPeg::StringEscape < KPeg::CompiledParser
247
632
  end
248
633
 
249
634
  Rules = {}
250
- Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
635
+ Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\r\" { \"\\\\r\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
251
636
  Rules[:_root] = rule_info("root", "segment*:s { @text = s.join }")
252
637
  Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
253
638
  Rules[:_embed] = rule_info("embed", "embed_seg*:s { @text = s.join }")
639
+ # :startdoc:
254
640
  end
data/lib/kpeg.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module KPeg
2
2
 
3
- VERSION = "0.9.0"
3
+ VERSION = "1.2.0"
4
4
 
5
5
  def self.grammar
6
6
  g = Grammar.new
data/test/test_kpeg.rb CHANGED
@@ -2,7 +2,7 @@ require 'minitest/autorun'
2
2
  require 'kpeg'
3
3
  require 'stringio'
4
4
 
5
- class TestKPeg < MiniTest::Unit::TestCase
5
+ class TestKPeg < Minitest::Test
6
6
  def assert_match(m, str)
7
7
  assert_kind_of KPeg::MatchString, m
8
8
  assert_equal str, m.string