kpeg 0.9.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,380 @@
1
- require 'kpeg/compiled_parser'
1
+ class KPeg::StringEscape
2
+ # :stopdoc:
3
+
4
+ # This is distinct from setup_parser so that a standalone parser
5
+ # can redefine #initialize and still have access to the proper
6
+ # parser setup code.
7
+ def initialize(str, debug=false)
8
+ setup_parser(str, debug)
9
+ end
10
+
11
+
12
+
13
+ # Prepares for parsing +str+. If you define a custom initialize you must
14
+ # call this method before #parse
15
+ def setup_parser(str, debug=false)
16
+ set_string str, 0
17
+ @memoizations = Hash.new { |h,k| h[k] = {} }
18
+ @result = nil
19
+ @failed_rule = nil
20
+ @failing_rule_offset = -1
21
+ @line_offsets = nil
22
+
23
+ setup_foreign_grammar
24
+ end
25
+
26
+ attr_reader :string
27
+ attr_reader :failing_rule_offset
28
+ attr_accessor :result, :pos
29
+
30
+ def current_column(target=pos)
31
+ if c = string.rindex("\n", target-1)
32
+ return target - c - 1
33
+ end
34
+
35
+ target + 1
36
+ end
37
+
38
+ if [].respond_to? :bsearch_index
39
+ def current_line(target=pos)
40
+ unless @line_offsets
41
+ @line_offsets = [-1]
42
+ total = 0
43
+ string.each_line do |line|
44
+ @line_offsets << total
45
+ total += line.size
46
+ end
47
+ @line_offsets << total
48
+ end
49
+
50
+ @line_offsets.bsearch_index {|x| x >= target } || -1
51
+ end
52
+ else
53
+ def current_line(target=pos)
54
+ cur_offset = 0
55
+ cur_line = 0
56
+
57
+ string.each_line do |line|
58
+ cur_line += 1
59
+ cur_offset += line.size
60
+ return cur_line if cur_offset >= target
61
+ end
62
+
63
+ -1
64
+ end
65
+ end
66
+
67
+ def lines
68
+ lines = []
69
+ string.each_line { |l| lines << l }
70
+ lines
71
+ end
72
+
73
+
74
+
75
+ def get_text(start)
76
+ @string[start..@pos-1]
77
+ end
78
+
79
+ # Sets the string and current parsing position for the parser.
80
+ def set_string string, pos
81
+ @string = string
82
+ @string_size = string ? string.size : 0
83
+ @pos = pos
84
+ end
85
+
86
+ def show_pos
87
+ width = 10
88
+ if @pos < width
89
+ "#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
90
+ else
91
+ "#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
92
+ end
93
+ end
94
+
95
+ def failure_info
96
+ l = current_line @failing_rule_offset
97
+ c = current_column @failing_rule_offset
98
+
99
+ if @failed_rule.kind_of? Symbol
100
+ info = self.class::Rules[@failed_rule]
101
+ "line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
102
+ else
103
+ "line #{l}, column #{c}: failed rule '#{@failed_rule}'"
104
+ end
105
+ end
106
+
107
+ def failure_caret
108
+ l = current_line @failing_rule_offset
109
+ c = current_column @failing_rule_offset
110
+
111
+ line = lines[l-1]
112
+ "#{line}\n#{' ' * (c - 1)}^"
113
+ end
114
+
115
+ def failure_character
116
+ l = current_line @failing_rule_offset
117
+ c = current_column @failing_rule_offset
118
+ lines[l-1][c-1, 1]
119
+ end
120
+
121
+ def failure_oneline
122
+ l = current_line @failing_rule_offset
123
+ c = current_column @failing_rule_offset
124
+
125
+ char = lines[l-1][c-1, 1]
126
+
127
+ if @failed_rule.kind_of? Symbol
128
+ info = self.class::Rules[@failed_rule]
129
+ "@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
130
+ else
131
+ "@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
132
+ end
133
+ end
134
+
135
+ class ParseError < RuntimeError
136
+ end
137
+
138
+ def raise_error
139
+ raise ParseError, failure_oneline
140
+ end
141
+
142
+ def show_error(io=STDOUT)
143
+ error_pos = @failing_rule_offset
144
+ line_no = current_line(error_pos)
145
+ col_no = current_column(error_pos)
146
+
147
+ io.puts "On line #{line_no}, column #{col_no}:"
148
+
149
+ if @failed_rule.kind_of? Symbol
150
+ info = self.class::Rules[@failed_rule]
151
+ io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
152
+ else
153
+ io.puts "Failed to match rule '#{@failed_rule}'"
154
+ end
155
+
156
+ io.puts "Got: #{string[error_pos,1].inspect}"
157
+ line = lines[line_no-1]
158
+ io.puts "=> #{line}"
159
+ io.print(" " * (col_no + 3))
160
+ io.puts "^"
161
+ end
162
+
163
+ def set_failed_rule(name)
164
+ if @pos > @failing_rule_offset
165
+ @failed_rule = name
166
+ @failing_rule_offset = @pos
167
+ end
168
+ end
169
+
170
+ attr_reader :failed_rule
171
+
172
+ def match_string(str)
173
+ len = str.size
174
+ if @string[pos,len] == str
175
+ @pos += len
176
+ return str
177
+ end
178
+
179
+ return nil
180
+ end
181
+
182
+ def scan(reg)
183
+ if m = reg.match(@string, @pos)
184
+ @pos = m.end(0)
185
+ return true
186
+ end
187
+
188
+ return nil
189
+ end
190
+
191
+ if "".respond_to? :ord
192
+ def get_byte
193
+ if @pos >= @string_size
194
+ return nil
195
+ end
196
+
197
+ s = @string[@pos].ord
198
+ @pos += 1
199
+ s
200
+ end
201
+ else
202
+ def get_byte
203
+ if @pos >= @string_size
204
+ return nil
205
+ end
206
+
207
+ s = @string[@pos]
208
+ @pos += 1
209
+ s
210
+ end
211
+ end
212
+
213
+ def parse(rule=nil)
214
+ # We invoke the rules indirectly via apply
215
+ # instead of by just calling them as methods because
216
+ # if the rules use left recursion, apply needs to
217
+ # manage that.
218
+
219
+ if !rule
220
+ apply(:_root)
221
+ else
222
+ method = rule.gsub("-","_hyphen_")
223
+ apply :"_#{method}"
224
+ end
225
+ end
226
+
227
+ class MemoEntry
228
+ def initialize(ans, pos)
229
+ @ans = ans
230
+ @pos = pos
231
+ @result = nil
232
+ @set = false
233
+ @left_rec = false
234
+ end
235
+
236
+ attr_reader :ans, :pos, :result, :set
237
+ attr_accessor :left_rec
238
+
239
+ def move!(ans, pos, result)
240
+ @ans = ans
241
+ @pos = pos
242
+ @result = result
243
+ @set = true
244
+ @left_rec = false
245
+ end
246
+ end
247
+
248
+ def external_invoke(other, rule, *args)
249
+ old_pos = @pos
250
+ old_string = @string
251
+
252
+ set_string other.string, other.pos
253
+
254
+ begin
255
+ if val = __send__(rule, *args)
256
+ other.pos = @pos
257
+ other.result = @result
258
+ else
259
+ other.set_failed_rule "#{self.class}##{rule}"
260
+ end
261
+ val
262
+ ensure
263
+ set_string old_string, old_pos
264
+ end
265
+ end
266
+
267
+ def apply_with_args(rule, *args)
268
+ memo_key = [rule, args]
269
+ if m = @memoizations[memo_key][@pos]
270
+ @pos = m.pos
271
+ if !m.set
272
+ m.left_rec = true
273
+ return nil
274
+ end
275
+
276
+ @result = m.result
2
277
 
3
- class KPeg::StringEscape < KPeg::CompiledParser
278
+ return m.ans
279
+ else
280
+ m = MemoEntry.new(nil, @pos)
281
+ @memoizations[memo_key][@pos] = m
282
+ start_pos = @pos
283
+
284
+ ans = __send__ rule, *args
285
+
286
+ lr = m.left_rec
287
+
288
+ m.move! ans, @pos, @result
289
+
290
+ # Don't bother trying to grow the left recursion
291
+ # if it's failing straight away (thus there is no seed)
292
+ if ans and lr
293
+ return grow_lr(rule, args, start_pos, m)
294
+ else
295
+ return ans
296
+ end
297
+ end
298
+ end
299
+
300
+ def apply(rule)
301
+ if m = @memoizations[rule][@pos]
302
+ @pos = m.pos
303
+ if !m.set
304
+ m.left_rec = true
305
+ return nil
306
+ end
307
+
308
+ @result = m.result
309
+
310
+ return m.ans
311
+ else
312
+ m = MemoEntry.new(nil, @pos)
313
+ @memoizations[rule][@pos] = m
314
+ start_pos = @pos
315
+
316
+ ans = __send__ rule
317
+
318
+ lr = m.left_rec
319
+
320
+ m.move! ans, @pos, @result
321
+
322
+ # Don't bother trying to grow the left recursion
323
+ # if it's failing straight away (thus there is no seed)
324
+ if ans and lr
325
+ return grow_lr(rule, nil, start_pos, m)
326
+ else
327
+ return ans
328
+ end
329
+ end
330
+ end
331
+
332
+ def grow_lr(rule, args, start_pos, m)
333
+ while true
334
+ @pos = start_pos
335
+ @result = m.result
336
+
337
+ if args
338
+ ans = __send__ rule, *args
339
+ else
340
+ ans = __send__ rule
341
+ end
342
+ return nil unless ans
343
+
344
+ break if @pos <= m.pos
345
+
346
+ m.move! ans, @pos, @result
347
+ end
348
+
349
+ @result = m.result
350
+ @pos = m.pos
351
+ return m.ans
352
+ end
353
+
354
+ class RuleInfo
355
+ def initialize(name, rendered)
356
+ @name = name
357
+ @rendered = rendered
358
+ end
359
+
360
+ attr_reader :name, :rendered
361
+ end
362
+
363
+ def self.rule_info(name, rendered)
364
+ RuleInfo.new(name, rendered)
365
+ end
366
+
367
+
368
+ # :startdoc:
4
369
 
5
370
 
6
371
  attr_reader :text
7
372
 
8
373
 
374
+ # :stopdoc:
375
+ def setup_foreign_grammar; end
9
376
 
10
- # segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
377
+ # segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\r" { "\\r" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
11
378
  def _segment
12
379
 
13
380
  _save = self.pos
@@ -16,7 +383,7 @@ class KPeg::StringEscape < KPeg::CompiledParser
16
383
  _save1 = self.pos
17
384
  while true # sequence
18
385
  _text_start = self.pos
19
- _tmp = scan(/\A(?-mix:[\w ]+)/)
386
+ _tmp = scan(/\G(?-mix:[\w ]+)/)
20
387
  if _tmp
21
388
  text = get_text(_text_start)
22
389
  end
@@ -73,12 +440,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
73
440
 
74
441
  _save4 = self.pos
75
442
  while true # sequence
76
- _tmp = match_string("\t")
443
+ _tmp = match_string("\r")
77
444
  unless _tmp
78
445
  self.pos = _save4
79
446
  break
80
447
  end
81
- @result = begin; "\\t" ; end
448
+ @result = begin; "\\r" ; end
82
449
  _tmp = true
83
450
  unless _tmp
84
451
  self.pos = _save4
@@ -91,12 +458,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
91
458
 
92
459
  _save5 = self.pos
93
460
  while true # sequence
94
- _tmp = match_string("\b")
461
+ _tmp = match_string("\t")
95
462
  unless _tmp
96
463
  self.pos = _save5
97
464
  break
98
465
  end
99
- @result = begin; "\\b" ; end
466
+ @result = begin; "\\t" ; end
100
467
  _tmp = true
101
468
  unless _tmp
102
469
  self.pos = _save5
@@ -109,12 +476,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
109
476
 
110
477
  _save6 = self.pos
111
478
  while true # sequence
112
- _tmp = match_string("\"")
479
+ _tmp = match_string("\b")
113
480
  unless _tmp
114
481
  self.pos = _save6
115
482
  break
116
483
  end
117
- @result = begin; "\\\"" ; end
484
+ @result = begin; "\\b" ; end
118
485
  _tmp = true
119
486
  unless _tmp
120
487
  self.pos = _save6
@@ -126,6 +493,24 @@ class KPeg::StringEscape < KPeg::CompiledParser
126
493
  self.pos = _save
127
494
 
128
495
  _save7 = self.pos
496
+ while true # sequence
497
+ _tmp = match_string("\"")
498
+ unless _tmp
499
+ self.pos = _save7
500
+ break
501
+ end
502
+ @result = begin; "\\\"" ; end
503
+ _tmp = true
504
+ unless _tmp
505
+ self.pos = _save7
506
+ end
507
+ break
508
+ end # end sequence
509
+
510
+ break if _tmp
511
+ self.pos = _save
512
+
513
+ _save8 = self.pos
129
514
  while true # sequence
130
515
  _text_start = self.pos
131
516
  _tmp = get_byte
@@ -133,13 +518,13 @@ class KPeg::StringEscape < KPeg::CompiledParser
133
518
  text = get_text(_text_start)
134
519
  end
135
520
  unless _tmp
136
- self.pos = _save7
521
+ self.pos = _save8
137
522
  break
138
523
  end
139
524
  @result = begin; text ; end
140
525
  _tmp = true
141
526
  unless _tmp
142
- self.pos = _save7
527
+ self.pos = _save8
143
528
  end
144
529
  break
145
530
  end # end sequence
@@ -247,8 +632,9 @@ class KPeg::StringEscape < KPeg::CompiledParser
247
632
  end
248
633
 
249
634
  Rules = {}
250
- Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
635
+ Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\r\" { \"\\\\r\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
251
636
  Rules[:_root] = rule_info("root", "segment*:s { @text = s.join }")
252
637
  Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
253
638
  Rules[:_embed] = rule_info("embed", "embed_seg*:s { @text = s.join }")
639
+ # :startdoc:
254
640
  end
data/lib/kpeg.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module KPeg
2
2
 
3
- VERSION = "0.9.0"
3
+ VERSION = "1.2.0"
4
4
 
5
5
  def self.grammar
6
6
  g = Grammar.new
data/test/test_kpeg.rb CHANGED
@@ -2,7 +2,7 @@ require 'minitest/autorun'
2
2
  require 'kpeg'
3
3
  require 'stringio'
4
4
 
5
- class TestKPeg < MiniTest::Unit::TestCase
5
+ class TestKPeg < Minitest::Test
6
6
  def assert_match(m, str)
7
7
  assert_kind_of KPeg::MatchString, m
8
8
  assert_equal str, m.string