kpeg 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.autotest +10 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +11 -3
  4. data/History.txt +21 -0
  5. data/LICENSE +25 -0
  6. data/Manifest.txt +47 -0
  7. data/README.rdoc +222 -0
  8. data/Rakefile +23 -11
  9. data/bin/kpeg +4 -2
  10. data/examples/calculator/calculator.kpeg +17 -0
  11. data/examples/calculator/calculator.rb +7 -0
  12. data/examples/foreign_reference/literals.kpeg +5 -0
  13. data/examples/foreign_reference/matcher.kpeg +9 -0
  14. data/examples/foreign_reference/matcher.rb +5 -0
  15. data/examples/lua_string/driver.rb +21 -0
  16. data/examples/lua_string/lua_string.kpeg +14 -0
  17. data/examples/lua_string/lua_string.kpeg.rb +460 -0
  18. data/examples/phone_number/README.md +3 -0
  19. data/examples/phone_number/phone_number.kpeg +20 -0
  20. data/examples/phone_number/phone_number.rb +6 -0
  21. data/examples/upper/README.md +83 -0
  22. data/examples/upper/upper.kpeg +24 -0
  23. data/examples/upper/upper.rb +9 -0
  24. data/kpeg.gemspec +35 -17
  25. data/lib/hoe/kpeg.rb +94 -0
  26. data/lib/kpeg.rb +3 -0
  27. data/lib/kpeg/code_generator.rb +16 -3
  28. data/lib/kpeg/compiled_parser.rb +18 -28
  29. data/lib/kpeg/format_parser.kpeg +129 -0
  30. data/lib/kpeg/format_parser.rb +88 -49
  31. data/lib/kpeg/grammar.rb +10 -0
  32. data/lib/kpeg/string_escape.kpeg +20 -0
  33. data/test/inputs/comments.kpeg +5 -0
  34. data/test/test_file_parser_roundtrip.rb +3 -3
  35. data/test/test_gen_calc.rb +2 -2
  36. data/test/test_kpeg.rb +2 -2
  37. data/test/test_kpeg_code_generator.rb +65 -2
  38. data/test/test_kpeg_compiled_parser.rb +2 -2
  39. data/test/test_kpeg_format.rb +49 -4
  40. data/test/test_kpeg_grammar_renderer.rb +2 -2
  41. data/test/test_left_recursion.rb +2 -2
  42. data/{doc → vim}/syntax_kpeg/ftdetect/kpeg.vim +0 -0
  43. data/{doc → vim}/syntax_kpeg/syntax/kpeg.vim +0 -0
  44. metadata +89 -26
  45. data/README.md +0 -183
  46. data/lib/kpeg/version.rb +0 -3
@@ -0,0 +1,9 @@
1
+ %% name = Matcher
2
+
3
+ %% {
4
+ require "literals.kpeg.rb"
5
+ }
6
+
7
+ %grammer1 = Literal
8
+
9
+ root = (%grammer1.alpha %grammer1.space*)+ %grammer1.period
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require './matcher.kpeg.rb'
3
+
4
+ parser = Matcher.new("this is a string.")
5
+ puts parser.parse
@@ -0,0 +1,21 @@
1
+ require 'lua_string.kpeg.rb'
2
+
3
+ ls = LuaString.new("[[blah]]")
4
+ ls.parse
5
+
6
+ p ls.result
7
+
8
+ ls = LuaString.new("[==[blah2]==]")
9
+ ls.parse
10
+
11
+ p ls.result
12
+
13
+ ls = LuaString.new("[==[embeded]stuff]==]")
14
+ ls.parse
15
+
16
+ p ls.result
17
+
18
+ ls = LuaString.new("[==[embeded]=]stuff]==]")
19
+ ls.parse
20
+
21
+ p ls.result
@@ -0,0 +1,14 @@
1
+ %% name = LuaString
2
+
3
+ %% {
4
+ attr_accessor :result
5
+ }
6
+
7
+
8
+ equals = < "="* > { text }
9
+
10
+ equal_ending(start) = "]" equals:x &{ x == start } "]"
11
+
12
+ root = "[" equals:e "[" < (!equal_ending(e) .)* > equal_ending(e) {
13
+ @result = text
14
+ }
@@ -0,0 +1,460 @@
1
+ class LuaString
2
+ # STANDALONE START
3
+ def setup_parser(str, debug=false)
4
+ @string = str
5
+ @pos = 0
6
+ @memoizations = Hash.new { |h,k| h[k] = {} }
7
+ @result = nil
8
+ @failed_rule = nil
9
+ @failing_rule_offset = -1
10
+
11
+ setup_foreign_grammar
12
+ end
13
+
14
+ # This is distinct from setup_parser so that a standalone parser
15
+ # can redefine #initialize and still have access to the proper
16
+ # parser setup code.
17
+ #
18
+ def initialize(str, debug=false)
19
+ setup_parser(str, debug)
20
+ end
21
+
22
+ attr_reader :string
23
+ attr_reader :result, :failing_rule_offset
24
+ attr_accessor :pos
25
+
26
+ # STANDALONE START
27
+ def current_column(target=pos)
28
+ if c = string.rindex("\n", target-1)
29
+ return target - c - 1
30
+ end
31
+
32
+ target + 1
33
+ end
34
+
35
+ def current_line(target=pos)
36
+ cur_offset = 0
37
+ cur_line = 0
38
+
39
+ string.each_line do |line|
40
+ cur_line += 1
41
+ cur_offset += line.size
42
+ return cur_line if cur_offset >= target
43
+ end
44
+
45
+ -1
46
+ end
47
+
48
+ def lines
49
+ lines = []
50
+ string.each_line { |l| lines << l }
51
+ lines
52
+ end
53
+
54
+ #
55
+
56
+ def get_text(start)
57
+ @string[start..@pos-1]
58
+ end
59
+
60
+ def show_pos
61
+ width = 10
62
+ if @pos < width
63
+ "#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
64
+ else
65
+ "#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
66
+ end
67
+ end
68
+
69
+ def failure_info
70
+ l = current_line @failing_rule_offset
71
+ c = current_column @failing_rule_offset
72
+
73
+ if @failed_rule.kind_of? Symbol
74
+ info = self.class::Rules[@failed_rule]
75
+ "line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
76
+ else
77
+ "line #{l}, column #{c}: failed rule '#{@failed_rule}'"
78
+ end
79
+ end
80
+
81
+ def failure_caret
82
+ l = current_line @failing_rule_offset
83
+ c = current_column @failing_rule_offset
84
+
85
+ line = lines[l-1]
86
+ "#{line}\n#{' ' * (c - 1)}^"
87
+ end
88
+
89
+ def failure_character
90
+ l = current_line @failing_rule_offset
91
+ c = current_column @failing_rule_offset
92
+ lines[l-1][c-1, 1]
93
+ end
94
+
95
+ def failure_oneline
96
+ l = current_line @failing_rule_offset
97
+ c = current_column @failing_rule_offset
98
+
99
+ char = lines[l-1][c-1, 1]
100
+
101
+ if @failed_rule.kind_of? Symbol
102
+ info = self.class::Rules[@failed_rule]
103
+ "@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
104
+ else
105
+ "@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
106
+ end
107
+ end
108
+
109
+ class ParseError < RuntimeError
110
+ end
111
+
112
+ def raise_error
113
+ raise ParseError, failure_oneline
114
+ end
115
+
116
+ def show_error(io=STDOUT)
117
+ error_pos = @failing_rule_offset
118
+ line_no = current_line(error_pos)
119
+ col_no = current_column(error_pos)
120
+
121
+ io.puts "On line #{line_no}, column #{col_no}:"
122
+
123
+ if @failed_rule.kind_of? Symbol
124
+ info = self.class::Rules[@failed_rule]
125
+ io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
126
+ else
127
+ io.puts "Failed to match rule '#{@failed_rule}'"
128
+ end
129
+
130
+ io.puts "Got: #{string[error_pos,1].inspect}"
131
+ line = lines[line_no-1]
132
+ io.puts "=> #{line}"
133
+ io.print(" " * (col_no + 3))
134
+ io.puts "^"
135
+ end
136
+
137
+ def set_failed_rule(name)
138
+ if @pos > @failing_rule_offset
139
+ @failed_rule = name
140
+ @failing_rule_offset = @pos
141
+ end
142
+ end
143
+
144
+ attr_reader :failed_rule
145
+
146
+ def match_string(str)
147
+ len = str.size
148
+ if @string[pos,len] == str
149
+ @pos += len
150
+ return str
151
+ end
152
+
153
+ return nil
154
+ end
155
+
156
+ def scan(reg)
157
+ if m = reg.match(@string[@pos..-1])
158
+ width = m.end(0)
159
+ @pos += width
160
+ return true
161
+ end
162
+
163
+ return nil
164
+ end
165
+
166
+ if "".respond_to? :getbyte
167
+ def get_byte
168
+ if @pos >= @string.size
169
+ return nil
170
+ end
171
+
172
+ s = @string.getbyte @pos
173
+ @pos += 1
174
+ s
175
+ end
176
+ else
177
+ def get_byte
178
+ if @pos >= @string.size
179
+ return nil
180
+ end
181
+
182
+ s = @string[@pos]
183
+ @pos += 1
184
+ s
185
+ end
186
+ end
187
+
188
+ def parse(rule=nil)
189
+ if !rule
190
+ _root ? true : false
191
+ else
192
+ # This is not shared with code_generator.rb so this can be standalone
193
+ method = rule.gsub("-","_hyphen_")
194
+ __send__("_#{method}") ? true : false
195
+ end
196
+ end
197
+
198
+ class LeftRecursive
199
+ def initialize(detected=false)
200
+ @detected = detected
201
+ end
202
+
203
+ attr_accessor :detected
204
+ end
205
+
206
+ class MemoEntry
207
+ def initialize(ans, pos)
208
+ @ans = ans
209
+ @pos = pos
210
+ @uses = 1
211
+ @result = nil
212
+ end
213
+
214
+ attr_reader :ans, :pos, :uses, :result
215
+
216
+ def inc!
217
+ @uses += 1
218
+ end
219
+
220
+ def move!(ans, pos, result)
221
+ @ans = ans
222
+ @pos = pos
223
+ @result = result
224
+ end
225
+ end
226
+
227
+ def external_invoke(other, rule, *args)
228
+ old_pos = @pos
229
+ old_string = @string
230
+
231
+ @pos = other.pos
232
+ @string = other.string
233
+
234
+ begin
235
+ if val = __send__(rule, *args)
236
+ other.pos = @pos
237
+ else
238
+ other.set_failed_rule "#{self.class}##{rule}"
239
+ end
240
+ val
241
+ ensure
242
+ @pos = old_pos
243
+ @string = old_string
244
+ end
245
+ end
246
+
247
+ def apply(rule)
248
+ if m = @memoizations[rule][@pos]
249
+ m.inc!
250
+
251
+ prev = @pos
252
+ @pos = m.pos
253
+ if m.ans.kind_of? LeftRecursive
254
+ m.ans.detected = true
255
+ return nil
256
+ end
257
+
258
+ @result = m.result
259
+
260
+ return m.ans
261
+ else
262
+ lr = LeftRecursive.new(false)
263
+ m = MemoEntry.new(lr, @pos)
264
+ @memoizations[rule][@pos] = m
265
+ start_pos = @pos
266
+
267
+ ans = __send__ rule
268
+
269
+ m.move! ans, @pos, @result
270
+
271
+ # Don't bother trying to grow the left recursion
272
+ # if it's failing straight away (thus there is no seed)
273
+ if ans and lr.detected
274
+ return grow_lr(rule, start_pos, m)
275
+ else
276
+ return ans
277
+ end
278
+
279
+ return ans
280
+ end
281
+ end
282
+
283
+ def grow_lr(rule, start_pos, m)
284
+ while true
285
+ @pos = start_pos
286
+ @result = m.result
287
+
288
+ ans = __send__ rule
289
+ return nil unless ans
290
+
291
+ break if @pos <= m.pos
292
+
293
+ m.move! ans, @pos, @result
294
+ end
295
+
296
+ @result = m.result
297
+ @pos = m.pos
298
+ return m.ans
299
+ end
300
+
301
+ class RuleInfo
302
+ def initialize(name, rendered)
303
+ @name = name
304
+ @rendered = rendered
305
+ end
306
+
307
+ attr_reader :name, :rendered
308
+ end
309
+
310
+ def self.rule_info(name, rendered)
311
+ RuleInfo.new(name, rendered)
312
+ end
313
+
314
+ #
315
+
316
+
317
+ attr_accessor :result
318
+
319
+
320
+ def setup_foreign_grammar; end
321
+
322
+ # equals = < "="* > { text }
323
+ def _equals
324
+
325
+ _save = self.pos
326
+ while true # sequence
327
+ _text_start = self.pos
328
+ while true
329
+ _tmp = match_string("=")
330
+ break unless _tmp
331
+ end
332
+ _tmp = true
333
+ if _tmp
334
+ text = get_text(_text_start)
335
+ end
336
+ unless _tmp
337
+ self.pos = _save
338
+ break
339
+ end
340
+ @result = begin; text ; end
341
+ _tmp = true
342
+ unless _tmp
343
+ self.pos = _save
344
+ end
345
+ break
346
+ end # end sequence
347
+
348
+ set_failed_rule :_equals unless _tmp
349
+ return _tmp
350
+ end
351
+
352
+ # equal_ending = "]" equals:x &{ x == start } "]"
353
+ def _equal_ending(start)
354
+
355
+ _save = self.pos
356
+ while true # sequence
357
+ _tmp = match_string("]")
358
+ unless _tmp
359
+ self.pos = _save
360
+ break
361
+ end
362
+ _tmp = apply(:_equals)
363
+ x = @result
364
+ unless _tmp
365
+ self.pos = _save
366
+ break
367
+ end
368
+ _save1 = self.pos
369
+ _tmp = begin; x == start ; end
370
+ self.pos = _save1
371
+ unless _tmp
372
+ self.pos = _save
373
+ break
374
+ end
375
+ _tmp = match_string("]")
376
+ unless _tmp
377
+ self.pos = _save
378
+ end
379
+ break
380
+ end # end sequence
381
+
382
+ set_failed_rule :_equal_ending unless _tmp
383
+ return _tmp
384
+ end
385
+
386
+ # root = "[" equals:e "[" < (!equal_ending(e) .)* > equal_ending(e) { @result = text }
387
+ def _root
388
+
389
+ _save = self.pos
390
+ while true # sequence
391
+ _tmp = match_string("[")
392
+ unless _tmp
393
+ self.pos = _save
394
+ break
395
+ end
396
+ _tmp = apply(:_equals)
397
+ e = @result
398
+ unless _tmp
399
+ self.pos = _save
400
+ break
401
+ end
402
+ _tmp = match_string("[")
403
+ unless _tmp
404
+ self.pos = _save
405
+ break
406
+ end
407
+ _text_start = self.pos
408
+ while true
409
+
410
+ _save2 = self.pos
411
+ while true # sequence
412
+ _save3 = self.pos
413
+ _tmp = _equal_ending(e)
414
+ _tmp = _tmp ? nil : true
415
+ self.pos = _save3
416
+ unless _tmp
417
+ self.pos = _save2
418
+ break
419
+ end
420
+ _tmp = get_byte
421
+ unless _tmp
422
+ self.pos = _save2
423
+ end
424
+ break
425
+ end # end sequence
426
+
427
+ break unless _tmp
428
+ end
429
+ _tmp = true
430
+ if _tmp
431
+ text = get_text(_text_start)
432
+ end
433
+ unless _tmp
434
+ self.pos = _save
435
+ break
436
+ end
437
+ _tmp = _equal_ending(e)
438
+ unless _tmp
439
+ self.pos = _save
440
+ break
441
+ end
442
+ @result = begin;
443
+ @result = text
444
+ ; end
445
+ _tmp = true
446
+ unless _tmp
447
+ self.pos = _save
448
+ end
449
+ break
450
+ end # end sequence
451
+
452
+ set_failed_rule :_root unless _tmp
453
+ return _tmp
454
+ end
455
+
456
+ Rules = {}
457
+ Rules[:_equals] = rule_info("equals", "< \"=\"* > { text }")
458
+ Rules[:_equal_ending] = rule_info("equal_ending", "\"]\" equals:x &{ x == start } \"]\"")
459
+ Rules[:_root] = rule_info("root", "\"[\" equals:e \"[\" < (!equal_ending(e) .)* > equal_ending(e) { @result = text }")
460
+ end