grammar 0.5 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/benchmark/json.benchmark.rb +355 -0
- data/benchmark/json.grammar.rb +56 -0
- data/benchmark/json.grammar0_5.rb +57 -0
- data/benchmark/json.ll1.rb +155 -0
- data/benchmark/json.peggy.rb +174 -0
- data/benchmark/json.re.rb +81 -0
- data/lib/grammar.rb +212 -639
- data/lib/grammar/ruby.rb +606 -0
- data/lib/grammar/ruby/code.rb +1030 -0
- data/lib/grammar/ruby0.rb +521 -0
- data/lib/grammar/ruby2cext.rb +19 -0
- data/lib/grammar/rubycall.rb +21 -0
- data/test/advanced.rb +105 -0
- data/test/atoms.rb +77 -0
- data/test/basic.rb +32 -0
- data/test/composite.rb +147 -0
- data/test/molecules.rb +125 -0
- data/test/test_demo.rb +200 -0
- data/test/test_ruby.rb +30 -0
- data/test/test_ruby0.rb +30 -0
- data/test/test_ruby2cext.rb +30 -0
- data/test/test_rubycall.rb +30 -0
- metadata +45 -28
- data/samples/fact.tcl +0 -12
- data/samples/infix2postfix.rb +0 -114
- data/samples/tcl.rb +0 -163
- data/samples/test.infix +0 -4
- data/test/test_grammar.rb +0 -274
@@ -0,0 +1,155 @@
|
|
1
|
+
|
2
|
+
class JSON
|
3
|
+
|
4
|
+
def parse(io)
|
5
|
+
@la = io.getc
|
6
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
7
|
+
value(out=[], io)
|
8
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
9
|
+
raise("EOF expected") if @la
|
10
|
+
raise(out.inspect) unless out.length==1
|
11
|
+
out[0]
|
12
|
+
end
|
13
|
+
|
14
|
+
def error(expected, found)
|
15
|
+
raise("expected #{expected}, found #{found ? ("'"<<found<<?\') : 'EOF'}")
|
16
|
+
end
|
17
|
+
|
18
|
+
def value(out, io)
|
19
|
+
if ?\"==(@la)
|
20
|
+
out << string(io)
|
21
|
+
elsif ?\{==(@la)
|
22
|
+
# object
|
23
|
+
@la=io.getc
|
24
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
25
|
+
kv = []
|
26
|
+
unless ?\}==(@la)
|
27
|
+
kv = []
|
28
|
+
?\"==(@la) ? (kv << string(io)) : error("a string", @la)
|
29
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
30
|
+
?\:==(@la) ? (@la=io.getc) : error("':'", @la)
|
31
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
32
|
+
value(kv, io)
|
33
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
34
|
+
until ?\}==(@la)
|
35
|
+
?,==(@la) ? (@la=io.getc) : error("','", @la)
|
36
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
37
|
+
?\"==(@la) ? (kv << string(io)) : error("a string", @la)
|
38
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
39
|
+
?\:==(@la) ? (@la=io.getc) : error("':'", @la)
|
40
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
41
|
+
value(kv, io)
|
42
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
@la = io.getc
|
46
|
+
out << Hash[*kv]
|
47
|
+
elsif ?\[==(@la)
|
48
|
+
# array
|
49
|
+
@la=io.getc
|
50
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
51
|
+
a = []
|
52
|
+
unless ?\]==(@la)
|
53
|
+
value(a, io)
|
54
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
55
|
+
until ?\]==(@la)
|
56
|
+
?\,==(@la) ? (@la=io.getc) : error("','", @la)
|
57
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
58
|
+
value(a, io)
|
59
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
@la = io.getc
|
63
|
+
out << a
|
64
|
+
elsif ?t==(@la)
|
65
|
+
@la = io.getc
|
66
|
+
?r==(@la) ? (@la=io.getc) : error(?r, @la)
|
67
|
+
?u==(@la) ? (@la=io.getc) : error(?u, @la)
|
68
|
+
?e==(@la) ? (@la=io.getc) : error(?e, @la)
|
69
|
+
out << true
|
70
|
+
elsif ?f==(@la)
|
71
|
+
@la = io.getc
|
72
|
+
?a==(@la) ? (@la=io.getc) : error(?a, @la)
|
73
|
+
?l==(@la) ? (@la=io.getc) : error(?l, @la)
|
74
|
+
?s==(@la) ? (@la=io.getc) : error(?s, @la)
|
75
|
+
?e==(@la) ? (@la=io.getc) : error(?e, @la)
|
76
|
+
out << false
|
77
|
+
elsif ?n==(@la)
|
78
|
+
@la = io.getc
|
79
|
+
?u==(@la) ? (@la=io.getc) : error(?u, @la)
|
80
|
+
?l==(@la) ? (@la=io.getc) : error(?l, @la)
|
81
|
+
?l==(@la) ? (@la=io.getc) : error(?l, @la)
|
82
|
+
out << nil
|
83
|
+
else
|
84
|
+
# number
|
85
|
+
n = ""
|
86
|
+
(n<<@la;@la=io.getc) if ?-==(@la)
|
87
|
+
?0==(@la) ? (n<<@la;@la=io.getc) : digits(n, io)
|
88
|
+
(?.==(@la) ?
|
89
|
+
(n<<@la;@la=io.getc;digits(n, io);exp(n, io);true) :
|
90
|
+
exp(n, io)) ?
|
91
|
+
(out << n.to_f) :
|
92
|
+
(out << n.to_i)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Flattening any of the methods below will improve performance further
|
97
|
+
|
98
|
+
def ws(io)
|
99
|
+
@la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
|
100
|
+
end
|
101
|
+
|
102
|
+
def digits(out, io)
|
103
|
+
(?0<=@la && ?9>=@la) ? (out<<@la;@la=io.getc) : error("a digit", @la)
|
104
|
+
while (?0<=@la && ?9>=@la); (out<<@la;@la=io.getc); end
|
105
|
+
end
|
106
|
+
|
107
|
+
def exp(out, io)
|
108
|
+
(case @la;when ?e,?E;true;end) ? (out<<@la;@la=io.getc) :
|
109
|
+
return
|
110
|
+
(out<<@la;@la=io.getc) if (case @la;when ?-,?+;true;end)
|
111
|
+
digits(out, io)
|
112
|
+
true
|
113
|
+
end
|
114
|
+
|
115
|
+
def string(io)
|
116
|
+
# we've already verified the starting "
|
117
|
+
@la=io.getc
|
118
|
+
s = ""
|
119
|
+
until ?\"==(@la)
|
120
|
+
if ?\\==(@la)
|
121
|
+
@la = io.getc
|
122
|
+
case @la
|
123
|
+
when ?\",?\\,?\/ then (s<<@la;@la=io.getc)
|
124
|
+
when ?b then (s<<?\b;@la=io.getc)
|
125
|
+
when ?f then (s<<?\f;@la=io.getc)
|
126
|
+
when ?n then (s<<?\n;@la=io.getc)
|
127
|
+
when ?r then (s<<?\r;@la=io.getc)
|
128
|
+
when ?t then (s<<?\t;@la=io.getc)
|
129
|
+
when ?u
|
130
|
+
@la = io.getc
|
131
|
+
u = ""
|
132
|
+
4.times {
|
133
|
+
case @la
|
134
|
+
when ?0..?9, ?a..?f, ?A..?F
|
135
|
+
u<<@la;@la=io.getc
|
136
|
+
else
|
137
|
+
error("a hex character", @la)
|
138
|
+
end
|
139
|
+
}
|
140
|
+
s << u.to_i(16)
|
141
|
+
else
|
142
|
+
error("a valid escape", @la)
|
143
|
+
end
|
144
|
+
else
|
145
|
+
error("a character", @la) unless @la
|
146
|
+
s<<@la;@la=io.getc
|
147
|
+
end
|
148
|
+
end
|
149
|
+
@la = io.getc
|
150
|
+
s
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
|
@@ -0,0 +1,174 @@
|
|
1
|
+
class JSON < Peggy::Builder
|
2
|
+
KEYWORDS = {"true" => true, "false" => false, "null" => nil}
|
3
|
+
ESCAPES = Hash[*%W[b \b f \f n \n r \r t \t]]
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
|
8
|
+
self.ignore_productions = [:space]
|
9
|
+
space { lit /\s+/ }
|
10
|
+
|
11
|
+
value {
|
12
|
+
seq {
|
13
|
+
opt { space }
|
14
|
+
one {
|
15
|
+
string
|
16
|
+
object
|
17
|
+
array
|
18
|
+
keyword
|
19
|
+
number
|
20
|
+
}
|
21
|
+
opt { space }
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
object {
|
26
|
+
seq {
|
27
|
+
lit /\{\s*/
|
28
|
+
one {
|
29
|
+
seq {
|
30
|
+
opt { many { seq { string; lit /\s*:/; value; lit /,\s*/ } } }
|
31
|
+
seq { string; lit /\s*:/; value }
|
32
|
+
lit "}"
|
33
|
+
}
|
34
|
+
lit "}"
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
array {
|
40
|
+
seq {
|
41
|
+
lit "["
|
42
|
+
one {
|
43
|
+
seq {
|
44
|
+
opt { many { seq { value; lit "," } } }; value; lit "]"
|
45
|
+
}
|
46
|
+
lit "]"
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
string {
|
52
|
+
seq {
|
53
|
+
lit '"'
|
54
|
+
one {
|
55
|
+
lit '"'
|
56
|
+
seq {
|
57
|
+
many {
|
58
|
+
one {
|
59
|
+
seq { string_content; opt { escape } }
|
60
|
+
seq { escape; opt { string_content } }
|
61
|
+
}
|
62
|
+
}
|
63
|
+
lit '"'
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
string_content { lit(/[^\\"]+/) }
|
69
|
+
escape {
|
70
|
+
one {
|
71
|
+
escape_literal
|
72
|
+
escape_sequence
|
73
|
+
escape_unicode
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
escape_literal { lit(%r{\\["\\/]}) }
|
78
|
+
escape_sequence { lit(/\\[bfnrt]/) }
|
79
|
+
escape_unicode { lit(/\\u[0-9a-f]{4}/i) }
|
80
|
+
|
81
|
+
number { lit(/-?(?:0|[1-9]\d*)(?:\.\d+(?:[eE][+-]?\d+)?)?\b/) }
|
82
|
+
keyword { lit(/\b(?:true|false|null)\b/) }
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_ruby(from = parse_results.keys.min)
|
86
|
+
kind = parse_results[from][:found_order].first
|
87
|
+
to = parse_results[from][kind]
|
88
|
+
send("to_ruby_#{kind}", from, to)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def to_ruby_object(from, to)
|
94
|
+
#p parse_results
|
95
|
+
object = Hash.new
|
96
|
+
skip_to = nil
|
97
|
+
last_key = nil
|
98
|
+
parse_results.keys.select { |k| k > from and k < to }.sort.each do |key|
|
99
|
+
content = parse_results[key]
|
100
|
+
next if skip_to and key < skip_to
|
101
|
+
next unless content[:found_order] and
|
102
|
+
( ( content[:found_order].size == 2 and
|
103
|
+
content[:found_order][1] == :value ) or
|
104
|
+
content[:found_order] == [:string] )
|
105
|
+
if content[:found_order] == [:string]
|
106
|
+
last_key = to_ruby_string(key, content[:string])
|
107
|
+
else
|
108
|
+
case content[:found_order].first
|
109
|
+
when :object
|
110
|
+
object[last_key] = to_ruby_object(key, content[:object])
|
111
|
+
skip_to = content[:object]
|
112
|
+
when :array
|
113
|
+
object[last_key] = to_ruby_array(key, content[:array])
|
114
|
+
skip_to = content[:array]
|
115
|
+
else
|
116
|
+
object[last_key] = to_ruby(key)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
object
|
121
|
+
end
|
122
|
+
|
123
|
+
def to_ruby_array(from, to)
|
124
|
+
array = Array.new
|
125
|
+
skip_to = nil
|
126
|
+
parse_results.keys.select { |k| k > from and k < to }.sort.each do |key|
|
127
|
+
content = parse_results[key]
|
128
|
+
next if skip_to and key < skip_to
|
129
|
+
next unless content[:found_order] and
|
130
|
+
content[:found_order].size == 2 and
|
131
|
+
content[:found_order][1] == :value
|
132
|
+
case content[:found_order].first
|
133
|
+
when :object
|
134
|
+
array << to_ruby_object(key, content[:object])
|
135
|
+
skip_to = content[:object]
|
136
|
+
when :array
|
137
|
+
array << to_ruby_array(key, content[:array])
|
138
|
+
skip_to = content[:array]
|
139
|
+
else
|
140
|
+
array << to_ruby(key)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
array
|
144
|
+
end
|
145
|
+
|
146
|
+
def to_ruby_string(from, to)
|
147
|
+
string = String.new
|
148
|
+
parse_results.keys.select { |k| k > from and k < to }.sort.each do |key|
|
149
|
+
content = parse_results[key]
|
150
|
+
next unless content[:found_order]
|
151
|
+
case content[:found_order].first
|
152
|
+
when :string_content
|
153
|
+
string << source_text[key...content[:string_content]]
|
154
|
+
when :escape_literal
|
155
|
+
string << source_text[content[:escape_literal] - 1, 1]
|
156
|
+
when :escape_sequence
|
157
|
+
string << ESCAPES[source_text[content[:escape_sequence] - 1, 1]]
|
158
|
+
when :escape_unicode
|
159
|
+
string << [Integer("0x#{source_text[key + 2, 4]}")].pack("U")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
string
|
163
|
+
end
|
164
|
+
|
165
|
+
def to_ruby_number(from, to)
|
166
|
+
num = source_text[from...to]
|
167
|
+
num.include?(".") ? Float(num) : Integer(num)
|
168
|
+
end
|
169
|
+
|
170
|
+
def to_ruby_keyword(from, to)
|
171
|
+
KEYWORDS[source_text[from...to]]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
class JSON
|
3
|
+
|
4
|
+
def parse(input)
|
5
|
+
input.scan(/\s*/)
|
6
|
+
parse_value(out=[], input)
|
7
|
+
input.eos? or error("Unexpected data", input)
|
8
|
+
out[0]
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def parse_value(out, input)
|
14
|
+
if input.scan(/"/)
|
15
|
+
parse_string(out, input)
|
16
|
+
elsif input.scan(/\{\s*/)
|
17
|
+
kv = []
|
18
|
+
until input.scan(/\}\s*/)
|
19
|
+
kv.empty? or input.scan(/,\s*/) or error("Expected ,", input)
|
20
|
+
input.scan(/"/) or error("Expected string", input)
|
21
|
+
parse_string(kv, input)
|
22
|
+
input.scan(/:\s*/) or error("Expecting object separator", input)
|
23
|
+
parse_value(kv, input)
|
24
|
+
end
|
25
|
+
out << Hash[*kv]
|
26
|
+
elsif input.scan(/\[\s*/)
|
27
|
+
array = []
|
28
|
+
until input.scan(/\]\s*/)
|
29
|
+
array.empty? or input.scan(/,\s*/) or error("Expected ,", input)
|
30
|
+
parse_value(array, input)
|
31
|
+
end
|
32
|
+
out << array
|
33
|
+
elsif input.scan(/true\s*/)
|
34
|
+
out << true
|
35
|
+
elsif input.scan(/false\s*/)
|
36
|
+
out << false
|
37
|
+
elsif input.scan(/null\s*/)
|
38
|
+
out << nil
|
39
|
+
elsif text=input.scan(/-?(?:0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?\s*/)
|
40
|
+
out << ((input[1]||input[2]) ? text.to_f : text.to_i)
|
41
|
+
else
|
42
|
+
error("Illegal JSON value")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_string(out, input)
|
47
|
+
s = ""
|
48
|
+
while true
|
49
|
+
if text=input.scan(/[^\\"]+/)
|
50
|
+
s.concat(text)
|
51
|
+
elsif input.scan(/\\/)
|
52
|
+
case (ch=input.getch[0])
|
53
|
+
when ?b ; s << ?\b
|
54
|
+
when ?f ; s << ?\f
|
55
|
+
when ?n ; s << ?\n
|
56
|
+
when ?r ; s << ?\r
|
57
|
+
when ?t ; s << ?\t
|
58
|
+
when ?u
|
59
|
+
text = input.scan(/[0-9a-fA-F]{4}/) or raise("expected hex*4")
|
60
|
+
s << text.to_i(16)
|
61
|
+
else
|
62
|
+
s << ch
|
63
|
+
end
|
64
|
+
else
|
65
|
+
break
|
66
|
+
end
|
67
|
+
end
|
68
|
+
input.scan(/"\s*/) or error("Unclosed string", input)
|
69
|
+
out << s
|
70
|
+
end
|
71
|
+
|
72
|
+
def error(message, input)
|
73
|
+
if input.eos?
|
74
|
+
raise "Unexpected end of input."
|
75
|
+
else
|
76
|
+
raise "#{message}: #{input.peek(input.string.length)}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
|
data/lib/grammar.rb
CHANGED
@@ -1,692 +1,265 @@
|
|
1
1
|
#!/bin/env ruby
|
2
2
|
# = grammar.rb - specify BNF-like grammar directly in Ruby
|
3
|
-
# $Id: grammar.rb,v 1.
|
4
|
-
# Author:: Eric Mahurin (Eric under Mahurin at yahoo
|
3
|
+
# $Id: grammar.rb,v 1.3 2008/09/05 06:01:20 eric_mahurin Exp $
|
4
|
+
# Author:: Eric Mahurin (Eric under Mahurin at yahoo period com)
|
5
|
+
# Copyright (c) Eric Mahurin 2005-2008
|
5
6
|
# License:: Ruby license
|
6
7
|
# Home:: http://rubyforge.org/projects/grammar
|
7
8
|
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
9
|
+
# The Grammar class defines operators and methods that allow Grammars to be
|
10
|
+
# built in a tree. The result is similar to BNF seen in other parser
|
11
|
+
# generators. No actual parsing is done by this class. That is up to an
|
12
|
+
# engine.
|
11
13
|
class Grammar
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# grammars for those arguments.
|
18
|
-
def multiple(&block) # :yield: *recursive_grammars
|
19
|
-
grammars = (1..block.arity).map { self.new }
|
20
|
-
grammars.zip(yield(*grammars)) { |g,g1| g << g1 }
|
21
|
-
grammars
|
22
|
-
end
|
23
|
-
end
|
24
|
-
# Creates a Grammar from another +grammar+. If +grammar+ is not given
|
25
|
-
# and a block is instead, the block is passed +self+ (to handle recursion)
|
26
|
-
# and the resulting grammar from this block will be used.
|
27
|
-
def initialize(grammar=nil,&block) # :yield: +self+
|
28
|
-
@grammar = grammar || block && yield(self)
|
29
|
-
end
|
30
|
-
# Reinitialize with another Grammar. This will be needed for recursion
|
31
|
-
# unless the block form of new is used.
|
32
|
-
def << (*args)
|
33
|
-
initialize(*args)
|
34
|
-
end
|
35
|
-
# Match to elements at a Cursor while advancing. When matched, a parse
|
36
|
-
# buffer is returned. Instead of an empty Array, the seed to this parse buffer
|
37
|
-
# can be given by +buffer+ which should respond to #concat and #<< like Array.
|
38
|
-
# When a mismatch occurs several possibilities exist. If +lookahead+ and
|
39
|
-
# the Grammar is within its lookahead (defaults one element/token - can be
|
40
|
-
# controlled by #lookahead), the cursor is moved back to where it started and
|
41
|
-
# +false+ is returned. Otherwise an exception describing the mismatch is
|
42
|
-
# raised.
|
43
|
-
def scan(cursor,buffer=[],lookahead=false)
|
44
|
-
@grammar.scan(cursor,buffer,lookahead)
|
14
|
+
|
15
|
+
# Create a Grammar from a block. The block is passed a Grammar engine
|
16
|
+
# which should be used to do any parsing.
|
17
|
+
def initialize(&block) # :yield: engine
|
18
|
+
@block = block
|
45
19
|
end
|
46
|
-
#
|
47
|
-
|
48
|
-
|
20
|
+
# Executes the Grammar with an engine. The engine simply gets passed to
|
21
|
+
# the block (actually a lambda now) contained in the Grammar.
|
22
|
+
def [](engine)
|
23
|
+
@block[engine]
|
49
24
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
"#{me}.scan(#{cursor},#{buffer},#{lookahead})"
|
25
|
+
# Returns the lambda that the Grammar holds.
|
26
|
+
def to_proc
|
27
|
+
@block
|
54
28
|
end
|
55
|
-
|
56
|
-
|
29
|
+
# Replaces the contained lambda with one from another Grammar.
|
30
|
+
def <<(gram)
|
31
|
+
@block = gram && gram.to_proc
|
57
32
|
end
|
58
|
-
#
|
33
|
+
# Grammar that matches +self+ or +other+ if that fails.
|
59
34
|
def |(other)
|
60
|
-
|
61
|
-
"(#{us[cursor,buffer,true,hold]} ||
|
62
|
-
#{them[cursor,buffer,lookahead,hold]})"
|
63
|
-
}
|
35
|
+
Grammar { |e| e.alternation(self.to_proc, &other) }
|
64
36
|
end
|
65
|
-
#
|
66
|
-
# The resulting match list is a concatenation from the match lists
|
67
|
-
# from +self+ and +other+.
|
37
|
+
# Grammar that matches +self+ followed by +other+.
|
68
38
|
def +(other)
|
69
|
-
|
70
|
-
"(#{us[cursor,buffer,lookahead,false]} &&
|
71
|
-
#{them[cursor,buffer,false,false]})"
|
72
|
-
}
|
73
|
-
end
|
74
|
-
# Generates a Grammar that matches when +self+ (in-place) and +other+.
|
75
|
-
def &(other)
|
76
|
-
Inline.new(self,other) { |us,them,cursor,buffer,lookahead,hold|
|
77
|
-
"(#{us[cursor,buffer,lookahead,true]} &&
|
78
|
-
#{them[cursor,buffer,lookahead,hold]})"
|
79
|
-
}
|
80
|
-
end
|
81
|
-
# Creates a new Grammar that matches +self+ replicated +multiplier+ times.
|
82
|
-
# +multiplier+ can be a Range to specify a variable multiplier. The
|
83
|
-
# +multiplier+ just needs to responds to #=== to determine the min and
|
84
|
-
# max iterations.
|
85
|
-
def *(multiplier)
|
86
|
-
Inline.new(self,nil,multiplier) { |us,multiplier,cursor,buffer,lookahead|
|
87
|
-
Inline.var { |n,ret,look| "(
|
88
|
-
#{n} = -1
|
89
|
-
#{ret} = false
|
90
|
-
#{look} = #{lookahead}
|
91
|
-
while true
|
92
|
-
if #{multiplier}===(#{n}+=1)
|
93
|
-
if !#{ret}
|
94
|
-
#{ret} = #{buffer}
|
95
|
-
#{look} = true
|
96
|
-
end
|
97
|
-
else
|
98
|
-
break(#{ret}) if #{ret}
|
99
|
-
end
|
100
|
-
#{us[cursor,buffer,look,false]} or break(#{ret})
|
101
|
-
#{look} = false if !#{ret}
|
102
|
-
end
|
103
|
-
)" }
|
104
|
-
}
|
39
|
+
Grammar { |e| e.sequence(self.to_proc, &other) }
|
105
40
|
end
|
106
|
-
#
|
41
|
+
# Zero-width Grammar that matches +self+ (discards results).
|
107
42
|
def +@
|
108
|
-
|
109
|
-
"(#{us[cursor,'DISCARD',lookahead,true]} && #{buffer})"
|
110
|
-
}
|
43
|
+
Grammar { |e| e.positive(&self) }
|
111
44
|
end
|
112
|
-
#
|
45
|
+
# Zero-width Grammar that matches anything but +self+ (discards results).
|
113
46
|
def -@
|
114
|
-
|
115
|
-
"(!#{us[cursor,'DISCARD',true,true]} ? #{buffer} :
|
116
|
-
!#{lookahead}&&raise(Error.new(cursor,'a negative syntatic predicate')))"
|
117
|
-
}
|
47
|
+
Grammar { |e| e.negative(&self) }
|
118
48
|
end
|
119
|
-
#
|
49
|
+
# Grammar that as long as what follows doesn't match +self+, it
|
120
50
|
# matches to the next element. Most useful for a single element Grammar.
|
121
51
|
def ~
|
122
|
-
|
52
|
+
-self + ANY
|
123
53
|
end
|
124
|
-
#
|
54
|
+
# Grammar that optionally matches +self+.
|
125
55
|
def optional
|
126
|
-
self|NULL
|
127
|
-
end
|
128
|
-
# Matches a list of +self+ (plus possibly other stuff) one or more times.
|
129
|
-
# The arguments are an alternating list of optional terminators and
|
130
|
-
# separators. Along with #list0 you should be able to describe any
|
131
|
-
# tail recursive grammar. This is equivalent to this recursive Grammar:
|
132
|
-
#
|
133
|
-
# Grammar.new { |g| a+(z|b+(y|...g)) }
|
134
|
-
#
|
135
|
-
# where a, b, ... are +self+ and the separators and z, y, ... are the
|
136
|
-
# terminators.
|
137
|
-
#
|
138
|
-
# When a terminator is +nil+, the next item is treated
|
139
|
-
# as optional (i.e. instead of a+(nil|g), a+(g|) is used).
|
140
|
-
#
|
141
|
-
# When there is a missing terminator at the end of +term_sep+ (and it is
|
142
|
-
# non-empty), the list is not allowed to stop at that point.
|
143
|
-
def list1(*term_sep)
|
144
|
-
term_sep.push(nil) if term_sep.empty?
|
145
|
-
term_sep.unshift(self)
|
146
|
-
Inline.new(*term_sep.compact) { |*args|
|
147
|
-
cursor,buffer,lookahead = args.slice!(-3,3)
|
148
|
-
Inline.var { |look,ret|
|
149
|
-
terminated = (term_sep.size&1).nonzero? || term_sep[-1]
|
150
|
-
code = "(
|
151
|
-
#{look} = #{lookahead}
|
152
|
-
#{terminated ? (ret=false;'') : "#{ret} = false"}
|
153
|
-
while true
|
154
|
-
#{args[j=0][cursor,buffer,look,false]} or break(#{ret})
|
155
|
-
#{look} = #{terminated ? false : true}
|
156
|
-
#{terminated ? '' : "#{ret} = #{buffer}"}"
|
157
|
-
1.step(term_sep.size-1,2) { |i|
|
158
|
-
if term_sep[i]
|
159
|
-
code << "
|
160
|
-
#{args[j+=1][cursor,buffer,true,false]} and break(#{buffer})"
|
161
|
-
if i+1<term_sep.size
|
162
|
-
code << "
|
163
|
-
#{args[j+=1][cursor,buffer,false,false]} or break(false)"
|
164
|
-
end
|
165
|
-
elsif i+1<term_sep.size
|
166
|
-
code << "
|
167
|
-
#{args[j+=1][cursor,buffer,true,false]} or break(#{buffer})"
|
168
|
-
end
|
169
|
-
}
|
170
|
-
code << "
|
171
|
-
end
|
172
|
-
)"
|
173
|
-
}
|
174
|
-
}
|
56
|
+
self | NULL
|
175
57
|
end
|
176
|
-
#
|
177
|
-
#
|
178
|
-
#
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
#
|
183
|
-
# where a, b, ... are +self+ and the separators and z, y, ..., x are the
|
184
|
-
# terminators.
|
185
|
-
#
|
186
|
-
# When a terminator is +nil+/missing, the next item is treated
|
187
|
-
# as optional.
|
188
|
-
def list0(*term_sep)
|
189
|
-
term_sep.push(nil) if (term_sep.size&1).zero?
|
190
|
-
term_sep.unshift(self)
|
191
|
-
Inline.new(*term_sep.compact) { |*args|
|
192
|
-
cursor,buffer,lookahead = args.slice!(-3,3)
|
193
|
-
Inline.var { |look,ret|
|
194
|
-
code = "("
|
195
|
-
code << "
|
196
|
-
#{look} = #{lookahead}" if term_sep[-1]
|
197
|
-
code << "
|
198
|
-
while true"
|
199
|
-
j = -2
|
200
|
-
-1.step(term_sep.size-3,2) { |i|
|
201
|
-
if term_sep[i]
|
202
|
-
code << "
|
203
|
-
#{args[j+=1][cursor,buffer,true,false]} and break(#{buffer})"
|
204
|
-
if j.zero?
|
205
|
-
code << "
|
206
|
-
#{args[j+=1][cursor,buffer,look,false]} or break(false)
|
207
|
-
#{look} = false"
|
208
|
-
else
|
209
|
-
code << "
|
210
|
-
#{args[j+=1][cursor,buffer,false,false]} or break(false)"
|
211
|
-
end
|
212
|
-
else
|
213
|
-
j += 1 if j==2
|
214
|
-
code << "
|
215
|
-
#{args[j+=1][cursor,buffer,true,false]} or break(#{buffer})"
|
216
|
-
end
|
217
|
-
}
|
218
|
-
code << "
|
219
|
-
end)"
|
220
|
-
}
|
221
|
-
}
|
222
|
-
end
|
223
|
-
# Creates a new Grammar where the entire grammar is considered a
|
224
|
-
# part of the lookahead (instead of just the first element).
|
225
|
-
def lookahead
|
226
|
-
Inline.new(self) { |us,cursor,buffer,lookahead|
|
227
|
-
Inline.var { |branch| "(
|
228
|
-
#{branch} = #{buffer}.class.new
|
229
|
-
#{cursor}.pos? { begin
|
230
|
-
#{us[cursor,branch,false]}
|
231
|
-
rescue Error => err
|
232
|
-
raise(err) if !#{lookahead}
|
233
|
-
end } && #{buffer}.concat(#{branch})
|
234
|
-
)" }
|
235
|
-
}
|
236
|
-
end
|
237
|
-
# Creates a new Grammar where the match list of +self+ is filtered by
|
238
|
-
# some code.
|
239
|
-
# When a +klass+ is given, +klass+.new is used as the buffer to hold what
|
240
|
-
# will be passed to the code. Otherwise this temporary buffer will come
|
241
|
-
# from buffer.class.new.
|
242
|
-
# If the block needs 1 argument, this temporary buffer will be passed
|
243
|
-
# and the block should return something that will be given to buffer.concat.
|
244
|
-
# If the block needs 2 arguments, the second argument will be the buffer
|
245
|
-
# and the block should do the concatenation.
|
246
|
-
# If there is no block, the temporary buffer is passed to buffer.concat
|
247
|
-
# directly. Use this to get some isolation.
|
248
|
-
def filter(klass=nil,&code) # :yield: branch[, buffer]
|
249
|
-
if !code
|
250
|
-
if klass
|
251
|
-
Inline.new(self,nil,klass) { |us,klass,cursor,buffer,lookahead,hold|
|
252
|
-
Inline.var { |branch| "(
|
253
|
-
#{branch}=#{klass}.new
|
254
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
255
|
-
#{buffer}.concat(#{branch})
|
256
|
-
)"}
|
257
|
-
}
|
258
|
-
else
|
259
|
-
Inline.new(self) { |us,cursor,buffer,lookahead,hold|
|
260
|
-
Inline.var { |branch| "(
|
261
|
-
#{branch}=#{buffer}.class.new
|
262
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
263
|
-
#{buffer}.concat(#{branch})
|
264
|
-
)"}
|
265
|
-
}
|
266
|
-
end
|
267
|
-
elsif code.arity>=2
|
268
|
-
if klass
|
269
|
-
Inline.new(self,nil,klass,code) { |us,klass,code,cursor,buffer,lookahead,hold|
|
270
|
-
Inline.var { |branch| "(
|
271
|
-
#{branch}=#{klass}.new
|
272
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
273
|
-
(#{code}[#{branch},#{buffer}]||
|
274
|
-
raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
|
275
|
-
)"}
|
276
|
-
}
|
277
|
-
else
|
278
|
-
Inline.new(self,nil,code) { |us,code,cursor,buffer,lookahead,hold|
|
279
|
-
Inline.var { |branch| "(
|
280
|
-
#{branch}=#{buffer}.class.new
|
281
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
282
|
-
(#{code}[#{branch},#{buffer}]||
|
283
|
-
raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
|
284
|
-
)"}
|
285
|
-
}
|
286
|
-
end
|
58
|
+
# Grammar that matches a sequence of zero or more +self+ followed
|
59
|
+
# by an optional terminator (+term+). If +term+ is given it takes
|
60
|
+
# precedence over matching +self+ items.
|
61
|
+
def repeat0(term=nil)
|
62
|
+
if term
|
63
|
+
Recurse { |g| term | self + g }
|
287
64
|
else
|
288
|
-
|
289
|
-
Inline.new(self,nil,klass,code) { |us,klass,code,cursor,buffer,lookahead,hold|
|
290
|
-
Inline.var { |branch| "(
|
291
|
-
#{branch}=#{klass}.new
|
292
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
293
|
-
#{buffer}.concat(#{code}[#{branch}]||
|
294
|
-
raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
|
295
|
-
)"}
|
296
|
-
}
|
297
|
-
else
|
298
|
-
Inline.new(self,nil,code) { |us,code,cursor,buffer,lookahead,hold|
|
299
|
-
Inline.var { |branch| "(
|
300
|
-
#{branch}=#{buffer}.class.new
|
301
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
302
|
-
#{buffer}.concat(#{code}[#{branch}]||
|
303
|
-
raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
|
304
|
-
)"}
|
305
|
-
}
|
306
|
-
end
|
65
|
+
Recurse { |g| g + self | NULL }
|
307
66
|
end
|
308
67
|
end
|
309
|
-
#
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
# Returns a Grammar that groups the match list from +self+. A temporary
|
316
|
-
# buffer is formed just list #filter, but buffer.<< is used instead of
|
317
|
-
# buffer.concat.
|
318
|
-
def group(klass=nil)
|
319
|
-
if klass
|
320
|
-
Inline.new(self,nil,klass) { |us,klass,cursor,buffer,lookahead,hold|
|
321
|
-
Inline.var { |branch| "(
|
322
|
-
#{branch}=#{klass}.new
|
323
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
324
|
-
#{buffer}<<#{branch}
|
325
|
-
)"}
|
326
|
-
}
|
68
|
+
# Grammar that matches a sequence of one or more +self+ followed
|
69
|
+
# by an optional terminator (+term+). If +term+ is given it takes
|
70
|
+
# precedence over matching +self+ items.
|
71
|
+
def repeat1(term=nil)
|
72
|
+
if term
|
73
|
+
Recurse { |g| self + (term | g) }
|
327
74
|
else
|
328
|
-
|
329
|
-
Inline.var { |branch| "(
|
330
|
-
#{branch}=#{buffer}.class.new
|
331
|
-
#{us[cursor,branch,lookahead,hold]} &&
|
332
|
-
#{buffer}<<#{branch}
|
333
|
-
)"}
|
334
|
-
}
|
75
|
+
Recurse { |g| (g | NULL) + self }
|
335
76
|
end
|
336
77
|
end
|
337
|
-
|
338
|
-
|
339
|
-
#
|
340
|
-
#
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
"\nend"
|
353
|
-
)
|
354
|
-
scan(cursor,buffer,lookahead)
|
355
|
-
end
|
356
|
-
def check(cursor,buffer=[],lookahead=false) # :nodoc:
|
357
|
-
(class << self;self;end).class_eval(
|
358
|
-
"def check(cursor,buffer=[],lookahead=false)\n"+
|
359
|
-
scanner(*(_leaf_names+Arg_names+[true]))+
|
360
|
-
"\nend"
|
361
|
-
)
|
362
|
-
check(cursor,buffer,lookahead)
|
363
|
-
end
|
364
|
-
def scanner(*leaves_args) # :nodoc:
|
365
|
-
objects = _extractors.map { |e| e[leaves_args] }
|
366
|
-
args = objects+leaves_args
|
367
|
-
if @block.arity<args.size and args.slice!(-1)
|
368
|
-
"#{leaves_args[0]}.pos{#{@block.call(*args)}}"
|
369
|
-
else
|
370
|
-
@block.call(*args)
|
371
|
-
end
|
372
|
-
end
|
373
|
-
def leaves # :nodoc:
|
374
|
-
@_ or begin
|
375
|
-
@_ = []
|
376
|
-
@extractors = []
|
377
|
-
@objects.inject(false) { |leaf,object|
|
378
|
-
if leaf
|
379
|
-
@_ << object
|
380
|
-
@extractors << lambda { |leaves_args|
|
381
|
-
leaves_args.slice!(0)
|
382
|
-
}
|
383
|
-
true
|
384
|
-
elsif !object
|
385
|
-
true
|
386
|
-
elsif false
|
387
|
-
# enable this code to disable code flattening
|
388
|
-
@_ << object
|
389
|
-
@extractors << lambda { |leaves_args|
|
390
|
-
g = leaves_args.slice!(0)
|
391
|
-
lambda { |*args|
|
392
|
-
"#{g}.#{args.slice!(-1) ? 'check' : 'scan'}(#{args.join(',')})"
|
393
|
-
}
|
394
|
-
}
|
395
|
-
false
|
396
|
-
else
|
397
|
-
leaves = object.leaves
|
398
|
-
@_.concat(leaves)
|
399
|
-
n = leaves.size
|
400
|
-
@extractors << lambda { |leaves_args|
|
401
|
-
leaf_names = leaves_args.slice!(0,n)
|
402
|
-
lambda { |*args| object.scanner(*(leaf_names+args)) }
|
403
|
-
}
|
404
|
-
false
|
78
|
+
# Grammar that matches +self+ replicated +multiplier+ times.
|
79
|
+
# +multiplier+ can be a Range to specify a variable multiplier. The
|
80
|
+
# +multiplier+ just needs to responds to #=== to determine the min and
|
81
|
+
# max iterations.
|
82
|
+
def *(mult)
|
83
|
+
Common { |e|
|
84
|
+
Variables(0) { |i|
|
85
|
+
case mult
|
86
|
+
when Fixnum
|
87
|
+
start = Check { e[mult].equal?(i << i[] + e[1]) }
|
88
|
+
inside = Fail()
|
89
|
+
when Range
|
90
|
+
start = case (range0=mult.begin)
|
91
|
+
when Fixnum; Check { e[range0].equal?(i << i[] + e[1]) }
|
92
|
+
else; Check { e[range0] <= (i << i[] + e[1]) }
|
405
93
|
end
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
end
|
417
|
-
def inspect # :nodoc:
|
418
|
-
to_s[0..-2].concat(" #{scanner(*(leaves+Arg_names+[false]))}>")
|
419
|
-
end
|
420
|
-
@@symbol = "_0".to_sym
|
421
|
-
# used for generating "local" variable names
|
422
|
-
def self.var(&block)
|
423
|
-
critical0 = Thread.critical
|
424
|
-
Thread.critical = true
|
425
|
-
if block
|
426
|
-
begin
|
427
|
-
symbol = @@symbol
|
428
|
-
symbols = []
|
429
|
-
block.arity.times {
|
430
|
-
symbols << @@symbol
|
431
|
-
@@symbol = @@symbol.to_s.succ.to_sym
|
432
|
-
}
|
433
|
-
# this better not need other threads - critical section
|
434
|
-
yield(*symbols)
|
435
|
-
ensure
|
436
|
-
@@symbol = symbol
|
437
|
-
end
|
438
|
-
else
|
439
|
-
begin
|
440
|
-
@@symbol
|
441
|
-
ensure
|
442
|
-
@@symbol = @@symbol.to_s.succ.to_sym
|
94
|
+
range1 = mult.end
|
95
|
+
mult.exclude_end? or
|
96
|
+
range1 = begin;range1.succ;rescue;range1+1;end
|
97
|
+
inside = case range1
|
98
|
+
when Fixnum; Check { e.not(e[range1].equal?(i << i[] + e[1])) }
|
99
|
+
when 1.0/0; NULL
|
100
|
+
else; Check { e[range1] > (i << i[] + e[1]) }
|
101
|
+
end
|
102
|
+
else
|
103
|
+
start = inside = Check { e[mult] === (i << i[] + e[1]) }
|
443
104
|
end
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
105
|
+
tail = Recurse { |l| l + inside + self | NULL }
|
106
|
+
((mult===0) ? tail : Recurse { |r| self + (start + tail | r) })
|
107
|
+
}
|
108
|
+
}
|
448
109
|
end
|
449
|
-
|
450
|
-
#
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
super(nil,code) { |code,cursor,buffer,lookahead|
|
455
|
-
"#{code}[#{cursor},#{buffer},#{lookahead}]"
|
456
|
-
}
|
457
|
-
else
|
458
|
-
super(nil,code) { |code,cursor,buffer,lookahead,hold|
|
459
|
-
"#{code}[#{cursor},#{buffer},#{lookahead},#{hold}]"
|
460
|
-
}
|
461
|
-
end
|
462
|
-
end
|
110
|
+
# Grammar that redirects parsing results of +self+ to a
|
111
|
+
# +buf0+.clone and yields the resulting buffer and possibly the engine
|
112
|
+
# afterwards.
|
113
|
+
def redirect(buf0, &block) # :yield: buf[, engine]
|
114
|
+
Grammar { |e| e.redirect(self.to_proc, buf0, &block) }
|
463
115
|
end
|
464
|
-
|
465
|
-
#
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
end
|
470
|
-
def scan(cursor,buffer=[],lookahead=false) # :nodoc:
|
471
|
-
v = cursor.read1next
|
472
|
-
if grammar = @lookup[v]
|
473
|
-
buffer << v
|
474
|
-
grammar.scan(cursor,buffer,false)
|
475
|
-
else
|
476
|
-
raise(Error.new(cursor,"no grammar for #{v} found in #{@lookup}"))
|
477
|
-
end
|
478
|
-
end
|
116
|
+
# Grammar that discards parsing results of +self+ and afterwards
|
117
|
+
# yields the engine to the optional block which should return something
|
118
|
+
# to be appended to the output.
|
119
|
+
def discard(&block) # :yield: engine
|
120
|
+
Grammar { |e| e.discard(self.to_proc, &block) }
|
479
121
|
end
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
else
|
491
|
-
raise(Error.new(cursor,"no grammar for #{v} found in #{@lookup}"))
|
492
|
-
end
|
493
|
-
end
|
122
|
+
# Grammar that redirects parsing results of +self+ to a
|
123
|
+
# +buf0+.clone and yields the resulting buffer and possibly the engine
|
124
|
+
# afterwards to an optional block which should return something to be
|
125
|
+
# appended to the output.
|
126
|
+
def group(buf0, &block) # :yield: buf[, engine]
|
127
|
+
block_given? ? redirect(buf0) { |buf, e|
|
128
|
+
e << (block.arity==1 ? yield(buf) : yield(buf, e))
|
129
|
+
} : redirect(buf0) { |buf, e|
|
130
|
+
e << buf
|
131
|
+
}
|
494
132
|
end
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
# (i.e. String/Array) is used to represent this sequence. Each element
|
499
|
-
# returned by #[] should respond to #== to compare each element in the
|
500
|
-
# sequence.
|
501
|
-
class Sequence < Grammar
|
502
|
-
def initialize(value,partial=false)
|
503
|
-
@value = value
|
504
|
-
@partial = partial
|
505
|
-
end
|
506
|
-
def scan(cursor,buffer=[],lookahead=false) # :nodoc:
|
507
|
-
i = cursor.scan(@value,false,false,buffer)
|
508
|
-
if !i
|
509
|
-
if lookahead
|
510
|
-
false
|
511
|
-
else
|
512
|
-
raise(Error.new(cursor,@value[0]))
|
513
|
-
end
|
514
|
-
elsif !@partial and i<0
|
515
|
-
raise(Error.new(cursor,@value[-i]))
|
516
|
-
else
|
517
|
-
buffer
|
518
|
-
end
|
519
|
-
end
|
520
|
-
def inspect
|
521
|
-
"#{self.class}.new(#{@value.inspect},#{@partial.inspect})"
|
522
|
-
end
|
523
|
-
def to_s
|
524
|
-
inspect
|
525
|
-
end
|
133
|
+
# not sure if this is needed or wanted right now
|
134
|
+
def backref(&block) # :nodoc: :yield: n[, engine]
|
135
|
+
Grammar { |e| e.backref(self.to_proc, &block) }
|
526
136
|
end
|
527
|
-
# Grammar that matches
|
528
|
-
#
|
529
|
-
|
530
|
-
|
531
|
-
@value = value
|
532
|
-
@allow_eof = allow_eof
|
533
|
-
end
|
534
|
-
def scan(cursor,buffer=[],lookahead=false) # :nodoc:
|
535
|
-
len,i = cursor.scan_until(@value,false,false,buffer)
|
536
|
-
if !len
|
537
|
-
if lookahead
|
538
|
-
false
|
539
|
-
else
|
540
|
-
raise(Error.new(cursor,@value[0]))
|
541
|
-
end
|
542
|
-
elsif !@allow_eof and len.nonzero? and i<=0
|
543
|
-
raise(Error.new(cursor,@value[-i]))
|
544
|
-
else
|
545
|
-
buffer
|
546
|
-
end
|
547
|
-
end
|
137
|
+
# Grammar that matches +self+, but backtracks when it
|
138
|
+
# fails instead of raising an error.
|
139
|
+
def backtrack(len=nil)
|
140
|
+
Grammar { |e| e.backtrack(self.to_proc, len) }
|
548
141
|
end
|
549
|
-
# Grammar that
|
550
|
-
#
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
"(v=#{cursor}.scan1next(#{value}))"
|
557
|
-
"(#{condition} ? " +
|
558
|
-
"#{buffer} << v : " +
|
559
|
-
"!#{lookahead}&&raise(Error.new(#{cursor},#{value})))"
|
560
|
-
}
|
561
|
-
end
|
142
|
+
# Grammar that uses a looped +self+ as a lexer to generate tokens
|
143
|
+
# for +parser+ which sends its results to the output. +buf0+.clone is used
|
144
|
+
# hold tokens between the lexer and the parser.
|
145
|
+
def supply(parser, buf0, &block) # :yield: buf[, engine]
|
146
|
+
Grammar { |e|
|
147
|
+
e.supply(self.to_proc, parser.to_proc, buf0, &block)
|
148
|
+
}
|
562
149
|
end
|
563
|
-
# Grammar that
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
150
|
+
# Grammar that uses +self+ as a lexer to generate tokens
|
151
|
+
# for +parser+ which sends its results to the output. +buf0+.clone is used
|
152
|
+
# hold tokens between the lexer and the parser.
|
153
|
+
def pipe(parser, buf0, len=nil, &block) # :yield: buf[, engine]
|
154
|
+
Grammar { |e|
|
155
|
+
e.pipe(self.to_proc, parser.to_proc, buf0, len, &block)
|
156
|
+
}
|
570
157
|
end
|
571
158
|
|
572
|
-
#
|
573
|
-
|
574
|
-
"((v=#{cursor}.read1#{hold ? 'after' : 'next'}) ? " +
|
575
|
-
"#{buffer} << v : " +
|
576
|
-
"!#{lookahead}&&raise(Error.new(#{cursor},'any element')))"
|
577
|
-
}
|
578
|
-
# Grammar that always passes and matches nothing
|
579
|
-
NULL = Inline.new { |_,buffer,_,_| "#{buffer}" }
|
580
|
-
# Grammar that matches the end-of-file (or end-of-cursor)
|
581
|
-
EOF = Inline.new { |cursor,buffer,_,_|
|
582
|
-
"(!#{cursor}.skip1after&&#{buffer})"
|
583
|
-
}
|
159
|
+
# include this somewhere to have access to methods that
|
160
|
+
module Molecules
|
584
161
|
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
@cursor = cursor
|
590
|
-
@expected = expected
|
591
|
-
@found = found
|
162
|
+
# Eliminate the need for the ".new".
|
163
|
+
# Would be better if objects were callable so we wouldn't need this.
|
164
|
+
def Grammar(&block)
|
165
|
+
Grammar.new(&block)
|
592
166
|
end
|
593
|
-
def to_s
|
594
|
-
err = [super]
|
595
|
-
err << "expected #{@expected.inspect}" if @expected
|
596
|
-
err << "found #{@found.inspect}" if @found
|
597
|
-
begin
|
598
|
-
#err << @cursor.to_s if @cursor
|
599
|
-
rescue
|
600
|
-
end
|
601
|
-
err * ", "
|
602
|
-
end
|
603
|
-
end
|
604
167
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
define_method(:class) do;self;end # using "def class" messed up rdoc
|
611
|
-
def new;self;end
|
612
|
-
}.new
|
613
|
-
# :startdoc:
|
614
|
-
|
615
|
-
end
|
168
|
+
# Grammar that matches to a single element. An object responding to #===
|
169
|
+
# is used to do the matching.
|
170
|
+
def Element(pattern)
|
171
|
+
Grammar { |e| e.match(pattern) }
|
172
|
+
end
|
616
173
|
|
174
|
+
alias_method(:E, :Element)
|
617
175
|
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
def initialize(max_size=16,&producer)
|
628
|
-
@buffer = []
|
629
|
-
@size = 0
|
630
|
-
@max_size = max_size
|
631
|
-
@consumer = Thread.current
|
632
|
-
@producer = Thread.new { producer[self] }
|
633
|
-
end
|
634
|
-
def new_data
|
635
|
-
[]
|
636
|
-
end
|
637
|
-
def read1next
|
638
|
-
while (Thread.critical=true;@buffer.empty?&&@producer.alive?)
|
639
|
-
Thread.critical = false
|
640
|
-
@producer.run
|
641
|
-
end
|
642
|
-
v = @buffer.shift
|
643
|
-
@size -= 1
|
644
|
-
v
|
645
|
-
ensure
|
646
|
-
Thread.critical = false
|
647
|
-
end
|
648
|
-
def read1after
|
649
|
-
v = read1next
|
650
|
-
unless v.nil?;begin
|
651
|
-
Thread.critical = true
|
652
|
-
@buffer.unshift(v)
|
653
|
-
ensure
|
654
|
-
Thread.critical = false
|
655
|
-
end;end
|
656
|
-
v
|
657
|
-
end
|
658
|
-
def skip1after
|
659
|
-
read1after.nil? ? nil : true
|
176
|
+
# Grammar that matches the elements in +pattern_sequence+. +Element+
|
177
|
+
# is used for each pattern in +pattern_sequence+. Starting from index
|
178
|
+
# 0 #[] is used to access +pattern_sequence+ until it returns +nil+.
|
179
|
+
def Chain(pattern_sequence)
|
180
|
+
p = pattern_sequence[0] or return NULL
|
181
|
+
g = E(p)
|
182
|
+
i = 0
|
183
|
+
g += E(p) while p = pattern_sequence[i+=1]
|
184
|
+
g
|
660
185
|
end
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
186
|
+
|
187
|
+
# Grammar that always fails (with a +message+)
|
188
|
+
def Fail(message=nil)
|
189
|
+
Grammar { |e| e.failure(message) }
|
190
|
+
end
|
191
|
+
|
192
|
+
# Grammar that shares/uses one or more variables. Optional initial
|
193
|
+
# values can be given for the variables. The block should take
|
194
|
+
# variable reference objects (one or more) and should return a
|
195
|
+
# Grammar that uses the variables in action blocks. Use var#[] to
|
196
|
+
# get the value in a variable reference object and var#<< to set the
|
197
|
+
# value.
|
198
|
+
def Variables(*vals, &block) # :yield: *vars
|
199
|
+
Grammar { |e|
|
200
|
+
e.variables(block.arity) { |*vars|
|
201
|
+
init = []
|
202
|
+
vals.each_with_index { |val, i|
|
203
|
+
init << (vars[i] << e[val,true])
|
204
|
+
}
|
205
|
+
init << e[true]
|
206
|
+
Grammar { e.steps(*init) } + yield(*vars)
|
207
|
+
}
|
208
|
+
}
|
670
209
|
end
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
210
|
+
|
211
|
+
# Grammar that handles recursion. +inner+ represents a call back to
|
212
|
+
# the resulting Grammar. An +inner+ may be given or it will be
|
213
|
+
# automatically generated (as an empty/invalid Grammar). +inner+
|
214
|
+
# is yielded to the block which should return the resulting Grammar
|
215
|
+
# (and be based on +inner+). Middle, right, and left recursion should
|
216
|
+
# be handled by the engine properly, but there may be restrictions on
|
217
|
+
# left recursion (i.e. must be the very first thing in the resulting
|
218
|
+
# Grammar).
|
219
|
+
def Recurse(inner=Grammar()) # :yield: inner
|
220
|
+
outer = yield(inner)
|
221
|
+
Grammar { |e| e.recurse(inner, &outer) }
|
681
222
|
end
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
223
|
+
|
224
|
+
# Grammar that fails with a message when the Grammar block doesn't
|
225
|
+
# pass.
|
226
|
+
def Check(message=nil, &block) # :yield: engine
|
227
|
+
Grammar(&block) | Fail(message)
|
228
|
+
end
|
229
|
+
|
230
|
+
# Grammar that yields an engine to a Grammar block and expects that
|
231
|
+
# the result always passes.
|
232
|
+
def Always(&block) # :yield: engine
|
233
|
+
Grammar { |e| e.always(&block) }
|
234
|
+
end
|
235
|
+
|
236
|
+
# Grammar that is the result of yielding an engine. This adds a
|
237
|
+
# convenience so that action blocks don't need to receive the engine.
|
238
|
+
def Common # :yield: engine
|
239
|
+
Grammar { |e| yield(e)[e] }
|
240
|
+
end
|
241
|
+
|
242
|
+
# Grammar that modifies the output buffer. The current buffer and
|
243
|
+
# optionally the engine are yielded to a block which should return
|
244
|
+
# what the new output buffer should be.
|
245
|
+
# WARNING: only use this inside of a Grammar where #group or
|
246
|
+
# #redirect has been applied. It probably won't work as expected in
|
247
|
+
# other places.
|
248
|
+
def Output(&block) # :yield: buf[, engine]
|
249
|
+
Grammar { |e| e.output(&block) }
|
688
250
|
end
|
251
|
+
|
252
|
+
# Zero-width Grammar that always passes and matches nothing
|
253
|
+
NULL = Grammar.new { |e| e[true] }
|
254
|
+
# Zero-width Grammar that matches the end-of-file (or end-of-input)
|
255
|
+
EOF = Grammar.new { |e| e.eof }
|
256
|
+
# Grammar that matches any single element (not EOF)
|
257
|
+
ANY = Grammar.new { |e| e.any }
|
258
|
+
|
689
259
|
end
|
260
|
+
|
261
|
+
include Molecules
|
262
|
+
|
690
263
|
end
|
691
264
|
|
692
265
|
|