pegex 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemspec +2 -2
- data/.profile +2 -0
- data/CHANGELOG.yaml +3 -0
- data/LICENSE +1 -1
- data/README.rdoc +26 -24
- data/Rakefile +10 -1
- data/ToDo +3 -0
- data/lib/pegex.rb +4 -5
- data/lib/pegex/grammar.rb +1 -0
- data/lib/pegex/input.rb +1 -0
- data/lib/pegex/parser.rb +52 -54
- data/lib/pegex/pegex/ast.rb +6 -6
- data/lib/pegex/tree.rb +1 -1
- data/lib/pegex/tree/wrap.rb +2 -2
- data/test/export-api.rb +38 -0
- data/test/flatten.rb +30 -0
- data/test/grammar-api.rb +20 -8
- data/test/lib/xxx.rb +4 -4
- data/test/mice.pgx +7 -0
- data/test/parse.rb +18 -0
- data/test/repeat.rb +10 -0
- data/test/sample.rb +72 -0
- data/test/testml.rb +25 -0
- data/test/testml.yaml +3 -0
- data/test/{compiler-checks.tml → testml/compiler-checks.tml} +8 -11
- data/test/{compiler-equivalence.rb → testml/compiler-equivalence.tml} +2 -8
- data/test/{compiler.tml → testml/compiler.tml} +13 -0
- data/test/{error.rb → testml/error.tml} +2 -13
- data/test/testml/optimize.tml +14 -0
- data/test/{tree-pegex.tml → testml/tree-pegex.tml} +11 -1
- data/test/{tree.tml → testml/tree.tml} +23 -9
- data/test/testml_bridge.rb +64 -0
- metadata +22 -17
- data/test/compiler-checks.rb +0 -271
- data/test/compiler.rb +0 -42
- data/test/export_ok.rb +0 -36
- data/test/lib/test_pegex.rb +0 -33
- data/test/optimize.rb +0 -18
- data/test/tree.rb +0 -47
data/.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
GemSpec = Gem::Specification.new do |gem|
|
4
4
|
gem.name = 'pegex'
|
5
|
-
gem.version = '0.0.
|
5
|
+
gem.version = '0.0.3'
|
6
6
|
gem.license = 'MIT'
|
7
7
|
gem.required_ruby_version = '>= 1.9.1'
|
8
8
|
|
@@ -17,5 +17,5 @@ that will work equivalently in lots of programming languages!
|
|
17
17
|
|
18
18
|
gem.files = `git ls-files`.lines.map{|l|l.chomp}
|
19
19
|
|
20
|
-
gem.add_development_dependency 'testml
|
20
|
+
gem.add_development_dependency 'testml', '>= 0.0.2'
|
21
21
|
end
|
data/.profile
ADDED
data/CHANGELOG.yaml
CHANGED
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
=
|
1
|
+
= pegex - Acmeist PEG Parsing Framework
|
2
2
|
|
3
3
|
Pegex is a Acmeist parser framework. It allows you to easily create
|
4
4
|
parsers that will work equivalently in lots of programming languages!
|
@@ -29,35 +29,37 @@ or more explicitly:
|
|
29
29
|
|
30
30
|
= Description
|
31
31
|
|
32
|
-
Pegex is a Acmeist parser framework. It allows you to easily create
|
33
|
-
that will work equivalently in lots of programming languages!
|
32
|
+
Pegex is a Acmeist parser framework. It allows you to easily create
|
33
|
+
parsers that will work equivalently in lots of programming languages!
|
34
34
|
|
35
|
-
Pegex gets it name by combining Parsing Expression Grammars (PEG),
|
36
|
-
Regular Expessions (Regex). That's actually what Pegex does.
|
35
|
+
Pegex gets it name by combining Parsing Expression Grammars (PEG),
|
36
|
+
with Regular Expessions (Regex). That's actually what Pegex does.
|
37
37
|
|
38
|
-
PEG is the cool new way to elegantly specify recursive descent
|
39
|
-
Perl 6 language is defined in terms of a self modifying
|
40
|
-
*Perl 6 Rules*. Regexes are familiar to
|
41
|
-
programming languages. Pegex defines a
|
42
|
-
terminals are regexes. This means
|
38
|
+
PEG is the cool new way to elegantly specify recursive descent
|
39
|
+
grammars. The Perl 6 language is defined in terms of a self modifying
|
40
|
+
PEG language called *Perl 6 Rules*. Regexes are familiar to
|
41
|
+
programmers of most modern programming languages. Pegex defines a
|
42
|
+
simple PEG syntax, where all the terminals are regexes. This means
|
43
|
+
that Pegex can be quite fast and powerful.
|
43
44
|
|
44
|
-
Pegex attempts to be the simplest way to define new (or old) Domain
|
45
|
-
Languages (DSLs) that need to be used in several programming
|
46
|
-
environments.
|
45
|
+
Pegex attempts to be the simplest way to define new (or old) Domain
|
46
|
+
Specific Languages (DSLs) that need to be used in several programming
|
47
|
+
languages and environments.
|
47
48
|
|
48
49
|
= Usage
|
49
50
|
|
50
|
-
The +pegex.rb+ module itself is just a trivial way to use the
|
51
|
-
|
51
|
+
The +pegex.rb+ module itself is just a trivial way to use the Pegex
|
52
|
+
framework. It is only intended for the simplest of uses.
|
52
53
|
|
53
|
-
+pegex.rb+ defines a single function, +pegex+, which takes a Pegex
|
54
|
-
string as input. You may also pass in a receiver class or
|
54
|
+
+pegex.rb+ defines a single function, +pegex+, which takes a Pegex
|
55
|
+
grammar string as input. You may also pass in a receiver class or
|
56
|
+
object.
|
55
57
|
|
56
58
|
parser = pegex(grammar, MyReceiver)
|
57
59
|
|
58
|
-
The +pegex+ function returns a Pegex::Parser object, on which you
|
59
|
-
typically call the +parse()+ method, which (on success) will
|
60
|
-
structure of the parsed data.
|
60
|
+
The +pegex+ function returns a Pegex::Parser object, on which you
|
61
|
+
would typically call the +parse()+ method, which (on success) will
|
62
|
+
return a data structure of the parsed data.
|
61
63
|
|
62
64
|
See Pegex::API for more details.
|
63
65
|
|
@@ -66,10 +68,10 @@ See Pegex::API for more details.
|
|
66
68
|
This Pegex library was ported to Ruby from the Perl module:
|
67
69
|
http://search.cpan.org/dist/Pegex/
|
68
70
|
|
69
|
-
The code and tests were fully ported from Perl to Ruby. Pegex should
|
70
|
-
exactly the same in both languages. The documentation and
|
71
|
-
been fully ported, but they will be soon enough.
|
72
|
-
docs.
|
71
|
+
The code and tests were fully ported from Perl to Ruby. Pegex should
|
72
|
+
work exactly the same in both languages. The documentation and
|
73
|
+
examples have not yet been fully ported, but they will be soon enough.
|
74
|
+
For now, refer to the Perl docs.
|
73
75
|
|
74
76
|
You can start here: http://search.cpan.org/dist/Pegex/lib/Pegex.pod
|
75
77
|
|
data/Rakefile
CHANGED
@@ -11,6 +11,12 @@ DevNull = '2>/dev/null'
|
|
11
11
|
require 'rake'
|
12
12
|
require 'rake/testtask'
|
13
13
|
require 'rake/clean'
|
14
|
+
if File.exists? 'test/testml.yaml'
|
15
|
+
if File.exists? 'lib/rake/testml.rb'
|
16
|
+
$:.unshift "#{Dir.getwd}/lib"
|
17
|
+
end
|
18
|
+
require 'rake/testml'
|
19
|
+
end
|
14
20
|
|
15
21
|
task :default => 'help'
|
16
22
|
|
@@ -18,9 +24,12 @@ CLEAN.include GemDir, GemFile, 'data.tar.gz', 'metadata.gz'
|
|
18
24
|
|
19
25
|
desc 'Run the tests'
|
20
26
|
task :test do
|
27
|
+
load '.env' if File.exists? '.env'
|
21
28
|
Rake::TestTask.new do |t|
|
22
29
|
t.verbose = true
|
23
|
-
t.test_files =
|
30
|
+
t.test_files = ENV['DEV_TEST_FILES'] &&
|
31
|
+
FileList[ENV['DEV_TEST_FILES'].split] ||
|
32
|
+
FileList['test/**/*.rb'].sort
|
24
33
|
end
|
25
34
|
end
|
26
35
|
|
data/ToDo
CHANGED
data/lib/pegex.rb
CHANGED
@@ -3,15 +3,14 @@ module Pegex;end
|
|
3
3
|
require 'pegex/parser'
|
4
4
|
require 'pegex/grammar'
|
5
5
|
|
6
|
-
def pegex
|
7
|
-
|
6
|
+
def pegex grammar, receiver=nil
|
7
|
+
if not receiver
|
8
8
|
require 'pegex/tree/wrap'
|
9
9
|
receiver = Pegex::Tree::Wrap.new
|
10
10
|
end
|
11
|
-
receiver = receiver.new
|
12
|
-
if receiver.class == Class
|
11
|
+
receiver = receiver.new if receiver.class == Class
|
13
12
|
return Pegex::Parser.new do |p|
|
14
|
-
p.grammar = Pegex::Grammar.new {|g| g.text =
|
13
|
+
p.grammar = Pegex::Grammar.new {|g| g.text = grammar}
|
15
14
|
p.receiver = receiver
|
16
15
|
end
|
17
16
|
end
|
data/lib/pegex/grammar.rb
CHANGED
data/lib/pegex/input.rb
CHANGED
data/lib/pegex/parser.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
require 'pegex/input'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
module Pegex::Constant
|
4
|
+
Null = []
|
5
|
+
Dummy = []
|
6
|
+
end
|
5
7
|
|
6
8
|
class Pegex::Parser
|
7
9
|
attr_accessor :grammar
|
8
10
|
attr_accessor :receiver
|
11
|
+
attr_accessor :input
|
12
|
+
|
9
13
|
attr_accessor :parent
|
10
14
|
attr_accessor :rule
|
11
15
|
attr_accessor :debug
|
@@ -14,18 +18,16 @@ class Pegex::Parser
|
|
14
18
|
@position = 0
|
15
19
|
@farthest = 0
|
16
20
|
@optimized = false
|
17
|
-
@debug = false
|
18
21
|
@throw_on_error = true
|
19
|
-
|
22
|
+
@debug = ENV['RUBY_PEGEX_DEBUG'] || $PegexParserDebug || false
|
20
23
|
yield self if block_given?
|
21
24
|
end
|
22
25
|
|
23
26
|
def parse input, start=nil
|
24
27
|
@position = 0
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
28
|
+
|
29
|
+
if not input.kind_of? Pegex::Input
|
30
|
+
input = Pegex::Input.new {|i| i.string = input}
|
29
31
|
end
|
30
32
|
@input = input
|
31
33
|
@input.open unless @input.open?
|
@@ -40,18 +42,20 @@ class Pegex::Parser
|
|
40
42
|
(@tree['TOP'] ? 'TOP' : nil) or
|
41
43
|
fail "No starting rule for Pegex::Parser::parse"
|
42
44
|
|
43
|
-
optimize_grammar
|
45
|
+
optimize_grammar(start_rule_ref)
|
44
46
|
|
45
|
-
fail
|
47
|
+
fail "No 'receiver'. Can't parse" unless @receiver
|
46
48
|
|
47
|
-
# XXX does ruby have problems with circulat references
|
49
|
+
# XXX does ruby have problems with circulat references?
|
48
50
|
@receiver.parser = self
|
49
51
|
|
50
52
|
if @receiver.respond_to? 'initial'
|
51
|
-
@rule
|
53
|
+
@rule = start_rule_ref
|
54
|
+
@parent = {}
|
55
|
+
@receiver.initial
|
52
56
|
end
|
53
57
|
|
54
|
-
match = match_ref
|
58
|
+
match = match_ref(start_rule_ref, {})
|
55
59
|
|
56
60
|
@input.close
|
57
61
|
|
@@ -61,7 +65,8 @@ class Pegex::Parser
|
|
61
65
|
end
|
62
66
|
|
63
67
|
if @receiver.respond_to? 'final'
|
64
|
-
@rule
|
68
|
+
@rule = start_rule_ref
|
69
|
+
@parent = {}
|
65
70
|
match = [ @receiver.final(match.first) ]
|
66
71
|
end
|
67
72
|
|
@@ -72,9 +77,9 @@ class Pegex::Parser
|
|
72
77
|
return if @optimized
|
73
78
|
@tree.each_pair do |name, node|
|
74
79
|
next if node.kind_of? String
|
75
|
-
optimize_node
|
80
|
+
optimize_node(node)
|
76
81
|
end
|
77
|
-
optimize_node
|
82
|
+
optimize_node('.ref' => start)
|
78
83
|
@optimized = true
|
79
84
|
end
|
80
85
|
|
@@ -88,8 +93,8 @@ class Pegex::Parser
|
|
88
93
|
end
|
89
94
|
end
|
90
95
|
min, max = node.values_at '+min', '+max'
|
91
|
-
node['+min'] ||= max
|
92
|
-
node['+max'] ||= min
|
96
|
+
node['+min'] ||= max.nil? ? 1 : 0
|
97
|
+
node['+max'] ||= min.nil? ? 1 : 0
|
93
98
|
node['+asr'] ||= nil
|
94
99
|
node['+min'] = node['+min'].to_i
|
95
100
|
node['+max'] = node['+max'].to_i
|
@@ -111,12 +116,12 @@ class Pegex::Parser
|
|
111
116
|
node['rule'] = Regexp.new "\\A#{node['.rgx']}"
|
112
117
|
end
|
113
118
|
if sep = node['.sep']
|
114
|
-
optimize_node
|
119
|
+
optimize_node(sep)
|
115
120
|
end
|
116
121
|
end
|
117
122
|
|
118
123
|
def match_next next_
|
119
|
-
return match_next_with_sep
|
124
|
+
return match_next_with_sep(next_) if next_['.sep']
|
120
125
|
|
121
126
|
rule, method, kind, min, max, assertion =
|
122
127
|
next_.values_at 'rule', 'method', 'kind', '+min', '+max', '+asr'
|
@@ -126,7 +131,7 @@ class Pegex::Parser
|
|
126
131
|
while return_ = method.call(rule, next_)
|
127
132
|
position = @position unless assertion
|
128
133
|
count += 1
|
129
|
-
match.concat return_
|
134
|
+
match.concat return_
|
130
135
|
break if max == 1
|
131
136
|
end
|
132
137
|
if max != 1
|
@@ -134,7 +139,7 @@ class Pegex::Parser
|
|
134
139
|
@farthest = position if (@position = position) > @farthest
|
135
140
|
end
|
136
141
|
result = (count >= min and (max == 0 or count <= max)) ^ (assertion == -1)
|
137
|
-
if not
|
142
|
+
if not(result) or assertion
|
138
143
|
@farthest = position if (@position = position) > @farthest
|
139
144
|
end
|
140
145
|
|
@@ -146,53 +151,52 @@ class Pegex::Parser
|
|
146
151
|
next_.values_at 'rule', 'method', 'kind', '+min', '+max', '.sep'
|
147
152
|
|
148
153
|
position, match, count, scount, smin, smax =
|
149
|
-
@position, [], 0, 0, sep.values_at('+min', '+max')
|
150
|
-
|
154
|
+
@position, [], 0, 0, *(sep.values_at('+min', '+max'))
|
151
155
|
while return_ = method.call(rule, next_)
|
152
156
|
position = @position
|
153
157
|
count += 1
|
154
|
-
match.concat
|
158
|
+
match.concat(return_)
|
155
159
|
return_ = match_next(sep) or break
|
156
|
-
match.concat return_
|
160
|
+
match.concat(smax == 1 ? return_ : return_[0]) if !return_.empty?
|
157
161
|
scount += 1
|
158
162
|
end
|
159
|
-
if max != 1
|
160
|
-
match = [match]
|
161
|
-
end
|
163
|
+
match = [match] if max != 1
|
162
164
|
result = count >= min and (max == 0 or count <= max)
|
163
165
|
if count == scount and not sep['+eok']
|
164
166
|
@farthest = position if (@position = position) > @farthest
|
165
167
|
end
|
166
168
|
|
167
|
-
return
|
169
|
+
return(result ? next_['-skip'] ? [] : match : false)
|
168
170
|
end
|
169
171
|
|
170
172
|
def match_ref ref, parent
|
171
173
|
rule = @tree[ref]
|
172
174
|
match = match_next(rule) or return false
|
173
|
-
return
|
175
|
+
return Pegex::Constant::Dummy unless rule['action']
|
174
176
|
@rule, @parent = ref, parent
|
175
177
|
result = rule['action'].call(match.first)
|
176
|
-
return (result.equal?
|
178
|
+
return (result.equal? Pegex::Constant::Null) ? result : [result]
|
177
179
|
end
|
178
180
|
|
179
181
|
def match_rgx regexp, parent=nil
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
182
|
+
buffer = @buffer[@position .. -1]
|
183
|
+
(m = buffer.match regexp) or return false
|
184
|
+
@position += m[0].length
|
185
|
+
# TODO use m.captures
|
184
186
|
match = m[1..-1]
|
185
187
|
match = [ match ] if m.length > 2
|
186
|
-
@farthest = position if
|
188
|
+
@farthest = @position if @position > @farthest
|
187
189
|
return match
|
188
190
|
end
|
189
191
|
|
190
192
|
def match_all list, parent=nil
|
191
|
-
position
|
193
|
+
position = @position
|
194
|
+
set = []
|
195
|
+
len = 0
|
192
196
|
list.each do |elem|
|
193
197
|
if match = match_next(elem)
|
194
|
-
if !elem['+asr']
|
195
|
-
set.concat
|
198
|
+
if !(elem['+asr'] or elem['-skip'])
|
199
|
+
set.concat(match)
|
196
200
|
len += 1
|
197
201
|
end
|
198
202
|
else
|
@@ -206,7 +210,7 @@ class Pegex::Parser
|
|
206
210
|
|
207
211
|
def match_any list, parent=nil
|
208
212
|
list.each do |elem|
|
209
|
-
if (match = match_next
|
213
|
+
if (match = match_next(elem))
|
210
214
|
return match
|
211
215
|
end
|
212
216
|
end
|
@@ -214,18 +218,18 @@ class Pegex::Parser
|
|
214
218
|
end
|
215
219
|
|
216
220
|
def match_err error, parent=nil
|
217
|
-
throw_error
|
221
|
+
throw_error(error)
|
218
222
|
end
|
219
223
|
|
220
224
|
def match_ref_trace ref, parent
|
221
225
|
rule = @tree[ref]
|
222
|
-
|
223
|
-
trace
|
226
|
+
trace = ! rule['+asr']
|
227
|
+
trace("try_#{ref}") if trace
|
224
228
|
result = nil
|
225
|
-
if (result = match_ref
|
226
|
-
trace
|
229
|
+
if (result = match_ref(ref, parent))
|
230
|
+
trace("got_#{ref}") if trace
|
227
231
|
else
|
228
|
-
trace
|
232
|
+
trace("not_#{ref}") if trace
|
229
233
|
end
|
230
234
|
return result
|
231
235
|
end
|
@@ -243,13 +247,7 @@ class Pegex::Parser
|
|
243
247
|
$stderr.print indent ? " >#{snippet}<\n" : "\n"
|
244
248
|
end
|
245
249
|
|
246
|
-
|
247
|
-
raise msg
|
248
|
-
end
|
249
|
-
|
250
|
-
class PegexParseError < RuntimeError
|
251
|
-
|
252
|
-
end
|
250
|
+
class PegexParseError < RuntimeError;end
|
253
251
|
|
254
252
|
def throw_error msg
|
255
253
|
@error = format_error msg
|
data/lib/pegex/pegex/ast.rb
CHANGED
@@ -57,20 +57,20 @@ class Pegex::Pegex::AST < Pegex::Tree
|
|
57
57
|
group[@prefixes[prefix]] = 1
|
58
58
|
end
|
59
59
|
unless suffix.empty?
|
60
|
-
set_quantity
|
60
|
+
set_quantity(group, suffix)
|
61
61
|
end
|
62
62
|
return group
|
63
63
|
end
|
64
64
|
|
65
65
|
def got_all_group got
|
66
|
-
list = get_group
|
66
|
+
list = get_group(got)
|
67
67
|
fail unless list.length > 0
|
68
68
|
return list.first if list.length == 1
|
69
69
|
return '.all' => list
|
70
70
|
end
|
71
71
|
|
72
72
|
def got_any_group got
|
73
|
-
list = get_group
|
73
|
+
list = get_group(got)
|
74
74
|
fail unless list.length > 0
|
75
75
|
return list.first if list.length == 1
|
76
76
|
return '.any' => list
|
@@ -96,10 +96,10 @@ class Pegex::Pegex::AST < Pegex::Tree
|
|
96
96
|
if (regex = @atoms[ref])
|
97
97
|
@extra_rules[ref] = {'.rgx' => regex}
|
98
98
|
end
|
99
|
-
|
100
|
-
set_quantity
|
99
|
+
if !suffix.empty?
|
100
|
+
set_quantity(node, suffix)
|
101
101
|
end
|
102
|
-
|
102
|
+
if !prefix.empty?
|
103
103
|
if @prefixes[prefix].kind_of? Array
|
104
104
|
key, val = @prefixes[prefix]
|
105
105
|
else
|