pegex 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemspec +2 -2
- data/.profile +2 -0
- data/CHANGELOG.yaml +3 -0
- data/LICENSE +1 -1
- data/README.rdoc +26 -24
- data/Rakefile +10 -1
- data/ToDo +3 -0
- data/lib/pegex.rb +4 -5
- data/lib/pegex/grammar.rb +1 -0
- data/lib/pegex/input.rb +1 -0
- data/lib/pegex/parser.rb +52 -54
- data/lib/pegex/pegex/ast.rb +6 -6
- data/lib/pegex/tree.rb +1 -1
- data/lib/pegex/tree/wrap.rb +2 -2
- data/test/export-api.rb +38 -0
- data/test/flatten.rb +30 -0
- data/test/grammar-api.rb +20 -8
- data/test/lib/xxx.rb +4 -4
- data/test/mice.pgx +7 -0
- data/test/parse.rb +18 -0
- data/test/repeat.rb +10 -0
- data/test/sample.rb +72 -0
- data/test/testml.rb +25 -0
- data/test/testml.yaml +3 -0
- data/test/{compiler-checks.tml → testml/compiler-checks.tml} +8 -11
- data/test/{compiler-equivalence.rb → testml/compiler-equivalence.tml} +2 -8
- data/test/{compiler.tml → testml/compiler.tml} +13 -0
- data/test/{error.rb → testml/error.tml} +2 -13
- data/test/testml/optimize.tml +14 -0
- data/test/{tree-pegex.tml → testml/tree-pegex.tml} +11 -1
- data/test/{tree.tml → testml/tree.tml} +23 -9
- data/test/testml_bridge.rb +64 -0
- metadata +22 -17
- data/test/compiler-checks.rb +0 -271
- data/test/compiler.rb +0 -42
- data/test/export_ok.rb +0 -36
- data/test/lib/test_pegex.rb +0 -33
- data/test/optimize.rb +0 -18
- data/test/tree.rb +0 -47
data/.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
GemSpec = Gem::Specification.new do |gem|
|
4
4
|
gem.name = 'pegex'
|
5
|
-
gem.version = '0.0.
|
5
|
+
gem.version = '0.0.3'
|
6
6
|
gem.license = 'MIT'
|
7
7
|
gem.required_ruby_version = '>= 1.9.1'
|
8
8
|
|
@@ -17,5 +17,5 @@ that will work equivalently in lots of programming languages!
|
|
17
17
|
|
18
18
|
gem.files = `git ls-files`.lines.map{|l|l.chomp}
|
19
19
|
|
20
|
-
gem.add_development_dependency 'testml
|
20
|
+
gem.add_development_dependency 'testml', '>= 0.0.2'
|
21
21
|
end
|
data/.profile
ADDED
data/CHANGELOG.yaml
CHANGED
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
=
|
1
|
+
= pegex - Acmeist PEG Parsing Framework
|
2
2
|
|
3
3
|
Pegex is a Acmeist parser framework. It allows you to easily create
|
4
4
|
parsers that will work equivalently in lots of programming languages!
|
@@ -29,35 +29,37 @@ or more explicitly:
|
|
29
29
|
|
30
30
|
= Description
|
31
31
|
|
32
|
-
Pegex is a Acmeist parser framework. It allows you to easily create
|
33
|
-
that will work equivalently in lots of programming languages!
|
32
|
+
Pegex is a Acmeist parser framework. It allows you to easily create
|
33
|
+
parsers that will work equivalently in lots of programming languages!
|
34
34
|
|
35
|
-
Pegex gets it name by combining Parsing Expression Grammars (PEG),
|
36
|
-
Regular Expessions (Regex). That's actually what Pegex does.
|
35
|
+
Pegex gets it name by combining Parsing Expression Grammars (PEG),
|
36
|
+
with Regular Expessions (Regex). That's actually what Pegex does.
|
37
37
|
|
38
|
-
PEG is the cool new way to elegantly specify recursive descent
|
39
|
-
Perl 6 language is defined in terms of a self modifying
|
40
|
-
*Perl 6 Rules*. Regexes are familiar to
|
41
|
-
programming languages. Pegex defines a
|
42
|
-
terminals are regexes. This means
|
38
|
+
PEG is the cool new way to elegantly specify recursive descent
|
39
|
+
grammars. The Perl 6 language is defined in terms of a self modifying
|
40
|
+
PEG language called *Perl 6 Rules*. Regexes are familiar to
|
41
|
+
programmers of most modern programming languages. Pegex defines a
|
42
|
+
simple PEG syntax, where all the terminals are regexes. This means
|
43
|
+
that Pegex can be quite fast and powerful.
|
43
44
|
|
44
|
-
Pegex attempts to be the simplest way to define new (or old) Domain
|
45
|
-
Languages (DSLs) that need to be used in several programming
|
46
|
-
environments.
|
45
|
+
Pegex attempts to be the simplest way to define new (or old) Domain
|
46
|
+
Specific Languages (DSLs) that need to be used in several programming
|
47
|
+
languages and environments.
|
47
48
|
|
48
49
|
= Usage
|
49
50
|
|
50
|
-
The +pegex.rb+ module itself is just a trivial way to use the
|
51
|
-
|
51
|
+
The +pegex.rb+ module itself is just a trivial way to use the Pegex
|
52
|
+
framework. It is only intended for the simplest of uses.
|
52
53
|
|
53
|
-
+pegex.rb+ defines a single function, +pegex+, which takes a Pegex
|
54
|
-
string as input. You may also pass in a receiver class or
|
54
|
+
+pegex.rb+ defines a single function, +pegex+, which takes a Pegex
|
55
|
+
grammar string as input. You may also pass in a receiver class or
|
56
|
+
object.
|
55
57
|
|
56
58
|
parser = pegex(grammar, MyReceiver)
|
57
59
|
|
58
|
-
The +pegex+ function returns a Pegex::Parser object, on which you
|
59
|
-
typically call the +parse()+ method, which (on success) will
|
60
|
-
structure of the parsed data.
|
60
|
+
The +pegex+ function returns a Pegex::Parser object, on which you
|
61
|
+
would typically call the +parse()+ method, which (on success) will
|
62
|
+
return a data structure of the parsed data.
|
61
63
|
|
62
64
|
See Pegex::API for more details.
|
63
65
|
|
@@ -66,10 +68,10 @@ See Pegex::API for more details.
|
|
66
68
|
This Pegex library was ported to Ruby from the Perl module:
|
67
69
|
http://search.cpan.org/dist/Pegex/
|
68
70
|
|
69
|
-
The code and tests were fully ported from Perl to Ruby. Pegex should
|
70
|
-
exactly the same in both languages. The documentation and
|
71
|
-
been fully ported, but they will be soon enough.
|
72
|
-
docs.
|
71
|
+
The code and tests were fully ported from Perl to Ruby. Pegex should
|
72
|
+
work exactly the same in both languages. The documentation and
|
73
|
+
examples have not yet been fully ported, but they will be soon enough.
|
74
|
+
For now, refer to the Perl docs.
|
73
75
|
|
74
76
|
You can start here: http://search.cpan.org/dist/Pegex/lib/Pegex.pod
|
75
77
|
|
data/Rakefile
CHANGED
@@ -11,6 +11,12 @@ DevNull = '2>/dev/null'
|
|
11
11
|
require 'rake'
|
12
12
|
require 'rake/testtask'
|
13
13
|
require 'rake/clean'
|
14
|
+
if File.exists? 'test/testml.yaml'
|
15
|
+
if File.exists? 'lib/rake/testml.rb'
|
16
|
+
$:.unshift "#{Dir.getwd}/lib"
|
17
|
+
end
|
18
|
+
require 'rake/testml'
|
19
|
+
end
|
14
20
|
|
15
21
|
task :default => 'help'
|
16
22
|
|
@@ -18,9 +24,12 @@ CLEAN.include GemDir, GemFile, 'data.tar.gz', 'metadata.gz'
|
|
18
24
|
|
19
25
|
desc 'Run the tests'
|
20
26
|
task :test do
|
27
|
+
load '.env' if File.exists? '.env'
|
21
28
|
Rake::TestTask.new do |t|
|
22
29
|
t.verbose = true
|
23
|
-
t.test_files =
|
30
|
+
t.test_files = ENV['DEV_TEST_FILES'] &&
|
31
|
+
FileList[ENV['DEV_TEST_FILES'].split] ||
|
32
|
+
FileList['test/**/*.rb'].sort
|
24
33
|
end
|
25
34
|
end
|
26
35
|
|
data/ToDo
CHANGED
data/lib/pegex.rb
CHANGED
@@ -3,15 +3,14 @@ module Pegex;end
|
|
3
3
|
require 'pegex/parser'
|
4
4
|
require 'pegex/grammar'
|
5
5
|
|
6
|
-
def pegex
|
7
|
-
|
6
|
+
def pegex grammar, receiver=nil
|
7
|
+
if not receiver
|
8
8
|
require 'pegex/tree/wrap'
|
9
9
|
receiver = Pegex::Tree::Wrap.new
|
10
10
|
end
|
11
|
-
receiver = receiver.new
|
12
|
-
if receiver.class == Class
|
11
|
+
receiver = receiver.new if receiver.class == Class
|
13
12
|
return Pegex::Parser.new do |p|
|
14
|
-
p.grammar = Pegex::Grammar.new {|g| g.text =
|
13
|
+
p.grammar = Pegex::Grammar.new {|g| g.text = grammar}
|
15
14
|
p.receiver = receiver
|
16
15
|
end
|
17
16
|
end
|
data/lib/pegex/grammar.rb
CHANGED
data/lib/pegex/input.rb
CHANGED
data/lib/pegex/parser.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
require 'pegex/input'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
module Pegex::Constant
|
4
|
+
Null = []
|
5
|
+
Dummy = []
|
6
|
+
end
|
5
7
|
|
6
8
|
class Pegex::Parser
|
7
9
|
attr_accessor :grammar
|
8
10
|
attr_accessor :receiver
|
11
|
+
attr_accessor :input
|
12
|
+
|
9
13
|
attr_accessor :parent
|
10
14
|
attr_accessor :rule
|
11
15
|
attr_accessor :debug
|
@@ -14,18 +18,16 @@ class Pegex::Parser
|
|
14
18
|
@position = 0
|
15
19
|
@farthest = 0
|
16
20
|
@optimized = false
|
17
|
-
@debug = false
|
18
21
|
@throw_on_error = true
|
19
|
-
|
22
|
+
@debug = ENV['RUBY_PEGEX_DEBUG'] || $PegexParserDebug || false
|
20
23
|
yield self if block_given?
|
21
24
|
end
|
22
25
|
|
23
26
|
def parse input, start=nil
|
24
27
|
@position = 0
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
28
|
+
|
29
|
+
if not input.kind_of? Pegex::Input
|
30
|
+
input = Pegex::Input.new {|i| i.string = input}
|
29
31
|
end
|
30
32
|
@input = input
|
31
33
|
@input.open unless @input.open?
|
@@ -40,18 +42,20 @@ class Pegex::Parser
|
|
40
42
|
(@tree['TOP'] ? 'TOP' : nil) or
|
41
43
|
fail "No starting rule for Pegex::Parser::parse"
|
42
44
|
|
43
|
-
optimize_grammar
|
45
|
+
optimize_grammar(start_rule_ref)
|
44
46
|
|
45
|
-
fail
|
47
|
+
fail "No 'receiver'. Can't parse" unless @receiver
|
46
48
|
|
47
|
-
# XXX does ruby have problems with circulat references
|
49
|
+
# XXX does ruby have problems with circulat references?
|
48
50
|
@receiver.parser = self
|
49
51
|
|
50
52
|
if @receiver.respond_to? 'initial'
|
51
|
-
@rule
|
53
|
+
@rule = start_rule_ref
|
54
|
+
@parent = {}
|
55
|
+
@receiver.initial
|
52
56
|
end
|
53
57
|
|
54
|
-
match = match_ref
|
58
|
+
match = match_ref(start_rule_ref, {})
|
55
59
|
|
56
60
|
@input.close
|
57
61
|
|
@@ -61,7 +65,8 @@ class Pegex::Parser
|
|
61
65
|
end
|
62
66
|
|
63
67
|
if @receiver.respond_to? 'final'
|
64
|
-
@rule
|
68
|
+
@rule = start_rule_ref
|
69
|
+
@parent = {}
|
65
70
|
match = [ @receiver.final(match.first) ]
|
66
71
|
end
|
67
72
|
|
@@ -72,9 +77,9 @@ class Pegex::Parser
|
|
72
77
|
return if @optimized
|
73
78
|
@tree.each_pair do |name, node|
|
74
79
|
next if node.kind_of? String
|
75
|
-
optimize_node
|
80
|
+
optimize_node(node)
|
76
81
|
end
|
77
|
-
optimize_node
|
82
|
+
optimize_node('.ref' => start)
|
78
83
|
@optimized = true
|
79
84
|
end
|
80
85
|
|
@@ -88,8 +93,8 @@ class Pegex::Parser
|
|
88
93
|
end
|
89
94
|
end
|
90
95
|
min, max = node.values_at '+min', '+max'
|
91
|
-
node['+min'] ||= max
|
92
|
-
node['+max'] ||= min
|
96
|
+
node['+min'] ||= max.nil? ? 1 : 0
|
97
|
+
node['+max'] ||= min.nil? ? 1 : 0
|
93
98
|
node['+asr'] ||= nil
|
94
99
|
node['+min'] = node['+min'].to_i
|
95
100
|
node['+max'] = node['+max'].to_i
|
@@ -111,12 +116,12 @@ class Pegex::Parser
|
|
111
116
|
node['rule'] = Regexp.new "\\A#{node['.rgx']}"
|
112
117
|
end
|
113
118
|
if sep = node['.sep']
|
114
|
-
optimize_node
|
119
|
+
optimize_node(sep)
|
115
120
|
end
|
116
121
|
end
|
117
122
|
|
118
123
|
def match_next next_
|
119
|
-
return match_next_with_sep
|
124
|
+
return match_next_with_sep(next_) if next_['.sep']
|
120
125
|
|
121
126
|
rule, method, kind, min, max, assertion =
|
122
127
|
next_.values_at 'rule', 'method', 'kind', '+min', '+max', '+asr'
|
@@ -126,7 +131,7 @@ class Pegex::Parser
|
|
126
131
|
while return_ = method.call(rule, next_)
|
127
132
|
position = @position unless assertion
|
128
133
|
count += 1
|
129
|
-
match.concat return_
|
134
|
+
match.concat return_
|
130
135
|
break if max == 1
|
131
136
|
end
|
132
137
|
if max != 1
|
@@ -134,7 +139,7 @@ class Pegex::Parser
|
|
134
139
|
@farthest = position if (@position = position) > @farthest
|
135
140
|
end
|
136
141
|
result = (count >= min and (max == 0 or count <= max)) ^ (assertion == -1)
|
137
|
-
if not
|
142
|
+
if not(result) or assertion
|
138
143
|
@farthest = position if (@position = position) > @farthest
|
139
144
|
end
|
140
145
|
|
@@ -146,53 +151,52 @@ class Pegex::Parser
|
|
146
151
|
next_.values_at 'rule', 'method', 'kind', '+min', '+max', '.sep'
|
147
152
|
|
148
153
|
position, match, count, scount, smin, smax =
|
149
|
-
@position, [], 0, 0, sep.values_at('+min', '+max')
|
150
|
-
|
154
|
+
@position, [], 0, 0, *(sep.values_at('+min', '+max'))
|
151
155
|
while return_ = method.call(rule, next_)
|
152
156
|
position = @position
|
153
157
|
count += 1
|
154
|
-
match.concat
|
158
|
+
match.concat(return_)
|
155
159
|
return_ = match_next(sep) or break
|
156
|
-
match.concat return_
|
160
|
+
match.concat(smax == 1 ? return_ : return_[0]) if !return_.empty?
|
157
161
|
scount += 1
|
158
162
|
end
|
159
|
-
if max != 1
|
160
|
-
match = [match]
|
161
|
-
end
|
163
|
+
match = [match] if max != 1
|
162
164
|
result = count >= min and (max == 0 or count <= max)
|
163
165
|
if count == scount and not sep['+eok']
|
164
166
|
@farthest = position if (@position = position) > @farthest
|
165
167
|
end
|
166
168
|
|
167
|
-
return
|
169
|
+
return(result ? next_['-skip'] ? [] : match : false)
|
168
170
|
end
|
169
171
|
|
170
172
|
def match_ref ref, parent
|
171
173
|
rule = @tree[ref]
|
172
174
|
match = match_next(rule) or return false
|
173
|
-
return
|
175
|
+
return Pegex::Constant::Dummy unless rule['action']
|
174
176
|
@rule, @parent = ref, parent
|
175
177
|
result = rule['action'].call(match.first)
|
176
|
-
return (result.equal?
|
178
|
+
return (result.equal? Pegex::Constant::Null) ? result : [result]
|
177
179
|
end
|
178
180
|
|
179
181
|
def match_rgx regexp, parent=nil
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
182
|
+
buffer = @buffer[@position .. -1]
|
183
|
+
(m = buffer.match regexp) or return false
|
184
|
+
@position += m[0].length
|
185
|
+
# TODO use m.captures
|
184
186
|
match = m[1..-1]
|
185
187
|
match = [ match ] if m.length > 2
|
186
|
-
@farthest = position if
|
188
|
+
@farthest = @position if @position > @farthest
|
187
189
|
return match
|
188
190
|
end
|
189
191
|
|
190
192
|
def match_all list, parent=nil
|
191
|
-
position
|
193
|
+
position = @position
|
194
|
+
set = []
|
195
|
+
len = 0
|
192
196
|
list.each do |elem|
|
193
197
|
if match = match_next(elem)
|
194
|
-
if !elem['+asr']
|
195
|
-
set.concat
|
198
|
+
if !(elem['+asr'] or elem['-skip'])
|
199
|
+
set.concat(match)
|
196
200
|
len += 1
|
197
201
|
end
|
198
202
|
else
|
@@ -206,7 +210,7 @@ class Pegex::Parser
|
|
206
210
|
|
207
211
|
def match_any list, parent=nil
|
208
212
|
list.each do |elem|
|
209
|
-
if (match = match_next
|
213
|
+
if (match = match_next(elem))
|
210
214
|
return match
|
211
215
|
end
|
212
216
|
end
|
@@ -214,18 +218,18 @@ class Pegex::Parser
|
|
214
218
|
end
|
215
219
|
|
216
220
|
def match_err error, parent=nil
|
217
|
-
throw_error
|
221
|
+
throw_error(error)
|
218
222
|
end
|
219
223
|
|
220
224
|
def match_ref_trace ref, parent
|
221
225
|
rule = @tree[ref]
|
222
|
-
|
223
|
-
trace
|
226
|
+
trace = ! rule['+asr']
|
227
|
+
trace("try_#{ref}") if trace
|
224
228
|
result = nil
|
225
|
-
if (result = match_ref
|
226
|
-
trace
|
229
|
+
if (result = match_ref(ref, parent))
|
230
|
+
trace("got_#{ref}") if trace
|
227
231
|
else
|
228
|
-
trace
|
232
|
+
trace("not_#{ref}") if trace
|
229
233
|
end
|
230
234
|
return result
|
231
235
|
end
|
@@ -243,13 +247,7 @@ class Pegex::Parser
|
|
243
247
|
$stderr.print indent ? " >#{snippet}<\n" : "\n"
|
244
248
|
end
|
245
249
|
|
246
|
-
|
247
|
-
raise msg
|
248
|
-
end
|
249
|
-
|
250
|
-
class PegexParseError < RuntimeError
|
251
|
-
|
252
|
-
end
|
250
|
+
class PegexParseError < RuntimeError;end
|
253
251
|
|
254
252
|
def throw_error msg
|
255
253
|
@error = format_error msg
|
data/lib/pegex/pegex/ast.rb
CHANGED
@@ -57,20 +57,20 @@ class Pegex::Pegex::AST < Pegex::Tree
|
|
57
57
|
group[@prefixes[prefix]] = 1
|
58
58
|
end
|
59
59
|
unless suffix.empty?
|
60
|
-
set_quantity
|
60
|
+
set_quantity(group, suffix)
|
61
61
|
end
|
62
62
|
return group
|
63
63
|
end
|
64
64
|
|
65
65
|
def got_all_group got
|
66
|
-
list = get_group
|
66
|
+
list = get_group(got)
|
67
67
|
fail unless list.length > 0
|
68
68
|
return list.first if list.length == 1
|
69
69
|
return '.all' => list
|
70
70
|
end
|
71
71
|
|
72
72
|
def got_any_group got
|
73
|
-
list = get_group
|
73
|
+
list = get_group(got)
|
74
74
|
fail unless list.length > 0
|
75
75
|
return list.first if list.length == 1
|
76
76
|
return '.any' => list
|
@@ -96,10 +96,10 @@ class Pegex::Pegex::AST < Pegex::Tree
|
|
96
96
|
if (regex = @atoms[ref])
|
97
97
|
@extra_rules[ref] = {'.rgx' => regex}
|
98
98
|
end
|
99
|
-
|
100
|
-
set_quantity
|
99
|
+
if !suffix.empty?
|
100
|
+
set_quantity(node, suffix)
|
101
101
|
end
|
102
|
-
|
102
|
+
if !prefix.empty?
|
103
103
|
if @prefixes[prefix].kind_of? Array
|
104
104
|
key, val = @prefixes[prefix]
|
105
105
|
else
|