pegex 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemspec +2 -2
- data/.profile +2 -0
- data/CHANGELOG.yaml +3 -0
- data/LICENSE +1 -1
- data/README.rdoc +26 -24
- data/Rakefile +10 -1
- data/ToDo +3 -0
- data/lib/pegex.rb +4 -5
- data/lib/pegex/grammar.rb +1 -0
- data/lib/pegex/input.rb +1 -0
- data/lib/pegex/parser.rb +52 -54
- data/lib/pegex/pegex/ast.rb +6 -6
- data/lib/pegex/tree.rb +1 -1
- data/lib/pegex/tree/wrap.rb +2 -2
- data/test/export-api.rb +38 -0
- data/test/flatten.rb +30 -0
- data/test/grammar-api.rb +20 -8
- data/test/lib/xxx.rb +4 -4
- data/test/mice.pgx +7 -0
- data/test/parse.rb +18 -0
- data/test/repeat.rb +10 -0
- data/test/sample.rb +72 -0
- data/test/testml.rb +25 -0
- data/test/testml.yaml +3 -0
- data/test/{compiler-checks.tml → testml/compiler-checks.tml} +8 -11
- data/test/{compiler-equivalence.rb → testml/compiler-equivalence.tml} +2 -8
- data/test/{compiler.tml → testml/compiler.tml} +13 -0
- data/test/{error.rb → testml/error.tml} +2 -13
- data/test/testml/optimize.tml +14 -0
- data/test/{tree-pegex.tml → testml/tree-pegex.tml} +11 -1
- data/test/{tree.tml → testml/tree.tml} +23 -9
- data/test/testml_bridge.rb +64 -0
- metadata +22 -17
- data/test/compiler-checks.rb +0 -271
- data/test/compiler.rb +0 -42
- data/test/export_ok.rb +0 -36
- data/test/lib/test_pegex.rb +0 -33
- data/test/optimize.rb +0 -18
- data/test/tree.rb +0 -47
data/lib/pegex/tree.rb
CHANGED
data/lib/pegex/tree/wrap.rb
CHANGED
@@ -2,8 +2,8 @@ require 'pegex/tree'
|
|
2
2
|
|
3
3
|
class Pegex::Tree::Wrap < Pegex::Tree
|
4
4
|
def gotrule got
|
5
|
-
return got if @parser.parent['-pass']
|
6
|
-
return
|
5
|
+
return got || Pegex::Constant::Null if @parser.parent['-pass']
|
6
|
+
return Pegex::Constant::Null unless got
|
7
7
|
return @parser.rule => got
|
8
8
|
end
|
9
9
|
|
data/test/export-api.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
class Test::Unit::TestCase
|
4
|
+
def test
|
5
|
+
require 'pegex'
|
6
|
+
assert method('pegex'),
|
7
|
+
'pegex is exported'
|
8
|
+
|
9
|
+
parser1 = pegex "foo: <bar>\n"
|
10
|
+
|
11
|
+
assert parser1.kind_of?(Pegex::Parser),
|
12
|
+
'pegex returns a Pegex::Parser object'
|
13
|
+
|
14
|
+
assert_equal parser1.grammar.tree['+toprule'], 'foo',
|
15
|
+
'pegex() contains a grammar with a compiled tree'
|
16
|
+
|
17
|
+
parser2 = pegex(<<'...');
|
18
|
+
number: /<DIGIT>+/
|
19
|
+
...
|
20
|
+
|
21
|
+
begin
|
22
|
+
parser2.parse '123'
|
23
|
+
assert true, 'parser2.parse worked'
|
24
|
+
rescue
|
25
|
+
assert false, "parser2.parse failed: #{$!.message}"
|
26
|
+
end
|
27
|
+
|
28
|
+
assert parser2.kind_of?(Pegex::Parser),
|
29
|
+
'grammar property is Pegex::Parser object'
|
30
|
+
|
31
|
+
tree2 = parser2.grammar.tree
|
32
|
+
assert tree2, 'Grammar object has tree';
|
33
|
+
assert tree2.kind_of?(Hash), 'Grammar object is compiled to a tree'
|
34
|
+
|
35
|
+
assert_equal tree2['+toprule'], 'number',
|
36
|
+
'_FIRST_RULE is set correctly'
|
37
|
+
end
|
38
|
+
end
|
data/test/flatten.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'pegex'
|
3
|
+
require 'pegex/receiver'
|
4
|
+
|
5
|
+
class TestFlatten < Test::Unit::TestCase
|
6
|
+
def test_flatten
|
7
|
+
grammar = <<'...'
|
8
|
+
a: (((b)))+
|
9
|
+
b: (c | d)
|
10
|
+
c: /(x)/
|
11
|
+
d: /y/
|
12
|
+
...
|
13
|
+
parser = pegex(grammar, R)
|
14
|
+
got = parser.parse('xxx')
|
15
|
+
|
16
|
+
assert_equal got.join(''), 'xxx', 'Array was flattened'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class R < Pegex::Receiver
|
21
|
+
def got_a(got)
|
22
|
+
got.flatten
|
23
|
+
end
|
24
|
+
def got_b(got)
|
25
|
+
[got]
|
26
|
+
end
|
27
|
+
def got_c(got)
|
28
|
+
[got]
|
29
|
+
end
|
30
|
+
end
|
data/test/grammar-api.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
|
-
require '
|
1
|
+
require 'testml'
|
2
|
+
require 'testml/compiler/lite'
|
3
|
+
require 'testml/util'; include TestML::Util
|
2
4
|
require 'pegex/grammar'
|
3
5
|
|
6
|
+
TestML.new(
|
7
|
+
compiler: TestML::Compiler::Lite,
|
8
|
+
).testml = <<'...'
|
9
|
+
%TestML 0.1.0
|
10
|
+
Plan = 1
|
11
|
+
# Label = 'MyGrammar1 compiled a tree from its text'
|
12
|
+
grammar_api() == 'foo'
|
13
|
+
...
|
14
|
+
|
15
|
+
class TestML::Bridge
|
16
|
+
def grammar_api
|
17
|
+
grammar = MyGrammar1.new
|
18
|
+
grammar.make_tree
|
19
|
+
native grammar.tree['+toprule']
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
4
23
|
class MyGrammar1 < Pegex::Grammar
|
5
24
|
def initialize
|
6
25
|
@text = <<'...'
|
@@ -12,10 +31,3 @@ baz: /def/
|
|
12
31
|
...
|
13
32
|
end
|
14
33
|
end
|
15
|
-
|
16
|
-
TestML.run do |t|
|
17
|
-
g1 = MyGrammar1.new
|
18
|
-
g1.make_tree
|
19
|
-
t.assert_equal g1.tree['+toprule'], 'foo',
|
20
|
-
'MyGrammar1 compiled a tree from its text'
|
21
|
-
end
|
data/test/lib/xxx.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
def XXX object
|
2
|
-
require '
|
3
|
-
puts
|
2
|
+
require 'yaml'
|
3
|
+
puts YAML.dump object
|
4
4
|
puts 'XXX from: ' + caller.first
|
5
5
|
exit
|
6
6
|
end
|
7
7
|
|
8
8
|
def YYY object, show_caller=true
|
9
|
-
require '
|
10
|
-
puts
|
9
|
+
require 'yaml'
|
10
|
+
puts YAML.dump object
|
11
11
|
puts 'YYY from: ' + caller.first if show_caller
|
12
12
|
return object
|
13
13
|
end
|
data/test/mice.pgx
ADDED
data/test/parse.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'pegex'
|
3
|
+
require 'pegex/input'
|
4
|
+
|
5
|
+
class TestParse < Test::Unit::TestCase
|
6
|
+
def test_parse
|
7
|
+
grammar_file = 'test/mice.pgx';
|
8
|
+
# XXX need to add file support to Pegex::Input
|
9
|
+
# input = Pegex::Input.new(file: grammar_file)
|
10
|
+
input = File.read(grammar_file)
|
11
|
+
begin
|
12
|
+
pegex(input).parse("3 blind mice\n")
|
13
|
+
assert true, "!<rule> works"
|
14
|
+
rescue
|
15
|
+
assert false, "!<rule> fails"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/test/repeat.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'pegex'
|
3
|
+
|
4
|
+
class TestRepeat < Test::Unit::TestCase
|
5
|
+
def test_repeat
|
6
|
+
parser = pegex('a: /<ANY>*?(x+)<ANY>*/')
|
7
|
+
assert_equal parser.parse('xxxx')['a'], 'xxxx', 'First parse works'
|
8
|
+
assert_equal parser.parse('xxxx')['a'], 'xxxx', 'Second parse works'
|
9
|
+
end
|
10
|
+
end
|
data/test/sample.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
class TestSample < Test::Unit::TestCase
|
4
|
+
def test_sample
|
5
|
+
grammar_text = <<'...'
|
6
|
+
contact:
|
7
|
+
name_section
|
8
|
+
phone_section
|
9
|
+
address_section
|
10
|
+
|
11
|
+
name_section:
|
12
|
+
/ Name <COLON> <BLANK>+ /
|
13
|
+
name
|
14
|
+
EOL
|
15
|
+
|
16
|
+
name: /(<WORD>+)<BLANK>(<WORD>+)/
|
17
|
+
|
18
|
+
phone_section: /Phone<COLON><BLANK>+/ <phone_number> <EOL>
|
19
|
+
phone_number: term
|
20
|
+
|
21
|
+
address_section:
|
22
|
+
/Address<COLON><EOL>/
|
23
|
+
street_line
|
24
|
+
city_line
|
25
|
+
country_line?
|
26
|
+
|
27
|
+
street_line: indent street EOL
|
28
|
+
street: /<NS><ANY>*/
|
29
|
+
city_line: indent city EOL
|
30
|
+
city: term
|
31
|
+
country_line: indent country EOL
|
32
|
+
country: term
|
33
|
+
|
34
|
+
term: /(
|
35
|
+
<NS> # NS is "non-space"
|
36
|
+
<ANY>*
|
37
|
+
)/
|
38
|
+
|
39
|
+
indent: /<BLANK>{2}/
|
40
|
+
...
|
41
|
+
|
42
|
+
input = <<'...'
|
43
|
+
Name: Ingy Net
|
44
|
+
Phone: 919-876-5432
|
45
|
+
Address:
|
46
|
+
1234 Main St
|
47
|
+
Niceville
|
48
|
+
OK
|
49
|
+
...
|
50
|
+
|
51
|
+
want = <<'...'
|
52
|
+
...
|
53
|
+
|
54
|
+
require 'pegex/grammar'
|
55
|
+
require 'pegex/receiver'
|
56
|
+
require 'pegex/compiler'
|
57
|
+
grammar = Pegex::Grammar.new do |i|
|
58
|
+
i.tree = Pegex::Compiler.new.compile(grammar_text).tree
|
59
|
+
end
|
60
|
+
parser = Pegex::Parser.new do |i|
|
61
|
+
i.grammar = grammar
|
62
|
+
i.receiver = Pegex::Receiver.new
|
63
|
+
end
|
64
|
+
ast1 = parser.parse(input)
|
65
|
+
|
66
|
+
assert true, 'parsed'
|
67
|
+
# TODO
|
68
|
+
# got = YAML.dump(ast1)
|
69
|
+
# assert_equal got, want, 'It works'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
data/test/testml.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'testml'
|
3
|
+
require 'testml/compiler/lite'
|
4
|
+
$:.unshift "#{Dir.getwd}/test"
|
5
|
+
$:.unshift "#{Dir.getwd}/test/lib"
|
6
|
+
require 'testml_bridge'
|
7
|
+
|
8
|
+
class TestMLTestCase < Test::Unit::TestCase
|
9
|
+
def run_testml_file(file)
|
10
|
+
TestML.new(
|
11
|
+
testml: file,
|
12
|
+
bridge: TestMLBridge,
|
13
|
+
compiler: TestML::Compiler::Lite,
|
14
|
+
).run(self)
|
15
|
+
end
|
16
|
+
|
17
|
+
(Dir.glob('test/testml/*.tml')
|
18
|
+
.collect {|f| f.sub(/^test\//, '')}
|
19
|
+
).each do |file|
|
20
|
+
method_name = 'test_' + file.gsub(/\W+/, '_').sub(/_tml$/, '')
|
21
|
+
define_method(method_name.to_sym) do
|
22
|
+
run_testml_file(file)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/test/testml.yaml
ADDED
@@ -1,12 +1,10 @@
|
|
1
|
-
|
1
|
+
%TestML 0.1.0
|
2
2
|
|
3
|
-
|
3
|
+
Plan = 48
|
4
4
|
|
5
|
-
|
5
|
+
*grammar.bootstrap_compile.yaml.clean == *yaml
|
6
|
+
*grammar.compile.yaml.clean == *yaml
|
6
7
|
|
7
|
-
*grammar.compile.yaml.clean == *yaml;
|
8
|
-
|
9
|
-
testml_data <<'...'
|
10
8
|
=== Empty Grammar
|
11
9
|
--- grammar
|
12
10
|
--- yaml
|
@@ -256,10 +254,10 @@ a:
|
|
256
254
|
|
257
255
|
=== Meta Lines
|
258
256
|
--- grammar
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
257
|
+
\%grammar foo
|
258
|
+
\%version 1.1.1
|
259
|
+
\%extends bar bar
|
260
|
+
\%include bazzy
|
263
261
|
a: /b/
|
264
262
|
--- yaml
|
265
263
|
+extends: bar bar
|
@@ -268,4 +266,3 @@ a: /b/
|
|
268
266
|
+version: 1.1.1
|
269
267
|
a:
|
270
268
|
.rgx: b
|
271
|
-
...
|
@@ -1,12 +1,7 @@
|
|
1
|
-
|
1
|
+
%TestML 0.1.0
|
2
2
|
|
3
|
-
|
3
|
+
*grammar1.compile.yaml == *grammar2.compile.yaml
|
4
4
|
|
5
|
-
TestML.run do |t|
|
6
|
-
t.eval '*grammar1.compile.yaml == *grammar2.compile.yaml'
|
7
|
-
end
|
8
|
-
|
9
|
-
TestML.data <<'...'
|
10
5
|
=== Simple Test Case
|
11
6
|
--- grammar1
|
12
7
|
a: /x/
|
@@ -76,4 +71,3 @@ a b: /O HAI/
|
|
76
71
|
--- grammar2
|
77
72
|
a: /O HAI/
|
78
73
|
b: /O HAI/
|
79
|
-
...
|
@@ -1,3 +1,16 @@
|
|
1
|
+
%TestML 0.1.0
|
2
|
+
|
3
|
+
Plan = 63
|
4
|
+
|
5
|
+
Label = '$BlockLabel - Compiler output matches bootstrap?'
|
6
|
+
*grammar.compile.yaml == *grammar.bootstrap_compile.yaml
|
7
|
+
|
8
|
+
Label = '$BlockLabel - Compressed grammar compiles the same?'
|
9
|
+
*grammar.compress.compile.yaml == *grammar.compress.compile.yaml
|
10
|
+
|
11
|
+
Label = '$BlockLabel - Compressed grammar matches uncompressed?'
|
12
|
+
*grammar.compress.compile.yaml == *grammar.compile.yaml
|
13
|
+
|
1
14
|
=== Single Regex
|
2
15
|
--- grammar
|
3
16
|
a: /x/
|
@@ -1,17 +1,7 @@
|
|
1
|
-
|
1
|
+
%TestML 0.1.0
|
2
2
|
|
3
|
-
|
4
|
-
t.eval 'parse_input(*grammar, *input).Catch ~~ *error'
|
5
|
-
end
|
3
|
+
parse_input(*grammar, *input).Catch ~~ *error
|
6
4
|
|
7
|
-
class TestPegex
|
8
|
-
def parse_input grammar, input
|
9
|
-
parser = pegex grammar
|
10
|
-
return parser.parse input
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
TestML.data <<'...'
|
15
5
|
=== Error fails at furthest match
|
16
6
|
# XXX This one not testing much.
|
17
7
|
--- grammar
|
@@ -158,4 +148,3 @@ a: /a+/ %%~%%^%% ~
|
|
158
148
|
aaa
|
159
149
|
--- error: Rule ending syntax error
|
160
150
|
# --- error: Illegal characters in separator indicator
|
161
|
-
...
|
@@ -0,0 +1,14 @@
|
|
1
|
+
%TestML 0.1.0
|
2
|
+
|
3
|
+
# XXX Skipping this test for now. Might need a %Skip in TestML
|
4
|
+
'1' == '1'
|
5
|
+
# *grammar.compile.optimize.yaml.clean == *yaml
|
6
|
+
|
7
|
+
|
8
|
+
=== Question Mark Expansion
|
9
|
+
--- SKIP
|
10
|
+
--- grammar
|
11
|
+
a: /(:foo)/
|
12
|
+
--- yaml
|
13
|
+
a:
|
14
|
+
.rgx: /(?:foo)/
|
@@ -1,5 +1,15 @@
|
|
1
|
+
%TestML 0.1.0
|
2
|
+
|
3
|
+
Plan = 2
|
4
|
+
|
5
|
+
Label = '$BlockLabel - Pegex::Tree'
|
6
|
+
parse_to_tree(*grammar, *input).yaml.clean == *tree
|
7
|
+
Label = '$BlockLabel - Pegex::Tree::Wrap'
|
8
|
+
parse_to_tree_wrap(*grammar, *input).yaml.clean == *wrap
|
9
|
+
Label = '$BlockLabel - TestAST'
|
10
|
+
parse_to_tree_test(*grammar, *input).yaml.clean == *ast
|
11
|
+
|
1
12
|
=== Part of Pegex Grammar
|
2
|
-
--- SKIP
|
3
13
|
--- grammar
|
4
14
|
\# This is the Pegex grammar for Pegex grammars!
|
5
15
|
grammar: ( <comment>* <rule_definition> )+ <comment>*
|
@@ -1,3 +1,14 @@
|
|
1
|
+
%TestML 0.1.0
|
2
|
+
|
3
|
+
Plan = 56
|
4
|
+
|
5
|
+
Label = '$BlockLabel - Pegex::Tree'
|
6
|
+
parse_to_tree(*grammar, *input).yaml.clean == *tree
|
7
|
+
Label = '$BlockLabel - Pegex::Tree::Wrap'
|
8
|
+
parse_to_tree_wrap(*grammar, *input).yaml.clean == *wrap
|
9
|
+
Label = '$BlockLabel - t::TestAST'
|
10
|
+
parse_to_tree_test(*grammar, *input).yaml.clean == *ast
|
11
|
+
|
1
12
|
=== Single Regex - Single Capture
|
2
13
|
--- grammar
|
3
14
|
a: /x*(y*)z*<EOL>/
|
@@ -57,13 +68,12 @@ d: /d/
|
|
57
68
|
--- tree
|
58
69
|
- []
|
59
70
|
- []
|
60
|
-
---
|
71
|
+
--- wrap
|
61
72
|
a:
|
62
73
|
- []
|
63
74
|
- []
|
64
75
|
|
65
76
|
=== A subrule
|
66
|
-
--- SKIP
|
67
77
|
--- grammar
|
68
78
|
a: <b> /(y+)/ <EOL>
|
69
79
|
b: /(x+)/
|
@@ -109,7 +119,6 @@ a:
|
|
109
119
|
- zzz
|
110
120
|
|
111
121
|
=== + Modifier
|
112
|
-
--- SKIP
|
113
122
|
--- grammar
|
114
123
|
a: ( b c )+ <EOL>
|
115
124
|
b: /(x*)/
|
@@ -186,7 +195,6 @@ a:
|
|
186
195
|
c: ccc
|
187
196
|
|
188
197
|
=== Assertion not captured
|
189
|
-
--- SKIP
|
190
198
|
--- grammar
|
191
199
|
a: =x x y EOL
|
192
200
|
x: /(x+)/
|
@@ -202,7 +210,6 @@ a:
|
|
202
210
|
- y: yyyy
|
203
211
|
|
204
212
|
=== Empty regex group plus rule
|
205
|
-
--- SKIP
|
206
213
|
--- grammar
|
207
214
|
a: <b>* <c> <EOL>
|
208
215
|
b: /xxx/
|
@@ -218,7 +225,6 @@ a:
|
|
218
225
|
- c: yyy
|
219
226
|
|
220
227
|
=== Rule to Rule to Rule
|
221
|
-
--- SKIP
|
222
228
|
--- grammar
|
223
229
|
a: <b>
|
224
230
|
b: <c>*
|
@@ -262,7 +268,6 @@ a:
|
|
262
268
|
- c: c
|
263
269
|
|
264
270
|
=== Rule with Separator
|
265
|
-
--- SKIP
|
266
271
|
--- grammar
|
267
272
|
a: <c>* % <d>
|
268
273
|
c: /(c+)/
|
@@ -388,7 +393,6 @@ a:
|
|
388
393
|
- b: b
|
389
394
|
|
390
395
|
=== Quantifier on the Separator
|
391
|
-
--- SKIP
|
392
396
|
--- grammar
|
393
397
|
a: <b>2-4 %% <c>*
|
394
398
|
b: /(b)/
|
@@ -422,7 +426,6 @@ a:
|
|
422
426
|
- - b: b
|
423
427
|
- b: b
|
424
428
|
|
425
|
-
# TODO - deal with psych null output
|
426
429
|
=== False Values
|
427
430
|
--- grammar
|
428
431
|
a: <zero> <empty> <undef>
|
@@ -447,3 +450,14 @@ a:
|
|
447
450
|
- b: bb
|
448
451
|
- c: cc
|
449
452
|
- d: dd
|
453
|
+
|
454
|
+
=== 2 + 1
|
455
|
+
--- SKIP
|
456
|
+
--- grammar
|
457
|
+
a: <b>2 b
|
458
|
+
b: /(b)/
|
459
|
+
--- input: bbb
|
460
|
+
--- ast
|
461
|
+
- b
|
462
|
+
- b
|
463
|
+
- b
|