pegex 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemspec +21 -0
- data/CHANGELOG.yaml +3 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +16 -0
- data/LICENSE +21 -0
- data/README.rdoc +78 -0
- data/Rakefile +64 -0
- data/lib/pegex/compiler.rb +91 -0
- data/lib/pegex/grammar/atoms.rb +96 -0
- data/lib/pegex/grammar.rb +21 -0
- data/lib/pegex/input.rb +41 -0
- data/lib/pegex/parser.rb +287 -0
- data/lib/pegex/pegex/ast.rb +148 -0
- data/lib/pegex/pegex/grammar.rb +414 -0
- data/lib/pegex/receiver.rb +7 -0
- data/lib/pegex/tree/wrap.rb +13 -0
- data/lib/pegex/tree.rb +17 -0
- data/lib/pegex.rb +18 -0
- data/test/compiler-checks.rb +271 -0
- data/test/compiler-checks.tml +271 -0
- data/test/compiler-equivalence.rb +79 -0
- data/test/compiler.rb +42 -0
- data/test/compiler.tml +111 -0
- data/test/error.rb +161 -0
- data/test/export_ok.rb +36 -0
- data/test/grammar-api.rb +21 -0
- data/test/lib/recursive_sort.rb +17 -0
- data/test/lib/test_pegex.rb +33 -0
- data/test/lib/testast.rb +15 -0
- data/test/lib/xxx.rb +13 -0
- data/test/tree-pegex.tml +35 -0
- data/test/tree.rb +47 -0
- data/test/tree.tml +449 -0
- metadata +99 -0
data/test/compiler.tml
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
=== Single Regex
|
2
|
+
--- grammar
|
3
|
+
a: /x/
|
4
|
+
|
5
|
+
=== Single Reference
|
6
|
+
--- grammar
|
7
|
+
a: <b>
|
8
|
+
|
9
|
+
=== Single Error
|
10
|
+
--- grammar
|
11
|
+
a: `b`
|
12
|
+
|
13
|
+
=== Simple All Group
|
14
|
+
--- grammar
|
15
|
+
a: /b/ <c>
|
16
|
+
|
17
|
+
=== Simple Any Group
|
18
|
+
--- grammar
|
19
|
+
a: <b> | <c>
|
20
|
+
|
21
|
+
=== Bracketed All Group
|
22
|
+
--- grammar
|
23
|
+
a: ( <b> /c/ )
|
24
|
+
|
25
|
+
=== Bracketed Any Group
|
26
|
+
--- grammar
|
27
|
+
a: ( <b> | /c/ | `d` )
|
28
|
+
|
29
|
+
=== Bracketed Group in Unbracketed Group
|
30
|
+
--- grammar
|
31
|
+
a: <b> ( <c> | <d> )
|
32
|
+
|
33
|
+
=== And over Or Precedence
|
34
|
+
--- grammar
|
35
|
+
a: <b> <c> | <d> <e> | <f> % <g>
|
36
|
+
|
37
|
+
=== Multiple Rules
|
38
|
+
--- grammar
|
39
|
+
a: <b>
|
40
|
+
b: <c>
|
41
|
+
|
42
|
+
=== Simple Grammar
|
43
|
+
--- grammar
|
44
|
+
a: ( <b> <c>* )
|
45
|
+
b: /x/
|
46
|
+
c: /y+/
|
47
|
+
|
48
|
+
=== Semicolons OK
|
49
|
+
--- grammar
|
50
|
+
a: <b>;
|
51
|
+
b: <c>
|
52
|
+
c: /d/;
|
53
|
+
|
54
|
+
=== Unbracketed
|
55
|
+
--- grammar
|
56
|
+
a: <b> <c> <d>
|
57
|
+
b: <c> | <d>
|
58
|
+
|
59
|
+
=== Not Rule
|
60
|
+
--- grammar
|
61
|
+
a: !<b> <c>
|
62
|
+
|
63
|
+
=== Multiline
|
64
|
+
--- grammar
|
65
|
+
a: <b>
|
66
|
+
<c>
|
67
|
+
b:
|
68
|
+
/c/ <d>
|
69
|
+
<e>;
|
70
|
+
c:
|
71
|
+
<d> |
|
72
|
+
( /e/ <f> )
|
73
|
+
| `g`
|
74
|
+
|
75
|
+
=== Various Groups
|
76
|
+
--- grammar
|
77
|
+
a: <b> ( <c> | <d> )
|
78
|
+
b: ( <c> | <d> ) <e>
|
79
|
+
c: <d> | ( <e> <f>) | <g>
|
80
|
+
d: <e> | (<f> <g>) | <h> | ( `i` )
|
81
|
+
e: ( <f> )
|
82
|
+
|
83
|
+
=== Modifiers
|
84
|
+
--- grammar
|
85
|
+
a: !<a> =<b>
|
86
|
+
b: ( /c/ <d> )+
|
87
|
+
c: ( /c/ <d> )+
|
88
|
+
|
89
|
+
=== Any Group Plus Rule
|
90
|
+
--- grammar
|
91
|
+
a: /w/ ( <x>+ | <y>* ) <z>?
|
92
|
+
|
93
|
+
=== Equivalent
|
94
|
+
--- grammar
|
95
|
+
a: <b>
|
96
|
+
c: !<d>
|
97
|
+
|
98
|
+
=== Regex and Rule
|
99
|
+
--- grammar
|
100
|
+
a_b: /c/ <d>
|
101
|
+
|
102
|
+
=== Quantified group
|
103
|
+
--- grammar
|
104
|
+
a: <b> ( <c>* | <d>+ )+
|
105
|
+
e: ( <f> !<g> )?
|
106
|
+
|
107
|
+
=== Failures to test later
|
108
|
+
--- SKIP
|
109
|
+
--- grammar
|
110
|
+
b: ( /x/ )+
|
111
|
+
|
data/test/error.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
|
3
|
+
TestML.run do |t|
|
4
|
+
t.eval 'parse_input(*grammar, *input).Catch ~~ *error'
|
5
|
+
end
|
6
|
+
|
7
|
+
class TestPegex
|
8
|
+
def parse_input grammar, input
|
9
|
+
parser = pegex grammar
|
10
|
+
return parser.parse input
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
TestML.data <<'...'
|
15
|
+
=== Error fails at furthest match
|
16
|
+
# XXX This one not testing much.
|
17
|
+
--- grammar
|
18
|
+
a: b+ c
|
19
|
+
b: /b/
|
20
|
+
c: /c/
|
21
|
+
--- input
|
22
|
+
bbbbddddd
|
23
|
+
--- error: ddddd\n
|
24
|
+
|
25
|
+
### TODO ###
|
26
|
+
# === Pegex: Illegal meta rule
|
27
|
+
# --- grammar
|
28
|
+
# %grammar Test
|
29
|
+
# %foobar Quark
|
30
|
+
# a: /a+/
|
31
|
+
# --- input
|
32
|
+
# aaa
|
33
|
+
# --- error: Illegal meta rule
|
34
|
+
|
35
|
+
=== Pegex: Rule header syntax error
|
36
|
+
--- grammar
|
37
|
+
a|: /a+/
|
38
|
+
--- input
|
39
|
+
aaa
|
40
|
+
--- error: Rule header syntax error
|
41
|
+
|
42
|
+
=== Pegex: Rule ending syntax error
|
43
|
+
--- grammar
|
44
|
+
a: /a+/ |
|
45
|
+
--- input
|
46
|
+
aaa
|
47
|
+
--- error: Rule ending syntax error
|
48
|
+
|
49
|
+
=== Pegex: Illegal rule modifier
|
50
|
+
--- grammar
|
51
|
+
a: /a+/
|
52
|
+
b: ^<a>1-2
|
53
|
+
--- input
|
54
|
+
aaa
|
55
|
+
--- error: Illegal rule modifier
|
56
|
+
|
57
|
+
=== Pegex: Missing > in rule reference
|
58
|
+
--- grammar
|
59
|
+
a: /a+/
|
60
|
+
b: !<a1-2
|
61
|
+
--- input
|
62
|
+
aaa
|
63
|
+
--- error: Missing > in rule reference
|
64
|
+
|
65
|
+
=== Pegex: Missing < in rule reference
|
66
|
+
--- grammar
|
67
|
+
a: /a+/
|
68
|
+
b: !a>1-2
|
69
|
+
--- input
|
70
|
+
aaa
|
71
|
+
--- error: Rule ending syntax error
|
72
|
+
# --- error: Missing < in rule reference
|
73
|
+
|
74
|
+
=== Pegex: Illegal character in rule quantifier
|
75
|
+
--- grammar
|
76
|
+
a: /a+/
|
77
|
+
b: !a^1-2
|
78
|
+
--- input
|
79
|
+
aaa
|
80
|
+
--- error: Rule ending syntax error
|
81
|
+
# --- error: Illegal character in rule quantifier
|
82
|
+
|
83
|
+
=== Pegex: Unprotected rule name with numeric quantifier
|
84
|
+
--- grammar
|
85
|
+
a: /a+/
|
86
|
+
b: !a1-2
|
87
|
+
--- input
|
88
|
+
aaa
|
89
|
+
--- error: Rule ending syntax error
|
90
|
+
# --- error: Unprotected rule name with numeric quantifier
|
91
|
+
|
92
|
+
=== Pegex: Runaway regular expression
|
93
|
+
--- grammar
|
94
|
+
a: /a+
|
95
|
+
--- input
|
96
|
+
aaa
|
97
|
+
--- error: Runaway regular expression
|
98
|
+
|
99
|
+
=== Pegex: Illegal group rule modifier
|
100
|
+
--- grammar
|
101
|
+
a: /a+/
|
102
|
+
b: !(a =<a>)1-2
|
103
|
+
--- input
|
104
|
+
aaa
|
105
|
+
--- error: Illegal group rule modifier
|
106
|
+
|
107
|
+
=== Pegex: Runaway rule group
|
108
|
+
--- grammar
|
109
|
+
a: /a+/
|
110
|
+
b: .(a =<a>1-2
|
111
|
+
--- input
|
112
|
+
aaa
|
113
|
+
--- error: Runaway rule group
|
114
|
+
|
115
|
+
=== Pegex: Illegal character in group rule quantifier
|
116
|
+
--- grammar
|
117
|
+
a: /a+/
|
118
|
+
b: .(a =<a>)^2
|
119
|
+
--- input
|
120
|
+
aaa
|
121
|
+
--- error: Rule ending syntax error
|
122
|
+
# --- error: Illegal character in group rule quantifier
|
123
|
+
|
124
|
+
=== Pegex: Multi-line error messages not allowed
|
125
|
+
--- grammar
|
126
|
+
a: /a+/
|
127
|
+
b: `This is legal`
|
128
|
+
c: `This is
|
129
|
+
|
130
|
+
illegal`
|
131
|
+
--- input
|
132
|
+
aaa
|
133
|
+
--- error: Multi-line error messages not allowed
|
134
|
+
|
135
|
+
=== Pegex: Runaway error message
|
136
|
+
--- grammar
|
137
|
+
a: /a+/
|
138
|
+
b: `This is legal`
|
139
|
+
c: `This is
|
140
|
+
|
141
|
+
illegal
|
142
|
+
--- input
|
143
|
+
aaa
|
144
|
+
--- error: Runaway error message
|
145
|
+
|
146
|
+
=== Pegex: Leading separator form (BOK) no longer supported
|
147
|
+
--- grammar
|
148
|
+
a: /a+/ %%% ~
|
149
|
+
--- input
|
150
|
+
aaa
|
151
|
+
--- error: Rule ending syntax error
|
152
|
+
# --- error: Leading separator form (BOK) no longer supported
|
153
|
+
|
154
|
+
=== Pegex: Illegal characters in separator indicator
|
155
|
+
--- grammar
|
156
|
+
a: /a+/ %%~%%^%% ~
|
157
|
+
--- input
|
158
|
+
aaa
|
159
|
+
--- error: Rule ending syntax error
|
160
|
+
# --- error: Illegal characters in separator indicator
|
161
|
+
...
|
data/test/export_ok.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
|
3
|
+
TestML.run do |t|
|
4
|
+
require 'pegex'
|
5
|
+
t.assert method('pegex'),
|
6
|
+
'pegex is exported'
|
7
|
+
|
8
|
+
parser1 = pegex "foo: <bar>\n"
|
9
|
+
|
10
|
+
t.assert parser1.kind_of?(Pegex::Parser),
|
11
|
+
'pegex returns a Pegex::Parser object'
|
12
|
+
|
13
|
+
t.assert_equal parser1.grammar.tree['+toprule'], 'foo',
|
14
|
+
'pegex() contains a grammar with a compiled tree'
|
15
|
+
|
16
|
+
parser2 = pegex(<<'...');
|
17
|
+
number: /<DIGIT>+/
|
18
|
+
...
|
19
|
+
|
20
|
+
begin
|
21
|
+
parser2.parse '123'
|
22
|
+
t.assert true, 'parser2.parse worked'
|
23
|
+
rescue
|
24
|
+
t.assert false, "parser2.parse failed: #{$!.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
t.assert parser2.kind_of?(Pegex::Parser),
|
28
|
+
'grammar property is Pegex::Parser object'
|
29
|
+
|
30
|
+
tree2 = parser2.grammar.tree
|
31
|
+
t.assert tree2, 'Grammar object has tree';
|
32
|
+
t.assert tree2.kind_of?(Hash), 'Grammar object is compiled to a tree'
|
33
|
+
|
34
|
+
t.assert_equal tree2['+toprule'], 'number',
|
35
|
+
'_FIRST_RULE is set correctly'
|
36
|
+
end
|
data/test/grammar-api.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
require 'pegex/grammar'
|
3
|
+
|
4
|
+
class MyGrammar1 < Pegex::Grammar
|
5
|
+
def initialize
|
6
|
+
@text = <<'...'
|
7
|
+
foo: /xyz/ <bar>
|
8
|
+
bar:
|
9
|
+
/abc/ |
|
10
|
+
<baz>
|
11
|
+
baz: /def/
|
12
|
+
...
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
TestML.run do |t|
|
17
|
+
g1 = MyGrammar1.new
|
18
|
+
g1.make_tree
|
19
|
+
t.assert_equal g1.tree['+toprule'], 'foo',
|
20
|
+
'MyGrammar1 compiled a tree from its text'
|
21
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'testml/lite'
|
2
|
+
require 'pegex/compiler'
|
3
|
+
require 'recursive_sort'
|
4
|
+
|
5
|
+
class TestPegex < TestML::Lite
|
6
|
+
include TestML::Lite::TestCases
|
7
|
+
|
8
|
+
def compile grammar_text
|
9
|
+
$grammar_text = grammar_text
|
10
|
+
tree = Pegex::Compiler.new.parse(grammar_text).combinate.tree
|
11
|
+
tree.delete '+toprule'
|
12
|
+
return tree
|
13
|
+
end
|
14
|
+
|
15
|
+
def yaml object
|
16
|
+
require 'psych'
|
17
|
+
Psych.dump object.recursive_sort
|
18
|
+
end
|
19
|
+
|
20
|
+
def clean yaml
|
21
|
+
yaml.sub! /^\.\.\.\n/, ''
|
22
|
+
yaml.sub! /\A---\s/, ''
|
23
|
+
yaml.gsub! /'(\d+)'/, '\1'
|
24
|
+
yaml.gsub! /\+eok: true/, '+eok: 1'
|
25
|
+
return yaml
|
26
|
+
end
|
27
|
+
|
28
|
+
def on_fail
|
29
|
+
puts "Parsing this Pegex grammar:"
|
30
|
+
puts $grammar_text
|
31
|
+
puts
|
32
|
+
end
|
33
|
+
end
|
data/test/lib/testast.rb
ADDED
data/test/lib/xxx.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
def XXX object
|
2
|
+
require 'psych'
|
3
|
+
puts Psych.dump object
|
4
|
+
puts 'XXX from: ' + caller.first
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
def YYY object, show_caller=true
|
9
|
+
require 'psych'
|
10
|
+
puts Psych.dump object
|
11
|
+
puts 'YYY from: ' + caller.first if show_caller
|
12
|
+
return object
|
13
|
+
end
|
data/test/tree-pegex.tml
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
=== Part of Pegex Grammar
|
2
|
+
--- SKIP
|
3
|
+
--- grammar
|
4
|
+
\# This is the Pegex grammar for Pegex grammars!
|
5
|
+
grammar: ( <comment>* <rule_definition> )+ <comment>*
|
6
|
+
rule_definition: /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
7
|
+
rule_name: /(<ALPHA><WORD>*)/
|
8
|
+
comment: /<HASH><line><EOL>/
|
9
|
+
line: /<ANY>*/
|
10
|
+
rule_line: /(<line>)<EOL>/
|
11
|
+
|
12
|
+
--- input
|
13
|
+
\# This is the Pegex grammar for Pegex grammars!
|
14
|
+
grammar: ( <comment>* <rule_definition> )+ <comment>*
|
15
|
+
rule_definition: /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
16
|
+
--- tree
|
17
|
+
- - - []
|
18
|
+
- - grammar
|
19
|
+
- ( <comment>* <rule_definition> )+ <comment>*
|
20
|
+
- - []
|
21
|
+
- - rule_definition
|
22
|
+
- /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
23
|
+
- []
|
24
|
+
--- wrap
|
25
|
+
grammar:
|
26
|
+
- - - []
|
27
|
+
- rule_definition:
|
28
|
+
- rule_name: grammar
|
29
|
+
- rule_line: ( <comment>* <rule_definition> )+ <comment>*
|
30
|
+
- - []
|
31
|
+
- rule_definition:
|
32
|
+
- rule_name: rule_definition
|
33
|
+
- rule_line: /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
34
|
+
- []
|
35
|
+
|
data/test/tree.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
|
3
|
+
TestML.require_or_skip 'psych'
|
4
|
+
|
5
|
+
TestML.run do |t|
|
6
|
+
files = [
|
7
|
+
'test/tree.tml',
|
8
|
+
'test/tree-pegex.tml',
|
9
|
+
]
|
10
|
+
|
11
|
+
files.each do |f|
|
12
|
+
t.data f
|
13
|
+
t.eval '*grammar', t.method('run_tree_tests')
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TestPegex
|
18
|
+
require 'pegex/tree'
|
19
|
+
require 'pegex/tree/wrap'
|
20
|
+
require 'testast'
|
21
|
+
def run_tree_tests block, expr=nil
|
22
|
+
label '$BlockLabel - Pegex::Tree'
|
23
|
+
run_test(
|
24
|
+
block,
|
25
|
+
"parse_to_tree('Pegex::Tree', *grammar, *input).yaml.clean == *tree",
|
26
|
+
)
|
27
|
+
|
28
|
+
label('$BlockLabel - Pegex::Tree::Wrap');
|
29
|
+
run_test(
|
30
|
+
block,
|
31
|
+
"parse_to_tree('Pegex::Tree::Wrap', *grammar, *input).yaml.clean == *wrap",
|
32
|
+
)
|
33
|
+
|
34
|
+
label('$BlockLabel - t::TestAST');
|
35
|
+
run_test(
|
36
|
+
block,
|
37
|
+
"parse_to_tree('TestAST', *grammar, *input).yaml.clean == *ast",
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
require 'pegex'
|
42
|
+
def parse_to_tree(receiver, grammar, input)
|
43
|
+
require receiver.downcase.gsub /::/, '/'
|
44
|
+
parser = pegex(grammar, Kernel.eval(receiver))
|
45
|
+
return parser.parse(input)
|
46
|
+
end
|
47
|
+
end
|