pegex 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemspec +21 -0
- data/CHANGELOG.yaml +3 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +16 -0
- data/LICENSE +21 -0
- data/README.rdoc +78 -0
- data/Rakefile +64 -0
- data/lib/pegex/compiler.rb +91 -0
- data/lib/pegex/grammar/atoms.rb +96 -0
- data/lib/pegex/grammar.rb +21 -0
- data/lib/pegex/input.rb +41 -0
- data/lib/pegex/parser.rb +287 -0
- data/lib/pegex/pegex/ast.rb +148 -0
- data/lib/pegex/pegex/grammar.rb +414 -0
- data/lib/pegex/receiver.rb +7 -0
- data/lib/pegex/tree/wrap.rb +13 -0
- data/lib/pegex/tree.rb +17 -0
- data/lib/pegex.rb +18 -0
- data/test/compiler-checks.rb +271 -0
- data/test/compiler-checks.tml +271 -0
- data/test/compiler-equivalence.rb +79 -0
- data/test/compiler.rb +42 -0
- data/test/compiler.tml +111 -0
- data/test/error.rb +161 -0
- data/test/export_ok.rb +36 -0
- data/test/grammar-api.rb +21 -0
- data/test/lib/recursive_sort.rb +17 -0
- data/test/lib/test_pegex.rb +33 -0
- data/test/lib/testast.rb +15 -0
- data/test/lib/xxx.rb +13 -0
- data/test/tree-pegex.tml +35 -0
- data/test/tree.rb +47 -0
- data/test/tree.tml +449 -0
- metadata +99 -0
data/test/compiler.tml
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
=== Single Regex
|
2
|
+
--- grammar
|
3
|
+
a: /x/
|
4
|
+
|
5
|
+
=== Single Reference
|
6
|
+
--- grammar
|
7
|
+
a: <b>
|
8
|
+
|
9
|
+
=== Single Error
|
10
|
+
--- grammar
|
11
|
+
a: `b`
|
12
|
+
|
13
|
+
=== Simple All Group
|
14
|
+
--- grammar
|
15
|
+
a: /b/ <c>
|
16
|
+
|
17
|
+
=== Simple Any Group
|
18
|
+
--- grammar
|
19
|
+
a: <b> | <c>
|
20
|
+
|
21
|
+
=== Bracketed All Group
|
22
|
+
--- grammar
|
23
|
+
a: ( <b> /c/ )
|
24
|
+
|
25
|
+
=== Bracketed Any Group
|
26
|
+
--- grammar
|
27
|
+
a: ( <b> | /c/ | `d` )
|
28
|
+
|
29
|
+
=== Bracketed Group in Unbracketed Group
|
30
|
+
--- grammar
|
31
|
+
a: <b> ( <c> | <d> )
|
32
|
+
|
33
|
+
=== And over Or Precedence
|
34
|
+
--- grammar
|
35
|
+
a: <b> <c> | <d> <e> | <f> % <g>
|
36
|
+
|
37
|
+
=== Multiple Rules
|
38
|
+
--- grammar
|
39
|
+
a: <b>
|
40
|
+
b: <c>
|
41
|
+
|
42
|
+
=== Simple Grammar
|
43
|
+
--- grammar
|
44
|
+
a: ( <b> <c>* )
|
45
|
+
b: /x/
|
46
|
+
c: /y+/
|
47
|
+
|
48
|
+
=== Semicolons OK
|
49
|
+
--- grammar
|
50
|
+
a: <b>;
|
51
|
+
b: <c>
|
52
|
+
c: /d/;
|
53
|
+
|
54
|
+
=== Unbracketed
|
55
|
+
--- grammar
|
56
|
+
a: <b> <c> <d>
|
57
|
+
b: <c> | <d>
|
58
|
+
|
59
|
+
=== Not Rule
|
60
|
+
--- grammar
|
61
|
+
a: !<b> <c>
|
62
|
+
|
63
|
+
=== Multiline
|
64
|
+
--- grammar
|
65
|
+
a: <b>
|
66
|
+
<c>
|
67
|
+
b:
|
68
|
+
/c/ <d>
|
69
|
+
<e>;
|
70
|
+
c:
|
71
|
+
<d> |
|
72
|
+
( /e/ <f> )
|
73
|
+
| `g`
|
74
|
+
|
75
|
+
=== Various Groups
|
76
|
+
--- grammar
|
77
|
+
a: <b> ( <c> | <d> )
|
78
|
+
b: ( <c> | <d> ) <e>
|
79
|
+
c: <d> | ( <e> <f>) | <g>
|
80
|
+
d: <e> | (<f> <g>) | <h> | ( `i` )
|
81
|
+
e: ( <f> )
|
82
|
+
|
83
|
+
=== Modifiers
|
84
|
+
--- grammar
|
85
|
+
a: !<a> =<b>
|
86
|
+
b: ( /c/ <d> )+
|
87
|
+
c: ( /c/ <d> )+
|
88
|
+
|
89
|
+
=== Any Group Plus Rule
|
90
|
+
--- grammar
|
91
|
+
a: /w/ ( <x>+ | <y>* ) <z>?
|
92
|
+
|
93
|
+
=== Equivalent
|
94
|
+
--- grammar
|
95
|
+
a: <b>
|
96
|
+
c: !<d>
|
97
|
+
|
98
|
+
=== Regex and Rule
|
99
|
+
--- grammar
|
100
|
+
a_b: /c/ <d>
|
101
|
+
|
102
|
+
=== Quantified group
|
103
|
+
--- grammar
|
104
|
+
a: <b> ( <c>* | <d>+ )+
|
105
|
+
e: ( <f> !<g> )?
|
106
|
+
|
107
|
+
=== Failures to test later
|
108
|
+
--- SKIP
|
109
|
+
--- grammar
|
110
|
+
b: ( /x/ )+
|
111
|
+
|
data/test/error.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
|
3
|
+
TestML.run do |t|
|
4
|
+
t.eval 'parse_input(*grammar, *input).Catch ~~ *error'
|
5
|
+
end
|
6
|
+
|
7
|
+
class TestPegex
|
8
|
+
def parse_input grammar, input
|
9
|
+
parser = pegex grammar
|
10
|
+
return parser.parse input
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
TestML.data <<'...'
|
15
|
+
=== Error fails at furthest match
|
16
|
+
# XXX This one not testing much.
|
17
|
+
--- grammar
|
18
|
+
a: b+ c
|
19
|
+
b: /b/
|
20
|
+
c: /c/
|
21
|
+
--- input
|
22
|
+
bbbbddddd
|
23
|
+
--- error: ddddd\n
|
24
|
+
|
25
|
+
### TODO ###
|
26
|
+
# === Pegex: Illegal meta rule
|
27
|
+
# --- grammar
|
28
|
+
# %grammar Test
|
29
|
+
# %foobar Quark
|
30
|
+
# a: /a+/
|
31
|
+
# --- input
|
32
|
+
# aaa
|
33
|
+
# --- error: Illegal meta rule
|
34
|
+
|
35
|
+
=== Pegex: Rule header syntax error
|
36
|
+
--- grammar
|
37
|
+
a|: /a+/
|
38
|
+
--- input
|
39
|
+
aaa
|
40
|
+
--- error: Rule header syntax error
|
41
|
+
|
42
|
+
=== Pegex: Rule ending syntax error
|
43
|
+
--- grammar
|
44
|
+
a: /a+/ |
|
45
|
+
--- input
|
46
|
+
aaa
|
47
|
+
--- error: Rule ending syntax error
|
48
|
+
|
49
|
+
=== Pegex: Illegal rule modifier
|
50
|
+
--- grammar
|
51
|
+
a: /a+/
|
52
|
+
b: ^<a>1-2
|
53
|
+
--- input
|
54
|
+
aaa
|
55
|
+
--- error: Illegal rule modifier
|
56
|
+
|
57
|
+
=== Pegex: Missing > in rule reference
|
58
|
+
--- grammar
|
59
|
+
a: /a+/
|
60
|
+
b: !<a1-2
|
61
|
+
--- input
|
62
|
+
aaa
|
63
|
+
--- error: Missing > in rule reference
|
64
|
+
|
65
|
+
=== Pegex: Missing < in rule reference
|
66
|
+
--- grammar
|
67
|
+
a: /a+/
|
68
|
+
b: !a>1-2
|
69
|
+
--- input
|
70
|
+
aaa
|
71
|
+
--- error: Rule ending syntax error
|
72
|
+
# --- error: Missing < in rule reference
|
73
|
+
|
74
|
+
=== Pegex: Illegal character in rule quantifier
|
75
|
+
--- grammar
|
76
|
+
a: /a+/
|
77
|
+
b: !a^1-2
|
78
|
+
--- input
|
79
|
+
aaa
|
80
|
+
--- error: Rule ending syntax error
|
81
|
+
# --- error: Illegal character in rule quantifier
|
82
|
+
|
83
|
+
=== Pegex: Unprotected rule name with numeric quantifier
|
84
|
+
--- grammar
|
85
|
+
a: /a+/
|
86
|
+
b: !a1-2
|
87
|
+
--- input
|
88
|
+
aaa
|
89
|
+
--- error: Rule ending syntax error
|
90
|
+
# --- error: Unprotected rule name with numeric quantifier
|
91
|
+
|
92
|
+
=== Pegex: Runaway regular expression
|
93
|
+
--- grammar
|
94
|
+
a: /a+
|
95
|
+
--- input
|
96
|
+
aaa
|
97
|
+
--- error: Runaway regular expression
|
98
|
+
|
99
|
+
=== Pegex: Illegal group rule modifier
|
100
|
+
--- grammar
|
101
|
+
a: /a+/
|
102
|
+
b: !(a =<a>)1-2
|
103
|
+
--- input
|
104
|
+
aaa
|
105
|
+
--- error: Illegal group rule modifier
|
106
|
+
|
107
|
+
=== Pegex: Runaway rule group
|
108
|
+
--- grammar
|
109
|
+
a: /a+/
|
110
|
+
b: .(a =<a>1-2
|
111
|
+
--- input
|
112
|
+
aaa
|
113
|
+
--- error: Runaway rule group
|
114
|
+
|
115
|
+
=== Pegex: Illegal character in group rule quantifier
|
116
|
+
--- grammar
|
117
|
+
a: /a+/
|
118
|
+
b: .(a =<a>)^2
|
119
|
+
--- input
|
120
|
+
aaa
|
121
|
+
--- error: Rule ending syntax error
|
122
|
+
# --- error: Illegal character in group rule quantifier
|
123
|
+
|
124
|
+
=== Pegex: Multi-line error messages not allowed
|
125
|
+
--- grammar
|
126
|
+
a: /a+/
|
127
|
+
b: `This is legal`
|
128
|
+
c: `This is
|
129
|
+
|
130
|
+
illegal`
|
131
|
+
--- input
|
132
|
+
aaa
|
133
|
+
--- error: Multi-line error messages not allowed
|
134
|
+
|
135
|
+
=== Pegex: Runaway error message
|
136
|
+
--- grammar
|
137
|
+
a: /a+/
|
138
|
+
b: `This is legal`
|
139
|
+
c: `This is
|
140
|
+
|
141
|
+
illegal
|
142
|
+
--- input
|
143
|
+
aaa
|
144
|
+
--- error: Runaway error message
|
145
|
+
|
146
|
+
=== Pegex: Leading separator form (BOK) no longer supported
|
147
|
+
--- grammar
|
148
|
+
a: /a+/ %%% ~
|
149
|
+
--- input
|
150
|
+
aaa
|
151
|
+
--- error: Rule ending syntax error
|
152
|
+
# --- error: Leading separator form (BOK) no longer supported
|
153
|
+
|
154
|
+
=== Pegex: Illegal characters in separator indicator
|
155
|
+
--- grammar
|
156
|
+
a: /a+/ %%~%%^%% ~
|
157
|
+
--- input
|
158
|
+
aaa
|
159
|
+
--- error: Rule ending syntax error
|
160
|
+
# --- error: Illegal characters in separator indicator
|
161
|
+
...
|
data/test/export_ok.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
|
3
|
+
TestML.run do |t|
|
4
|
+
require 'pegex'
|
5
|
+
t.assert method('pegex'),
|
6
|
+
'pegex is exported'
|
7
|
+
|
8
|
+
parser1 = pegex "foo: <bar>\n"
|
9
|
+
|
10
|
+
t.assert parser1.kind_of?(Pegex::Parser),
|
11
|
+
'pegex returns a Pegex::Parser object'
|
12
|
+
|
13
|
+
t.assert_equal parser1.grammar.tree['+toprule'], 'foo',
|
14
|
+
'pegex() contains a grammar with a compiled tree'
|
15
|
+
|
16
|
+
parser2 = pegex(<<'...');
|
17
|
+
number: /<DIGIT>+/
|
18
|
+
...
|
19
|
+
|
20
|
+
begin
|
21
|
+
parser2.parse '123'
|
22
|
+
t.assert true, 'parser2.parse worked'
|
23
|
+
rescue
|
24
|
+
t.assert false, "parser2.parse failed: #{$!.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
t.assert parser2.kind_of?(Pegex::Parser),
|
28
|
+
'grammar property is Pegex::Parser object'
|
29
|
+
|
30
|
+
tree2 = parser2.grammar.tree
|
31
|
+
t.assert tree2, 'Grammar object has tree';
|
32
|
+
t.assert tree2.kind_of?(Hash), 'Grammar object is compiled to a tree'
|
33
|
+
|
34
|
+
t.assert_equal tree2['+toprule'], 'number',
|
35
|
+
'_FIRST_RULE is set correctly'
|
36
|
+
end
|
data/test/grammar-api.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
require 'pegex/grammar'
|
3
|
+
|
4
|
+
class MyGrammar1 < Pegex::Grammar
|
5
|
+
def initialize
|
6
|
+
@text = <<'...'
|
7
|
+
foo: /xyz/ <bar>
|
8
|
+
bar:
|
9
|
+
/abc/ |
|
10
|
+
<baz>
|
11
|
+
baz: /def/
|
12
|
+
...
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
TestML.run do |t|
|
17
|
+
g1 = MyGrammar1.new
|
18
|
+
g1.make_tree
|
19
|
+
t.assert_equal g1.tree['+toprule'], 'foo',
|
20
|
+
'MyGrammar1 compiled a tree from its text'
|
21
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'testml/lite'
|
2
|
+
require 'pegex/compiler'
|
3
|
+
require 'recursive_sort'
|
4
|
+
|
5
|
+
class TestPegex < TestML::Lite
|
6
|
+
include TestML::Lite::TestCases
|
7
|
+
|
8
|
+
def compile grammar_text
|
9
|
+
$grammar_text = grammar_text
|
10
|
+
tree = Pegex::Compiler.new.parse(grammar_text).combinate.tree
|
11
|
+
tree.delete '+toprule'
|
12
|
+
return tree
|
13
|
+
end
|
14
|
+
|
15
|
+
def yaml object
|
16
|
+
require 'psych'
|
17
|
+
Psych.dump object.recursive_sort
|
18
|
+
end
|
19
|
+
|
20
|
+
def clean yaml
|
21
|
+
yaml.sub! /^\.\.\.\n/, ''
|
22
|
+
yaml.sub! /\A---\s/, ''
|
23
|
+
yaml.gsub! /'(\d+)'/, '\1'
|
24
|
+
yaml.gsub! /\+eok: true/, '+eok: 1'
|
25
|
+
return yaml
|
26
|
+
end
|
27
|
+
|
28
|
+
def on_fail
|
29
|
+
puts "Parsing this Pegex grammar:"
|
30
|
+
puts $grammar_text
|
31
|
+
puts
|
32
|
+
end
|
33
|
+
end
|
data/test/lib/testast.rb
ADDED
data/test/lib/xxx.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
def XXX object
|
2
|
+
require 'psych'
|
3
|
+
puts Psych.dump object
|
4
|
+
puts 'XXX from: ' + caller.first
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
def YYY object, show_caller=true
|
9
|
+
require 'psych'
|
10
|
+
puts Psych.dump object
|
11
|
+
puts 'YYY from: ' + caller.first if show_caller
|
12
|
+
return object
|
13
|
+
end
|
data/test/tree-pegex.tml
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
=== Part of Pegex Grammar
|
2
|
+
--- SKIP
|
3
|
+
--- grammar
|
4
|
+
\# This is the Pegex grammar for Pegex grammars!
|
5
|
+
grammar: ( <comment>* <rule_definition> )+ <comment>*
|
6
|
+
rule_definition: /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
7
|
+
rule_name: /(<ALPHA><WORD>*)/
|
8
|
+
comment: /<HASH><line><EOL>/
|
9
|
+
line: /<ANY>*/
|
10
|
+
rule_line: /(<line>)<EOL>/
|
11
|
+
|
12
|
+
--- input
|
13
|
+
\# This is the Pegex grammar for Pegex grammars!
|
14
|
+
grammar: ( <comment>* <rule_definition> )+ <comment>*
|
15
|
+
rule_definition: /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
16
|
+
--- tree
|
17
|
+
- - - []
|
18
|
+
- - grammar
|
19
|
+
- ( <comment>* <rule_definition> )+ <comment>*
|
20
|
+
- - []
|
21
|
+
- - rule_definition
|
22
|
+
- /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
23
|
+
- []
|
24
|
+
--- wrap
|
25
|
+
grammar:
|
26
|
+
- - - []
|
27
|
+
- rule_definition:
|
28
|
+
- rule_name: grammar
|
29
|
+
- rule_line: ( <comment>* <rule_definition> )+ <comment>*
|
30
|
+
- - []
|
31
|
+
- rule_definition:
|
32
|
+
- rule_name: rule_definition
|
33
|
+
- rule_line: /<WS>*/ <rule_name> /<COLON><WS>*/ <rule_line>
|
34
|
+
- []
|
35
|
+
|
data/test/tree.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require './test/lib/test_pegex'
|
2
|
+
|
3
|
+
TestML.require_or_skip 'psych'
|
4
|
+
|
5
|
+
TestML.run do |t|
|
6
|
+
files = [
|
7
|
+
'test/tree.tml',
|
8
|
+
'test/tree-pegex.tml',
|
9
|
+
]
|
10
|
+
|
11
|
+
files.each do |f|
|
12
|
+
t.data f
|
13
|
+
t.eval '*grammar', t.method('run_tree_tests')
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TestPegex
|
18
|
+
require 'pegex/tree'
|
19
|
+
require 'pegex/tree/wrap'
|
20
|
+
require 'testast'
|
21
|
+
def run_tree_tests block, expr=nil
|
22
|
+
label '$BlockLabel - Pegex::Tree'
|
23
|
+
run_test(
|
24
|
+
block,
|
25
|
+
"parse_to_tree('Pegex::Tree', *grammar, *input).yaml.clean == *tree",
|
26
|
+
)
|
27
|
+
|
28
|
+
label('$BlockLabel - Pegex::Tree::Wrap');
|
29
|
+
run_test(
|
30
|
+
block,
|
31
|
+
"parse_to_tree('Pegex::Tree::Wrap', *grammar, *input).yaml.clean == *wrap",
|
32
|
+
)
|
33
|
+
|
34
|
+
label('$BlockLabel - t::TestAST');
|
35
|
+
run_test(
|
36
|
+
block,
|
37
|
+
"parse_to_tree('TestAST', *grammar, *input).yaml.clean == *ast",
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
require 'pegex'
|
42
|
+
def parse_to_tree(receiver, grammar, input)
|
43
|
+
require receiver.downcase.gsub /::/, '/'
|
44
|
+
parser = pegex(grammar, Kernel.eval(receiver))
|
45
|
+
return parser.parse(input)
|
46
|
+
end
|
47
|
+
end
|