kpeg 0.8.5 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +10 -0
- data/.gemtest +0 -0
- data/Gemfile +11 -3
- data/History.txt +21 -0
- data/LICENSE +25 -0
- data/Manifest.txt +47 -0
- data/README.rdoc +222 -0
- data/Rakefile +23 -11
- data/bin/kpeg +4 -2
- data/examples/calculator/calculator.kpeg +17 -0
- data/examples/calculator/calculator.rb +7 -0
- data/examples/foreign_reference/literals.kpeg +5 -0
- data/examples/foreign_reference/matcher.kpeg +9 -0
- data/examples/foreign_reference/matcher.rb +5 -0
- data/examples/lua_string/driver.rb +21 -0
- data/examples/lua_string/lua_string.kpeg +14 -0
- data/examples/lua_string/lua_string.kpeg.rb +460 -0
- data/examples/phone_number/README.md +3 -0
- data/examples/phone_number/phone_number.kpeg +20 -0
- data/examples/phone_number/phone_number.rb +6 -0
- data/examples/upper/README.md +83 -0
- data/examples/upper/upper.kpeg +24 -0
- data/examples/upper/upper.rb +9 -0
- data/kpeg.gemspec +35 -17
- data/lib/hoe/kpeg.rb +94 -0
- data/lib/kpeg.rb +3 -0
- data/lib/kpeg/code_generator.rb +16 -3
- data/lib/kpeg/compiled_parser.rb +18 -28
- data/lib/kpeg/format_parser.kpeg +129 -0
- data/lib/kpeg/format_parser.rb +88 -49
- data/lib/kpeg/grammar.rb +10 -0
- data/lib/kpeg/string_escape.kpeg +20 -0
- data/test/inputs/comments.kpeg +5 -0
- data/test/test_file_parser_roundtrip.rb +3 -3
- data/test/test_gen_calc.rb +2 -2
- data/test/test_kpeg.rb +2 -2
- data/test/test_kpeg_code_generator.rb +65 -2
- data/test/test_kpeg_compiled_parser.rb +2 -2
- data/test/test_kpeg_format.rb +49 -4
- data/test/test_kpeg_grammar_renderer.rb +2 -2
- data/test/test_left_recursion.rb +2 -2
- data/{doc → vim}/syntax_kpeg/ftdetect/kpeg.vim +0 -0
- data/{doc → vim}/syntax_kpeg/syntax/kpeg.vim +0 -0
- metadata +89 -26
- data/README.md +0 -183
- data/lib/kpeg/version.rb +0 -3
data/.autotest
ADDED
data/.gemtest
ADDED
File without changes
|
data/Gemfile
CHANGED
@@ -1,4 +1,12 @@
|
|
1
|
-
|
1
|
+
# -*- ruby -*-
|
2
2
|
|
3
|
-
#
|
4
|
-
|
3
|
+
# DO NOT EDIT THIS FILE. Instead, edit Rakefile, and run `rake bundler:gemfile`.
|
4
|
+
|
5
|
+
source :gemcutter
|
6
|
+
|
7
|
+
|
8
|
+
gem "minitest", "~>2.11", :group => [:development, :test]
|
9
|
+
gem "rdoc", "~>3.10", :group => [:development, :test]
|
10
|
+
gem "hoe", "~>2.15", :group => [:development, :test]
|
11
|
+
|
12
|
+
# vim: syntax=ruby
|
data/History.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
=== 1.0 / 2012-04-06
|
2
|
+
|
3
|
+
* Minor enhancements
|
4
|
+
* Added arbitrary directives to the kpeg grammar
|
5
|
+
|
6
|
+
%% directive_name { ... }
|
7
|
+
|
8
|
+
* Added header and footer directives to the kpeg code formatter. These
|
9
|
+
appear above and below all other output, respectively:
|
10
|
+
|
11
|
+
%% header {
|
12
|
+
# coding: UTF-8
|
13
|
+
}
|
14
|
+
|
15
|
+
[... your grammar ...]
|
16
|
+
|
17
|
+
%% footer {
|
18
|
+
require 'some/subclass'
|
19
|
+
}
|
20
|
+
* Switched to minitest
|
21
|
+
* Switched to hoe
|
data/LICENSE
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
Copyright (c) 2011, Evan Phoenix
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
* Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
* Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
* Neither the name of the <organization> nor the
|
12
|
+
names of its contributors may be used to endorse or promote products
|
13
|
+
derived from this software without specific prior written permission.
|
14
|
+
|
15
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
16
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
17
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
18
|
+
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
19
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
20
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
21
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
22
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
23
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
24
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
25
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
.autotest
|
2
|
+
Gemfile
|
3
|
+
History.txt
|
4
|
+
LICENSE
|
5
|
+
Manifest.txt
|
6
|
+
README.rdoc
|
7
|
+
Rakefile
|
8
|
+
bin/kpeg
|
9
|
+
examples/calculator/calculator.kpeg
|
10
|
+
examples/calculator/calculator.rb
|
11
|
+
examples/foreign_reference/literals.kpeg
|
12
|
+
examples/foreign_reference/matcher.kpeg
|
13
|
+
examples/foreign_reference/matcher.rb
|
14
|
+
examples/lua_string/driver.rb
|
15
|
+
examples/lua_string/lua_string.kpeg
|
16
|
+
examples/lua_string/lua_string.kpeg.rb
|
17
|
+
examples/phone_number/README.md
|
18
|
+
examples/phone_number/phone_number.kpeg
|
19
|
+
examples/phone_number/phone_number.rb
|
20
|
+
examples/upper/README.md
|
21
|
+
examples/upper/upper.kpeg
|
22
|
+
examples/upper/upper.rb
|
23
|
+
kpeg.gemspec
|
24
|
+
lib/hoe/kpeg.rb
|
25
|
+
lib/kpeg.rb
|
26
|
+
lib/kpeg/code_generator.rb
|
27
|
+
lib/kpeg/compiled_parser.rb
|
28
|
+
lib/kpeg/format_parser.kpeg
|
29
|
+
lib/kpeg/format_parser.rb
|
30
|
+
lib/kpeg/grammar.rb
|
31
|
+
lib/kpeg/grammar_renderer.rb
|
32
|
+
lib/kpeg/match.rb
|
33
|
+
lib/kpeg/parser.rb
|
34
|
+
lib/kpeg/position.rb
|
35
|
+
lib/kpeg/string_escape.kpeg
|
36
|
+
lib/kpeg/string_escape.rb
|
37
|
+
test/inputs/comments.kpeg
|
38
|
+
test/test_file_parser_roundtrip.rb
|
39
|
+
test/test_gen_calc.rb
|
40
|
+
test/test_kpeg.rb
|
41
|
+
test/test_kpeg_code_generator.rb
|
42
|
+
test/test_kpeg_compiled_parser.rb
|
43
|
+
test/test_kpeg_format.rb
|
44
|
+
test/test_kpeg_grammar_renderer.rb
|
45
|
+
test/test_left_recursion.rb
|
46
|
+
vim/syntax_kpeg/ftdetect/kpeg.vim
|
47
|
+
vim/syntax_kpeg/syntax/kpeg.vim
|
data/README.rdoc
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
= kpeg
|
2
|
+
|
3
|
+
home :: https://github.com/evanphx/kpeg
|
4
|
+
bugs :: https://github.com/evanphx/kpeg/issues
|
5
|
+
|
6
|
+
== Description
|
7
|
+
|
8
|
+
KPeg is a simple PEG library for Ruby. It provides an API as well as native
|
9
|
+
grammar to build the grammar.
|
10
|
+
|
11
|
+
KPeg strives to provide a simple, powerful API without being too exotic.
|
12
|
+
|
13
|
+
KPeg supports direct left recursion of rules via the
|
14
|
+
{OMeta memoization}[http://www.vpri.org/pdf/tr2008003_experimenting.pdf] trick.
|
15
|
+
|
16
|
+
== Writing your first grammar
|
17
|
+
|
18
|
+
=== Setting up your grammar
|
19
|
+
|
20
|
+
All grammars start with with the class/module name that will be your parser
|
21
|
+
|
22
|
+
%% name = Example::Parser
|
23
|
+
|
24
|
+
After that a block of ruby code can be defined that will be added into the
|
25
|
+
class body of your parser. Attributes that are defined in this block can be
|
26
|
+
accessed within your parser as instance variables. Methods can also be defined
|
27
|
+
in this block and used in action blocks as well.
|
28
|
+
|
29
|
+
%% {
|
30
|
+
attr_accessor :something_cool
|
31
|
+
|
32
|
+
def something_awesome
|
33
|
+
# do something awesome
|
34
|
+
end
|
35
|
+
}
|
36
|
+
|
37
|
+
=== Defining literals
|
38
|
+
|
39
|
+
Literals are static declarations of characters or regular expressions designed for reuse in the grammar. These can be constants or variables. Literals can take strings, regular expressions or character ranges
|
40
|
+
|
41
|
+
ALPHA = /[A-Za-z]/
|
42
|
+
DIGIT = /[0-9]/
|
43
|
+
period = "."
|
44
|
+
string = "a string"
|
45
|
+
regex = /(regexs?)+/
|
46
|
+
char_range = [b-t]
|
47
|
+
|
48
|
+
Literals can also accept multiple definitions
|
49
|
+
|
50
|
+
vowel = "a" | "e" | "i" | "o" | "u"
|
51
|
+
alpha = /[A-Z]/ | /[a-z]/
|
52
|
+
|
53
|
+
=== Defining Rules for Values
|
54
|
+
|
55
|
+
Before you can start parsing a string you will need to define rules that you
|
56
|
+
will use to accept or reject that string. There are many different types of
|
57
|
+
rules available in kpeg
|
58
|
+
|
59
|
+
The most basic of these rules is a string capture
|
60
|
+
|
61
|
+
alpha = < /[A-Za-z]/ > { text }
|
62
|
+
|
63
|
+
While this looks very much like the ALPHA literal defined above it differs in
|
64
|
+
one important way, the text captured by the rule defined between the < and >
|
65
|
+
symbols will be set as the text variable in block that follows. You can also
|
66
|
+
explicitly define the variable that you would like but only with existing
|
67
|
+
rules or literals.
|
68
|
+
|
69
|
+
letter = alpha:a { a }
|
70
|
+
|
71
|
+
Additionally blocks can return true or false values based upon an expression
|
72
|
+
within the block. To return true if a test passes do the following:
|
73
|
+
|
74
|
+
match_greater_than_10 = < num:n > &{ n > 10 }
|
75
|
+
|
76
|
+
To test and return a false value if the test passes do the following:
|
77
|
+
|
78
|
+
do_not_match_greater_than_10 = < num:n > !{ n > 10 }
|
79
|
+
|
80
|
+
Rules can also act like functions and take parameters. An example of this is
|
81
|
+
lifted from the {Email List
|
82
|
+
Validator}[https://github.com/larb/email_address_validator], where an ascii
|
83
|
+
value is passed in and the character is evaluated against it returning a true
|
84
|
+
if it matches
|
85
|
+
|
86
|
+
d(num) = <.> &{ text[0] == num }
|
87
|
+
|
88
|
+
Rules support some regular expression syntax for matching
|
89
|
+
|
90
|
+
* maybe ?
|
91
|
+
* many +
|
92
|
+
* kleene *
|
93
|
+
* groupings ()
|
94
|
+
|
95
|
+
Examples:
|
96
|
+
|
97
|
+
letters = alpha+
|
98
|
+
words = alpha+ space* period?
|
99
|
+
sentence = (letters+ | space+)+
|
100
|
+
|
101
|
+
Kpeg also allows a rule to define the acceptable number of matches in the form
|
102
|
+
of a range. In regular expressions this is often denoted with syntax like
|
103
|
+
{0,3}. Kpeg uses this syntax to accomplish match ranges [min, max].
|
104
|
+
|
105
|
+
matches_3_to_5_times = letter[3,5]
|
106
|
+
matches_3_to_any_times = letter[3,*]
|
107
|
+
|
108
|
+
=== Defining Actions
|
109
|
+
|
110
|
+
Illustrated above in some of the examples, kpeg allows you to perform actions
|
111
|
+
based upon a match that are described in block provided or in the rule
|
112
|
+
definition itself.
|
113
|
+
|
114
|
+
num = /[1-9][0-9]*/
|
115
|
+
sum = < num:n1 "+" num:n2 > { n1 + n2 }
|
116
|
+
|
117
|
+
As of version 0.8 an alternate syntax has been added for calling defined
|
118
|
+
methods as actions.
|
119
|
+
|
120
|
+
%% {
|
121
|
+
def add(n1, n2){
|
122
|
+
n1 + n2
|
123
|
+
}
|
124
|
+
}
|
125
|
+
num = /[1-9][0-9]*/
|
126
|
+
sum = < num:n1 "+" num:n2 > ~add(n1, n2)
|
127
|
+
|
128
|
+
=== Referencing an external grammar
|
129
|
+
|
130
|
+
Kpeg allows you to run a rule that is defined in an external grammar. This is
|
131
|
+
useful if there is a defined set of rules that you would like to reuse in
|
132
|
+
another parser. To do this, create your grammar and generate a parser using
|
133
|
+
the kpeg command line tool.
|
134
|
+
|
135
|
+
kpeg literals.kpeg
|
136
|
+
|
137
|
+
Once you have the generated parser, include that file into your new grammar
|
138
|
+
|
139
|
+
%{
|
140
|
+
require "literals.kpeg.rb"
|
141
|
+
}
|
142
|
+
|
143
|
+
Then create a variable to hold to foreign interface and pass it the class name
|
144
|
+
of your parser. In this case my parser class name is Literal
|
145
|
+
|
146
|
+
%foreign_grammer = Literal
|
147
|
+
|
148
|
+
You can then use rules defined in the foreign grammar in the local grammar
|
149
|
+
file like so
|
150
|
+
|
151
|
+
sentence = (%foreign_grammer.alpha %foreign_grammer.space*)+
|
152
|
+
%foreign_grammer.period
|
153
|
+
|
154
|
+
=== Comments
|
155
|
+
|
156
|
+
Kpeg allows comments to be added to the grammar file by using the # symbol
|
157
|
+
|
158
|
+
# This is a comment in my grammar
|
159
|
+
|
160
|
+
== Generating and running your parser
|
161
|
+
|
162
|
+
Before you can generate your parser you will need to define a root rule. This
|
163
|
+
will be the first rule run against the string provided to the parser
|
164
|
+
|
165
|
+
root = sentence
|
166
|
+
|
167
|
+
To generate the parser run the kpeg command with the kpeg file(s) as an
|
168
|
+
argument. This will generate a ruby file with the same name as your grammar
|
169
|
+
file.
|
170
|
+
|
171
|
+
kpeg example.kpeg
|
172
|
+
|
173
|
+
Include your generated parser file into an application that you want to use
|
174
|
+
the parser in and run it. Create a new instance of the parser and pass in the
|
175
|
+
string you want to evaluate. When parse is called on the parser instance it
|
176
|
+
will return a true if the sting is matched, or false if it doesn't.
|
177
|
+
|
178
|
+
require "example.kpeg.rb"
|
179
|
+
|
180
|
+
parser = Example::Parser.new(string_to_evaluate)
|
181
|
+
parser.parse
|
182
|
+
|
183
|
+
== Shortcuts and other techniques
|
184
|
+
|
185
|
+
Per vito, you can get the current line or current column in the following way
|
186
|
+
|
187
|
+
line = { current_line }
|
188
|
+
column = { current_column }
|
189
|
+
foo = line:line ... { # use line here }
|
190
|
+
|
191
|
+
== AST Generation
|
192
|
+
|
193
|
+
As of Kpeg 0.8 a parser can now generate an AST. To define an AST node use the
|
194
|
+
following syntax
|
195
|
+
|
196
|
+
%% assign = ast Assignment(name, value)
|
197
|
+
|
198
|
+
Once you have a defined AST node, it can be used in your grammar like so
|
199
|
+
|
200
|
+
assignment = identifier:i space* = space* value:v ~assign(i,v)
|
201
|
+
|
202
|
+
This will create a new Assign node that you can add into your AST.
|
203
|
+
|
204
|
+
For a good example of usage check out Talon[https://github.com/evanphx/talon]
|
205
|
+
|
206
|
+
== Examples
|
207
|
+
|
208
|
+
There are several examples available in the /examples directory. The upper
|
209
|
+
parser has a readme with a step by step description of the grammar.
|
210
|
+
|
211
|
+
== Projects using kpeg
|
212
|
+
|
213
|
+
Dang[https://github.com/veganstraightedge/dang]
|
214
|
+
|
215
|
+
{Email Address Validator}[https://github.com/larb/email_address_validator]
|
216
|
+
|
217
|
+
Callisto[https://github.com/dwaite/Callisto]
|
218
|
+
|
219
|
+
Doodle[https://github.com/vito/doodle]
|
220
|
+
|
221
|
+
Kanbanpad[https://kanbanpad.com] (uses kpeg for parsing of the 'enter
|
222
|
+
something' bar)
|
data/Rakefile
CHANGED
@@ -1,15 +1,21 @@
|
|
1
|
-
|
1
|
+
# -*- ruby -*-
|
2
2
|
|
3
|
-
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
4
5
|
|
5
|
-
|
6
|
+
Hoe.plugin :bundler
|
7
|
+
Hoe.plugin :gemspec
|
8
|
+
Hoe.plugin :git
|
9
|
+
Hoe.plugin :minitest
|
10
|
+
Hoe.plugin :travis
|
6
11
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
t.verbose = true
|
12
|
+
Hoe.spec 'kpeg' do
|
13
|
+
self.readme_file = "README.rdoc"
|
14
|
+
developer 'Evan Phoenix', 'evan@fallingsnow.net'
|
11
15
|
end
|
12
16
|
|
17
|
+
task :test => :parser
|
18
|
+
|
13
19
|
task :grammar do
|
14
20
|
require 'kpeg'
|
15
21
|
require 'kpeg/format'
|
@@ -19,8 +25,14 @@ task :grammar do
|
|
19
25
|
gr.render(STDOUT)
|
20
26
|
end
|
21
27
|
|
22
|
-
|
23
|
-
|
24
|
-
sh "ruby -Ilib bin/kpeg -o lib/kpeg/string_escape.rb -f lib/kpeg/string_escape.kpeg"
|
25
|
-
sh "ruby -Ilib bin/kpeg -o lib/kpeg/format_parser.rb -s -f lib/kpeg/format.kpeg"
|
28
|
+
rule ".rb" => ".kpeg" do |t|
|
29
|
+
ruby "-Ilib bin/kpeg -s -o #{t.name} -f #{t.source}"
|
26
30
|
end
|
31
|
+
|
32
|
+
desc "build the parser"
|
33
|
+
task :parser => %w[
|
34
|
+
lib/kpeg/string_escape.rb
|
35
|
+
lib/kpeg/format_parser.rb
|
36
|
+
]
|
37
|
+
|
38
|
+
# vim: syntax=ruby
|
data/bin/kpeg
CHANGED
@@ -119,8 +119,10 @@ end
|
|
119
119
|
cg = KPeg::CodeGenerator.new name, grammar
|
120
120
|
cg.standalone = options[:standalone]
|
121
121
|
|
122
|
-
|
123
|
-
|
122
|
+
output = cg.output
|
123
|
+
|
124
|
+
open new_path, "w" do |io|
|
125
|
+
io << output
|
124
126
|
end
|
125
127
|
|
126
128
|
puts "Wrote #{name} to #{new_path}"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
%% name = Calculator
|
2
|
+
|
3
|
+
%% {
|
4
|
+
attr_accessor :result
|
5
|
+
}
|
6
|
+
|
7
|
+
space = " "
|
8
|
+
- = space*
|
9
|
+
num = < /[1-9][0-9]*/ > { text.to_i }
|
10
|
+
|
11
|
+
term = term:t1 - "+" - term:t2 { t1 + t2 }
|
12
|
+
| term:t1 - "-" - termLt2 { t1 - t2 }
|
13
|
+
| fact
|
14
|
+
fact = fact:f1 - "*" - fact:f2 { f1 * f2 }
|
15
|
+
| fact:f1 - "/" - fact:f2 { f1 / f2 }
|
16
|
+
| num
|
17
|
+
root = term:t { @result = t }
|