pegex 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemspec +21 -0
- data/CHANGELOG.yaml +3 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +16 -0
- data/LICENSE +21 -0
- data/README.rdoc +78 -0
- data/Rakefile +64 -0
- data/lib/pegex/compiler.rb +91 -0
- data/lib/pegex/grammar/atoms.rb +96 -0
- data/lib/pegex/grammar.rb +21 -0
- data/lib/pegex/input.rb +41 -0
- data/lib/pegex/parser.rb +287 -0
- data/lib/pegex/pegex/ast.rb +148 -0
- data/lib/pegex/pegex/grammar.rb +414 -0
- data/lib/pegex/receiver.rb +7 -0
- data/lib/pegex/tree/wrap.rb +13 -0
- data/lib/pegex/tree.rb +17 -0
- data/lib/pegex.rb +18 -0
- data/test/compiler-checks.rb +271 -0
- data/test/compiler-checks.tml +271 -0
- data/test/compiler-equivalence.rb +79 -0
- data/test/compiler.rb +42 -0
- data/test/compiler.tml +111 -0
- data/test/error.rb +161 -0
- data/test/export_ok.rb +36 -0
- data/test/grammar-api.rb +21 -0
- data/test/lib/recursive_sort.rb +17 -0
- data/test/lib/test_pegex.rb +33 -0
- data/test/lib/testast.rb +15 -0
- data/test/lib/xxx.rb +13 -0
- data/test/tree-pegex.tml +35 -0
- data/test/tree.rb +47 -0
- data/test/tree.tml +449 -0
- metadata +99 -0
data/.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
GemSpec = Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'pegex'
|
5
|
+
gem.version = '0.0.1'
|
6
|
+
gem.license = 'MIT'
|
7
|
+
gem.required_ruby_version = '>= 1.9.1'
|
8
|
+
|
9
|
+
gem.authors << 'Ingy döt Net'
|
10
|
+
gem.email = 'ingy@ingy.net'
|
11
|
+
gem.summary = 'Acmeist PEG Parsing Framework'
|
12
|
+
gem.description = <<-'.'
|
13
|
+
Pegex is a Acmeist parser framework. It allows you to easily create parsers
|
14
|
+
that will work equivalently in lots of programming languages!
|
15
|
+
.
|
16
|
+
gem.homepage = 'http://pegex.org'
|
17
|
+
|
18
|
+
gem.files = `git ls-files`.lines.map{|l|l.chomp}
|
19
|
+
|
20
|
+
gem.add_development_dependency 'testml-lite', '>= 0.0.1'
|
21
|
+
end
|
data/CHANGELOG.yaml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright © 2012 Ingy döt Net
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the ‘Software’), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
9
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
10
|
+
so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
= Pegex - Acmeist PEG Parsing Framework
|
2
|
+
|
3
|
+
Pegex is a Acmeist parser framework. It allows you to easily create
|
4
|
+
parsers that will work equivalently in lots of programming languages!
|
5
|
+
|
6
|
+
= Synopsis
|
7
|
+
|
8
|
+
require 'pegex'
|
9
|
+
result = pegex(grammar).parse(input)
|
10
|
+
|
11
|
+
or with options:
|
12
|
+
|
13
|
+
require 'pegex'
|
14
|
+
require 'my_receiver_class'
|
15
|
+
parser = pegex(grammar, MyReceiverClass)
|
16
|
+
result = parser.parse(input)
|
17
|
+
|
18
|
+
or more explicitly:
|
19
|
+
|
20
|
+
require 'pegex/parser'
|
21
|
+
require 'pegex/grammar'
|
22
|
+
pegex_grammar = Pegex::Grammar.new do |g|
|
23
|
+
g.text = grammar
|
24
|
+
end
|
25
|
+
parser = Pegex::Parser.new do |p|
|
26
|
+
p.grammar = pegex_grammar
|
27
|
+
end
|
28
|
+
result = parser.parse(input)
|
29
|
+
|
30
|
+
= Description
|
31
|
+
|
32
|
+
Pegex is a Acmeist parser framework. It allows you to easily create parsers
|
33
|
+
that will work equivalently in lots of programming languages!
|
34
|
+
|
35
|
+
Pegex gets it name by combining Parsing Expression Grammars (PEG), with
|
36
|
+
Regular Expessions (Regex). That's actually what Pegex does.
|
37
|
+
|
38
|
+
PEG is the cool new way to elegantly specify recursive descent grammars. The
|
39
|
+
Perl 6 language is defined in terms of a self modifying PEG language called
|
40
|
+
*Perl 6 Rules*. Regexes are familiar to programmers of most modern
|
41
|
+
programming languages. Pegex defines a simple PEG syntax, where all the
|
42
|
+
terminals are regexes. This means that Pegex can be quite fast and powerful.
|
43
|
+
|
44
|
+
Pegex attempts to be the simplest way to define new (or old) Domain Specific
|
45
|
+
Languages (DSLs) that need to be used in several programming languages and
|
46
|
+
environments.
|
47
|
+
|
48
|
+
= Usage
|
49
|
+
|
50
|
+
The +pegex.rb+ module itself is just a trivial way to use the
|
51
|
+
Pegex framework. It is only intended for the simplest of uses.
|
52
|
+
|
53
|
+
+pegex.rb+ defines a single function, +pegex+, which takes a Pegex grammar
|
54
|
+
string as input. You may also pass in a receiver class or object.
|
55
|
+
|
56
|
+
parser = pegex(grammar, MyReceiver)
|
57
|
+
|
58
|
+
The +pegex+ function returns a Pegex::Parser object, on which you would
|
59
|
+
typically call the +parse()+ method, which (on success) will return a data
|
60
|
+
structure of the parsed data.
|
61
|
+
|
62
|
+
See Pegex::API for more details.
|
63
|
+
|
64
|
+
= Documentation
|
65
|
+
|
66
|
+
This Pegex library was ported to Ruby from the Perl module:
|
67
|
+
http://search.cpan.org/dist/Pegex/
|
68
|
+
|
69
|
+
The code and tests were fully ported from Perl to Ruby. Pegex should work
|
70
|
+
exactly the same in both languages. The documentation and examples have not yet
|
71
|
+
been fully ported, but they will be soon enough. For now, refer to the Perl
|
72
|
+
docs.
|
73
|
+
|
74
|
+
You can start here: http://search.cpan.org/dist/Pegex/lib/Pegex.pod
|
75
|
+
|
76
|
+
= Copyright
|
77
|
+
|
78
|
+
Copyright (c) 2012 Ingy döt Net. See LICENSE for further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Load Gem constants from the gemspec
|
2
|
+
GemSpecFile = '.gemspec'
|
3
|
+
load GemSpecFile
|
4
|
+
GemName = GemSpec.name
|
5
|
+
GemVersion = GemSpec.version
|
6
|
+
GemDir = "#{GemName}-#{GemVersion}"
|
7
|
+
GemFile = "#{GemDir}.gem"
|
8
|
+
DevNull = '2>/dev/null'
|
9
|
+
|
10
|
+
# Require the Rake libraries
|
11
|
+
require 'rake'
|
12
|
+
require 'rake/testtask'
|
13
|
+
require 'rake/clean'
|
14
|
+
|
15
|
+
task :default => 'help'
|
16
|
+
|
17
|
+
CLEAN.include GemDir, GemFile, 'data.tar.gz', 'metadata.gz'
|
18
|
+
|
19
|
+
desc 'Run the tests'
|
20
|
+
task :test do
|
21
|
+
Rake::TestTask.new do |t|
|
22
|
+
t.verbose = true
|
23
|
+
t.test_files = FileList['test/*.rb']
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'Build the gem'
|
28
|
+
task :build => [:clean, :test] do
|
29
|
+
sh "gem build #{GemSpecFile}"
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'Install the gem'
|
33
|
+
task :install => [:build] do
|
34
|
+
sh "gem install #{GemFile}"
|
35
|
+
end
|
36
|
+
|
37
|
+
desc 'Build, unpack and inspect the gem'
|
38
|
+
task :distdir => [:build] do
|
39
|
+
sh "tar xf #{GemFile} #{DevNull}"
|
40
|
+
Dir.mkdir GemDir
|
41
|
+
Dir.chdir GemDir
|
42
|
+
sh "tar xzf ../data.tar.gz #{DevNull}"
|
43
|
+
puts "\n>>> Entering sub-shell for #{GemDir}..."
|
44
|
+
system ENV['SHELL']
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'Build and push the gem'
|
48
|
+
task :release => [:build] do
|
49
|
+
sh "gem push #{GemFile}"
|
50
|
+
end
|
51
|
+
|
52
|
+
desc 'Print a description of the gem'
|
53
|
+
task :desc do
|
54
|
+
puts "Gem: '#{GemName}' (version #{GemVersion})"
|
55
|
+
puts
|
56
|
+
puts GemSpec.description.gsub /^/, ' '
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'List the Rakefile tasks'
|
60
|
+
task :help do
|
61
|
+
puts 'The following rake tasks are available:'
|
62
|
+
puts
|
63
|
+
puts `rake -T`.gsub /^/, ' '
|
64
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'pegex/parser'
|
2
|
+
require 'pegex/pegex/grammar'
|
3
|
+
require 'pegex/pegex/ast'
|
4
|
+
require 'pegex/grammar/atoms'
|
5
|
+
|
6
|
+
class Pegex::Compiler
|
7
|
+
attr_accessor :tree
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@tree = {}
|
11
|
+
@_tree = {}
|
12
|
+
@atoms = Pegex::Grammar::Atoms.new.atoms
|
13
|
+
end
|
14
|
+
|
15
|
+
def compile grammar
|
16
|
+
parse grammar
|
17
|
+
combinate
|
18
|
+
native
|
19
|
+
return self
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse input
|
23
|
+
parser = Pegex::Parser.new do |p|
|
24
|
+
p.grammar = Pegex::Pegex::Grammar.new
|
25
|
+
p.receiver = Pegex::Pegex::AST.new
|
26
|
+
end
|
27
|
+
|
28
|
+
@tree = parser.parse input
|
29
|
+
|
30
|
+
return self
|
31
|
+
end
|
32
|
+
|
33
|
+
def combinate rule=nil
|
34
|
+
(rule ||= @tree['+toprule']) or return self
|
35
|
+
|
36
|
+
@tree.keys.grep(/^\+/).each {|k| @_tree[k] = @tree[k]}
|
37
|
+
|
38
|
+
combinate_rule rule
|
39
|
+
@tree = @_tree
|
40
|
+
return self
|
41
|
+
end
|
42
|
+
|
43
|
+
def combinate_rule rule
|
44
|
+
return if @_tree[rule]
|
45
|
+
object = @_tree[rule] = @tree[rule]
|
46
|
+
combinate_object object
|
47
|
+
end
|
48
|
+
|
49
|
+
def combinate_object object
|
50
|
+
if (sub = object['.sep'])
|
51
|
+
combinate_object sub
|
52
|
+
end
|
53
|
+
if object['.rgx']
|
54
|
+
combinate_re object
|
55
|
+
elsif (rule = object['.ref'])
|
56
|
+
if @tree[rule]
|
57
|
+
combinate_rule rule
|
58
|
+
end
|
59
|
+
elsif object['.any']
|
60
|
+
object['.any'].each {|elem| combinate_object elem}
|
61
|
+
elsif object['.all']
|
62
|
+
object['.all'].each {|elem| combinate_object elem}
|
63
|
+
elsif object['.err']
|
64
|
+
else
|
65
|
+
puts "Can't combinate:"
|
66
|
+
XXX object
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def combinate_re regex
|
71
|
+
re = regex['.rgx'].clone
|
72
|
+
loop do
|
73
|
+
re.gsub! /(~+)/ do |m|
|
74
|
+
"<ws#{$1.length}>"
|
75
|
+
end
|
76
|
+
re.gsub! /<(\w+)>/ do |m|
|
77
|
+
if @tree[$1]
|
78
|
+
@tree[$1]['.rgx'] or fail "'#{$1}' not defined as a single RE"
|
79
|
+
else
|
80
|
+
@atoms[$1] or fail "'#{$1}' not defined in the grammar"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
break if re == regex['.rgx']
|
84
|
+
regex['.rgx'] = re.clone
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def native
|
89
|
+
# TODO
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
class Pegex::Grammar::Atoms
|
2
|
+
attr_accessor :atoms
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@atoms = {
|
6
|
+
# Default whitespace rules for that use '~'
|
7
|
+
'ws' => '<WS>',
|
8
|
+
'ws1' => '<ws>*',
|
9
|
+
'ws2' => '<ws>+',
|
10
|
+
|
11
|
+
# Special rules
|
12
|
+
'ALWAYS' => '',
|
13
|
+
'NEVER' => '(?!)',
|
14
|
+
|
15
|
+
# Basics
|
16
|
+
'ALL' => '[\s\S]', # Every char (including newline and space)
|
17
|
+
'ANY' => '.', # Any char (except newline)
|
18
|
+
'SPACE' => '\ ', # ASCII space char
|
19
|
+
'TAB' => '\t', # Horizontal tab
|
20
|
+
'WS' => '\s', # Whitespace
|
21
|
+
'NS' => '\S', # Not Space
|
22
|
+
'NL' => '\n', # Newline
|
23
|
+
'BREAK' => '\n', # Line break (more readable alias for NL)
|
24
|
+
'CR' => '\r', # Carriage return
|
25
|
+
'EOL' => '\r?\n', # Unix/DOS line ending
|
26
|
+
'DOS' => '\r\n', # Windows/DOS line ending
|
27
|
+
'EOS' => '\z', # End of stream/string/file
|
28
|
+
|
29
|
+
# Common character classes
|
30
|
+
'WORD' => '\w',
|
31
|
+
'BLANK' => '[\ \t]',
|
32
|
+
'ALPHA' => '[a-zA-Z]',
|
33
|
+
'LOWER' => '[a-z]',
|
34
|
+
'UPPER' => '[A-Z]',
|
35
|
+
'DIGIT' => '[0-9]',
|
36
|
+
'OCTAL' => '[0-7]',
|
37
|
+
'HEX' => '[0-9a-fA-F]',
|
38
|
+
'ALNUM' => '[a-zA-Z0-9]',
|
39
|
+
'CONTROL' => '[\x00-\x1f]',
|
40
|
+
'HICHAR' => '[\x7f-\x{ffff}]',
|
41
|
+
|
42
|
+
# Ranges - for use inside character classes
|
43
|
+
'WORDS' => '0-9A-Za-z_',
|
44
|
+
'BLANKS' => '\ \t',
|
45
|
+
'ALPHAS' => 'a-zA-Z',
|
46
|
+
'LOWERS' => 'a-z',
|
47
|
+
'UPPERS' => 'A-Z',
|
48
|
+
'DIGITS' => '0-9',
|
49
|
+
'OCTALS' => '0-7',
|
50
|
+
'HEXS' => '0-9a-fA-F',
|
51
|
+
'ALNUMS' => 'a-zA-Z0-9',
|
52
|
+
'CONTROLS' => '\x00-\x1f',
|
53
|
+
'HICHARS' => '\x7f-\x{ffff}',
|
54
|
+
|
55
|
+
# Paired punctuation
|
56
|
+
'SINGLE' => "'",
|
57
|
+
'DOUBLE' => '"',
|
58
|
+
'GRAVE' => '`',
|
59
|
+
'LPAREN' => '\(',
|
60
|
+
'RPAREN' => '\)',
|
61
|
+
'LCURLY' => '\{',
|
62
|
+
'RCURLY' => '\}',
|
63
|
+
'LSQUARE' => '\[',
|
64
|
+
'RSQUARE' => '\]',
|
65
|
+
'LANGLE' => '<',
|
66
|
+
'RANGLE' => '>',
|
67
|
+
|
68
|
+
# Other ASCII punctuation
|
69
|
+
'BANG' => '!',
|
70
|
+
'AT' => '\@',
|
71
|
+
'HASH' => '\#',
|
72
|
+
'DOLLAR' => '\$',
|
73
|
+
'PERCENT' => '%',
|
74
|
+
'CARET' => '\^',
|
75
|
+
'AMP' => '&',
|
76
|
+
'STAR' => '\*',
|
77
|
+
'TILDE' => '\~',
|
78
|
+
'UNDER' => '_',
|
79
|
+
'DASH' => '\-',
|
80
|
+
'PLUS' => '\+',
|
81
|
+
'EQUAL' => '=',
|
82
|
+
'PIPE' => '\|',
|
83
|
+
'BACK' => '\\\\',
|
84
|
+
'COLON' => ':',
|
85
|
+
'SEMI' => ';',
|
86
|
+
'COMMA' => ',',
|
87
|
+
'DOT' => '\.',
|
88
|
+
'QMARK' => '\?',
|
89
|
+
'SLASH' => '/',
|
90
|
+
|
91
|
+
# Special rules for named control chars
|
92
|
+
'BS' => '\x08', # Backspace
|
93
|
+
'FF' => '\x12', # Formfeed
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'pegex'
|
2
|
+
class Pegex::Grammar
|
3
|
+
attr_accessor :text
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
yield self if block_given?
|
7
|
+
@tree ||= make_tree
|
8
|
+
end
|
9
|
+
|
10
|
+
def tree
|
11
|
+
return @tree if @tree
|
12
|
+
fail "Can't create a #{self.class} grammar. No grammar text" unless @text
|
13
|
+
return @tree = make_tree
|
14
|
+
end
|
15
|
+
|
16
|
+
def make_tree
|
17
|
+
require 'pegex/compiler'
|
18
|
+
return @tree = Pegex::Compiler.new.compile(@text).tree
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
data/lib/pegex/input.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'pegex'
|
2
|
+
|
3
|
+
class Pegex::Input
|
4
|
+
attr_accessor :string
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@is_eof = false
|
8
|
+
@is_open = false
|
9
|
+
@is_close = false
|
10
|
+
yield self
|
11
|
+
end
|
12
|
+
|
13
|
+
def read
|
14
|
+
buffer = @buffer
|
15
|
+
@buffer = nil
|
16
|
+
@eof = true
|
17
|
+
return buffer
|
18
|
+
end
|
19
|
+
|
20
|
+
def open
|
21
|
+
if defined? @string
|
22
|
+
@buffer = @string
|
23
|
+
else
|
24
|
+
fail "Pegex::Input::open failed. No source to open"
|
25
|
+
end
|
26
|
+
@is_open = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def close
|
30
|
+
fail "Attempted to close an unopen Pegex::Input object" \
|
31
|
+
if @is_close
|
32
|
+
@is_open = false
|
33
|
+
@is_close = true
|
34
|
+
@buffer = nil
|
35
|
+
return self
|
36
|
+
end
|
37
|
+
|
38
|
+
def open?
|
39
|
+
@is_open
|
40
|
+
end
|
41
|
+
end
|