pegex 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemspec +21 -0
- data/CHANGELOG.yaml +3 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +16 -0
- data/LICENSE +21 -0
- data/README.rdoc +78 -0
- data/Rakefile +64 -0
- data/lib/pegex/compiler.rb +91 -0
- data/lib/pegex/grammar/atoms.rb +96 -0
- data/lib/pegex/grammar.rb +21 -0
- data/lib/pegex/input.rb +41 -0
- data/lib/pegex/parser.rb +287 -0
- data/lib/pegex/pegex/ast.rb +148 -0
- data/lib/pegex/pegex/grammar.rb +414 -0
- data/lib/pegex/receiver.rb +7 -0
- data/lib/pegex/tree/wrap.rb +13 -0
- data/lib/pegex/tree.rb +17 -0
- data/lib/pegex.rb +18 -0
- data/test/compiler-checks.rb +271 -0
- data/test/compiler-checks.tml +271 -0
- data/test/compiler-equivalence.rb +79 -0
- data/test/compiler.rb +42 -0
- data/test/compiler.tml +111 -0
- data/test/error.rb +161 -0
- data/test/export_ok.rb +36 -0
- data/test/grammar-api.rb +21 -0
- data/test/lib/recursive_sort.rb +17 -0
- data/test/lib/test_pegex.rb +33 -0
- data/test/lib/testast.rb +15 -0
- data/test/lib/xxx.rb +13 -0
- data/test/tree-pegex.tml +35 -0
- data/test/tree.rb +47 -0
- data/test/tree.tml +449 -0
- metadata +99 -0
data/.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
GemSpec = Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'pegex'
|
5
|
+
gem.version = '0.0.1'
|
6
|
+
gem.license = 'MIT'
|
7
|
+
gem.required_ruby_version = '>= 1.9.1'
|
8
|
+
|
9
|
+
gem.authors << 'Ingy döt Net'
|
10
|
+
gem.email = 'ingy@ingy.net'
|
11
|
+
gem.summary = 'Acmeist PEG Parsing Framework'
|
12
|
+
gem.description = <<-'.'
|
13
|
+
Pegex is a Acmeist parser framework. It allows you to easily create parsers
|
14
|
+
that will work equivalently in lots of programming languages!
|
15
|
+
.
|
16
|
+
gem.homepage = 'http://pegex.org'
|
17
|
+
|
18
|
+
gem.files = `git ls-files`.lines.map{|l|l.chomp}
|
19
|
+
|
20
|
+
gem.add_development_dependency 'testml-lite', '>= 0.0.1'
|
21
|
+
end
|
data/CHANGELOG.yaml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright © 2012 Ingy döt Net
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the ‘Software’), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
9
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
10
|
+
so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
= Pegex - Acmeist PEG Parsing Framework
|
2
|
+
|
3
|
+
Pegex is a Acmeist parser framework. It allows you to easily create
|
4
|
+
parsers that will work equivalently in lots of programming languages!
|
5
|
+
|
6
|
+
= Synopsis
|
7
|
+
|
8
|
+
require 'pegex'
|
9
|
+
result = pegex(grammar).parse(input)
|
10
|
+
|
11
|
+
or with options:
|
12
|
+
|
13
|
+
require 'pegex'
|
14
|
+
require 'my_receiver_class'
|
15
|
+
parser = pegex(grammar, MyReceiverClass)
|
16
|
+
result = parser.parse(input)
|
17
|
+
|
18
|
+
or more explicitly:
|
19
|
+
|
20
|
+
require 'pegex/parser'
|
21
|
+
require 'pegex/grammar'
|
22
|
+
pegex_grammar = Pegex::Grammar.new do |g|
|
23
|
+
g.text = grammar
|
24
|
+
end
|
25
|
+
parser = Pegex::Parser.new do |p|
|
26
|
+
p.grammar = pegex_grammar
|
27
|
+
end
|
28
|
+
result = parser.parse(input)
|
29
|
+
|
30
|
+
= Description
|
31
|
+
|
32
|
+
Pegex is a Acmeist parser framework. It allows you to easily create parsers
|
33
|
+
that will work equivalently in lots of programming languages!
|
34
|
+
|
35
|
+
Pegex gets it name by combining Parsing Expression Grammars (PEG), with
|
36
|
+
Regular Expessions (Regex). That's actually what Pegex does.
|
37
|
+
|
38
|
+
PEG is the cool new way to elegantly specify recursive descent grammars. The
|
39
|
+
Perl 6 language is defined in terms of a self modifying PEG language called
|
40
|
+
*Perl 6 Rules*. Regexes are familiar to programmers of most modern
|
41
|
+
programming languages. Pegex defines a simple PEG syntax, where all the
|
42
|
+
terminals are regexes. This means that Pegex can be quite fast and powerful.
|
43
|
+
|
44
|
+
Pegex attempts to be the simplest way to define new (or old) Domain Specific
|
45
|
+
Languages (DSLs) that need to be used in several programming languages and
|
46
|
+
environments.
|
47
|
+
|
48
|
+
= Usage
|
49
|
+
|
50
|
+
The +pegex.rb+ module itself is just a trivial way to use the
|
51
|
+
Pegex framework. It is only intended for the simplest of uses.
|
52
|
+
|
53
|
+
+pegex.rb+ defines a single function, +pegex+, which takes a Pegex grammar
|
54
|
+
string as input. You may also pass in a receiver class or object.
|
55
|
+
|
56
|
+
parser = pegex(grammar, MyReceiver)
|
57
|
+
|
58
|
+
The +pegex+ function returns a Pegex::Parser object, on which you would
|
59
|
+
typically call the +parse()+ method, which (on success) will return a data
|
60
|
+
structure of the parsed data.
|
61
|
+
|
62
|
+
See Pegex::API for more details.
|
63
|
+
|
64
|
+
= Documentation
|
65
|
+
|
66
|
+
This Pegex library was ported to Ruby from the Perl module:
|
67
|
+
http://search.cpan.org/dist/Pegex/
|
68
|
+
|
69
|
+
The code and tests were fully ported from Perl to Ruby. Pegex should work
|
70
|
+
exactly the same in both languages. The documentation and examples have not yet
|
71
|
+
been fully ported, but they will be soon enough. For now, refer to the Perl
|
72
|
+
docs.
|
73
|
+
|
74
|
+
You can start here: http://search.cpan.org/dist/Pegex/lib/Pegex.pod
|
75
|
+
|
76
|
+
= Copyright
|
77
|
+
|
78
|
+
Copyright (c) 2012 Ingy döt Net. See LICENSE for further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Load Gem constants from the gemspec
|
2
|
+
GemSpecFile = '.gemspec'
|
3
|
+
load GemSpecFile
|
4
|
+
GemName = GemSpec.name
|
5
|
+
GemVersion = GemSpec.version
|
6
|
+
GemDir = "#{GemName}-#{GemVersion}"
|
7
|
+
GemFile = "#{GemDir}.gem"
|
8
|
+
DevNull = '2>/dev/null'
|
9
|
+
|
10
|
+
# Require the Rake libraries
|
11
|
+
require 'rake'
|
12
|
+
require 'rake/testtask'
|
13
|
+
require 'rake/clean'
|
14
|
+
|
15
|
+
task :default => 'help'
|
16
|
+
|
17
|
+
CLEAN.include GemDir, GemFile, 'data.tar.gz', 'metadata.gz'
|
18
|
+
|
19
|
+
desc 'Run the tests'
|
20
|
+
task :test do
|
21
|
+
Rake::TestTask.new do |t|
|
22
|
+
t.verbose = true
|
23
|
+
t.test_files = FileList['test/*.rb']
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'Build the gem'
|
28
|
+
task :build => [:clean, :test] do
|
29
|
+
sh "gem build #{GemSpecFile}"
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'Install the gem'
|
33
|
+
task :install => [:build] do
|
34
|
+
sh "gem install #{GemFile}"
|
35
|
+
end
|
36
|
+
|
37
|
+
desc 'Build, unpack and inspect the gem'
|
38
|
+
task :distdir => [:build] do
|
39
|
+
sh "tar xf #{GemFile} #{DevNull}"
|
40
|
+
Dir.mkdir GemDir
|
41
|
+
Dir.chdir GemDir
|
42
|
+
sh "tar xzf ../data.tar.gz #{DevNull}"
|
43
|
+
puts "\n>>> Entering sub-shell for #{GemDir}..."
|
44
|
+
system ENV['SHELL']
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'Build and push the gem'
|
48
|
+
task :release => [:build] do
|
49
|
+
sh "gem push #{GemFile}"
|
50
|
+
end
|
51
|
+
|
52
|
+
desc 'Print a description of the gem'
|
53
|
+
task :desc do
|
54
|
+
puts "Gem: '#{GemName}' (version #{GemVersion})"
|
55
|
+
puts
|
56
|
+
puts GemSpec.description.gsub /^/, ' '
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'List the Rakefile tasks'
|
60
|
+
task :help do
|
61
|
+
puts 'The following rake tasks are available:'
|
62
|
+
puts
|
63
|
+
puts `rake -T`.gsub /^/, ' '
|
64
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'pegex/parser'
|
2
|
+
require 'pegex/pegex/grammar'
|
3
|
+
require 'pegex/pegex/ast'
|
4
|
+
require 'pegex/grammar/atoms'
|
5
|
+
|
6
|
+
class Pegex::Compiler
|
7
|
+
attr_accessor :tree
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@tree = {}
|
11
|
+
@_tree = {}
|
12
|
+
@atoms = Pegex::Grammar::Atoms.new.atoms
|
13
|
+
end
|
14
|
+
|
15
|
+
def compile grammar
|
16
|
+
parse grammar
|
17
|
+
combinate
|
18
|
+
native
|
19
|
+
return self
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse input
|
23
|
+
parser = Pegex::Parser.new do |p|
|
24
|
+
p.grammar = Pegex::Pegex::Grammar.new
|
25
|
+
p.receiver = Pegex::Pegex::AST.new
|
26
|
+
end
|
27
|
+
|
28
|
+
@tree = parser.parse input
|
29
|
+
|
30
|
+
return self
|
31
|
+
end
|
32
|
+
|
33
|
+
def combinate rule=nil
|
34
|
+
(rule ||= @tree['+toprule']) or return self
|
35
|
+
|
36
|
+
@tree.keys.grep(/^\+/).each {|k| @_tree[k] = @tree[k]}
|
37
|
+
|
38
|
+
combinate_rule rule
|
39
|
+
@tree = @_tree
|
40
|
+
return self
|
41
|
+
end
|
42
|
+
|
43
|
+
def combinate_rule rule
|
44
|
+
return if @_tree[rule]
|
45
|
+
object = @_tree[rule] = @tree[rule]
|
46
|
+
combinate_object object
|
47
|
+
end
|
48
|
+
|
49
|
+
def combinate_object object
|
50
|
+
if (sub = object['.sep'])
|
51
|
+
combinate_object sub
|
52
|
+
end
|
53
|
+
if object['.rgx']
|
54
|
+
combinate_re object
|
55
|
+
elsif (rule = object['.ref'])
|
56
|
+
if @tree[rule]
|
57
|
+
combinate_rule rule
|
58
|
+
end
|
59
|
+
elsif object['.any']
|
60
|
+
object['.any'].each {|elem| combinate_object elem}
|
61
|
+
elsif object['.all']
|
62
|
+
object['.all'].each {|elem| combinate_object elem}
|
63
|
+
elsif object['.err']
|
64
|
+
else
|
65
|
+
puts "Can't combinate:"
|
66
|
+
XXX object
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def combinate_re regex
|
71
|
+
re = regex['.rgx'].clone
|
72
|
+
loop do
|
73
|
+
re.gsub! /(~+)/ do |m|
|
74
|
+
"<ws#{$1.length}>"
|
75
|
+
end
|
76
|
+
re.gsub! /<(\w+)>/ do |m|
|
77
|
+
if @tree[$1]
|
78
|
+
@tree[$1]['.rgx'] or fail "'#{$1}' not defined as a single RE"
|
79
|
+
else
|
80
|
+
@atoms[$1] or fail "'#{$1}' not defined in the grammar"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
break if re == regex['.rgx']
|
84
|
+
regex['.rgx'] = re.clone
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def native
|
89
|
+
# TODO
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
class Pegex::Grammar::Atoms
|
2
|
+
attr_accessor :atoms
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@atoms = {
|
6
|
+
# Default whitespace rules for that use '~'
|
7
|
+
'ws' => '<WS>',
|
8
|
+
'ws1' => '<ws>*',
|
9
|
+
'ws2' => '<ws>+',
|
10
|
+
|
11
|
+
# Special rules
|
12
|
+
'ALWAYS' => '',
|
13
|
+
'NEVER' => '(?!)',
|
14
|
+
|
15
|
+
# Basics
|
16
|
+
'ALL' => '[\s\S]', # Every char (including newline and space)
|
17
|
+
'ANY' => '.', # Any char (except newline)
|
18
|
+
'SPACE' => '\ ', # ASCII space char
|
19
|
+
'TAB' => '\t', # Horizontal tab
|
20
|
+
'WS' => '\s', # Whitespace
|
21
|
+
'NS' => '\S', # Not Space
|
22
|
+
'NL' => '\n', # Newline
|
23
|
+
'BREAK' => '\n', # Line break (more readable alias for NL)
|
24
|
+
'CR' => '\r', # Carriage return
|
25
|
+
'EOL' => '\r?\n', # Unix/DOS line ending
|
26
|
+
'DOS' => '\r\n', # Windows/DOS line ending
|
27
|
+
'EOS' => '\z', # End of stream/string/file
|
28
|
+
|
29
|
+
# Common character classes
|
30
|
+
'WORD' => '\w',
|
31
|
+
'BLANK' => '[\ \t]',
|
32
|
+
'ALPHA' => '[a-zA-Z]',
|
33
|
+
'LOWER' => '[a-z]',
|
34
|
+
'UPPER' => '[A-Z]',
|
35
|
+
'DIGIT' => '[0-9]',
|
36
|
+
'OCTAL' => '[0-7]',
|
37
|
+
'HEX' => '[0-9a-fA-F]',
|
38
|
+
'ALNUM' => '[a-zA-Z0-9]',
|
39
|
+
'CONTROL' => '[\x00-\x1f]',
|
40
|
+
'HICHAR' => '[\x7f-\x{ffff}]',
|
41
|
+
|
42
|
+
# Ranges - for use inside character classes
|
43
|
+
'WORDS' => '0-9A-Za-z_',
|
44
|
+
'BLANKS' => '\ \t',
|
45
|
+
'ALPHAS' => 'a-zA-Z',
|
46
|
+
'LOWERS' => 'a-z',
|
47
|
+
'UPPERS' => 'A-Z',
|
48
|
+
'DIGITS' => '0-9',
|
49
|
+
'OCTALS' => '0-7',
|
50
|
+
'HEXS' => '0-9a-fA-F',
|
51
|
+
'ALNUMS' => 'a-zA-Z0-9',
|
52
|
+
'CONTROLS' => '\x00-\x1f',
|
53
|
+
'HICHARS' => '\x7f-\x{ffff}',
|
54
|
+
|
55
|
+
# Paired punctuation
|
56
|
+
'SINGLE' => "'",
|
57
|
+
'DOUBLE' => '"',
|
58
|
+
'GRAVE' => '`',
|
59
|
+
'LPAREN' => '\(',
|
60
|
+
'RPAREN' => '\)',
|
61
|
+
'LCURLY' => '\{',
|
62
|
+
'RCURLY' => '\}',
|
63
|
+
'LSQUARE' => '\[',
|
64
|
+
'RSQUARE' => '\]',
|
65
|
+
'LANGLE' => '<',
|
66
|
+
'RANGLE' => '>',
|
67
|
+
|
68
|
+
# Other ASCII punctuation
|
69
|
+
'BANG' => '!',
|
70
|
+
'AT' => '\@',
|
71
|
+
'HASH' => '\#',
|
72
|
+
'DOLLAR' => '\$',
|
73
|
+
'PERCENT' => '%',
|
74
|
+
'CARET' => '\^',
|
75
|
+
'AMP' => '&',
|
76
|
+
'STAR' => '\*',
|
77
|
+
'TILDE' => '\~',
|
78
|
+
'UNDER' => '_',
|
79
|
+
'DASH' => '\-',
|
80
|
+
'PLUS' => '\+',
|
81
|
+
'EQUAL' => '=',
|
82
|
+
'PIPE' => '\|',
|
83
|
+
'BACK' => '\\\\',
|
84
|
+
'COLON' => ':',
|
85
|
+
'SEMI' => ';',
|
86
|
+
'COMMA' => ',',
|
87
|
+
'DOT' => '\.',
|
88
|
+
'QMARK' => '\?',
|
89
|
+
'SLASH' => '/',
|
90
|
+
|
91
|
+
# Special rules for named control chars
|
92
|
+
'BS' => '\x08', # Backspace
|
93
|
+
'FF' => '\x12', # Formfeed
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'pegex'
|
2
|
+
class Pegex::Grammar
|
3
|
+
attr_accessor :text
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
yield self if block_given?
|
7
|
+
@tree ||= make_tree
|
8
|
+
end
|
9
|
+
|
10
|
+
def tree
|
11
|
+
return @tree if @tree
|
12
|
+
fail "Can't create a #{self.class} grammar. No grammar text" unless @text
|
13
|
+
return @tree = make_tree
|
14
|
+
end
|
15
|
+
|
16
|
+
def make_tree
|
17
|
+
require 'pegex/compiler'
|
18
|
+
return @tree = Pegex::Compiler.new.compile(@text).tree
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
data/lib/pegex/input.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'pegex'
|
2
|
+
|
3
|
+
class Pegex::Input
|
4
|
+
attr_accessor :string
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@is_eof = false
|
8
|
+
@is_open = false
|
9
|
+
@is_close = false
|
10
|
+
yield self
|
11
|
+
end
|
12
|
+
|
13
|
+
def read
|
14
|
+
buffer = @buffer
|
15
|
+
@buffer = nil
|
16
|
+
@eof = true
|
17
|
+
return buffer
|
18
|
+
end
|
19
|
+
|
20
|
+
def open
|
21
|
+
if defined? @string
|
22
|
+
@buffer = @string
|
23
|
+
else
|
24
|
+
fail "Pegex::Input::open failed. No source to open"
|
25
|
+
end
|
26
|
+
@is_open = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def close
|
30
|
+
fail "Attempted to close an unopen Pegex::Input object" \
|
31
|
+
if @is_close
|
32
|
+
@is_open = false
|
33
|
+
@is_close = true
|
34
|
+
@buffer = nil
|
35
|
+
return self
|
36
|
+
end
|
37
|
+
|
38
|
+
def open?
|
39
|
+
@is_open
|
40
|
+
end
|
41
|
+
end
|