peggy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/Copy of builder.rb +363 -0
- data/lib/Copy of peg.rb +135 -0
- data/lib/ast.rb +86 -0
- data/lib/builder.rb +359 -0
- data/lib/parser.rb +203 -0
- data/lib/peg.rb +68 -0
- data/lib/peggy.rb +50 -0
- data/test/test_builder.rb +72 -0
- data/test/test_parser.rb +119 -0
- data/test/test_peg.rb +54 -0
- data/test/test_peggy.rb +66 -0
- data/test/tests.rb +6 -0
- metadata +61 -0
data/lib/peg.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'builder')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Implements the Parser Expression Grammar (PEG), one of several grammars supported.
|
8
|
+
class PEG < Builder
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
build
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def build
|
18
|
+
# Hierarchical syntax
|
19
|
+
grammar{each{spacing; some{definition}; eof}}
|
20
|
+
definition{each{identifier; left_arrow; expression}}
|
21
|
+
expression{each{sequence; many{each{slash; sequence}}}}
|
22
|
+
sequence{many{prefix}}
|
23
|
+
prefix{each{opt{one{peek; disallow}}; suffix}}
|
24
|
+
suffix{each{primary; opt{one{question; star; plus}}}}
|
25
|
+
primary{one{each{identifier; neg{left_arrow}}
|
26
|
+
each{lparen; expression; rparen}
|
27
|
+
literal; klass; dot}
|
28
|
+
}
|
29
|
+
# Lexical syntax
|
30
|
+
identifier{each{ident_start; many{ident_cont}; spacing}}
|
31
|
+
ident_start{lit /[a-zA-Z_]/}
|
32
|
+
ident_cont{one{ident_start; lit /[0-9]/}}
|
33
|
+
literal{one{
|
34
|
+
each{quote; many{each{neg{quote}; char}}; quote; spacing}
|
35
|
+
each{quotes; many{each{neg{quotes}; char}}; quotes; spacing}
|
36
|
+
}}
|
37
|
+
klass{each{lit '['; many{each{neg{lit ']'}; range}}; lit ']'; spacing}}
|
38
|
+
range{one{each{char; lit '-'; char}; char}}
|
39
|
+
char{lit /\\([nrt'"\[\]\\]|[0-2][0-7][0-7]|[0-7][0-7]?)|[^\\]/}
|
40
|
+
#char{one{
|
41
|
+
# each{lit '\\'; one{
|
42
|
+
# quote
|
43
|
+
# quotes
|
44
|
+
# lit /[nrt\[\]\\]/
|
45
|
+
# lit /[0-2][0-7][0-7]/
|
46
|
+
# lit /[0-7][0-7]?/
|
47
|
+
# }}
|
48
|
+
# lit /[^\\]/
|
49
|
+
#}}
|
50
|
+
left_arrow{each{lit '<-'; spacing}}
|
51
|
+
slash{each{lit '/'; spacing}}
|
52
|
+
peek{each{lit '&'; spacing}}
|
53
|
+
disallow{each{lit '!'; spacing}}
|
54
|
+
question{each{lit '?'; spacing}}
|
55
|
+
star{each{lit '*'; spacing}}
|
56
|
+
plus{each{lit '+'; spacing}}
|
57
|
+
lparen{each{lit '('; spacing}}
|
58
|
+
rparen{each{lit ')'; spacing}}
|
59
|
+
dot{each{lit '.'; spacing}}
|
60
|
+
quote{lit "'"}
|
61
|
+
quotes{lit '"'}
|
62
|
+
spacing{many{one{space; comment}}}
|
63
|
+
comment{each{lit '#'; many{each{neg{eol}; lit /./}}; eol}}
|
64
|
+
space{one{lit ' '; lit "\t"; eol}}
|
65
|
+
eol{one{lit "\r\n"; lit "\n"; lit "\r"}}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end # Peggy
|
data/lib/peggy.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
#require File.join(File.dirname(__FILE__), 'builder')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Implements a variation to the Parser Expression Grammar (PEG). This grammar
|
8
|
+
# supports regular expressions, and a syntax that more closely resembles
|
9
|
+
# other grammar languages.
|
10
|
+
class Peggy < Builder
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
super
|
14
|
+
build
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# Build the parser using a builder.
|
20
|
+
def build
|
21
|
+
self.ignore_productions = [:spacing]
|
22
|
+
spacing{many{one{comment; eol; space}}}
|
23
|
+
comment{each{lit /#[^\r\n]*/; eol}}
|
24
|
+
eol{lit /\r\n|[\r\n]/}
|
25
|
+
space{lit /[ \t]/}
|
26
|
+
grammar{each{some{production}; eof}}
|
27
|
+
production{each{identifier; equal; alternates}}
|
28
|
+
identifier{lit /[a-zA-Z_]\w*/}
|
29
|
+
equal{lit ':', '='}
|
30
|
+
alternates{each{sequence; many{each{lit '|'; sequence}}}}
|
31
|
+
sequence{some{modified}}
|
32
|
+
modified{each{opt{prefix}; primary; opt{suffix}}}
|
33
|
+
primary{one{group; literal; reference}}
|
34
|
+
prefix{one{positive; negative}}
|
35
|
+
positive{lit '&'}
|
36
|
+
negative{lit '!'}
|
37
|
+
suffix{one{oneOrMore; anyNumber; optional}}
|
38
|
+
oneOrMore{lit '+'}
|
39
|
+
anyNumber{lit '*'}
|
40
|
+
optional{lit '?'}
|
41
|
+
group{each{lit '('; alternates; lit ')'}}
|
42
|
+
literal{one{string; regexp}}
|
43
|
+
reference{each{identifier; neg{equal}}}
|
44
|
+
string{one{lit /"(\\"|[^"\r\n])*"/; lit /'(\\'|[^'\r\n])*'/}}
|
45
|
+
regexp{lit /\/(\\\/|[^\/\r\n])*\//}
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end #Peggy
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'builder')
|
3
|
+
|
4
|
+
class ABCBuilder < Peggy::Builder
|
5
|
+
|
6
|
+
def build
|
7
|
+
step1 {
|
8
|
+
each {
|
9
|
+
lit 'a'
|
10
|
+
many {lit 'b'}
|
11
|
+
lit 'c'
|
12
|
+
}
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
class BasicEnglishBuilder < Peggy::Builder
|
19
|
+
|
20
|
+
def build
|
21
|
+
# self.debug_flag = true
|
22
|
+
self.ignore_productions = [:spacing]
|
23
|
+
spacing{lit /\s+/}
|
24
|
+
paragraph{each{some{sentence}; eof}}
|
25
|
+
sentence{each{
|
26
|
+
opt{noun_phrase}
|
27
|
+
verb
|
28
|
+
opt{noun_phrase}
|
29
|
+
opt{prep_phrase}
|
30
|
+
punctuation
|
31
|
+
}}
|
32
|
+
adjective{lit 'red', 'hot', 'picky'}
|
33
|
+
noun{lit 'this', 'you', 'me', 'it', 'test'}
|
34
|
+
noun_phrase{each{opt{particle}; opt{adjective}; noun}}
|
35
|
+
particle{lit 'a', 'the'}
|
36
|
+
preposition{lit 'for', 'about', 'to', 'from'}
|
37
|
+
prep_phrase{each{opt{preposition}; noun}}
|
38
|
+
punctuation{lit '.', '?', '!'}
|
39
|
+
verb{lit 'can', 'is', 'tell'}
|
40
|
+
end
|
41
|
+
|
42
|
+
# Force source to lower case
|
43
|
+
def source_text= value
|
44
|
+
super value.downcase
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class TestBuilder < Test::Unit::TestCase
|
49
|
+
|
50
|
+
def test_sequence
|
51
|
+
seq = Peggy::Sequence.new
|
52
|
+
seq << Peggy::Literal.new('a') << Peggy::Literal.new('b')
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_productions
|
56
|
+
@builder = ABCBuilder.new
|
57
|
+
@builder.build
|
58
|
+
assert_not_nil @builder.productions
|
59
|
+
assert_equal 1, @builder.productions.size
|
60
|
+
assert_equal :step1, @builder[:step1].name
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_basic_english
|
64
|
+
@builder = BasicEnglishBuilder.new
|
65
|
+
@builder.build
|
66
|
+
# @builder.debug_flag = true
|
67
|
+
@builder.source_text = 'This is a test. Tell me about it.'
|
68
|
+
result = @builder.parse?(:paragraph)
|
69
|
+
# pp @builder.parse_results
|
70
|
+
assert_equal @builder.source_text.size, result
|
71
|
+
end
|
72
|
+
end
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'parser')
|
3
|
+
|
4
|
+
class BasicEnglishParser < Peggy::Parser
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
self.ignore_productions = [:spacing] # productions to ignore
|
8
|
+
# self.debug_flag = true # be verbose during parse
|
9
|
+
end
|
10
|
+
|
11
|
+
# Force source to lower case
|
12
|
+
def source_text= value
|
13
|
+
super value.downcase
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called and basically ignored by the parser before each match.
|
17
|
+
def spacing index
|
18
|
+
literal? /^\s+/, index # match whitespace
|
19
|
+
end
|
20
|
+
|
21
|
+
def paragraph index
|
22
|
+
while index
|
23
|
+
index = match? :sentence, index # don't call productions directly
|
24
|
+
if index
|
25
|
+
found = match? :eof, index # don't call eof directly either
|
26
|
+
return found if found
|
27
|
+
end
|
28
|
+
end
|
29
|
+
index
|
30
|
+
end
|
31
|
+
|
32
|
+
def sentence index
|
33
|
+
# notice how I don't have to check index after each test
|
34
|
+
index = allow? :noun_phrase, index
|
35
|
+
# puts found
|
36
|
+
index = match? :verb, index
|
37
|
+
# puts found
|
38
|
+
index = allow? :noun_phrase, index
|
39
|
+
index = allow? :prep_phrase, index
|
40
|
+
match? :punctuation, index
|
41
|
+
end
|
42
|
+
|
43
|
+
def adjective index
|
44
|
+
word index, ['red', 'hot', 'picky']
|
45
|
+
end
|
46
|
+
|
47
|
+
def noun index
|
48
|
+
word index, ['this', 'you', 'me', 'it', 'test']
|
49
|
+
end
|
50
|
+
|
51
|
+
def noun_phrase index
|
52
|
+
index = allow? :particle, index
|
53
|
+
index = allow? :adjective, index
|
54
|
+
match? :noun, index
|
55
|
+
end
|
56
|
+
|
57
|
+
def particle index
|
58
|
+
word index, ['a', 'the']
|
59
|
+
end
|
60
|
+
|
61
|
+
def preposition index
|
62
|
+
word index, ['for', 'about', 'to', 'from']
|
63
|
+
end
|
64
|
+
|
65
|
+
def prep_phrase index
|
66
|
+
index = allow? :preposition, index
|
67
|
+
match? :noun, index
|
68
|
+
end
|
69
|
+
|
70
|
+
def punctuation index
|
71
|
+
word index, ['.', '?', '!']
|
72
|
+
end
|
73
|
+
|
74
|
+
def verb index
|
75
|
+
word index, ['can', 'is', 'tell']
|
76
|
+
end
|
77
|
+
|
78
|
+
def word index, list
|
79
|
+
list.each do |vb|
|
80
|
+
l2 = string? vb, index
|
81
|
+
return l2 if l2
|
82
|
+
end
|
83
|
+
Peggy::NO_MATCH
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class BadParser < Peggy::Parser
|
88
|
+
|
89
|
+
def a index
|
90
|
+
match? :b, index
|
91
|
+
end
|
92
|
+
|
93
|
+
def b index
|
94
|
+
match? :c, index
|
95
|
+
end
|
96
|
+
|
97
|
+
def c index
|
98
|
+
match? :a, index
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class TestParser < Test::Unit::TestCase
|
103
|
+
|
104
|
+
def test_parse
|
105
|
+
@parser = BasicEnglishParser.new
|
106
|
+
@parser.source_text = 'This is a test. Tell me about it.'
|
107
|
+
result = @parser.parse? :paragraph
|
108
|
+
assert_equal @parser.source_text.size, result
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_recursion
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_infinite_recursion
|
115
|
+
@parser = BadParser.new
|
116
|
+
assert_raise(RuntimeError) {@parser.parse?(:a, 'ab')}
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/test/test_peg.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'peg')
|
3
|
+
require 'pp'
|
4
|
+
|
5
|
+
class TestPeggy < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_peg_grammar
|
8
|
+
@peg = Peggy::PEG.new
|
9
|
+
# note I have to double all backslashes because Ruby eats the first of each pair
|
10
|
+
@peg.source_text = <<-endg
|
11
|
+
# Hierarchical syntax
|
12
|
+
Grammar <- Spacing Definition+ EndOfFile
|
13
|
+
Definition <- Identifier LEFTARROW Expression
|
14
|
+
Expression <- Sequence (SLASH Sequence)*
|
15
|
+
Sequence <- Prefix*
|
16
|
+
Prefix <- (AND / NOT)? Suffix
|
17
|
+
Suffix <- Primary (QUESTION / STAR / PLUS)?
|
18
|
+
Primary <- Identifier !LEFTARROW
|
19
|
+
/ OPEN Expression CLOSE
|
20
|
+
/ Literal / Class / DOT
|
21
|
+
# Lexical syntax
|
22
|
+
Identifier <- IdentStart IdentCont* Spacing
|
23
|
+
IdentStart <- [a-zA-Z_]
|
24
|
+
IdentCont <- IdentStart / [0-9]
|
25
|
+
Literal <- ['] (!['] Char)* ['] Spacing
|
26
|
+
/ ["] (!["] Char)* ["] Spacing
|
27
|
+
Class <- '[' (!']' Range)* ']' Spacing
|
28
|
+
Range <- Char '-' Char / Char
|
29
|
+
Char <- '\\\\' [nrt'"\\[\\]\\\\]
|
30
|
+
/ '\\\\' [0-2][0-7][0-7]
|
31
|
+
/ '\\\\' [0-7][0-7]?
|
32
|
+
/ !'\\\\' .
|
33
|
+
LEFTARROW <- '<-' Spacing
|
34
|
+
SLASH <- '/' Spacing
|
35
|
+
AND <- '&' Spacing
|
36
|
+
NOT <- '!' Spacing
|
37
|
+
QUESTION <- '?' Spacing
|
38
|
+
STAR <- '*' Spacing
|
39
|
+
PLUS <- '+' Spacing
|
40
|
+
OPEN <- '(' Spacing
|
41
|
+
CLOSE <- ')' Spacing
|
42
|
+
DOT <- '.' Spacing
|
43
|
+
Spacing <- (Space / Comment)*
|
44
|
+
Comment <- '#' (!EndOfLine .)* EndOfLine
|
45
|
+
Space <- ' ' / '\\t' / EndOfLine
|
46
|
+
EndOfLine <- '\\r\\n' / '\\n' / '\\r'
|
47
|
+
EndOfFile <- !.
|
48
|
+
endg
|
49
|
+
#@peg.debug_flag = true
|
50
|
+
result = @peg.parse? :grammar
|
51
|
+
#pp @peg.parse_results
|
52
|
+
assert_equal @peg.source_text.length, result
|
53
|
+
end
|
54
|
+
end
|
data/test/test_peggy.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'peggy'
|
4
|
+
#require File.join(File.dirname(__FILE__), '..', 'lib', 'peggy')
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
class TestPeggy < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def test_expression
|
10
|
+
@peggy = Peggy::Peggy.new
|
11
|
+
# note I have to double all backslashes because Ruby eats the first of each pair
|
12
|
+
@peggy.source_text = <<-ENDG
|
13
|
+
expression: primary (operator primary)*
|
14
|
+
primary
|
15
|
+
: literal
|
16
|
+
| group
|
17
|
+
literal: number | string
|
18
|
+
group: '(' expression ')'
|
19
|
+
operator: /[*+\\/-^%]/
|
20
|
+
number: int | float
|
21
|
+
string: /"(\\\\"|[^"])*"/
|
22
|
+
ENDG
|
23
|
+
# @peggy.debug_flag = true
|
24
|
+
result = @peggy.parse? :grammar
|
25
|
+
# pp @peggy.parse_results
|
26
|
+
assert_equal @peggy.source_text.length, result
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_peggy_grammar
|
30
|
+
@peggy = Peggy::Peggy.new
|
31
|
+
@peggy.ignore_productions = [:spacing]
|
32
|
+
# note I have to double all backslashes because Ruby eats the first of each pair
|
33
|
+
@peggy.source_text = <<-endg
|
34
|
+
# ignored productions
|
35
|
+
spacing: (comment | eol | / \\t/)*
|
36
|
+
comment: /#[^\\r\\n]*/ eol
|
37
|
+
eol: /\\r\\n|[\\r\\n]/
|
38
|
+
# Starting production
|
39
|
+
grammar: production+ eof
|
40
|
+
production: identifier equal alternates
|
41
|
+
identifier: /[a-zA-Z_]\\w*/
|
42
|
+
equal: /[:=]/
|
43
|
+
alternates: sequence ('|' sequence)*
|
44
|
+
sequence: modified+
|
45
|
+
modified: prefix? primary suffix?
|
46
|
+
primary: group | literal | reference
|
47
|
+
prefix: positive | negative
|
48
|
+
positive: '&'
|
49
|
+
negative: '!'
|
50
|
+
suffix: oneOrMore | anyNumber | optional
|
51
|
+
oneOrMore: '+'
|
52
|
+
anyNumber: '*'
|
53
|
+
optional: '?'
|
54
|
+
group: '(' alternates ')'
|
55
|
+
literal: string | regexp
|
56
|
+
reference: identifier !equal
|
57
|
+
string: /"(\\\\"|[^"\\r\\n])*"/ | /'(\\\\'|[^'\\r\\n])*'/
|
58
|
+
regexp: /\\/(\\\\\\/|[^\\/\\r\\n])*\\//
|
59
|
+
endg
|
60
|
+
# @peggy.debug_flag = true
|
61
|
+
result = @peggy.parse? :grammar
|
62
|
+
# pp @peggy.parse_results
|
63
|
+
assert_equal @peggy.source_text.length, result
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/test/tests.rb
ADDED
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: peggy
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-10-05 00:00:00 -06:00
|
8
|
+
summary: A packrat parsing engine and PEG grammar, along with several other grammars.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: troyhen@gmail.com
|
12
|
+
homepage: http://rubyforge.org/projects/peggy/
|
13
|
+
rubyforge_project:
|
14
|
+
description: This is a parsing library and language specifier. It uses packrat parsing, as opposed to LL(k) or LR(k) parsing. Packrat parsing uses memoization in a recursive decent parser. By storing the production results from each significant point it speeds up the parse. PEG is a formalized grammar specification optimized for packrat parsing. Peggy also allows user to specfy their grammar in pure Ruby as methods or using a Builder. And the default Peggy grammar is a varitaion on PEG, with support for full regular expressions and for simplifed grammars which automatically ignore a set of productions.
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Troy Heninger
|
31
|
+
files:
|
32
|
+
- lib/ast.rb
|
33
|
+
- lib/builder.rb
|
34
|
+
- lib/Copy of builder.rb
|
35
|
+
- lib/Copy of peg.rb
|
36
|
+
- lib/parser.rb
|
37
|
+
- lib/peg.rb
|
38
|
+
- lib/peggy.rb
|
39
|
+
- test/tests.rb
|
40
|
+
- test/test_builder.rb
|
41
|
+
- test/test_parser.rb
|
42
|
+
- test/test_peg.rb
|
43
|
+
- test/test_peggy.rb
|
44
|
+
test_files:
|
45
|
+
- test/tests.rb
|
46
|
+
- test/test_builder.rb
|
47
|
+
- test/test_parser.rb
|
48
|
+
- test/test_peg.rb
|
49
|
+
- test/test_peggy.rb
|
50
|
+
rdoc_options: []
|
51
|
+
|
52
|
+
extra_rdoc_files: []
|
53
|
+
|
54
|
+
executables: []
|
55
|
+
|
56
|
+
extensions: []
|
57
|
+
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
dependencies: []
|
61
|
+
|