peggy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ require 'rubygems'
2
+ require 'builder'
3
+ # require File.join(File.dirname(__FILE__), 'builder')
4
+
5
+ module Peggy
6
+
7
+ # Implements the Parser Expression Grammar (PEG), one of several grammars supported.
8
+ class PEG < Builder
9
+
10
+ def initialize
11
+ super
12
+ build
13
+ end
14
+
15
+ private
16
+
17
+ def build
18
+ # Hierarchical syntax
19
+ grammar{each{spacing; some{definition}; eof}}
20
+ definition{each{identifier; left_arrow; expression}}
21
+ expression{each{sequence; many{each{slash; sequence}}}}
22
+ sequence{many{prefix}}
23
+ prefix{each{opt{one{peek; disallow}}; suffix}}
24
+ suffix{each{primary; opt{one{question; star; plus}}}}
25
+ primary{one{each{identifier; neg{left_arrow}}
26
+ each{lparen; expression; rparen}
27
+ literal; klass; dot}
28
+ }
29
+ # Lexical syntax
30
+ identifier{each{ident_start; many{ident_cont}; spacing}}
31
+ ident_start{lit /[a-zA-Z_]/}
32
+ ident_cont{one{ident_start; lit /[0-9]/}}
33
+ literal{one{
34
+ each{quote; many{each{neg{quote}; char}}; quote; spacing}
35
+ each{quotes; many{each{neg{quotes}; char}}; quotes; spacing}
36
+ }}
37
+ klass{each{lit '['; many{each{neg{lit ']'}; range}}; lit ']'; spacing}}
38
+ range{one{each{char; lit '-'; char}; char}}
39
+ char{lit /\\([nrt'"\[\]\\]|[0-2][0-7][0-7]|[0-7][0-7]?)|[^\\]/}
40
+ #char{one{
41
+ # each{lit '\\'; one{
42
+ # quote
43
+ # quotes
44
+ # lit /[nrt\[\]\\]/
45
+ # lit /[0-2][0-7][0-7]/
46
+ # lit /[0-7][0-7]?/
47
+ # }}
48
+ # lit /[^\\]/
49
+ #}}
50
+ left_arrow{each{lit '<-'; spacing}}
51
+ slash{each{lit '/'; spacing}}
52
+ peek{each{lit '&'; spacing}}
53
+ disallow{each{lit '!'; spacing}}
54
+ question{each{lit '?'; spacing}}
55
+ star{each{lit '*'; spacing}}
56
+ plus{each{lit '+'; spacing}}
57
+ lparen{each{lit '('; spacing}}
58
+ rparen{each{lit ')'; spacing}}
59
+ dot{each{lit '.'; spacing}}
60
+ quote{lit "'"}
61
+ quotes{lit '"'}
62
+ spacing{many{one{space; comment}}}
63
+ comment{each{lit '#'; many{each{neg{eol}; lit /./}}; eol}}
64
+ space{one{lit ' '; lit "\t"; eol}}
65
+ eol{one{lit "\r\n"; lit "\n"; lit "\r"}}
66
+ end
67
+ end
68
+ end # Peggy
@@ -0,0 +1,50 @@
1
+ require 'rubygems'
2
+ require 'builder'
3
+ #require File.join(File.dirname(__FILE__), 'builder')
4
+
5
+ module Peggy
6
+
7
+ # Implements a variation to the Parser Expression Grammar (PEG). This grammar
8
+ # supports regular expressions, and a syntax that more closely resembles
9
+ # other grammar languages.
10
+ class Peggy < Builder
11
+
12
+ def initialize
13
+ super
14
+ build
15
+ end
16
+
17
+ private
18
+
19
+ # Build the parser using a builder.
20
+ def build
21
+ self.ignore_productions = [:spacing]
22
+ spacing{many{one{comment; eol; space}}}
23
+ comment{each{lit /#[^\r\n]*/; eol}}
24
+ eol{lit /\r\n|[\r\n]/}
25
+ space{lit /[ \t]/}
26
+ grammar{each{some{production}; eof}}
27
+ production{each{identifier; equal; alternates}}
28
+ identifier{lit /[a-zA-Z_]\w*/}
29
+ equal{lit ':', '='}
30
+ alternates{each{sequence; many{each{lit '|'; sequence}}}}
31
+ sequence{some{modified}}
32
+ modified{each{opt{prefix}; primary; opt{suffix}}}
33
+ primary{one{group; literal; reference}}
34
+ prefix{one{positive; negative}}
35
+ positive{lit '&'}
36
+ negative{lit '!'}
37
+ suffix{one{oneOrMore; anyNumber; optional}}
38
+ oneOrMore{lit '+'}
39
+ anyNumber{lit '*'}
40
+ optional{lit '?'}
41
+ group{each{lit '('; alternates; lit ')'}}
42
+ literal{one{string; regexp}}
43
+ reference{each{identifier; neg{equal}}}
44
+ string{one{lit /"(\\"|[^"\r\n])*"/; lit /'(\\'|[^'\r\n])*'/}}
45
+ regexp{lit /\/(\\\/|[^\/\r\n])*\//}
46
+ end
47
+
48
+ end
49
+
50
+ end #Peggy
@@ -0,0 +1,72 @@
1
+ require 'test/unit'
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'builder')
3
+
4
+ class ABCBuilder < Peggy::Builder
5
+
6
+ def build
7
+ step1 {
8
+ each {
9
+ lit 'a'
10
+ many {lit 'b'}
11
+ lit 'c'
12
+ }
13
+ }
14
+ end
15
+
16
+ end
17
+
18
+ class BasicEnglishBuilder < Peggy::Builder
19
+
20
+ def build
21
+ # self.debug_flag = true
22
+ self.ignore_productions = [:spacing]
23
+ spacing{lit /\s+/}
24
+ paragraph{each{some{sentence}; eof}}
25
+ sentence{each{
26
+ opt{noun_phrase}
27
+ verb
28
+ opt{noun_phrase}
29
+ opt{prep_phrase}
30
+ punctuation
31
+ }}
32
+ adjective{lit 'red', 'hot', 'picky'}
33
+ noun{lit 'this', 'you', 'me', 'it', 'test'}
34
+ noun_phrase{each{opt{particle}; opt{adjective}; noun}}
35
+ particle{lit 'a', 'the'}
36
+ preposition{lit 'for', 'about', 'to', 'from'}
37
+ prep_phrase{each{opt{preposition}; noun}}
38
+ punctuation{lit '.', '?', '!'}
39
+ verb{lit 'can', 'is', 'tell'}
40
+ end
41
+
42
+ # Force source to lower case
43
+ def source_text= value
44
+ super value.downcase
45
+ end
46
+ end
47
+
48
+ class TestBuilder < Test::Unit::TestCase
49
+
50
+ def test_sequence
51
+ seq = Peggy::Sequence.new
52
+ seq << Peggy::Literal.new('a') << Peggy::Literal.new('b')
53
+ end
54
+
55
+ def test_productions
56
+ @builder = ABCBuilder.new
57
+ @builder.build
58
+ assert_not_nil @builder.productions
59
+ assert_equal 1, @builder.productions.size
60
+ assert_equal :step1, @builder[:step1].name
61
+ end
62
+
63
+ def test_basic_english
64
+ @builder = BasicEnglishBuilder.new
65
+ @builder.build
66
+ # @builder.debug_flag = true
67
+ @builder.source_text = 'This is a test. Tell me about it.'
68
+ result = @builder.parse?(:paragraph)
69
+ # pp @builder.parse_results
70
+ assert_equal @builder.source_text.size, result
71
+ end
72
+ end
@@ -0,0 +1,119 @@
1
+ require 'test/unit'
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'parser')
3
+
4
+ class BasicEnglishParser < Peggy::Parser
5
+
6
+ def initialize
7
+ self.ignore_productions = [:spacing] # productions to ignore
8
+ # self.debug_flag = true # be verbose during parse
9
+ end
10
+
11
+ # Force source to lower case
12
+ def source_text= value
13
+ super value.downcase
14
+ end
15
+
16
+ # Called and basically ignored by the parser before each match.
17
+ def spacing index
18
+ literal? /^\s+/, index # match whitespace
19
+ end
20
+
21
+ def paragraph index
22
+ while index
23
+ index = match? :sentence, index # don't call productions directly
24
+ if index
25
+ found = match? :eof, index # don't call eof directly either
26
+ return found if found
27
+ end
28
+ end
29
+ index
30
+ end
31
+
32
+ def sentence index
33
+ # notice how I don't have to check index after each test
34
+ index = allow? :noun_phrase, index
35
+ # puts found
36
+ index = match? :verb, index
37
+ # puts found
38
+ index = allow? :noun_phrase, index
39
+ index = allow? :prep_phrase, index
40
+ match? :punctuation, index
41
+ end
42
+
43
+ def adjective index
44
+ word index, ['red', 'hot', 'picky']
45
+ end
46
+
47
+ def noun index
48
+ word index, ['this', 'you', 'me', 'it', 'test']
49
+ end
50
+
51
+ def noun_phrase index
52
+ index = allow? :particle, index
53
+ index = allow? :adjective, index
54
+ match? :noun, index
55
+ end
56
+
57
+ def particle index
58
+ word index, ['a', 'the']
59
+ end
60
+
61
+ def preposition index
62
+ word index, ['for', 'about', 'to', 'from']
63
+ end
64
+
65
+ def prep_phrase index
66
+ index = allow? :preposition, index
67
+ match? :noun, index
68
+ end
69
+
70
+ def punctuation index
71
+ word index, ['.', '?', '!']
72
+ end
73
+
74
+ def verb index
75
+ word index, ['can', 'is', 'tell']
76
+ end
77
+
78
+ def word index, list
79
+ list.each do |vb|
80
+ l2 = string? vb, index
81
+ return l2 if l2
82
+ end
83
+ Peggy::NO_MATCH
84
+ end
85
+ end
86
+
87
+ class BadParser < Peggy::Parser
88
+
89
+ def a index
90
+ match? :b, index
91
+ end
92
+
93
+ def b index
94
+ match? :c, index
95
+ end
96
+
97
+ def c index
98
+ match? :a, index
99
+ end
100
+ end
101
+
102
+ class TestParser < Test::Unit::TestCase
103
+
104
+ def test_parse
105
+ @parser = BasicEnglishParser.new
106
+ @parser.source_text = 'This is a test. Tell me about it.'
107
+ result = @parser.parse? :paragraph
108
+ assert_equal @parser.source_text.size, result
109
+ end
110
+
111
+ def test_recursion
112
+ end
113
+
114
+ def test_infinite_recursion
115
+ @parser = BadParser.new
116
+ assert_raise(RuntimeError) {@parser.parse?(:a, 'ab')}
117
+ end
118
+
119
+ end
@@ -0,0 +1,54 @@
1
+ require 'test/unit'
2
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'peg')
3
+ require 'pp'
4
+
5
+ class TestPeggy < Test::Unit::TestCase
6
+
7
+ def test_peg_grammar
8
+ @peg = Peggy::PEG.new
9
+ # note I have to double all backslashes because Ruby eats the first of each pair
10
+ @peg.source_text = <<-endg
11
+ # Hierarchical syntax
12
+ Grammar <- Spacing Definition+ EndOfFile
13
+ Definition <- Identifier LEFTARROW Expression
14
+ Expression <- Sequence (SLASH Sequence)*
15
+ Sequence <- Prefix*
16
+ Prefix <- (AND / NOT)? Suffix
17
+ Suffix <- Primary (QUESTION / STAR / PLUS)?
18
+ Primary <- Identifier !LEFTARROW
19
+ / OPEN Expression CLOSE
20
+ / Literal / Class / DOT
21
+ # Lexical syntax
22
+ Identifier <- IdentStart IdentCont* Spacing
23
+ IdentStart <- [a-zA-Z_]
24
+ IdentCont <- IdentStart / [0-9]
25
+ Literal <- ['] (!['] Char)* ['] Spacing
26
+ / ["] (!["] Char)* ["] Spacing
27
+ Class <- '[' (!']' Range)* ']' Spacing
28
+ Range <- Char '-' Char / Char
29
+ Char <- '\\\\' [nrt'"\\[\\]\\\\]
30
+ / '\\\\' [0-2][0-7][0-7]
31
+ / '\\\\' [0-7][0-7]?
32
+ / !'\\\\' .
33
+ LEFTARROW <- '<-' Spacing
34
+ SLASH <- '/' Spacing
35
+ AND <- '&' Spacing
36
+ NOT <- '!' Spacing
37
+ QUESTION <- '?' Spacing
38
+ STAR <- '*' Spacing
39
+ PLUS <- '+' Spacing
40
+ OPEN <- '(' Spacing
41
+ CLOSE <- ')' Spacing
42
+ DOT <- '.' Spacing
43
+ Spacing <- (Space / Comment)*
44
+ Comment <- '#' (!EndOfLine .)* EndOfLine
45
+ Space <- ' ' / '\\t' / EndOfLine
46
+ EndOfLine <- '\\r\\n' / '\\n' / '\\r'
47
+ EndOfFile <- !.
48
+ endg
49
+ #@peg.debug_flag = true
50
+ result = @peg.parse? :grammar
51
+ #pp @peg.parse_results
52
+ assert_equal @peg.source_text.length, result
53
+ end
54
+ end
@@ -0,0 +1,66 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'peggy'
4
+ #require File.join(File.dirname(__FILE__), '..', 'lib', 'peggy')
5
+ require 'pp'
6
+
7
+ class TestPeggy < Test::Unit::TestCase
8
+
9
+ def test_expression
10
+ @peggy = Peggy::Peggy.new
11
+ # note I have to double all backslashes because Ruby eats the first of each pair
12
+ @peggy.source_text = <<-ENDG
13
+ expression: primary (operator primary)*
14
+ primary
15
+ : literal
16
+ | group
17
+ literal: number | string
18
+ group: '(' expression ')'
19
+ operator: /[*+\\/-^%]/
20
+ number: int | float
21
+ string: /"(\\\\"|[^"])*"/
22
+ ENDG
23
+ # @peggy.debug_flag = true
24
+ result = @peggy.parse? :grammar
25
+ # pp @peggy.parse_results
26
+ assert_equal @peggy.source_text.length, result
27
+ end
28
+
29
+ def test_peggy_grammar
30
+ @peggy = Peggy::Peggy.new
31
+ @peggy.ignore_productions = [:spacing]
32
+ # note I have to double all backslashes because Ruby eats the first of each pair
33
+ @peggy.source_text = <<-endg
34
+ # ignored productions
35
+ spacing: (comment | eol | / \\t/)*
36
+ comment: /#[^\\r\\n]*/ eol
37
+ eol: /\\r\\n|[\\r\\n]/
38
+ # Starting production
39
+ grammar: production+ eof
40
+ production: identifier equal alternates
41
+ identifier: /[a-zA-Z_]\\w*/
42
+ equal: /[:=]/
43
+ alternates: sequence ('|' sequence)*
44
+ sequence: modified+
45
+ modified: prefix? primary suffix?
46
+ primary: group | literal | reference
47
+ prefix: positive | negative
48
+ positive: '&'
49
+ negative: '!'
50
+ suffix: oneOrMore | anyNumber | optional
51
+ oneOrMore: '+'
52
+ anyNumber: '*'
53
+ optional: '?'
54
+ group: '(' alternates ')'
55
+ literal: string | regexp
56
+ reference: identifier !equal
57
+ string: /"(\\\\"|[^"\\r\\n])*"/ | /'(\\\\'|[^'\\r\\n])*'/
58
+ regexp: /\\/(\\\\\\/|[^\\/\\r\\n])*\\//
59
+ endg
60
+ # @peggy.debug_flag = true
61
+ result = @peggy.parse? :grammar
62
+ # pp @peggy.parse_results
63
+ assert_equal @peggy.source_text.length, result
64
+ end
65
+
66
+ end
@@ -0,0 +1,6 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'test/test_parser'
4
+ require 'test/test_builder'
5
+ require 'test/test_peg'
6
+ require 'test/test_peggy'
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: peggy
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2006-10-05 00:00:00 -06:00
8
+ summary: A packrat parsing engine and PEG grammar, along with several other grammars.
9
+ require_paths:
10
+ - lib
11
+ email: troyhen@gmail.com
12
+ homepage: http://rubyforge.org/projects/peggy/
13
+ rubyforge_project:
14
+ description: This is a parsing library and language specifier. It uses packrat parsing, as opposed to LL(k) or LR(k) parsing. Packrat parsing uses memoization in a recursive decent parser. By storing the production results from each significant point it speeds up the parse. PEG is a formalized grammar specification optimized for packrat parsing. Peggy also allows user to specfy their grammar in pure Ruby as methods or using a Builder. And the default Peggy grammar is a varitaion on PEG, with support for full regular expressions and for simplifed grammars which automatically ignore a set of productions.
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Troy Heninger
31
+ files:
32
+ - lib/ast.rb
33
+ - lib/builder.rb
34
+ - lib/Copy of builder.rb
35
+ - lib/Copy of peg.rb
36
+ - lib/parser.rb
37
+ - lib/peg.rb
38
+ - lib/peggy.rb
39
+ - test/tests.rb
40
+ - test/test_builder.rb
41
+ - test/test_parser.rb
42
+ - test/test_peg.rb
43
+ - test/test_peggy.rb
44
+ test_files:
45
+ - test/tests.rb
46
+ - test/test_builder.rb
47
+ - test/test_parser.rb
48
+ - test/test_peg.rb
49
+ - test/test_peggy.rb
50
+ rdoc_options: []
51
+
52
+ extra_rdoc_files: []
53
+
54
+ executables: []
55
+
56
+ extensions: []
57
+
58
+ requirements: []
59
+
60
+ dependencies: []
61
+