walrat 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/walrat.rb +70 -0
- data/lib/walrat/additions/proc.rb +32 -0
- data/lib/walrat/additions/regexp.rb +33 -0
- data/lib/walrat/additions/string.rb +99 -0
- data/lib/walrat/additions/symbol.rb +42 -0
- data/lib/walrat/and_predicate.rb +49 -0
- data/lib/walrat/array_result.rb +29 -0
- data/lib/walrat/continuation_wrapper_exception.rb +35 -0
- data/lib/walrat/grammar.rb +259 -0
- data/lib/walrat/left_recursion_exception.rb +34 -0
- data/lib/walrat/location_tracking.rb +126 -0
- data/lib/walrat/match_data_wrapper.rb +84 -0
- data/lib/walrat/memoizing.rb +55 -0
- data/lib/walrat/memoizing_cache.rb +126 -0
- data/lib/walrat/no_parameter_marker.rb +30 -0
- data/lib/walrat/node.rb +63 -0
- data/lib/walrat/not_predicate.rb +49 -0
- data/lib/walrat/parse_error.rb +48 -0
- data/lib/walrat/parser_state.rb +205 -0
- data/lib/walrat/parslet.rb +38 -0
- data/lib/walrat/parslet_choice.rb +155 -0
- data/lib/walrat/parslet_combination.rb +34 -0
- data/lib/walrat/parslet_combining.rb +190 -0
- data/lib/walrat/parslet_merge.rb +96 -0
- data/lib/walrat/parslet_omission.rb +74 -0
- data/lib/walrat/parslet_repetition.rb +114 -0
- data/lib/walrat/parslet_repetition_default.rb +77 -0
- data/lib/walrat/parslet_sequence.rb +241 -0
- data/lib/walrat/predicate.rb +68 -0
- data/lib/walrat/proc_parslet.rb +60 -0
- data/lib/walrat/regexp_parslet.rb +84 -0
- data/lib/walrat/skipped_substring_exception.rb +46 -0
- data/lib/walrat/string_enumerator.rb +47 -0
- data/lib/walrat/string_parslet.rb +89 -0
- data/lib/walrat/string_result.rb +34 -0
- data/lib/walrat/symbol_parslet.rb +82 -0
- data/lib/walrat/version.rb +26 -0
- metadata +110 -0
data/lib/walrat.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
# Ruby 1.9 support
|
24
|
+
require 'continuation' unless Kernel.respond_to?(:callcc)
|
25
|
+
|
26
|
+
module Walrat
|
27
|
+
major, minor = RUBY_VERSION.split '.'
|
28
|
+
if major == '1' and minor == '8'
|
29
|
+
$KCODE = 'U' # UTF-8 (necessary for Unicode support)
|
30
|
+
end
|
31
|
+
|
32
|
+
autoload :AndPredicate, 'walrat/and_predicate'
|
33
|
+
autoload :ArrayResult, 'walrat/array_result'
|
34
|
+
autoload :ContinuationWrapperException, 'walrat/continuation_wrapper_exception'
|
35
|
+
autoload :Grammar, 'walrat/grammar'
|
36
|
+
autoload :LeftRecursionException, 'walrat/left_recursion_exception'
|
37
|
+
autoload :LocationTracking, 'walrat/location_tracking'
|
38
|
+
autoload :MatchDataWrapper, 'walrat/match_data_wrapper'
|
39
|
+
autoload :Memoizing, 'walrat/memoizing'
|
40
|
+
autoload :MemoizingCache, 'walrat/memoizing_cache'
|
41
|
+
autoload :Node, 'walrat/node'
|
42
|
+
autoload :NoParameterMarker, 'walrat/no_parameter_marker'
|
43
|
+
autoload :NotPredicate, 'walrat/not_predicate'
|
44
|
+
autoload :ParseError, 'walrat/parse_error'
|
45
|
+
autoload :ParserState, 'walrat/parser_state'
|
46
|
+
|
47
|
+
# TODO: move these into subdirectory? directory for predicates also?
|
48
|
+
autoload :Parslet, 'walrat/parslet'
|
49
|
+
autoload :ParsletChoice, 'walrat/parslet_choice'
|
50
|
+
autoload :ParsletCombination, 'walrat/parslet_combination'
|
51
|
+
autoload :ParsletCombining, 'walrat/parslet_combining'
|
52
|
+
autoload :ParsletMerge, 'walrat/parslet_merge'
|
53
|
+
autoload :ParsletOmission, 'walrat/parslet_omission'
|
54
|
+
autoload :ParsletRepetition, 'walrat/parslet_repetition'
|
55
|
+
autoload :ParsletRepetitionDefault, 'walrat/parslet_repetition_default'
|
56
|
+
autoload :ParsletSequence, 'walrat/parslet_sequence'
|
57
|
+
autoload :Predicate, 'walrat/predicate'
|
58
|
+
autoload :ProcParslet, 'walrat/proc_parslet'
|
59
|
+
autoload :RegexpParslet, 'walrat/regexp_parslet'
|
60
|
+
autoload :SkippedSubstringException, 'walrat/skipped_substring_exception'
|
61
|
+
autoload :StringEnumerator, 'walrat/string_enumerator'
|
62
|
+
autoload :StringParslet, 'walrat/string_parslet'
|
63
|
+
autoload :StringResult, 'walrat/string_result'
|
64
|
+
autoload :SymbolParslet, 'walrat/symbol_parslet'
|
65
|
+
end # module Walrat
|
66
|
+
|
67
|
+
require 'walrat/additions/proc'
|
68
|
+
require 'walrat/additions/regexp'
|
69
|
+
require 'walrat/additions/string'
|
70
|
+
require 'walrat/additions/symbol'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
class Proc
|
26
|
+
include Walrat::ParsletCombining
|
27
|
+
|
28
|
+
# Returns a ProcParslet based on the receiver
|
29
|
+
def to_parseable
|
30
|
+
Walrat::ProcParslet.new self
|
31
|
+
end
|
32
|
+
end # class Proc
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
class Regexp
|
26
|
+
require 'walrat/parslet_combining'
|
27
|
+
include Walrat::ParsletCombining
|
28
|
+
|
29
|
+
# Returns a RegexpParslet based on the receiver
|
30
|
+
def to_parseable
|
31
|
+
Walrat::RegexpParslet.new self
|
32
|
+
end
|
33
|
+
end # class Regexp
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
# Additions to String class for Unicode support.
|
26
|
+
# Parslet combining methods.
|
27
|
+
# Convenience methods (to_parseable).
|
28
|
+
# Conversion utility methods.
|
29
|
+
class String
|
30
|
+
alias old_range []
|
31
|
+
|
32
|
+
def jlength
|
33
|
+
chars.to_a.length
|
34
|
+
end
|
35
|
+
|
36
|
+
# NOTE: this is a totally Walrat-specific implementation that is
|
37
|
+
# unlikely to be of use anywhere else. It is used in only 1 place
|
38
|
+
# in the codebase, and works around the fact that the MatchData
|
39
|
+
# made available by the index method gets clobbered by the
|
40
|
+
# "chars.to_a" call. The same thing happens for alternative
|
41
|
+
# methods of counting the chars, such as using jlength or a manual
|
42
|
+
# scan.
|
43
|
+
#
|
44
|
+
# One workaround is for the caller to re-perform the index call just
|
45
|
+
# to get the MatchData again, but that is inefficient. So here we
|
46
|
+
# just do the addition before returning the result to the caller.
|
47
|
+
def jindex_plus_length arg
|
48
|
+
if i = index(arg)
|
49
|
+
$~[0].length + unpack('C*')[0...i].pack('C*').chars.to_a.length
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Unlike the normal rindex method, the MatchData in $~ set by the inner
|
54
|
+
# rindex call gets clobbered (by the "chars.to_a" call) and is not visible to
|
55
|
+
# the caller of this method.
|
56
|
+
def jrindex arg, offset = Walrat::NoParameterMarker.instance
|
57
|
+
if offset == Walrat::NoParameterMarker.instance
|
58
|
+
i = rindex arg
|
59
|
+
else
|
60
|
+
i = rindex arg, offset
|
61
|
+
end
|
62
|
+
i ? unpack('C*')[0...i].pack('C*').chars.to_a.length : nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# multi-byte friendly [] implementation
|
66
|
+
def [](range, other = Walrat::NoParameterMarker.instance)
|
67
|
+
if other == Walrat::NoParameterMarker.instance
|
68
|
+
if range.kind_of? Range
|
69
|
+
chars.to_a[range].join
|
70
|
+
else
|
71
|
+
old_range range
|
72
|
+
end
|
73
|
+
else
|
74
|
+
old_range range, other
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns a character-level enumerator for the receiver.
|
79
|
+
def enumerator
|
80
|
+
Walrat::StringEnumerator.new self
|
81
|
+
end
|
82
|
+
|
83
|
+
# Rationale: it's ok to add "&" and "|" methods to string because they don't
|
84
|
+
# exist yet (they're not overrides).
|
85
|
+
include Walrat::ParsletCombining
|
86
|
+
|
87
|
+
# Returns a StringParslet based on the receiver
|
88
|
+
def to_parseable
|
89
|
+
Walrat::StringParslet.new self
|
90
|
+
end
|
91
|
+
|
92
|
+
# Converts the receiver of the form "foo_bar" to "FooBar". Specifically, the
|
93
|
+
# receiver is split into pieces delimited by underscores, each component is
|
94
|
+
# then converted to captial case (the first letter is capitalized and the
|
95
|
+
# remaining letters are lowercased) and finally the components are joined.
|
96
|
+
def to_class_name
|
97
|
+
self.split('_').collect { |component| component.capitalize}.join
|
98
|
+
end
|
99
|
+
end # class String
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
class Symbol
|
26
|
+
include Walrat::ParsletCombining
|
27
|
+
|
28
|
+
# Returns a SymbolParslet based on the receiver.
|
29
|
+
# Symbols can be used in Grammars when specifying rules and productions to
|
30
|
+
# refer to other rules and productions that have not been defined yet.
|
31
|
+
# They can also be used to allow self-references within rules and productions
|
32
|
+
# (recursion); for example:
|
33
|
+
#
|
34
|
+
# rule :thing & :thing.optional & :other_thing
|
35
|
+
#
|
36
|
+
# Basically these SymbolParslets allow deferred evaluation of a rule or
|
37
|
+
# production (deferred until parsing takes place) rather than being evaluated
|
38
|
+
# at the time a rule or production is defined.
|
39
|
+
def to_parseable
|
40
|
+
Walrat::SymbolParslet.new self
|
41
|
+
end
|
42
|
+
end # class Symbol
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
module Walrat
|
26
|
+
class AndPredicate < Predicate
|
27
|
+
def parse string, options = {}
|
28
|
+
raise ArgumentError if string.nil?
|
29
|
+
catch :ZeroWidthParseSuccess do
|
30
|
+
begin
|
31
|
+
parsed = @parseable.memoizing_parse string, options
|
32
|
+
rescue ParseError
|
33
|
+
raise ParseError.new('predicate not satisfied (expected "%s") while parsing "%s"' % [@parseable.to_s, string],
|
34
|
+
:line_end => options[:line_start],
|
35
|
+
:column_end => options[:column_start])
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# getting this far means that parsing succeeded
|
40
|
+
throw :AndPredicateSuccess
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def hash_offset
|
46
|
+
12
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end # module Walrat
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
module Walrat
|
26
|
+
class ArrayResult < Array
|
27
|
+
include LocationTracking
|
28
|
+
end # class ArrayResult
|
29
|
+
end # module Walrat
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
module Walrat
|
26
|
+
class ContinuationWrapperException < Exception
|
27
|
+
attr_reader :continuation
|
28
|
+
|
29
|
+
def initialize continuation
|
30
|
+
raise ArgumentError, 'nil continuation' if continuation.nil?
|
31
|
+
super self.class.to_s
|
32
|
+
@continuation = continuation
|
33
|
+
end
|
34
|
+
end # class ContinuationWrapperException
|
35
|
+
end # module Walrat
|
@@ -0,0 +1,259 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
require 'walrat/additions/string.rb'
|
25
|
+
|
26
|
+
module Walrat
|
27
|
+
class Grammar
|
28
|
+
class << self
|
29
|
+
# Lazy reader for the rules hash.
|
30
|
+
#
|
31
|
+
# Initializes the hash the first time it is accessed.
|
32
|
+
def rules
|
33
|
+
@rules or @rules = Hash.new do |hash, key|
|
34
|
+
raise "no rule for key '#{key}'"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Lazy reader for the productions hash.
|
39
|
+
#
|
40
|
+
# Initializes the hash the first time it is accessed.
|
41
|
+
def productions
|
42
|
+
@productions or @productions = Hash.new do |hash, key|
|
43
|
+
raise "no production for key '#{key}'"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Lazy reader for the skipping overrides hash.
|
48
|
+
#
|
49
|
+
# Initializes the hash the first time it is accessed.
|
50
|
+
def skipping_overrides
|
51
|
+
@skipping_overrides or @skipping_overrides = Hash.new do |hash, key|
|
52
|
+
raise "no skipping override for key '#{key}'"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Sets the starting symbol.
|
57
|
+
#
|
58
|
+
# @param [Symbol] symbol a symbol which refers to a rule
|
59
|
+
def starting_symbol symbol
|
60
|
+
raise ArgumentError, 'starting symbol already set' if @starting_symbol
|
61
|
+
@starting_symbol = symbol
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the starting symbol.
|
65
|
+
#
|
66
|
+
# Note that the "starting_symbol" method can't be used as an accessor
|
67
|
+
# because it is already used as part of the grammar-definition DSL.
|
68
|
+
def start_rule
|
69
|
+
@starting_symbol
|
70
|
+
end
|
71
|
+
|
72
|
+
# Sets the default parslet that is used for skipping inter-token
|
73
|
+
# whitespace, and can be used to override the default on a rule-by-rule
|
74
|
+
# basis.
|
75
|
+
#
|
76
|
+
# This allows for simpler grammars which do not need to explicitly put
|
77
|
+
# optional whitespace parslets (or any other kind of parslet) between
|
78
|
+
# elements.
|
79
|
+
#
|
80
|
+
# There are two modes of operation for this method. In the first mode
|
81
|
+
# (when only one parameter is passed) the rule_or_parslet parameter is
|
82
|
+
# used to define the default parslet for inter-token skipping.
|
83
|
+
# rule_or_parslet must refer to a rule which itself is a Parslet or
|
84
|
+
# ParsletCombination and which is responsible for skipping. Note that the
|
85
|
+
# ability to pass an arbitrary parslet means that the notion of what
|
86
|
+
# consitutes the "whitespace" that should be skipped is completely
|
87
|
+
# flexible. Raises if a default skipping parslet has already been set.
|
88
|
+
#
|
89
|
+
# In the second mode of operation (when two parameters are passed) the
|
90
|
+
# rule_or_parslet parameter is interpreted to be the rule to which an
|
91
|
+
# override should be applied, where the parslet parameter specifies the
|
92
|
+
# parslet to be used in this case. If nil is explicitly passed then this
|
93
|
+
# overrides the default parslet; no parslet will be used for the purposes
|
94
|
+
# of inter-token skipping. Raises if an override has already been set for
|
95
|
+
# the named rule.
|
96
|
+
#
|
97
|
+
# The inter-token parslet is passed inside the "options" hash when
|
98
|
+
# invoking the "parse" methods. Any parser which fails will retry after
|
99
|
+
# giving this inter-token parslet a chance to consume and discard
|
100
|
+
# intervening whitespace.
|
101
|
+
#
|
102
|
+
# The initial, conservative implementation only performs this fallback
|
103
|
+
# skipping for ParsletSequence and ParsletRepetition combinations.
|
104
|
+
#
|
105
|
+
# Raises if rule_or_parslet is nil.
|
106
|
+
def skipping rule_or_parslet, parslet = NoParameterMarker.instance
|
107
|
+
raise ArgumentError, 'nil rule_or_parslet' if rule_or_parslet.nil?
|
108
|
+
if parslet == NoParameterMarker.instance
|
109
|
+
# first mode of operation: set default parslet
|
110
|
+
raise 'default skipping parslet already set' if @skipping
|
111
|
+
@skipping = rule_or_parslet
|
112
|
+
else
|
113
|
+
# second mode of operation: override default case
|
114
|
+
raise ArgumentError,
|
115
|
+
"skipping override already set for rule '#{rule_or_parslet}'" if
|
116
|
+
skipping_overrides.has_key? rule_or_parslet
|
117
|
+
raise ArgumentError,
|
118
|
+
"non-existent rule '#{rule_or_parslet}'" unless
|
119
|
+
rules.has_key? rule_or_parslet
|
120
|
+
skipping_overrides[rule_or_parslet] = parslet
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns the default skipping rule.
|
125
|
+
#
|
126
|
+
# Note that we can't use "skipping" as the accessor method here because
|
127
|
+
# it is already used as part of the grammar-definition DSL.
|
128
|
+
def default_skipping_rule
|
129
|
+
@skipping
|
130
|
+
end
|
131
|
+
|
132
|
+
# Defines a rule and stores it
|
133
|
+
#
|
134
|
+
# Expects an object that responds to the parse message, such as a Parslet
|
135
|
+
# or ParsletCombination. As this is intended to work with Parsing
|
136
|
+
# Expression Grammars, each rule may only be defined once. Defining a
|
137
|
+
# rule more than once will raise an ArgumentError.
|
138
|
+
def rule symbol, parseable
|
139
|
+
raise ArgumentError, 'nil symbol' if symbol.nil?
|
140
|
+
raise ArgumentError, 'nil parseable' if parseable.nil?
|
141
|
+
raise ArgumentError,
|
142
|
+
"rule '#{symbol}' already defined" if rules.has_key? symbol
|
143
|
+
rules[symbol] = parseable
|
144
|
+
end
|
145
|
+
|
146
|
+
# Dynamically creates a Node subclass inside the namespace of the current
|
147
|
+
# grammar.
|
148
|
+
#
|
149
|
+
# This is used to create classes in a class hierarchy where no custom
|
150
|
+
# behavior is required and therefore no actual file with an impementation
|
151
|
+
# need be provided; an example from the Walrus grammar:
|
152
|
+
#
|
153
|
+
# module Walrus
|
154
|
+
# class Grammar < Walrat::Grammar
|
155
|
+
# class Literal < Walrat::Node
|
156
|
+
# class StringLiteral < Literal
|
157
|
+
# class DoubleQuotedStringLiteral < StringLiteral
|
158
|
+
#
|
159
|
+
# In this example hiearchy the "Literal" class has custom behavior which
|
160
|
+
# is shared by all subclasses, and the custom behavior is implemented in
|
161
|
+
# the file "walrus/grammar/literal". The subclasses, however, have no
|
162
|
+
# custom behavior and no associated file. They are dynamically
|
163
|
+
# synthesized when the Walrus::Grammar class is first evaluated.
|
164
|
+
def node new_class_name, parent_class = Node
|
165
|
+
raise ArgumentError, 'nil new_class_name' if new_class_name.nil?
|
166
|
+
new_class_name = new_class_name.to_s.to_class_name # camel-case
|
167
|
+
unless parent_class.kind_of? Class
|
168
|
+
parent_class = const_get parent_class.to_s.to_class_name
|
169
|
+
end
|
170
|
+
const_set new_class_name, Class.new(parent_class)
|
171
|
+
end
|
172
|
+
|
173
|
+
# Specifies that a Node subclass will be used to encapsulate results
|
174
|
+
# for the rule identified by the symbol, rule_name. The class name is
|
175
|
+
# derived by converting the rule_name to camel-case.
|
176
|
+
#
|
177
|
+
# If no additional params are supplied then the class is assumed to
|
178
|
+
# accept a single parameter named "lexeme" in its initialize method.
|
179
|
+
#
|
180
|
+
# If additional params are supplied then the class is expected to
|
181
|
+
# accept the named params in its initialize method.
|
182
|
+
#
|
183
|
+
# As a convenience, the params will be sent to the specified class using
|
184
|
+
# the "production" method, which sets up an appropriate initializer.
|
185
|
+
#
|
186
|
+
# For example:
|
187
|
+
#
|
188
|
+
# # accepts a single parameter, "lexeme"
|
189
|
+
# production :symbol_literal
|
190
|
+
#
|
191
|
+
# # accepts a single parameter, "content"
|
192
|
+
# production :multiline_comment, :content
|
193
|
+
#
|
194
|
+
# # accepts three parameters, "identifier", "params" and "content"
|
195
|
+
# production :block_directive, :identifier, :params, :content
|
196
|
+
#
|
197
|
+
def production rule_name, *results
|
198
|
+
raise ArgumentError, 'nil rule_name' if rule_name.nil?
|
199
|
+
raise ArgumentError,
|
200
|
+
"production already defined for rule '#{rule_name}'" if
|
201
|
+
productions.has_key?(rule_name)
|
202
|
+
raise ArgumentError, "non-existent rule '#{rule_name}'" unless
|
203
|
+
rules.has_key?(rule_name)
|
204
|
+
results = results.empty? ? [:lexeme] : results
|
205
|
+
const_get(rule_name.to_s.to_class_name).production *results
|
206
|
+
productions[rule_name] = results
|
207
|
+
end
|
208
|
+
|
209
|
+
# This method is called by the ParsletSequence and SymbolParslet classes
|
210
|
+
# to possibly wrap a parse result in a production node.
|
211
|
+
def wrap result, rule_name
|
212
|
+
if productions.has_key? rule_name.to_sym
|
213
|
+
node_class = const_get rule_name.to_s.to_class_name
|
214
|
+
param_count = productions[rule_name.to_sym].length
|
215
|
+
if param_count == 1
|
216
|
+
node = node_class.new result
|
217
|
+
else
|
218
|
+
node = node_class.new *result
|
219
|
+
end
|
220
|
+
node.start = (result.outer_start or result.start) # propagate the start information
|
221
|
+
node.end = (result.outer_end or result.end) # and the end information
|
222
|
+
node.source_text = (result.outer_source_text or result.source_text) # and the original source text
|
223
|
+
node
|
224
|
+
else
|
225
|
+
result.start = result.outer_start if result.outer_start
|
226
|
+
result.end = result.outer_end if result.outer_end
|
227
|
+
result.source_text = result.source_text if result.outer_source_text
|
228
|
+
result
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
attr_accessor :memoizing
|
234
|
+
|
235
|
+
def initialize
|
236
|
+
@memoizing = true
|
237
|
+
end
|
238
|
+
|
239
|
+
# TODO: consider making grammars copiable (could be used in threaded context then)
|
240
|
+
#def initialize_copy(from); end
|
241
|
+
#def clone; end
|
242
|
+
#def dupe; end
|
243
|
+
|
244
|
+
# Starts with starting_symbol.
|
245
|
+
def parse string, options = {}
|
246
|
+
raise ArgumentError, 'nil string' if string.nil?
|
247
|
+
raise 'starting symbol not defined' if self.class.start_rule.nil?
|
248
|
+
options[:grammar] = self.class
|
249
|
+
options[:rule_name] = self.class.start_rule
|
250
|
+
options[:skipping] = self.class.default_skipping_rule
|
251
|
+
options[:line_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
|
252
|
+
options[:column_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
|
253
|
+
options[:memoizer] = MemoizingCache.new if @memoizing
|
254
|
+
self.class.start_rule.to_parseable.memoizing_parse string, options
|
255
|
+
end
|
256
|
+
|
257
|
+
# TODO: pretty print method?
|
258
|
+
end # class Grammar
|
259
|
+
end # module Walrus
|