walrat 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/walrat.rb +70 -0
- data/lib/walrat/additions/proc.rb +32 -0
- data/lib/walrat/additions/regexp.rb +33 -0
- data/lib/walrat/additions/string.rb +99 -0
- data/lib/walrat/additions/symbol.rb +42 -0
- data/lib/walrat/and_predicate.rb +49 -0
- data/lib/walrat/array_result.rb +29 -0
- data/lib/walrat/continuation_wrapper_exception.rb +35 -0
- data/lib/walrat/grammar.rb +259 -0
- data/lib/walrat/left_recursion_exception.rb +34 -0
- data/lib/walrat/location_tracking.rb +126 -0
- data/lib/walrat/match_data_wrapper.rb +84 -0
- data/lib/walrat/memoizing.rb +55 -0
- data/lib/walrat/memoizing_cache.rb +126 -0
- data/lib/walrat/no_parameter_marker.rb +30 -0
- data/lib/walrat/node.rb +63 -0
- data/lib/walrat/not_predicate.rb +49 -0
- data/lib/walrat/parse_error.rb +48 -0
- data/lib/walrat/parser_state.rb +205 -0
- data/lib/walrat/parslet.rb +38 -0
- data/lib/walrat/parslet_choice.rb +155 -0
- data/lib/walrat/parslet_combination.rb +34 -0
- data/lib/walrat/parslet_combining.rb +190 -0
- data/lib/walrat/parslet_merge.rb +96 -0
- data/lib/walrat/parslet_omission.rb +74 -0
- data/lib/walrat/parslet_repetition.rb +114 -0
- data/lib/walrat/parslet_repetition_default.rb +77 -0
- data/lib/walrat/parslet_sequence.rb +241 -0
- data/lib/walrat/predicate.rb +68 -0
- data/lib/walrat/proc_parslet.rb +60 -0
- data/lib/walrat/regexp_parslet.rb +84 -0
- data/lib/walrat/skipped_substring_exception.rb +46 -0
- data/lib/walrat/string_enumerator.rb +47 -0
- data/lib/walrat/string_parslet.rb +89 -0
- data/lib/walrat/string_result.rb +34 -0
- data/lib/walrat/symbol_parslet.rb +82 -0
- data/lib/walrat/version.rb +26 -0
- metadata +110 -0
data/lib/walrat.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
# Ruby 1.9 support
|
24
|
+
require 'continuation' unless Kernel.respond_to?(:callcc)
|
25
|
+
|
26
|
+
module Walrat
|
27
|
+
major, minor = RUBY_VERSION.split '.'
|
28
|
+
if major == '1' and minor == '8'
|
29
|
+
$KCODE = 'U' # UTF-8 (necessary for Unicode support)
|
30
|
+
end
|
31
|
+
|
32
|
+
autoload :AndPredicate, 'walrat/and_predicate'
|
33
|
+
autoload :ArrayResult, 'walrat/array_result'
|
34
|
+
autoload :ContinuationWrapperException, 'walrat/continuation_wrapper_exception'
|
35
|
+
autoload :Grammar, 'walrat/grammar'
|
36
|
+
autoload :LeftRecursionException, 'walrat/left_recursion_exception'
|
37
|
+
autoload :LocationTracking, 'walrat/location_tracking'
|
38
|
+
autoload :MatchDataWrapper, 'walrat/match_data_wrapper'
|
39
|
+
autoload :Memoizing, 'walrat/memoizing'
|
40
|
+
autoload :MemoizingCache, 'walrat/memoizing_cache'
|
41
|
+
autoload :Node, 'walrat/node'
|
42
|
+
autoload :NoParameterMarker, 'walrat/no_parameter_marker'
|
43
|
+
autoload :NotPredicate, 'walrat/not_predicate'
|
44
|
+
autoload :ParseError, 'walrat/parse_error'
|
45
|
+
autoload :ParserState, 'walrat/parser_state'
|
46
|
+
|
47
|
+
# TODO: move these into subdirectory? directory for predicates also?
|
48
|
+
autoload :Parslet, 'walrat/parslet'
|
49
|
+
autoload :ParsletChoice, 'walrat/parslet_choice'
|
50
|
+
autoload :ParsletCombination, 'walrat/parslet_combination'
|
51
|
+
autoload :ParsletCombining, 'walrat/parslet_combining'
|
52
|
+
autoload :ParsletMerge, 'walrat/parslet_merge'
|
53
|
+
autoload :ParsletOmission, 'walrat/parslet_omission'
|
54
|
+
autoload :ParsletRepetition, 'walrat/parslet_repetition'
|
55
|
+
autoload :ParsletRepetitionDefault, 'walrat/parslet_repetition_default'
|
56
|
+
autoload :ParsletSequence, 'walrat/parslet_sequence'
|
57
|
+
autoload :Predicate, 'walrat/predicate'
|
58
|
+
autoload :ProcParslet, 'walrat/proc_parslet'
|
59
|
+
autoload :RegexpParslet, 'walrat/regexp_parslet'
|
60
|
+
autoload :SkippedSubstringException, 'walrat/skipped_substring_exception'
|
61
|
+
autoload :StringEnumerator, 'walrat/string_enumerator'
|
62
|
+
autoload :StringParslet, 'walrat/string_parslet'
|
63
|
+
autoload :StringResult, 'walrat/string_result'
|
64
|
+
autoload :SymbolParslet, 'walrat/symbol_parslet'
|
65
|
+
end # module Walrat
|
66
|
+
|
67
|
+
require 'walrat/additions/proc'
|
68
|
+
require 'walrat/additions/regexp'
|
69
|
+
require 'walrat/additions/string'
|
70
|
+
require 'walrat/additions/symbol'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
class Proc
|
26
|
+
include Walrat::ParsletCombining
|
27
|
+
|
28
|
+
# Returns a ProcParslet based on the receiver
|
29
|
+
def to_parseable
|
30
|
+
Walrat::ProcParslet.new self
|
31
|
+
end
|
32
|
+
end # class Proc
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
class Regexp
|
26
|
+
require 'walrat/parslet_combining'
|
27
|
+
include Walrat::ParsletCombining
|
28
|
+
|
29
|
+
# Returns a RegexpParslet based on the receiver
|
30
|
+
def to_parseable
|
31
|
+
Walrat::RegexpParslet.new self
|
32
|
+
end
|
33
|
+
end # class Regexp
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
# Additions to String class for Unicode support.
|
26
|
+
# Parslet combining methods.
|
27
|
+
# Convenience methods (to_parseable).
|
28
|
+
# Conversion utility methods.
|
29
|
+
class String
|
30
|
+
alias old_range []
|
31
|
+
|
32
|
+
def jlength
|
33
|
+
chars.to_a.length
|
34
|
+
end
|
35
|
+
|
36
|
+
# NOTE: this is a totally Walrat-specific implementation that is
|
37
|
+
# unlikely to be of use anywhere else. It is used in only 1 place
|
38
|
+
# in the codebase, and works around the fact that the MatchData
|
39
|
+
# made available by the index method gets clobbered by the
|
40
|
+
# "chars.to_a" call. The same thing happens for alternative
|
41
|
+
# methods of counting the chars, such as using jlength or a manual
|
42
|
+
# scan.
|
43
|
+
#
|
44
|
+
# One workaround is for the caller to re-perform the index call just
|
45
|
+
# to get the MatchData again, but that is inefficient. So here we
|
46
|
+
# just do the addition before returning the result to the caller.
|
47
|
+
def jindex_plus_length arg
|
48
|
+
if i = index(arg)
|
49
|
+
$~[0].length + unpack('C*')[0...i].pack('C*').chars.to_a.length
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Unlike the normal rindex method, the MatchData in $~ set by the inner
|
54
|
+
# rindex call gets clobbered (by the "chars.to_a" call) and is not visible to
|
55
|
+
# the caller of this method.
|
56
|
+
def jrindex arg, offset = Walrat::NoParameterMarker.instance
|
57
|
+
if offset == Walrat::NoParameterMarker.instance
|
58
|
+
i = rindex arg
|
59
|
+
else
|
60
|
+
i = rindex arg, offset
|
61
|
+
end
|
62
|
+
i ? unpack('C*')[0...i].pack('C*').chars.to_a.length : nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# multi-byte friendly [] implementation
|
66
|
+
def [](range, other = Walrat::NoParameterMarker.instance)
|
67
|
+
if other == Walrat::NoParameterMarker.instance
|
68
|
+
if range.kind_of? Range
|
69
|
+
chars.to_a[range].join
|
70
|
+
else
|
71
|
+
old_range range
|
72
|
+
end
|
73
|
+
else
|
74
|
+
old_range range, other
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns a character-level enumerator for the receiver.
|
79
|
+
def enumerator
|
80
|
+
Walrat::StringEnumerator.new self
|
81
|
+
end
|
82
|
+
|
83
|
+
# Rationale: it's ok to add "&" and "|" methods to string because they don't
|
84
|
+
# exist yet (they're not overrides).
|
85
|
+
include Walrat::ParsletCombining
|
86
|
+
|
87
|
+
# Returns a StringParslet based on the receiver
|
88
|
+
def to_parseable
|
89
|
+
Walrat::StringParslet.new self
|
90
|
+
end
|
91
|
+
|
92
|
+
# Converts the receiver of the form "foo_bar" to "FooBar". Specifically, the
|
93
|
+
# receiver is split into pieces delimited by underscores, each component is
|
94
|
+
# then converted to captial case (the first letter is capitalized and the
|
95
|
+
# remaining letters are lowercased) and finally the components are joined.
|
96
|
+
def to_class_name
|
97
|
+
self.split('_').collect { |component| component.capitalize}.join
|
98
|
+
end
|
99
|
+
end # class String
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
class Symbol
|
26
|
+
include Walrat::ParsletCombining
|
27
|
+
|
28
|
+
# Returns a SymbolParslet based on the receiver.
|
29
|
+
# Symbols can be used in Grammars when specifying rules and productions to
|
30
|
+
# refer to other rules and productions that have not been defined yet.
|
31
|
+
# They can also be used to allow self-references within rules and productions
|
32
|
+
# (recursion); for example:
|
33
|
+
#
|
34
|
+
# rule :thing & :thing.optional & :other_thing
|
35
|
+
#
|
36
|
+
# Basically these SymbolParslets allow deferred evaluation of a rule or
|
37
|
+
# production (deferred until parsing takes place) rather than being evaluated
|
38
|
+
# at the time a rule or production is defined.
|
39
|
+
def to_parseable
|
40
|
+
Walrat::SymbolParslet.new self
|
41
|
+
end
|
42
|
+
end # class Symbol
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
module Walrat
|
26
|
+
class AndPredicate < Predicate
|
27
|
+
def parse string, options = {}
|
28
|
+
raise ArgumentError if string.nil?
|
29
|
+
catch :ZeroWidthParseSuccess do
|
30
|
+
begin
|
31
|
+
parsed = @parseable.memoizing_parse string, options
|
32
|
+
rescue ParseError
|
33
|
+
raise ParseError.new('predicate not satisfied (expected "%s") while parsing "%s"' % [@parseable.to_s, string],
|
34
|
+
:line_end => options[:line_start],
|
35
|
+
:column_end => options[:column_start])
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# getting this far means that parsing succeeded
|
40
|
+
throw :AndPredicateSuccess
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def hash_offset
|
46
|
+
12
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end # module Walrat
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
module Walrat
|
26
|
+
class ArrayResult < Array
|
27
|
+
include LocationTracking
|
28
|
+
end # class ArrayResult
|
29
|
+
end # module Walrat
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
|
25
|
+
module Walrat
|
26
|
+
class ContinuationWrapperException < Exception
|
27
|
+
attr_reader :continuation
|
28
|
+
|
29
|
+
def initialize continuation
|
30
|
+
raise ArgumentError, 'nil continuation' if continuation.nil?
|
31
|
+
super self.class.to_s
|
32
|
+
@continuation = continuation
|
33
|
+
end
|
34
|
+
end # class ContinuationWrapperException
|
35
|
+
end # module Walrat
|
@@ -0,0 +1,259 @@
|
|
1
|
+
# Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
|
2
|
+
# Redistribution and use in source and binary forms, with or without
|
3
|
+
# modification, are permitted provided that the following conditions are met:
|
4
|
+
#
|
5
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6
|
+
# this list of conditions and the following disclaimer.
|
7
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer in the documentation
|
9
|
+
# and/or other materials provided with the distribution.
|
10
|
+
#
|
11
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
12
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
13
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
14
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
15
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
16
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
17
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
18
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
19
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
20
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
21
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
22
|
+
|
23
|
+
require 'walrat'
|
24
|
+
require 'walrat/additions/string.rb'
|
25
|
+
|
26
|
+
module Walrat
|
27
|
+
class Grammar
|
28
|
+
class << self
|
29
|
+
# Lazy reader for the rules hash.
|
30
|
+
#
|
31
|
+
# Initializes the hash the first time it is accessed.
|
32
|
+
def rules
|
33
|
+
@rules or @rules = Hash.new do |hash, key|
|
34
|
+
raise "no rule for key '#{key}'"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Lazy reader for the productions hash.
|
39
|
+
#
|
40
|
+
# Initializes the hash the first time it is accessed.
|
41
|
+
def productions
|
42
|
+
@productions or @productions = Hash.new do |hash, key|
|
43
|
+
raise "no production for key '#{key}'"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Lazy reader for the skipping overrides hash.
|
48
|
+
#
|
49
|
+
# Initializes the hash the first time it is accessed.
|
50
|
+
def skipping_overrides
|
51
|
+
@skipping_overrides or @skipping_overrides = Hash.new do |hash, key|
|
52
|
+
raise "no skipping override for key '#{key}'"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Sets the starting symbol.
|
57
|
+
#
|
58
|
+
# @param [Symbol] symbol a symbol which refers to a rule
|
59
|
+
def starting_symbol symbol
|
60
|
+
raise ArgumentError, 'starting symbol already set' if @starting_symbol
|
61
|
+
@starting_symbol = symbol
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the starting symbol.
|
65
|
+
#
|
66
|
+
# Note that the "starting_symbol" method can't be used as an accessor
|
67
|
+
# because it is already used as part of the grammar-definition DSL.
|
68
|
+
def start_rule
|
69
|
+
@starting_symbol
|
70
|
+
end
|
71
|
+
|
72
|
+
# Sets the default parslet that is used for skipping inter-token
|
73
|
+
# whitespace, and can be used to override the default on a rule-by-rule
|
74
|
+
# basis.
|
75
|
+
#
|
76
|
+
# This allows for simpler grammars which do not need to explicitly put
|
77
|
+
# optional whitespace parslets (or any other kind of parslet) between
|
78
|
+
# elements.
|
79
|
+
#
|
80
|
+
# There are two modes of operation for this method. In the first mode
|
81
|
+
# (when only one parameter is passed) the rule_or_parslet parameter is
|
82
|
+
# used to define the default parslet for inter-token skipping.
|
83
|
+
# rule_or_parslet must refer to a rule which itself is a Parslet or
|
84
|
+
# ParsletCombination and which is responsible for skipping. Note that the
|
85
|
+
# ability to pass an arbitrary parslet means that the notion of what
|
86
|
+
# consitutes the "whitespace" that should be skipped is completely
|
87
|
+
# flexible. Raises if a default skipping parslet has already been set.
|
88
|
+
#
|
89
|
+
# In the second mode of operation (when two parameters are passed) the
|
90
|
+
# rule_or_parslet parameter is interpreted to be the rule to which an
|
91
|
+
# override should be applied, where the parslet parameter specifies the
|
92
|
+
# parslet to be used in this case. If nil is explicitly passed then this
|
93
|
+
# overrides the default parslet; no parslet will be used for the purposes
|
94
|
+
# of inter-token skipping. Raises if an override has already been set for
|
95
|
+
# the named rule.
|
96
|
+
#
|
97
|
+
# The inter-token parslet is passed inside the "options" hash when
|
98
|
+
# invoking the "parse" methods. Any parser which fails will retry after
|
99
|
+
# giving this inter-token parslet a chance to consume and discard
|
100
|
+
# intervening whitespace.
|
101
|
+
#
|
102
|
+
# The initial, conservative implementation only performs this fallback
|
103
|
+
# skipping for ParsletSequence and ParsletRepetition combinations.
|
104
|
+
#
|
105
|
+
# Raises if rule_or_parslet is nil.
|
106
|
+
def skipping rule_or_parslet, parslet = NoParameterMarker.instance
|
107
|
+
raise ArgumentError, 'nil rule_or_parslet' if rule_or_parslet.nil?
|
108
|
+
if parslet == NoParameterMarker.instance
|
109
|
+
# first mode of operation: set default parslet
|
110
|
+
raise 'default skipping parslet already set' if @skipping
|
111
|
+
@skipping = rule_or_parslet
|
112
|
+
else
|
113
|
+
# second mode of operation: override default case
|
114
|
+
raise ArgumentError,
|
115
|
+
"skipping override already set for rule '#{rule_or_parslet}'" if
|
116
|
+
skipping_overrides.has_key? rule_or_parslet
|
117
|
+
raise ArgumentError,
|
118
|
+
"non-existent rule '#{rule_or_parslet}'" unless
|
119
|
+
rules.has_key? rule_or_parslet
|
120
|
+
skipping_overrides[rule_or_parslet] = parslet
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns the default skipping rule.
|
125
|
+
#
|
126
|
+
# Note that we can't use "skipping" as the accessor method here because
|
127
|
+
# it is already used as part of the grammar-definition DSL.
|
128
|
+
def default_skipping_rule
|
129
|
+
@skipping
|
130
|
+
end
|
131
|
+
|
132
|
+
# Defines a rule and stores it
|
133
|
+
#
|
134
|
+
# Expects an object that responds to the parse message, such as a Parslet
|
135
|
+
# or ParsletCombination. As this is intended to work with Parsing
|
136
|
+
# Expression Grammars, each rule may only be defined once. Defining a
|
137
|
+
# rule more than once will raise an ArgumentError.
|
138
|
+
def rule symbol, parseable
|
139
|
+
raise ArgumentError, 'nil symbol' if symbol.nil?
|
140
|
+
raise ArgumentError, 'nil parseable' if parseable.nil?
|
141
|
+
raise ArgumentError,
|
142
|
+
"rule '#{symbol}' already defined" if rules.has_key? symbol
|
143
|
+
rules[symbol] = parseable
|
144
|
+
end
|
145
|
+
|
146
|
+
# Dynamically creates a Node subclass inside the namespace of the current
|
147
|
+
# grammar.
|
148
|
+
#
|
149
|
+
# This is used to create classes in a class hierarchy where no custom
|
150
|
+
# behavior is required and therefore no actual file with an impementation
|
151
|
+
# need be provided; an example from the Walrus grammar:
|
152
|
+
#
|
153
|
+
# module Walrus
|
154
|
+
# class Grammar < Walrat::Grammar
|
155
|
+
# class Literal < Walrat::Node
|
156
|
+
# class StringLiteral < Literal
|
157
|
+
# class DoubleQuotedStringLiteral < StringLiteral
|
158
|
+
#
|
159
|
+
# In this example hiearchy the "Literal" class has custom behavior which
|
160
|
+
# is shared by all subclasses, and the custom behavior is implemented in
|
161
|
+
# the file "walrus/grammar/literal". The subclasses, however, have no
|
162
|
+
# custom behavior and no associated file. They are dynamically
|
163
|
+
# synthesized when the Walrus::Grammar class is first evaluated.
|
164
|
+
def node new_class_name, parent_class = Node
|
165
|
+
raise ArgumentError, 'nil new_class_name' if new_class_name.nil?
|
166
|
+
new_class_name = new_class_name.to_s.to_class_name # camel-case
|
167
|
+
unless parent_class.kind_of? Class
|
168
|
+
parent_class = const_get parent_class.to_s.to_class_name
|
169
|
+
end
|
170
|
+
const_set new_class_name, Class.new(parent_class)
|
171
|
+
end
|
172
|
+
|
173
|
+
# Specifies that a Node subclass will be used to encapsulate results
|
174
|
+
# for the rule identified by the symbol, rule_name. The class name is
|
175
|
+
# derived by converting the rule_name to camel-case.
|
176
|
+
#
|
177
|
+
# If no additional params are supplied then the class is assumed to
|
178
|
+
# accept a single parameter named "lexeme" in its initialize method.
|
179
|
+
#
|
180
|
+
# If additional params are supplied then the class is expected to
|
181
|
+
# accept the named params in its initialize method.
|
182
|
+
#
|
183
|
+
# As a convenience, the params will be sent to the specified class using
|
184
|
+
# the "production" method, which sets up an appropriate initializer.
|
185
|
+
#
|
186
|
+
# For example:
|
187
|
+
#
|
188
|
+
# # accepts a single parameter, "lexeme"
|
189
|
+
# production :symbol_literal
|
190
|
+
#
|
191
|
+
# # accepts a single parameter, "content"
|
192
|
+
# production :multiline_comment, :content
|
193
|
+
#
|
194
|
+
# # accepts three parameters, "identifier", "params" and "content"
|
195
|
+
# production :block_directive, :identifier, :params, :content
|
196
|
+
#
|
197
|
+
def production rule_name, *results
|
198
|
+
raise ArgumentError, 'nil rule_name' if rule_name.nil?
|
199
|
+
raise ArgumentError,
|
200
|
+
"production already defined for rule '#{rule_name}'" if
|
201
|
+
productions.has_key?(rule_name)
|
202
|
+
raise ArgumentError, "non-existent rule '#{rule_name}'" unless
|
203
|
+
rules.has_key?(rule_name)
|
204
|
+
results = results.empty? ? [:lexeme] : results
|
205
|
+
const_get(rule_name.to_s.to_class_name).production *results
|
206
|
+
productions[rule_name] = results
|
207
|
+
end
|
208
|
+
|
209
|
+
# This method is called by the ParsletSequence and SymbolParslet classes
|
210
|
+
# to possibly wrap a parse result in a production node.
|
211
|
+
def wrap result, rule_name
|
212
|
+
if productions.has_key? rule_name.to_sym
|
213
|
+
node_class = const_get rule_name.to_s.to_class_name
|
214
|
+
param_count = productions[rule_name.to_sym].length
|
215
|
+
if param_count == 1
|
216
|
+
node = node_class.new result
|
217
|
+
else
|
218
|
+
node = node_class.new *result
|
219
|
+
end
|
220
|
+
node.start = (result.outer_start or result.start) # propagate the start information
|
221
|
+
node.end = (result.outer_end or result.end) # and the end information
|
222
|
+
node.source_text = (result.outer_source_text or result.source_text) # and the original source text
|
223
|
+
node
|
224
|
+
else
|
225
|
+
result.start = result.outer_start if result.outer_start
|
226
|
+
result.end = result.outer_end if result.outer_end
|
227
|
+
result.source_text = result.source_text if result.outer_source_text
|
228
|
+
result
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
attr_accessor :memoizing
|
234
|
+
|
235
|
+
def initialize
|
236
|
+
@memoizing = true
|
237
|
+
end
|
238
|
+
|
239
|
+
# TODO: consider making grammars copiable (could be used in threaded context then)
|
240
|
+
#def initialize_copy(from); end
|
241
|
+
#def clone; end
|
242
|
+
#def dupe; end
|
243
|
+
|
244
|
+
# Starts with starting_symbol.
|
245
|
+
def parse string, options = {}
|
246
|
+
raise ArgumentError, 'nil string' if string.nil?
|
247
|
+
raise 'starting symbol not defined' if self.class.start_rule.nil?
|
248
|
+
options[:grammar] = self.class
|
249
|
+
options[:rule_name] = self.class.start_rule
|
250
|
+
options[:skipping] = self.class.default_skipping_rule
|
251
|
+
options[:line_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
|
252
|
+
options[:column_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
|
253
|
+
options[:memoizer] = MemoizingCache.new if @memoizing
|
254
|
+
self.class.start_rule.to_parseable.memoizing_parse string, options
|
255
|
+
end
|
256
|
+
|
257
|
+
# TODO: pretty print method?
|
258
|
+
end # class Grammar
|
259
|
+
end # module Walrus
|