walrat 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/walrat.rb +70 -0
  2. data/lib/walrat/additions/proc.rb +32 -0
  3. data/lib/walrat/additions/regexp.rb +33 -0
  4. data/lib/walrat/additions/string.rb +99 -0
  5. data/lib/walrat/additions/symbol.rb +42 -0
  6. data/lib/walrat/and_predicate.rb +49 -0
  7. data/lib/walrat/array_result.rb +29 -0
  8. data/lib/walrat/continuation_wrapper_exception.rb +35 -0
  9. data/lib/walrat/grammar.rb +259 -0
  10. data/lib/walrat/left_recursion_exception.rb +34 -0
  11. data/lib/walrat/location_tracking.rb +126 -0
  12. data/lib/walrat/match_data_wrapper.rb +84 -0
  13. data/lib/walrat/memoizing.rb +55 -0
  14. data/lib/walrat/memoizing_cache.rb +126 -0
  15. data/lib/walrat/no_parameter_marker.rb +30 -0
  16. data/lib/walrat/node.rb +63 -0
  17. data/lib/walrat/not_predicate.rb +49 -0
  18. data/lib/walrat/parse_error.rb +48 -0
  19. data/lib/walrat/parser_state.rb +205 -0
  20. data/lib/walrat/parslet.rb +38 -0
  21. data/lib/walrat/parslet_choice.rb +155 -0
  22. data/lib/walrat/parslet_combination.rb +34 -0
  23. data/lib/walrat/parslet_combining.rb +190 -0
  24. data/lib/walrat/parslet_merge.rb +96 -0
  25. data/lib/walrat/parslet_omission.rb +74 -0
  26. data/lib/walrat/parslet_repetition.rb +114 -0
  27. data/lib/walrat/parslet_repetition_default.rb +77 -0
  28. data/lib/walrat/parslet_sequence.rb +241 -0
  29. data/lib/walrat/predicate.rb +68 -0
  30. data/lib/walrat/proc_parslet.rb +60 -0
  31. data/lib/walrat/regexp_parslet.rb +84 -0
  32. data/lib/walrat/skipped_substring_exception.rb +46 -0
  33. data/lib/walrat/string_enumerator.rb +47 -0
  34. data/lib/walrat/string_parslet.rb +89 -0
  35. data/lib/walrat/string_result.rb +34 -0
  36. data/lib/walrat/symbol_parslet.rb +82 -0
  37. data/lib/walrat/version.rb +26 -0
  38. metadata +110 -0
@@ -0,0 +1,70 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ # Ruby 1.9 support
24
+ require 'continuation' unless Kernel.respond_to?(:callcc)
25
+
26
+ module Walrat
27
+ major, minor = RUBY_VERSION.split '.'
28
+ if major == '1' and minor == '8'
29
+ $KCODE = 'U' # UTF-8 (necessary for Unicode support)
30
+ end
31
+
32
+ autoload :AndPredicate, 'walrat/and_predicate'
33
+ autoload :ArrayResult, 'walrat/array_result'
34
+ autoload :ContinuationWrapperException, 'walrat/continuation_wrapper_exception'
35
+ autoload :Grammar, 'walrat/grammar'
36
+ autoload :LeftRecursionException, 'walrat/left_recursion_exception'
37
+ autoload :LocationTracking, 'walrat/location_tracking'
38
+ autoload :MatchDataWrapper, 'walrat/match_data_wrapper'
39
+ autoload :Memoizing, 'walrat/memoizing'
40
+ autoload :MemoizingCache, 'walrat/memoizing_cache'
41
+ autoload :Node, 'walrat/node'
42
+ autoload :NoParameterMarker, 'walrat/no_parameter_marker'
43
+ autoload :NotPredicate, 'walrat/not_predicate'
44
+ autoload :ParseError, 'walrat/parse_error'
45
+ autoload :ParserState, 'walrat/parser_state'
46
+
47
+ # TODO: move these into subdirectory? directory for predicates also?
48
+ autoload :Parslet, 'walrat/parslet'
49
+ autoload :ParsletChoice, 'walrat/parslet_choice'
50
+ autoload :ParsletCombination, 'walrat/parslet_combination'
51
+ autoload :ParsletCombining, 'walrat/parslet_combining'
52
+ autoload :ParsletMerge, 'walrat/parslet_merge'
53
+ autoload :ParsletOmission, 'walrat/parslet_omission'
54
+ autoload :ParsletRepetition, 'walrat/parslet_repetition'
55
+ autoload :ParsletRepetitionDefault, 'walrat/parslet_repetition_default'
56
+ autoload :ParsletSequence, 'walrat/parslet_sequence'
57
+ autoload :Predicate, 'walrat/predicate'
58
+ autoload :ProcParslet, 'walrat/proc_parslet'
59
+ autoload :RegexpParslet, 'walrat/regexp_parslet'
60
+ autoload :SkippedSubstringException, 'walrat/skipped_substring_exception'
61
+ autoload :StringEnumerator, 'walrat/string_enumerator'
62
+ autoload :StringParslet, 'walrat/string_parslet'
63
+ autoload :StringResult, 'walrat/string_result'
64
+ autoload :SymbolParslet, 'walrat/symbol_parslet'
65
+ end # module Walrat
66
+
67
+ require 'walrat/additions/proc'
68
+ require 'walrat/additions/regexp'
69
+ require 'walrat/additions/string'
70
+ require 'walrat/additions/symbol'
@@ -0,0 +1,32 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ class Proc
26
+ include Walrat::ParsletCombining
27
+
28
+ # Returns a ProcParslet based on the receiver
29
+ def to_parseable
30
+ Walrat::ProcParslet.new self
31
+ end
32
+ end # class Proc
@@ -0,0 +1,33 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ class Regexp
26
+ require 'walrat/parslet_combining'
27
+ include Walrat::ParsletCombining
28
+
29
+ # Returns a RegexpParslet based on the receiver
30
+ def to_parseable
31
+ Walrat::RegexpParslet.new self
32
+ end
33
+ end # class Regexp
@@ -0,0 +1,99 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ # Additions to String class for Unicode support.
26
+ # Parslet combining methods.
27
+ # Convenience methods (to_parseable).
28
+ # Conversion utility methods.
29
+ class String
30
+ alias old_range []
31
+
32
+ def jlength
33
+ chars.to_a.length
34
+ end
35
+
36
+ # NOTE: this is a totally Walrat-specific implementation that is
37
+ # unlikely to be of use anywhere else. It is used in only 1 place
38
+ # in the codebase, and works around the fact that the MatchData
39
+ # made available by the index method gets clobbered by the
40
+ # "chars.to_a" call. The same thing happens for alternative
41
+ # methods of counting the chars, such as using jlength or a manual
42
+ # scan.
43
+ #
44
+ # One workaround is for the caller to re-perform the index call just
45
+ # to get the MatchData again, but that is inefficient. So here we
46
+ # just do the addition before returning the result to the caller.
47
+ def jindex_plus_length arg
48
+ if i = index(arg)
49
+ $~[0].length + unpack('C*')[0...i].pack('C*').chars.to_a.length
50
+ end
51
+ end
52
+
53
+ # Unlike the normal rindex method, the MatchData in $~ set by the inner
54
+ # rindex call gets clobbered (by the "chars.to_a" call) and is not visible to
55
+ # the caller of this method.
56
+ def jrindex arg, offset = Walrat::NoParameterMarker.instance
57
+ if offset == Walrat::NoParameterMarker.instance
58
+ i = rindex arg
59
+ else
60
+ i = rindex arg, offset
61
+ end
62
+ i ? unpack('C*')[0...i].pack('C*').chars.to_a.length : nil
63
+ end
64
+
65
+ # multi-byte friendly [] implementation
66
+ def [](range, other = Walrat::NoParameterMarker.instance)
67
+ if other == Walrat::NoParameterMarker.instance
68
+ if range.kind_of? Range
69
+ chars.to_a[range].join
70
+ else
71
+ old_range range
72
+ end
73
+ else
74
+ old_range range, other
75
+ end
76
+ end
77
+
78
+ # Returns a character-level enumerator for the receiver.
79
+ def enumerator
80
+ Walrat::StringEnumerator.new self
81
+ end
82
+
83
+ # Rationale: it's ok to add "&" and "|" methods to string because they don't
84
+ # exist yet (they're not overrides).
85
+ include Walrat::ParsletCombining
86
+
87
+ # Returns a StringParslet based on the receiver
88
+ def to_parseable
89
+ Walrat::StringParslet.new self
90
+ end
91
+
92
+ # Converts the receiver of the form "foo_bar" to "FooBar". Specifically, the
93
+ # receiver is split into pieces delimited by underscores, each component is
94
+ # then converted to captial case (the first letter is capitalized and the
95
+ # remaining letters are lowercased) and finally the components are joined.
96
+ def to_class_name
97
+ self.split('_').collect { |component| component.capitalize}.join
98
+ end
99
+ end # class String
@@ -0,0 +1,42 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ class Symbol
26
+ include Walrat::ParsletCombining
27
+
28
+ # Returns a SymbolParslet based on the receiver.
29
+ # Symbols can be used in Grammars when specifying rules and productions to
30
+ # refer to other rules and productions that have not been defined yet.
31
+ # They can also be used to allow self-references within rules and productions
32
+ # (recursion); for example:
33
+ #
34
+ # rule :thing & :thing.optional & :other_thing
35
+ #
36
+ # Basically these SymbolParslets allow deferred evaluation of a rule or
37
+ # production (deferred until parsing takes place) rather than being evaluated
38
+ # at the time a rule or production is defined.
39
+ def to_parseable
40
+ Walrat::SymbolParslet.new self
41
+ end
42
+ end # class Symbol
@@ -0,0 +1,49 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ module Walrat
26
+ class AndPredicate < Predicate
27
+ def parse string, options = {}
28
+ raise ArgumentError if string.nil?
29
+ catch :ZeroWidthParseSuccess do
30
+ begin
31
+ parsed = @parseable.memoizing_parse string, options
32
+ rescue ParseError
33
+ raise ParseError.new('predicate not satisfied (expected "%s") while parsing "%s"' % [@parseable.to_s, string],
34
+ :line_end => options[:line_start],
35
+ :column_end => options[:column_start])
36
+ end
37
+ end
38
+
39
+ # getting this far means that parsing succeeded
40
+ throw :AndPredicateSuccess
41
+ end
42
+
43
+ private
44
+
45
+ def hash_offset
46
+ 12
47
+ end
48
+ end
49
+ end # module Walrat
@@ -0,0 +1,29 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ module Walrat
26
+ class ArrayResult < Array
27
+ include LocationTracking
28
+ end # class ArrayResult
29
+ end # module Walrat
@@ -0,0 +1,35 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ module Walrat
26
+ class ContinuationWrapperException < Exception
27
+ attr_reader :continuation
28
+
29
+ def initialize continuation
30
+ raise ArgumentError, 'nil continuation' if continuation.nil?
31
+ super self.class.to_s
32
+ @continuation = continuation
33
+ end
34
+ end # class ContinuationWrapperException
35
+ end # module Walrat
@@ -0,0 +1,259 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+ require 'walrat/additions/string.rb'
25
+
26
+ module Walrat
27
+ class Grammar
28
+ class << self
29
+ # Lazy reader for the rules hash.
30
+ #
31
+ # Initializes the hash the first time it is accessed.
32
+ def rules
33
+ @rules or @rules = Hash.new do |hash, key|
34
+ raise "no rule for key '#{key}'"
35
+ end
36
+ end
37
+
38
+ # Lazy reader for the productions hash.
39
+ #
40
+ # Initializes the hash the first time it is accessed.
41
+ def productions
42
+ @productions or @productions = Hash.new do |hash, key|
43
+ raise "no production for key '#{key}'"
44
+ end
45
+ end
46
+
47
+ # Lazy reader for the skipping overrides hash.
48
+ #
49
+ # Initializes the hash the first time it is accessed.
50
+ def skipping_overrides
51
+ @skipping_overrides or @skipping_overrides = Hash.new do |hash, key|
52
+ raise "no skipping override for key '#{key}'"
53
+ end
54
+ end
55
+
56
+ # Sets the starting symbol.
57
+ #
58
+ # @param [Symbol] symbol a symbol which refers to a rule
59
+ def starting_symbol symbol
60
+ raise ArgumentError, 'starting symbol already set' if @starting_symbol
61
+ @starting_symbol = symbol
62
+ end
63
+
64
+ # Returns the starting symbol.
65
+ #
66
+ # Note that the "starting_symbol" method can't be used as an accessor
67
+ # because it is already used as part of the grammar-definition DSL.
68
+ def start_rule
69
+ @starting_symbol
70
+ end
71
+
72
+ # Sets the default parslet that is used for skipping inter-token
73
+ # whitespace, and can be used to override the default on a rule-by-rule
74
+ # basis.
75
+ #
76
+ # This allows for simpler grammars which do not need to explicitly put
77
+ # optional whitespace parslets (or any other kind of parslet) between
78
+ # elements.
79
+ #
80
+ # There are two modes of operation for this method. In the first mode
81
+ # (when only one parameter is passed) the rule_or_parslet parameter is
82
+ # used to define the default parslet for inter-token skipping.
83
+ # rule_or_parslet must refer to a rule which itself is a Parslet or
84
+ # ParsletCombination and which is responsible for skipping. Note that the
85
+ # ability to pass an arbitrary parslet means that the notion of what
86
+ # consitutes the "whitespace" that should be skipped is completely
87
+ # flexible. Raises if a default skipping parslet has already been set.
88
+ #
89
+ # In the second mode of operation (when two parameters are passed) the
90
+ # rule_or_parslet parameter is interpreted to be the rule to which an
91
+ # override should be applied, where the parslet parameter specifies the
92
+ # parslet to be used in this case. If nil is explicitly passed then this
93
+ # overrides the default parslet; no parslet will be used for the purposes
94
+ # of inter-token skipping. Raises if an override has already been set for
95
+ # the named rule.
96
+ #
97
+ # The inter-token parslet is passed inside the "options" hash when
98
+ # invoking the "parse" methods. Any parser which fails will retry after
99
+ # giving this inter-token parslet a chance to consume and discard
100
+ # intervening whitespace.
101
+ #
102
+ # The initial, conservative implementation only performs this fallback
103
+ # skipping for ParsletSequence and ParsletRepetition combinations.
104
+ #
105
+ # Raises if rule_or_parslet is nil.
106
+ def skipping rule_or_parslet, parslet = NoParameterMarker.instance
107
+ raise ArgumentError, 'nil rule_or_parslet' if rule_or_parslet.nil?
108
+ if parslet == NoParameterMarker.instance
109
+ # first mode of operation: set default parslet
110
+ raise 'default skipping parslet already set' if @skipping
111
+ @skipping = rule_or_parslet
112
+ else
113
+ # second mode of operation: override default case
114
+ raise ArgumentError,
115
+ "skipping override already set for rule '#{rule_or_parslet}'" if
116
+ skipping_overrides.has_key? rule_or_parslet
117
+ raise ArgumentError,
118
+ "non-existent rule '#{rule_or_parslet}'" unless
119
+ rules.has_key? rule_or_parslet
120
+ skipping_overrides[rule_or_parslet] = parslet
121
+ end
122
+ end
123
+
124
+ # Returns the default skipping rule.
125
+ #
126
+ # Note that we can't use "skipping" as the accessor method here because
127
+ # it is already used as part of the grammar-definition DSL.
128
+ def default_skipping_rule
129
+ @skipping
130
+ end
131
+
132
+ # Defines a rule and stores it
133
+ #
134
+ # Expects an object that responds to the parse message, such as a Parslet
135
+ # or ParsletCombination. As this is intended to work with Parsing
136
+ # Expression Grammars, each rule may only be defined once. Defining a
137
+ # rule more than once will raise an ArgumentError.
138
+ def rule symbol, parseable
139
+ raise ArgumentError, 'nil symbol' if symbol.nil?
140
+ raise ArgumentError, 'nil parseable' if parseable.nil?
141
+ raise ArgumentError,
142
+ "rule '#{symbol}' already defined" if rules.has_key? symbol
143
+ rules[symbol] = parseable
144
+ end
145
+
146
+ # Dynamically creates a Node subclass inside the namespace of the current
147
+ # grammar.
148
+ #
149
+ # This is used to create classes in a class hierarchy where no custom
150
+ # behavior is required and therefore no actual file with an impementation
151
+ # need be provided; an example from the Walrus grammar:
152
+ #
153
+ # module Walrus
154
+ # class Grammar < Walrat::Grammar
155
+ # class Literal < Walrat::Node
156
+ # class StringLiteral < Literal
157
+ # class DoubleQuotedStringLiteral < StringLiteral
158
+ #
159
+ # In this example hiearchy the "Literal" class has custom behavior which
160
+ # is shared by all subclasses, and the custom behavior is implemented in
161
+ # the file "walrus/grammar/literal". The subclasses, however, have no
162
+ # custom behavior and no associated file. They are dynamically
163
+ # synthesized when the Walrus::Grammar class is first evaluated.
164
+ def node new_class_name, parent_class = Node
165
+ raise ArgumentError, 'nil new_class_name' if new_class_name.nil?
166
+ new_class_name = new_class_name.to_s.to_class_name # camel-case
167
+ unless parent_class.kind_of? Class
168
+ parent_class = const_get parent_class.to_s.to_class_name
169
+ end
170
+ const_set new_class_name, Class.new(parent_class)
171
+ end
172
+
173
+ # Specifies that a Node subclass will be used to encapsulate results
174
+ # for the rule identified by the symbol, rule_name. The class name is
175
+ # derived by converting the rule_name to camel-case.
176
+ #
177
+ # If no additional params are supplied then the class is assumed to
178
+ # accept a single parameter named "lexeme" in its initialize method.
179
+ #
180
+ # If additional params are supplied then the class is expected to
181
+ # accept the named params in its initialize method.
182
+ #
183
+ # As a convenience, the params will be sent to the specified class using
184
+ # the "production" method, which sets up an appropriate initializer.
185
+ #
186
+ # For example:
187
+ #
188
+ # # accepts a single parameter, "lexeme"
189
+ # production :symbol_literal
190
+ #
191
+ # # accepts a single parameter, "content"
192
+ # production :multiline_comment, :content
193
+ #
194
+ # # accepts three parameters, "identifier", "params" and "content"
195
+ # production :block_directive, :identifier, :params, :content
196
+ #
197
+ def production rule_name, *results
198
+ raise ArgumentError, 'nil rule_name' if rule_name.nil?
199
+ raise ArgumentError,
200
+ "production already defined for rule '#{rule_name}'" if
201
+ productions.has_key?(rule_name)
202
+ raise ArgumentError, "non-existent rule '#{rule_name}'" unless
203
+ rules.has_key?(rule_name)
204
+ results = results.empty? ? [:lexeme] : results
205
+ const_get(rule_name.to_s.to_class_name).production *results
206
+ productions[rule_name] = results
207
+ end
208
+
209
+ # This method is called by the ParsletSequence and SymbolParslet classes
210
+ # to possibly wrap a parse result in a production node.
211
+ def wrap result, rule_name
212
+ if productions.has_key? rule_name.to_sym
213
+ node_class = const_get rule_name.to_s.to_class_name
214
+ param_count = productions[rule_name.to_sym].length
215
+ if param_count == 1
216
+ node = node_class.new result
217
+ else
218
+ node = node_class.new *result
219
+ end
220
+ node.start = (result.outer_start or result.start) # propagate the start information
221
+ node.end = (result.outer_end or result.end) # and the end information
222
+ node.source_text = (result.outer_source_text or result.source_text) # and the original source text
223
+ node
224
+ else
225
+ result.start = result.outer_start if result.outer_start
226
+ result.end = result.outer_end if result.outer_end
227
+ result.source_text = result.source_text if result.outer_source_text
228
+ result
229
+ end
230
+ end
231
+ end
232
+
233
+ attr_accessor :memoizing
234
+
235
+ def initialize
236
+ @memoizing = true
237
+ end
238
+
239
+ # TODO: consider making grammars copiable (could be used in threaded context then)
240
+ #def initialize_copy(from); end
241
+ #def clone; end
242
+ #def dupe; end
243
+
244
+ # Starts with starting_symbol.
245
+ def parse string, options = {}
246
+ raise ArgumentError, 'nil string' if string.nil?
247
+ raise 'starting symbol not defined' if self.class.start_rule.nil?
248
+ options[:grammar] = self.class
249
+ options[:rule_name] = self.class.start_rule
250
+ options[:skipping] = self.class.default_skipping_rule
251
+ options[:line_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
252
+ options[:column_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
253
+ options[:memoizer] = MemoizingCache.new if @memoizing
254
+ self.class.start_rule.to_parseable.memoizing_parse string, options
255
+ end
256
+
257
+ # TODO: pretty print method?
258
+ end # class Grammar
259
+ end # module Walrus