walrat 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/walrat.rb +70 -0
  2. data/lib/walrat/additions/proc.rb +32 -0
  3. data/lib/walrat/additions/regexp.rb +33 -0
  4. data/lib/walrat/additions/string.rb +99 -0
  5. data/lib/walrat/additions/symbol.rb +42 -0
  6. data/lib/walrat/and_predicate.rb +49 -0
  7. data/lib/walrat/array_result.rb +29 -0
  8. data/lib/walrat/continuation_wrapper_exception.rb +35 -0
  9. data/lib/walrat/grammar.rb +259 -0
  10. data/lib/walrat/left_recursion_exception.rb +34 -0
  11. data/lib/walrat/location_tracking.rb +126 -0
  12. data/lib/walrat/match_data_wrapper.rb +84 -0
  13. data/lib/walrat/memoizing.rb +55 -0
  14. data/lib/walrat/memoizing_cache.rb +126 -0
  15. data/lib/walrat/no_parameter_marker.rb +30 -0
  16. data/lib/walrat/node.rb +63 -0
  17. data/lib/walrat/not_predicate.rb +49 -0
  18. data/lib/walrat/parse_error.rb +48 -0
  19. data/lib/walrat/parser_state.rb +205 -0
  20. data/lib/walrat/parslet.rb +38 -0
  21. data/lib/walrat/parslet_choice.rb +155 -0
  22. data/lib/walrat/parslet_combination.rb +34 -0
  23. data/lib/walrat/parslet_combining.rb +190 -0
  24. data/lib/walrat/parslet_merge.rb +96 -0
  25. data/lib/walrat/parslet_omission.rb +74 -0
  26. data/lib/walrat/parslet_repetition.rb +114 -0
  27. data/lib/walrat/parslet_repetition_default.rb +77 -0
  28. data/lib/walrat/parslet_sequence.rb +241 -0
  29. data/lib/walrat/predicate.rb +68 -0
  30. data/lib/walrat/proc_parslet.rb +60 -0
  31. data/lib/walrat/regexp_parslet.rb +84 -0
  32. data/lib/walrat/skipped_substring_exception.rb +46 -0
  33. data/lib/walrat/string_enumerator.rb +47 -0
  34. data/lib/walrat/string_parslet.rb +89 -0
  35. data/lib/walrat/string_result.rb +34 -0
  36. data/lib/walrat/symbol_parslet.rb +82 -0
  37. data/lib/walrat/version.rb +26 -0
  38. metadata +110 -0
@@ -0,0 +1,70 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ # Ruby 1.9 support
24
+ require 'continuation' unless Kernel.respond_to?(:callcc)
25
+
26
+ module Walrat
27
+ major, minor = RUBY_VERSION.split '.'
28
+ if major == '1' and minor == '8'
29
+ $KCODE = 'U' # UTF-8 (necessary for Unicode support)
30
+ end
31
+
32
+ autoload :AndPredicate, 'walrat/and_predicate'
33
+ autoload :ArrayResult, 'walrat/array_result'
34
+ autoload :ContinuationWrapperException, 'walrat/continuation_wrapper_exception'
35
+ autoload :Grammar, 'walrat/grammar'
36
+ autoload :LeftRecursionException, 'walrat/left_recursion_exception'
37
+ autoload :LocationTracking, 'walrat/location_tracking'
38
+ autoload :MatchDataWrapper, 'walrat/match_data_wrapper'
39
+ autoload :Memoizing, 'walrat/memoizing'
40
+ autoload :MemoizingCache, 'walrat/memoizing_cache'
41
+ autoload :Node, 'walrat/node'
42
+ autoload :NoParameterMarker, 'walrat/no_parameter_marker'
43
+ autoload :NotPredicate, 'walrat/not_predicate'
44
+ autoload :ParseError, 'walrat/parse_error'
45
+ autoload :ParserState, 'walrat/parser_state'
46
+
47
+ # TODO: move these into subdirectory? directory for predicates also?
48
+ autoload :Parslet, 'walrat/parslet'
49
+ autoload :ParsletChoice, 'walrat/parslet_choice'
50
+ autoload :ParsletCombination, 'walrat/parslet_combination'
51
+ autoload :ParsletCombining, 'walrat/parslet_combining'
52
+ autoload :ParsletMerge, 'walrat/parslet_merge'
53
+ autoload :ParsletOmission, 'walrat/parslet_omission'
54
+ autoload :ParsletRepetition, 'walrat/parslet_repetition'
55
+ autoload :ParsletRepetitionDefault, 'walrat/parslet_repetition_default'
56
+ autoload :ParsletSequence, 'walrat/parslet_sequence'
57
+ autoload :Predicate, 'walrat/predicate'
58
+ autoload :ProcParslet, 'walrat/proc_parslet'
59
+ autoload :RegexpParslet, 'walrat/regexp_parslet'
60
+ autoload :SkippedSubstringException, 'walrat/skipped_substring_exception'
61
+ autoload :StringEnumerator, 'walrat/string_enumerator'
62
+ autoload :StringParslet, 'walrat/string_parslet'
63
+ autoload :StringResult, 'walrat/string_result'
64
+ autoload :SymbolParslet, 'walrat/symbol_parslet'
65
+ end # module Walrat
66
+
67
+ require 'walrat/additions/proc'
68
+ require 'walrat/additions/regexp'
69
+ require 'walrat/additions/string'
70
+ require 'walrat/additions/symbol'
@@ -0,0 +1,32 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ class Proc
26
+ include Walrat::ParsletCombining
27
+
28
+ # Returns a ProcParslet based on the receiver
29
+ def to_parseable
30
+ Walrat::ProcParslet.new self
31
+ end
32
+ end # class Proc
@@ -0,0 +1,33 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ class Regexp
26
+ require 'walrat/parslet_combining'
27
+ include Walrat::ParsletCombining
28
+
29
+ # Returns a RegexpParslet based on the receiver
30
+ def to_parseable
31
+ Walrat::RegexpParslet.new self
32
+ end
33
+ end # class Regexp
@@ -0,0 +1,99 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ # Additions to String class for Unicode support.
26
+ # Parslet combining methods.
27
+ # Convenience methods (to_parseable).
28
+ # Conversion utility methods.
29
+ class String
30
+ alias old_range []
31
+
32
+ def jlength
33
+ chars.to_a.length
34
+ end
35
+
36
+ # NOTE: this is a totally Walrat-specific implementation that is
37
+ # unlikely to be of use anywhere else. It is used in only 1 place
38
+ # in the codebase, and works around the fact that the MatchData
39
+ # made available by the index method gets clobbered by the
40
+ # "chars.to_a" call. The same thing happens for alternative
41
+ # methods of counting the chars, such as using jlength or a manual
42
+ # scan.
43
+ #
44
+ # One workaround is for the caller to re-perform the index call just
45
+ # to get the MatchData again, but that is inefficient. So here we
46
+ # just do the addition before returning the result to the caller.
47
+ def jindex_plus_length arg
48
+ if i = index(arg)
49
+ $~[0].length + unpack('C*')[0...i].pack('C*').chars.to_a.length
50
+ end
51
+ end
52
+
53
+ # Unlike the normal rindex method, the MatchData in $~ set by the inner
54
+ # rindex call gets clobbered (by the "chars.to_a" call) and is not visible to
55
+ # the caller of this method.
56
+ def jrindex arg, offset = Walrat::NoParameterMarker.instance
57
+ if offset == Walrat::NoParameterMarker.instance
58
+ i = rindex arg
59
+ else
60
+ i = rindex arg, offset
61
+ end
62
+ i ? unpack('C*')[0...i].pack('C*').chars.to_a.length : nil
63
+ end
64
+
65
+ # multi-byte friendly [] implementation
66
+ def [](range, other = Walrat::NoParameterMarker.instance)
67
+ if other == Walrat::NoParameterMarker.instance
68
+ if range.kind_of? Range
69
+ chars.to_a[range].join
70
+ else
71
+ old_range range
72
+ end
73
+ else
74
+ old_range range, other
75
+ end
76
+ end
77
+
78
+ # Returns a character-level enumerator for the receiver.
79
+ def enumerator
80
+ Walrat::StringEnumerator.new self
81
+ end
82
+
83
+ # Rationale: it's ok to add "&" and "|" methods to string because they don't
84
+ # exist yet (they're not overrides).
85
+ include Walrat::ParsletCombining
86
+
87
+ # Returns a StringParslet based on the receiver
88
+ def to_parseable
89
+ Walrat::StringParslet.new self
90
+ end
91
+
92
+ # Converts the receiver of the form "foo_bar" to "FooBar". Specifically, the
93
+ # receiver is split into pieces delimited by underscores, each component is
94
+ # then converted to captial case (the first letter is capitalized and the
95
+ # remaining letters are lowercased) and finally the components are joined.
96
+ def to_class_name
97
+ self.split('_').collect { |component| component.capitalize}.join
98
+ end
99
+ end # class String
@@ -0,0 +1,42 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ class Symbol
26
+ include Walrat::ParsletCombining
27
+
28
+ # Returns a SymbolParslet based on the receiver.
29
+ # Symbols can be used in Grammars when specifying rules and productions to
30
+ # refer to other rules and productions that have not been defined yet.
31
+ # They can also be used to allow self-references within rules and productions
32
+ # (recursion); for example:
33
+ #
34
+ # rule :thing & :thing.optional & :other_thing
35
+ #
36
+ # Basically these SymbolParslets allow deferred evaluation of a rule or
37
+ # production (deferred until parsing takes place) rather than being evaluated
38
+ # at the time a rule or production is defined.
39
+ def to_parseable
40
+ Walrat::SymbolParslet.new self
41
+ end
42
+ end # class Symbol
@@ -0,0 +1,49 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ module Walrat
26
+ class AndPredicate < Predicate
27
+ def parse string, options = {}
28
+ raise ArgumentError if string.nil?
29
+ catch :ZeroWidthParseSuccess do
30
+ begin
31
+ parsed = @parseable.memoizing_parse string, options
32
+ rescue ParseError
33
+ raise ParseError.new('predicate not satisfied (expected "%s") while parsing "%s"' % [@parseable.to_s, string],
34
+ :line_end => options[:line_start],
35
+ :column_end => options[:column_start])
36
+ end
37
+ end
38
+
39
+ # getting this far means that parsing succeeded
40
+ throw :AndPredicateSuccess
41
+ end
42
+
43
+ private
44
+
45
+ def hash_offset
46
+ 12
47
+ end
48
+ end
49
+ end # module Walrat
@@ -0,0 +1,29 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ module Walrat
26
+ class ArrayResult < Array
27
+ include LocationTracking
28
+ end # class ArrayResult
29
+ end # module Walrat
@@ -0,0 +1,35 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+
25
+ module Walrat
26
+ class ContinuationWrapperException < Exception
27
+ attr_reader :continuation
28
+
29
+ def initialize continuation
30
+ raise ArgumentError, 'nil continuation' if continuation.nil?
31
+ super self.class.to_s
32
+ @continuation = continuation
33
+ end
34
+ end # class ContinuationWrapperException
35
+ end # module Walrat
@@ -0,0 +1,259 @@
1
+ # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2
+ # Redistribution and use in source and binary forms, with or without
3
+ # modification, are permitted provided that the following conditions are met:
4
+ #
5
+ # 1. Redistributions of source code must retain the above copyright notice,
6
+ # this list of conditions and the following disclaimer.
7
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
8
+ # this list of conditions and the following disclaimer in the documentation
9
+ # and/or other materials provided with the distribution.
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21
+ # POSSIBILITY OF SUCH DAMAGE.
22
+
23
+ require 'walrat'
24
+ require 'walrat/additions/string.rb'
25
+
26
+ module Walrat
27
+ class Grammar
28
+ class << self
29
+ # Lazy reader for the rules hash.
30
+ #
31
+ # Initializes the hash the first time it is accessed.
32
+ def rules
33
+ @rules or @rules = Hash.new do |hash, key|
34
+ raise "no rule for key '#{key}'"
35
+ end
36
+ end
37
+
38
+ # Lazy reader for the productions hash.
39
+ #
40
+ # Initializes the hash the first time it is accessed.
41
+ def productions
42
+ @productions or @productions = Hash.new do |hash, key|
43
+ raise "no production for key '#{key}'"
44
+ end
45
+ end
46
+
47
+ # Lazy reader for the skipping overrides hash.
48
+ #
49
+ # Initializes the hash the first time it is accessed.
50
+ def skipping_overrides
51
+ @skipping_overrides or @skipping_overrides = Hash.new do |hash, key|
52
+ raise "no skipping override for key '#{key}'"
53
+ end
54
+ end
55
+
56
+ # Sets the starting symbol.
57
+ #
58
+ # @param [Symbol] symbol a symbol which refers to a rule
59
+ def starting_symbol symbol
60
+ raise ArgumentError, 'starting symbol already set' if @starting_symbol
61
+ @starting_symbol = symbol
62
+ end
63
+
64
+ # Returns the starting symbol.
65
+ #
66
+ # Note that the "starting_symbol" method can't be used as an accessor
67
+ # because it is already used as part of the grammar-definition DSL.
68
+ def start_rule
69
+ @starting_symbol
70
+ end
71
+
72
+ # Sets the default parslet that is used for skipping inter-token
73
+ # whitespace, and can be used to override the default on a rule-by-rule
74
+ # basis.
75
+ #
76
+ # This allows for simpler grammars which do not need to explicitly put
77
+ # optional whitespace parslets (or any other kind of parslet) between
78
+ # elements.
79
+ #
80
+ # There are two modes of operation for this method. In the first mode
81
+ # (when only one parameter is passed) the rule_or_parslet parameter is
82
+ # used to define the default parslet for inter-token skipping.
83
+ # rule_or_parslet must refer to a rule which itself is a Parslet or
84
+ # ParsletCombination and which is responsible for skipping. Note that the
85
+ # ability to pass an arbitrary parslet means that the notion of what
86
+ # consitutes the "whitespace" that should be skipped is completely
87
+ # flexible. Raises if a default skipping parslet has already been set.
88
+ #
89
+ # In the second mode of operation (when two parameters are passed) the
90
+ # rule_or_parslet parameter is interpreted to be the rule to which an
91
+ # override should be applied, where the parslet parameter specifies the
92
+ # parslet to be used in this case. If nil is explicitly passed then this
93
+ # overrides the default parslet; no parslet will be used for the purposes
94
+ # of inter-token skipping. Raises if an override has already been set for
95
+ # the named rule.
96
+ #
97
+ # The inter-token parslet is passed inside the "options" hash when
98
+ # invoking the "parse" methods. Any parser which fails will retry after
99
+ # giving this inter-token parslet a chance to consume and discard
100
+ # intervening whitespace.
101
+ #
102
+ # The initial, conservative implementation only performs this fallback
103
+ # skipping for ParsletSequence and ParsletRepetition combinations.
104
+ #
105
+ # Raises if rule_or_parslet is nil.
106
+ def skipping rule_or_parslet, parslet = NoParameterMarker.instance
107
+ raise ArgumentError, 'nil rule_or_parslet' if rule_or_parslet.nil?
108
+ if parslet == NoParameterMarker.instance
109
+ # first mode of operation: set default parslet
110
+ raise 'default skipping parslet already set' if @skipping
111
+ @skipping = rule_or_parslet
112
+ else
113
+ # second mode of operation: override default case
114
+ raise ArgumentError,
115
+ "skipping override already set for rule '#{rule_or_parslet}'" if
116
+ skipping_overrides.has_key? rule_or_parslet
117
+ raise ArgumentError,
118
+ "non-existent rule '#{rule_or_parslet}'" unless
119
+ rules.has_key? rule_or_parslet
120
+ skipping_overrides[rule_or_parslet] = parslet
121
+ end
122
+ end
123
+
124
+ # Returns the default skipping rule.
125
+ #
126
+ # Note that we can't use "skipping" as the accessor method here because
127
+ # it is already used as part of the grammar-definition DSL.
128
+ def default_skipping_rule
129
+ @skipping
130
+ end
131
+
132
+ # Defines a rule and stores it
133
+ #
134
+ # Expects an object that responds to the parse message, such as a Parslet
135
+ # or ParsletCombination. As this is intended to work with Parsing
136
+ # Expression Grammars, each rule may only be defined once. Defining a
137
+ # rule more than once will raise an ArgumentError.
138
+ def rule symbol, parseable
139
+ raise ArgumentError, 'nil symbol' if symbol.nil?
140
+ raise ArgumentError, 'nil parseable' if parseable.nil?
141
+ raise ArgumentError,
142
+ "rule '#{symbol}' already defined" if rules.has_key? symbol
143
+ rules[symbol] = parseable
144
+ end
145
+
146
+ # Dynamically creates a Node subclass inside the namespace of the current
147
+ # grammar.
148
+ #
149
+ # This is used to create classes in a class hierarchy where no custom
150
+ # behavior is required and therefore no actual file with an impementation
151
+ # need be provided; an example from the Walrus grammar:
152
+ #
153
+ # module Walrus
154
+ # class Grammar < Walrat::Grammar
155
+ # class Literal < Walrat::Node
156
+ # class StringLiteral < Literal
157
+ # class DoubleQuotedStringLiteral < StringLiteral
158
+ #
159
+ # In this example hiearchy the "Literal" class has custom behavior which
160
+ # is shared by all subclasses, and the custom behavior is implemented in
161
+ # the file "walrus/grammar/literal". The subclasses, however, have no
162
+ # custom behavior and no associated file. They are dynamically
163
+ # synthesized when the Walrus::Grammar class is first evaluated.
164
+ def node new_class_name, parent_class = Node
165
+ raise ArgumentError, 'nil new_class_name' if new_class_name.nil?
166
+ new_class_name = new_class_name.to_s.to_class_name # camel-case
167
+ unless parent_class.kind_of? Class
168
+ parent_class = const_get parent_class.to_s.to_class_name
169
+ end
170
+ const_set new_class_name, Class.new(parent_class)
171
+ end
172
+
173
+ # Specifies that a Node subclass will be used to encapsulate results
174
+ # for the rule identified by the symbol, rule_name. The class name is
175
+ # derived by converting the rule_name to camel-case.
176
+ #
177
+ # If no additional params are supplied then the class is assumed to
178
+ # accept a single parameter named "lexeme" in its initialize method.
179
+ #
180
+ # If additional params are supplied then the class is expected to
181
+ # accept the named params in its initialize method.
182
+ #
183
+ # As a convenience, the params will be sent to the specified class using
184
+ # the "production" method, which sets up an appropriate initializer.
185
+ #
186
+ # For example:
187
+ #
188
+ # # accepts a single parameter, "lexeme"
189
+ # production :symbol_literal
190
+ #
191
+ # # accepts a single parameter, "content"
192
+ # production :multiline_comment, :content
193
+ #
194
+ # # accepts three parameters, "identifier", "params" and "content"
195
+ # production :block_directive, :identifier, :params, :content
196
+ #
197
+ def production rule_name, *results
198
+ raise ArgumentError, 'nil rule_name' if rule_name.nil?
199
+ raise ArgumentError,
200
+ "production already defined for rule '#{rule_name}'" if
201
+ productions.has_key?(rule_name)
202
+ raise ArgumentError, "non-existent rule '#{rule_name}'" unless
203
+ rules.has_key?(rule_name)
204
+ results = results.empty? ? [:lexeme] : results
205
+ const_get(rule_name.to_s.to_class_name).production *results
206
+ productions[rule_name] = results
207
+ end
208
+
209
+ # This method is called by the ParsletSequence and SymbolParslet classes
210
+ # to possibly wrap a parse result in a production node.
211
+ def wrap result, rule_name
212
+ if productions.has_key? rule_name.to_sym
213
+ node_class = const_get rule_name.to_s.to_class_name
214
+ param_count = productions[rule_name.to_sym].length
215
+ if param_count == 1
216
+ node = node_class.new result
217
+ else
218
+ node = node_class.new *result
219
+ end
220
+ node.start = (result.outer_start or result.start) # propagate the start information
221
+ node.end = (result.outer_end or result.end) # and the end information
222
+ node.source_text = (result.outer_source_text or result.source_text) # and the original source text
223
+ node
224
+ else
225
+ result.start = result.outer_start if result.outer_start
226
+ result.end = result.outer_end if result.outer_end
227
+ result.source_text = result.source_text if result.outer_source_text
228
+ result
229
+ end
230
+ end
231
+ end
232
+
233
+ attr_accessor :memoizing
234
+
235
+ def initialize
236
+ @memoizing = true
237
+ end
238
+
239
+ # TODO: consider making grammars copiable (could be used in threaded context then)
240
+ #def initialize_copy(from); end
241
+ #def clone; end
242
+ #def dupe; end
243
+
244
+ # Starts with starting_symbol.
245
+ def parse string, options = {}
246
+ raise ArgumentError, 'nil string' if string.nil?
247
+ raise 'starting symbol not defined' if self.class.start_rule.nil?
248
+ options[:grammar] = self.class
249
+ options[:rule_name] = self.class.start_rule
250
+ options[:skipping] = self.class.default_skipping_rule
251
+ options[:line_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
252
+ options[:column_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
253
+ options[:memoizer] = MemoizingCache.new if @memoizing
254
+ self.class.start_rule.to_parseable.memoizing_parse string, options
255
+ end
256
+
257
+ # TODO: pretty print method?
258
+ end # class Grammar
259
+ end # module Walrus