walrus 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/walrus +44 -0
- data/ext/jindex/extconf.rb +11 -0
- data/ext/jindex/jindex.c +79 -0
- data/ext/mkdtemp/extconf.rb +11 -0
- data/ext/mkdtemp/mkdtemp.c +41 -0
- data/lib/walrus/additions/module.rb +36 -0
- data/lib/walrus/additions/string.rb +37 -0
- data/lib/walrus/additions/test/unit/error_collector.rb +62 -0
- data/lib/walrus/compile_error.rb +28 -0
- data/lib/walrus/compiler.rb +124 -0
- data/lib/walrus/contrib/spec/walruscloth_spec.rb +32 -0
- data/lib/walrus/contrib/walruscloth.rb +82 -0
- data/lib/walrus/diff.rb +89 -0
- data/lib/walrus/document.rb +98 -0
- data/lib/walrus/grammar/additions/proc.rb +20 -0
- data/lib/walrus/grammar/additions/regexp.rb +21 -0
- data/lib/walrus/grammar/additions/string.rb +52 -0
- data/lib/walrus/grammar/additions/symbol.rb +42 -0
- data/lib/walrus/grammar/and_predicate.rb +40 -0
- data/lib/walrus/grammar/array_result.rb +19 -0
- data/lib/walrus/grammar/continuation_wrapper_exception.rb +28 -0
- data/lib/walrus/grammar/left_recursion_exception.rb +27 -0
- data/lib/walrus/grammar/location_tracking.rb +105 -0
- data/lib/walrus/grammar/match_data_wrapper.rb +65 -0
- data/lib/walrus/grammar/memoizing.rb +41 -0
- data/lib/walrus/grammar/memoizing_cache.rb +94 -0
- data/lib/walrus/grammar/node.rb +60 -0
- data/lib/walrus/grammar/not_predicate.rb +40 -0
- data/lib/walrus/grammar/parse_error.rb +39 -0
- data/lib/walrus/grammar/parser_state.rb +181 -0
- data/lib/walrus/grammar/parslet.rb +28 -0
- data/lib/walrus/grammar/parslet_choice.rb +120 -0
- data/lib/walrus/grammar/parslet_combination.rb +26 -0
- data/lib/walrus/grammar/parslet_combining.rb +154 -0
- data/lib/walrus/grammar/parslet_merge.rb +88 -0
- data/lib/walrus/grammar/parslet_omission.rb +57 -0
- data/lib/walrus/grammar/parslet_repetition.rb +97 -0
- data/lib/walrus/grammar/parslet_repetition_default.rb +58 -0
- data/lib/walrus/grammar/parslet_sequence.rb +202 -0
- data/lib/walrus/grammar/predicate.rb +57 -0
- data/lib/walrus/grammar/proc_parslet.rb +52 -0
- data/lib/walrus/grammar/regexp_parslet.rb +73 -0
- data/lib/walrus/grammar/skipped_substring_exception.rb +36 -0
- data/lib/walrus/grammar/string_enumerator.rb +45 -0
- data/lib/walrus/grammar/string_parslet.rb +75 -0
- data/lib/walrus/grammar/string_result.rb +24 -0
- data/lib/walrus/grammar/symbol_parslet.rb +63 -0
- data/lib/walrus/grammar.rb +170 -0
- data/lib/walrus/no_parameter_marker.rb +19 -0
- data/lib/walrus/parser.rb +420 -0
- data/lib/walrus/runner.rb +356 -0
- data/lib/walrus/template.rb +75 -0
- data/lib/walrus/walrus_grammar/assignment_expression.rb +24 -0
- data/lib/walrus/walrus_grammar/block_directive.rb +28 -0
- data/lib/walrus/walrus_grammar/comment.rb +24 -0
- data/lib/walrus/walrus_grammar/def_directive.rb +64 -0
- data/lib/walrus/walrus_grammar/echo_directive.rb +44 -0
- data/lib/walrus/walrus_grammar/escape_sequence.rb +24 -0
- data/lib/walrus/walrus_grammar/import_directive.rb +44 -0
- data/lib/walrus/walrus_grammar/include_directive.rb +27 -0
- data/lib/walrus/walrus_grammar/instance_variable.rb +24 -0
- data/lib/walrus/walrus_grammar/literal.rb +24 -0
- data/lib/walrus/walrus_grammar/message_expression.rb +25 -0
- data/lib/walrus/walrus_grammar/multiline_comment.rb +54 -0
- data/lib/walrus/walrus_grammar/placeholder.rb +40 -0
- data/lib/walrus/walrus_grammar/raw_directive.rb +42 -0
- data/lib/walrus/walrus_grammar/raw_text.rb +45 -0
- data/lib/walrus/walrus_grammar/ruby_directive.rb +29 -0
- data/lib/walrus/walrus_grammar/ruby_expression.rb +31 -0
- data/lib/walrus/walrus_grammar/set_directive.rb +24 -0
- data/lib/walrus/walrus_grammar/silent_directive.rb +44 -0
- data/lib/walrus/walrus_grammar/slurp_directive.rb +25 -0
- data/lib/walrus/walrus_grammar/super_directive.rb +27 -0
- data/lib/walrus.rb +64 -0
- data/spec/acceptance/acceptance_spec.rb +97 -0
- data/spec/acceptance/block/basic_block.expected +1 -0
- data/spec/acceptance/block/basic_block.tmpl +3 -0
- data/spec/acceptance/block/nested_blocks.expected +5 -0
- data/spec/acceptance/block/nested_blocks.tmpl +11 -0
- data/spec/acceptance/comments/comments_and_text.expected +3 -0
- data/spec/acceptance/comments/comments_and_text.tmpl +6 -0
- data/spec/acceptance/comments/single_comment.expected +0 -0
- data/spec/acceptance/comments/single_comment.tmpl +1 -0
- data/spec/acceptance/def/alternative_def_calling_conventions.expected +3 -0
- data/spec/acceptance/def/alternative_def_calling_conventions.tmpl +18 -0
- data/spec/acceptance/def/basic_def_block_no_output.expected +0 -0
- data/spec/acceptance/def/basic_def_block_no_output.tmpl +17 -0
- data/spec/acceptance/def/defs_can_be_called_multiple_times.expected +3 -0
- data/spec/acceptance/def/defs_can_be_called_multiple_times.tmpl +6 -0
- data/spec/acceptance/def/defs_can_be_dynamic.expected +4 -0
- data/spec/acceptance/def/defs_can_be_dynamic.tmpl +12 -0
- data/spec/acceptance/echo/echo_directive_with_numeric_literal.expected +1 -0
- data/spec/acceptance/echo/echo_directive_with_numeric_literal.tmpl +1 -0
- data/spec/acceptance/echo/echo_expression_list.expected +1 -0
- data/spec/acceptance/echo/echo_expression_list.tmpl +1 -0
- data/spec/acceptance/echo/echo_short_notation.expected +1 -0
- data/spec/acceptance/echo/echo_short_notation.tmpl +1 -0
- data/spec/acceptance/echo/echo_simple_expression.expected +1 -0
- data/spec/acceptance/echo/echo_simple_expression.tmpl +1 -0
- data/spec/acceptance/echo/echo_single_quoted_string_literal.expected +1 -0
- data/spec/acceptance/echo/echo_single_quoted_string_literal.tmpl +1 -0
- data/spec/acceptance/echo/multiple_echo_statements.expected +1 -0
- data/spec/acceptance/echo/multiple_echo_statements.tmpl +2 -0
- data/spec/acceptance/includes/basic_included_file.txt +1 -0
- data/spec/acceptance/includes/basic_includer.complex +3 -0
- data/spec/acceptance/includes/basic_includer.expected +3 -0
- data/spec/acceptance/includes/basic_includer.rb +38 -0
- data/spec/acceptance/includes/complicated_included_file.txt +3 -0
- data/spec/acceptance/includes/complicated_includer.complex +3 -0
- data/spec/acceptance/includes/complicated_includer.expected +3 -0
- data/spec/acceptance/includes/complicated_includer.rb +41 -0
- data/spec/acceptance/includes/nested_include_1.txt +3 -0
- data/spec/acceptance/includes/nested_include_2.txt +1 -0
- data/spec/acceptance/includes/nested_includer.complex +3 -0
- data/spec/acceptance/includes/nested_includer.expected +4 -0
- data/spec/acceptance/includes/nested_includer.rb +41 -0
- data/spec/acceptance/inheritance/basic_child.complex +10 -0
- data/spec/acceptance/inheritance/basic_child.expected +9 -0
- data/spec/acceptance/inheritance/basic_child.rb +54 -0
- data/spec/acceptance/inheritance/basic_parent.complex +5 -0
- data/spec/acceptance/inheritance/basic_parent.expected +3 -0
- data/spec/acceptance/inheritance/basic_parent.rb +41 -0
- data/spec/acceptance/inheritance/importing_child.complex +8 -0
- data/spec/acceptance/inheritance/importing_child.expected +7 -0
- data/spec/acceptance/inheritance/importing_child.rb +46 -0
- data/spec/acceptance/inheritance/subdirectory/importing_child_in_subdirectory.complex +8 -0
- data/spec/acceptance/inheritance/subdirectory/importing_child_in_subdirectory.expected +7 -0
- data/spec/acceptance/inheritance/subdirectory/importing_child_in_subdirectory.rb +44 -0
- data/spec/acceptance/multiline_comments/multiline_comment_with_directives_inside.expected +0 -0
- data/spec/acceptance/multiline_comments/multiline_comment_with_directives_inside.tmpl +15 -0
- data/spec/acceptance/multiline_comments/simple_multiline_comment.expected +2 -0
- data/spec/acceptance/multiline_comments/simple_multiline_comment.tmpl +4 -0
- data/spec/acceptance/raw/complicated_raw_example.expected +57 -0
- data/spec/acceptance/raw/complicated_raw_example.tmpl +79 -0
- data/spec/acceptance/raw-text/UTF_8.expected +12 -0
- data/spec/acceptance/raw-text/UTF_8.tmpl +12 -0
- data/spec/acceptance/raw-text/empty_file.expected +0 -0
- data/spec/acceptance/raw-text/empty_file.tmpl +0 -0
- data/spec/acceptance/raw-text/multi_line.expected +4 -0
- data/spec/acceptance/raw-text/multi_line.tmpl +4 -0
- data/spec/acceptance/raw-text/single_line.expected +1 -0
- data/spec/acceptance/raw-text/single_line.tmpl +1 -0
- data/spec/acceptance/raw-text/single_line_whitespace.expected +1 -0
- data/spec/acceptance/raw-text/single_line_whitespace.tmpl +1 -0
- data/spec/acceptance/ruby/ruby_directive_is_just_like_silent.expected +1 -0
- data/spec/acceptance/ruby/ruby_directive_is_just_like_silent.tmpl +4 -0
- data/spec/acceptance/ruby/ruby_directive_using_here_doc.expected +1 -0
- data/spec/acceptance/ruby/ruby_directive_using_here_doc.tmpl +4 -0
- data/spec/acceptance/ruby/ruby_directive_using_here_doc_alt_syntax.expected +1 -0
- data/spec/acceptance/ruby/ruby_directive_using_here_doc_alt_syntax.tmpl +4 -0
- data/spec/acceptance/ruby/ruby_directive_with_accumulate.expected +1 -0
- data/spec/acceptance/ruby/ruby_directive_with_accumulate.tmpl +4 -0
- data/spec/acceptance/ruby/ruby_directive_with_accumulate_and_block.expected +1 -0
- data/spec/acceptance/ruby/ruby_directive_with_accumulate_and_block.tmpl +6 -0
- data/spec/acceptance/set/unused_set.expected +0 -0
- data/spec/acceptance/set/unused_set.tmpl +1 -0
- data/spec/acceptance/set/used_set.expected +1 -0
- data/spec/acceptance/set/used_set.tmpl +2 -0
- data/spec/acceptance/silent/silent_and_echo_combined.expected +1 -0
- data/spec/acceptance/silent/silent_and_echo_combined.tmpl +2 -0
- data/spec/acceptance/silent/silent_short_notation.expected +1 -0
- data/spec/acceptance/silent/silent_short_notation.tmpl +1 -0
- data/spec/acceptance/silent/simple_silent_directive.expected +0 -0
- data/spec/acceptance/silent/simple_silent_directive.tmpl +1 -0
- data/spec/acceptance/slurp/basic_slurp_demo.expected +1 -0
- data/spec/acceptance/slurp/basic_slurp_demo.tmpl +4 -0
- data/spec/acceptance/super/super_with_no_effect.expected +4 -0
- data/spec/acceptance/super/super_with_no_effect.tmpl +5 -0
- data/spec/additions/module_spec.rb +126 -0
- data/spec/additions/string_spec.rb +99 -0
- data/spec/compiler_spec.rb +55 -0
- data/spec/grammar/additions/proc_spec.rb +25 -0
- data/spec/grammar/additions/regexp_spec.rb +37 -0
- data/spec/grammar/additions/string_spec.rb +106 -0
- data/spec/grammar/and_predicate_spec.rb +29 -0
- data/spec/grammar/continuation_wrapper_exception_spec.rb +23 -0
- data/spec/grammar/match_data_wrapper_spec.rb +41 -0
- data/spec/grammar/memoizing_cache_spec.rb +112 -0
- data/spec/grammar/node_spec.rb +126 -0
- data/spec/grammar/not_predicate_spec.rb +29 -0
- data/spec/grammar/parser_state_spec.rb +172 -0
- data/spec/grammar/parslet_choice_spec.rb +49 -0
- data/spec/grammar/parslet_combining_spec.rb +287 -0
- data/spec/grammar/parslet_merge_spec.rb +33 -0
- data/spec/grammar/parslet_omission_spec.rb +58 -0
- data/spec/grammar/parslet_repetition_spec.rb +77 -0
- data/spec/grammar/parslet_sequence_spec.rb +49 -0
- data/spec/grammar/parslet_spec.rb +23 -0
- data/spec/grammar/predicate_spec.rb +53 -0
- data/spec/grammar/proc_parslet_spec.rb +52 -0
- data/spec/grammar/regexp_parslet_spec.rb +347 -0
- data/spec/grammar/string_enumerator_spec.rb +94 -0
- data/spec/grammar/string_parslet_spec.rb +143 -0
- data/spec/grammar/symbol_parslet_spec.rb +30 -0
- data/spec/grammar_spec.rb +545 -0
- data/spec/parser_spec.rb +1418 -0
- data/spec/spec_helper.rb +34 -0
- data/spec/walrus_grammar/comment_spec.rb +39 -0
- data/spec/walrus_grammar/echo_directive_spec.rb +63 -0
- data/spec/walrus_grammar/escape_sequence_spec.rb +85 -0
- data/spec/walrus_grammar/literal_spec.rb +41 -0
- data/spec/walrus_grammar/message_expression_spec.rb +37 -0
- data/spec/walrus_grammar/multiline_comment_spec.rb +58 -0
- data/spec/walrus_grammar/placeholder_spec.rb +48 -0
- data/spec/walrus_grammar/raw_directive_spec.rb +81 -0
- data/spec/walrus_grammar/raw_text_spec.rb +65 -0
- data/spec/walrus_grammar/silent_directive_spec.rb +34 -0
- metadata +291 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
# The MemoizingCache class memoizes the outcomes of parse operations. The functionality is implemented as a separate class so as to minimize the amount of "contamination" of other classes by memoizing code, and to allow memoizing to be cleanly turned on or off at will. If a MemoizingCache is passed to a Parslet, ParsletCombination or Predicate as a value for the :memoizer key in the options hash passed to a parse method, the class implementing that method will call the parse method on the cache rather than proceeding normally. The cache will either propagate the previously memoized result, or will defer back to the original class to obtain the result. A circular dependency is avoided by setting the :skip_memoizer flag in the options dictionary. If no MemoizingCache is passed then normal program flow takes place.
|
15
|
+
class MemoizingCache
|
16
|
+
|
17
|
+
# Singleton class that serves as a default value for unset keys in a Hash.
|
18
|
+
class NoValueForKey
|
19
|
+
|
20
|
+
require 'singleton'
|
21
|
+
include Singleton
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
# The results of parse operations are stored (memoized) in a cache, keyed on a unique identifier comprising the Parslet, ParsletCombination or Predicate used in the parse operation, the location of the operation (the line_start and column_start), and the skipping override (if any). The values may be:
|
27
|
+
#
|
28
|
+
# - ParseErrors raised during parsing
|
29
|
+
# - SkippedSubstringExceptions raised during parsing
|
30
|
+
# - :ZeroWidthParseSuccess symbols thrown during parsing
|
31
|
+
# - :AndPredicateSuccess symbols thrown during parsing
|
32
|
+
# - :NotPredicateSuccess symbols thrown during parsing
|
33
|
+
# - String instances returned as parse results
|
34
|
+
# - MatchDataWrapper instance returned as parse results
|
35
|
+
# - Array instances containing ordered collections of parse results
|
36
|
+
# - Node subclass instances containing AST productions
|
37
|
+
#
|
38
|
+
@cache = Hash.new(NoValueForKey.instance)
|
39
|
+
end
|
40
|
+
|
41
|
+
# The receiver checks whether there is already a stored result corresponding to that a unique identifier that specifies the "coordinates" of a parsing operation (location, parseable, skipping override). If found propogates the result directly to the caller rather than performing the parse method all over again. Here "propagation" means re-raising parse errors, re-throwing symbols, and returning object references. If not found, performs the parsing operation and stores the result in the cache before propagating it.
|
42
|
+
def parse(string, options = {})
|
43
|
+
raise ArgumentError if string.nil?
|
44
|
+
|
45
|
+
# construct a unique identifier
|
46
|
+
identifier = [options[:parseable], options[:line_start], options[:column_start]]
|
47
|
+
identifier << options[:origin] if options.has_key? :origin
|
48
|
+
identifier << options[:skipping_override] if options.has_key? :skipping_override
|
49
|
+
|
50
|
+
if (result = @cache[identifier]) != NoValueForKey.instance
|
51
|
+
if result.kind_of? Symbol : throw result
|
52
|
+
elsif result.kind_of? Exception : raise result
|
53
|
+
else return result
|
54
|
+
end
|
55
|
+
else # first time for this parseable/location/skipping_override (etc) combination, capture result and propagate
|
56
|
+
catch :NotPredicateSuccess do
|
57
|
+
catch :AndPredicateSuccess do
|
58
|
+
catch :ZeroWidthParseSuccess do
|
59
|
+
begin
|
60
|
+
options[:ignore_memoizer] = true
|
61
|
+
|
62
|
+
# short-circuit left recursion here rather than infinite looping
|
63
|
+
if options[:parseable].kind_of? SymbolParslet
|
64
|
+
check_left_recursion(options[:parseable], options)
|
65
|
+
@last_seen_symbol_parslet = options[:parseable]
|
66
|
+
@last_seen_symbol_parslet_location = [options[:line_start], options[:column_start]]
|
67
|
+
end
|
68
|
+
|
69
|
+
return @cache[identifier] = options[:parseable].memoizing_parse(string, options) # store and return
|
70
|
+
rescue Exception => e
|
71
|
+
raise @cache[identifier] = e # store and re-raise
|
72
|
+
end
|
73
|
+
end
|
74
|
+
throw @cache[identifier] = :ZeroWidthParseSuccess # store and re-throw
|
75
|
+
end
|
76
|
+
throw @cache[identifier] = :AndPredicateSuccess # store and re-throw
|
77
|
+
end
|
78
|
+
throw @cache[identifier] = :NotPredicateSuccess # store and re-throw
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def check_left_recursion(parseable, options = {})
|
83
|
+
if parseable.kind_of? SymbolParslet and
|
84
|
+
@last_seen_symbol_parslet == parseable and
|
85
|
+
@last_seen_symbol_parslet_location == [options[:line_start], options[:column_start]]
|
86
|
+
raise LeftRecursionException
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end # class MemoizingCache
|
91
|
+
|
92
|
+
end # class Grammar
|
93
|
+
end # module Walrus
|
94
|
+
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
# Make subclasses of this for us in Abstract Syntax Trees (ASTs).
|
15
|
+
class Node
|
16
|
+
|
17
|
+
include Walrus::Grammar::LocationTracking
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
@string_value
|
21
|
+
end
|
22
|
+
|
23
|
+
# Dynamically creates a Node descendant.
|
24
|
+
# subclass_name should be a Symbol or String containing the name of the subclass to be created.
|
25
|
+
# namespace should be the module in which the new subclass should be created; it defaults to Walrus::Grammar.
|
26
|
+
# results are optional symbols expected to be parsed when initializing an instance of the subclass. If no optional symbols are provided then a default initializer is created that expects a single parameter and stores a reference to it in an instance variable called "lexeme".
|
27
|
+
def self.subclass(subclass_name, namespace = Walrus::Grammar, *results)
|
28
|
+
raise ArgumentError if subclass_name.nil?
|
29
|
+
|
30
|
+
# create new anonymous class with Node as superclass, assigning it to a constant effectively names the class
|
31
|
+
new_class = namespace.const_set(subclass_name.to_s, Class.new(self))
|
32
|
+
|
33
|
+
# set up accessors
|
34
|
+
for result in results
|
35
|
+
new_class.class_eval { attr_reader result }
|
36
|
+
end
|
37
|
+
|
38
|
+
# set up initializer
|
39
|
+
if results.length == 0 # default case, store sole parameter in "lexeme"
|
40
|
+
new_class.class_eval { attr_reader :lexeme }
|
41
|
+
initialize_body = "def initialize(lexeme)\n"
|
42
|
+
initialize_body << "@string_value = lexeme.to_s\n"
|
43
|
+
initialize_body << "@lexeme = lexeme\n"
|
44
|
+
else
|
45
|
+
initialize_body = "def initialize(#{results.collect { |symbol| symbol.to_s}.join(', ')})\n"
|
46
|
+
initialize_body << "@string_value = \"\"\n"
|
47
|
+
for result in results
|
48
|
+
initialize_body << "@#{result.to_s} = #{result.to_s}\n"
|
49
|
+
initialize_body << "@string_value << #{result.to_s}.to_s\n"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
initialize_body << "end\n"
|
53
|
+
new_class.class_eval initialize_body
|
54
|
+
new_class
|
55
|
+
end
|
56
|
+
|
57
|
+
end # class Node
|
58
|
+
|
59
|
+
end # class Grammar
|
60
|
+
end # module Walrus
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
class NotPredicate < Predicate
|
15
|
+
|
16
|
+
def parse(string, options = {})
|
17
|
+
raise ArgumentError if string.nil?
|
18
|
+
catch :ZeroWidthParseSuccess do
|
19
|
+
begin
|
20
|
+
@parseable.memoizing_parse(string, options)
|
21
|
+
rescue ParseError # failed to pass (which is just what we wanted)
|
22
|
+
throw :NotPredicateSuccess
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# getting this far means that parsing succeeded (not what we wanted)
|
27
|
+
raise ParseError.new('predicate not satisfied ("%s" not allowed) while parsing "%s"' % [@parseable.to_s, string],
|
28
|
+
:line_end => options[:line_start], :column_end => options[:column_start])
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def hash_offset
|
34
|
+
11
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end # class Grammar
|
40
|
+
end # module Walrus
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
class ParseError < Exception
|
15
|
+
|
16
|
+
include Walrus::Grammar::LocationTracking
|
17
|
+
|
18
|
+
# Takes an optional hash (for packing extra info into exception).
|
19
|
+
# position in string (irrespective of line number, column number)
|
20
|
+
# line number, column number
|
21
|
+
# filename
|
22
|
+
def initialize(message, info = {})
|
23
|
+
super message
|
24
|
+
self.line_start = info[:line_start]
|
25
|
+
self.column_start = info[:column_start]
|
26
|
+
self.line_end = info[:line_end]
|
27
|
+
self.column_end = info[:column_end]
|
28
|
+
end
|
29
|
+
|
30
|
+
def inspect
|
31
|
+
# TODO also return filename if available
|
32
|
+
'#<%s: %s @line_end=%d, @column_end=%d>' % [ self.class.to_s, self.to_s, self.line_end, self.column_end ]
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class ParseError
|
36
|
+
|
37
|
+
end # class Grammar
|
38
|
+
end # module Walrus
|
39
|
+
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id: /mirrors/Walrus/trunk/walrus/lib/walrus/grammar/parser_state.rb 6704 2007-04-09T18:30:00.421185Z wincent $
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
# Simple class for maintaining state during a parse operation.
|
15
|
+
class ParserState
|
16
|
+
|
17
|
+
attr_reader :options
|
18
|
+
|
19
|
+
# Returns the remainder (the unparsed portion) of the string. Will return an empty string if already at the end of the string.
|
20
|
+
attr_reader :remainder
|
21
|
+
|
22
|
+
# Raises an ArgumentError if string is nil.
|
23
|
+
def initialize(string, options = {})
|
24
|
+
raise ArgumentError if string.nil?
|
25
|
+
self.base_string = string
|
26
|
+
@results = ArrayResult.new # for accumulating results
|
27
|
+
@remainder = @base_string.clone
|
28
|
+
@scanned = ''
|
29
|
+
@options = options.clone
|
30
|
+
|
31
|
+
# start wherever we last finished (doesn't seem to behave different to the alternative)
|
32
|
+
@options[:line_start] = (@options[:line_end] or @options[:line_start] or 0)
|
33
|
+
@options[:column_start] = (@options[:column_end] or @options[:column_start] or 0)
|
34
|
+
# @options[:line_start] = 0 if @options[:line_start].nil?
|
35
|
+
# @options[:column_start] = 0 if @options[:column_start].nil?
|
36
|
+
|
37
|
+
@options[:line_end] = @options[:line_start] # before parsing begins, end point is equal to start point
|
38
|
+
@options[:column_end] = @options[:column_start]
|
39
|
+
@original_line_start = @options[:line_start]
|
40
|
+
@original_column_start = @options[:column_start]
|
41
|
+
end
|
42
|
+
|
43
|
+
# The parsed method is used to inform the receiver of a successful parsing event.
|
44
|
+
# Note that substring need not actually be a String but it must respond to the following messages:
|
45
|
+
# - "line_end" and "column_end" so that the end position of the receiver can be updated
|
46
|
+
# As a convenience returns the remainder.
|
47
|
+
# Raises an ArgumentError if substring is nil.
|
48
|
+
def parsed(substring)
|
49
|
+
raise ArgumentError if substring.nil?
|
50
|
+
update_and_return_remainder_for_string(substring, true)
|
51
|
+
end
|
52
|
+
|
53
|
+
# The skipped method is used to inform the receiver of a successful parsing event where the parsed substring should be consumed but not included in the accumulated results.
|
54
|
+
# The substring should respond to "line_end" and "column_end".
|
55
|
+
# In all other respects this method behaves exactly like the parsed method.
|
56
|
+
def skipped(substring)
|
57
|
+
raise ArgumentError if substring.nil?
|
58
|
+
update_and_return_remainder_for_string(substring)
|
59
|
+
end
|
60
|
+
|
61
|
+
# The skipped method is used to inform the receiver of a successful parsing event where the parsed substring should be consumed but not included in the accumulated results and furthermore the parse event should not effect the overall bounds of the parse result. In reality this means that the method is only ever called upon the successful use of a automatic intertoken "skipping" parslet. By definition this method should only be called for intertoken skipping otherwise incorrect results will be produced.
|
62
|
+
def auto_skipped(substring)
|
63
|
+
raise ArgumentError if substring.nil?
|
64
|
+
a, b, c, d = @options[:line_start], @options[:column_start], @options[:line_end], @options[:column_end] # save
|
65
|
+
remainder = update_and_return_remainder_for_string(substring)
|
66
|
+
@options[:line_start], @options[:column_start], @options[:line_end], @options[:column_end] = a, b, c, d # restore
|
67
|
+
remainder
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns the results accumulated so far.
|
71
|
+
# Returns an empty array if no results have been accumulated.
|
72
|
+
# Returns a single object if only one result has been accumulated.
|
73
|
+
# Returns an array of objects if multiple results have been accumulated.
|
74
|
+
def results
|
75
|
+
|
76
|
+
updated_start = [@original_line_start, @original_column_start]
|
77
|
+
updated_end = [@options[:line_end], @options[:column_end]]
|
78
|
+
updated_source_text = @scanned.clone
|
79
|
+
|
80
|
+
if @results.length == 1
|
81
|
+
|
82
|
+
# he we ask the single result to exhibit container-like properties
|
83
|
+
# use the "outer" variants so as to not overwrite any data internal to the result itself
|
84
|
+
# this can happen where a lone result is surrounded only by skipped elements
|
85
|
+
# the result has to convey data about its own limits, plus those of the context just around it
|
86
|
+
results = @results[0]
|
87
|
+
results.outer_start = updated_start if results.start != updated_start
|
88
|
+
results.outer_end = updated_end if results.end != updated_end
|
89
|
+
results.outer_source_text = updated_source_text if results.source_text != updated_source_text
|
90
|
+
|
91
|
+
# the above trick fixes some of the location tracking issues but opens up another can of worms
|
92
|
+
# uncomment this line to see
|
93
|
+
#return results
|
94
|
+
|
95
|
+
# need some way of handling unwrapped results (raw results, not AST nodes) as well
|
96
|
+
results.start = updated_start
|
97
|
+
results.end = updated_end
|
98
|
+
results.source_text = updated_source_text
|
99
|
+
|
100
|
+
else
|
101
|
+
results = @results
|
102
|
+
results.start = updated_start
|
103
|
+
results.end = updated_end
|
104
|
+
results.source_text = updated_source_text
|
105
|
+
end
|
106
|
+
|
107
|
+
results
|
108
|
+
end
|
109
|
+
|
110
|
+
# Returns the number of results accumulated so far.
|
111
|
+
def length
|
112
|
+
@results.length
|
113
|
+
end
|
114
|
+
|
115
|
+
# TODO: possibly implement "undo/rollback" and "reset" methods
|
116
|
+
# if I implement "undo" will probbaly do it as a stack
|
117
|
+
# will have the option of implementing "redo" as well but I'd only do that if I could think of a use for it
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def update_and_return_remainder_for_string(input, store = false)
|
122
|
+
previous_line_end = @options[:line_end] # remember old end point
|
123
|
+
previous_column_end = @options[:column_end] # remember old end point
|
124
|
+
|
125
|
+
# special case handling for literal String objects
|
126
|
+
if input.instance_of? String
|
127
|
+
input = StringResult.new(input)
|
128
|
+
input.start = [previous_line_end, previous_column_end]
|
129
|
+
if (line_count = input.scan(/\r\n|\r|\n/).length) != 0 # count number of newlines in receiver
|
130
|
+
column_end = input.jlength - input.jrindex(/\r|\n/) - 1 # calculate characters on last line
|
131
|
+
else # no newlines in match
|
132
|
+
column_end = input.jlength + previous_column_end
|
133
|
+
end
|
134
|
+
input.end = [previous_line_end + line_count, column_end]
|
135
|
+
end
|
136
|
+
|
137
|
+
@results << input if (store)
|
138
|
+
|
139
|
+
if input.line_end > previous_line_end # end line has advanced
|
140
|
+
@options[:line_end] = input.line_end
|
141
|
+
@options[:column_end] = 0
|
142
|
+
end
|
143
|
+
|
144
|
+
if input.column_end > @options[:column_end] # end column has advanced
|
145
|
+
@options[:column_end] = input.column_end
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
@options[:line_start] = @options[:line_end] # new start point is old end point
|
150
|
+
@options[:column_start] = @options[:column_end] # new start point is old end point
|
151
|
+
|
152
|
+
# calculate remainder
|
153
|
+
line_delta = @options[:line_end] - previous_line_end
|
154
|
+
if line_delta > 0 # have consumed newline(s)
|
155
|
+
line_delta.times do # remove them from remainder
|
156
|
+
newline_location = @remainder.jindex /\r\n|\r|\n/ # find the location of the next newline
|
157
|
+
newline_location += $~[0].length # add the actual characters used to indicate the newline
|
158
|
+
@scanned << @remainder[0...newline_location] # record scanned text
|
159
|
+
@remainder = @remainder[newline_location..-1] # strip everything up to and including the newline
|
160
|
+
end
|
161
|
+
@scanned << @remainder[0...@options[:column_end]]
|
162
|
+
@remainder = @remainder[@options[:column_end]..-1] # delete up to the current column
|
163
|
+
else # no newlines consumed
|
164
|
+
column_delta = @options[:column_end] - previous_column_end
|
165
|
+
if column_delta > 0 # there was movement within currentline
|
166
|
+
@scanned << @remainder[0...column_delta]
|
167
|
+
@remainder = @remainder[column_delta..-1] # delete up to the current column
|
168
|
+
end
|
169
|
+
end
|
170
|
+
@remainder
|
171
|
+
end
|
172
|
+
|
173
|
+
def base_string=(string)
|
174
|
+
@base_string = (string.clone rescue string)
|
175
|
+
end
|
176
|
+
|
177
|
+
end # class ParserState
|
178
|
+
|
179
|
+
end # class Grammar
|
180
|
+
end # module Walrus
|
181
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
class Parslet
|
14
|
+
|
15
|
+
include Walrus::Grammar::ParsletCombining
|
16
|
+
include Walrus::Grammar::Memoizing
|
17
|
+
|
18
|
+
def to_parseable
|
19
|
+
self
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse(string, options = {})
|
23
|
+
raise NotImplementedError # subclass responsibility
|
24
|
+
end
|
25
|
+
|
26
|
+
end # class Parslet
|
27
|
+
end # class Grammar
|
28
|
+
end # module Walrus
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
class ParsletChoice < ParsletCombination
|
15
|
+
|
16
|
+
attr_reader :hash
|
17
|
+
|
18
|
+
# Either parameter may be a Parslet or a ParsletCombination.
|
19
|
+
# Neither parmeter may be nil.
|
20
|
+
def initialize(left, right, *others)
|
21
|
+
raise ArgumentError if left.nil?
|
22
|
+
raise ArgumentError if right.nil?
|
23
|
+
@alternatives = [left, right] + others
|
24
|
+
update_hash
|
25
|
+
end
|
26
|
+
|
27
|
+
# Override so that alternatives are appended to an existing sequence:
|
28
|
+
# Consider the following example:
|
29
|
+
# A | B
|
30
|
+
# This constitutes a single choice:
|
31
|
+
# (A | B)
|
32
|
+
# If we then make this a three-element sequence:
|
33
|
+
# A | B | C
|
34
|
+
# We are effectively creating an nested sequence containing the original sequence and an additional element:
|
35
|
+
# ((A | B) | C)
|
36
|
+
# Although such a nested sequence is correctly parsed it is not as architecturally clean as a single sequence without nesting:
|
37
|
+
# (A | B | C)
|
38
|
+
# This method allows us to use the architecturally cleaner format.
|
39
|
+
#
|
40
|
+
def |(next_parslet)
|
41
|
+
append(next_parslet)
|
42
|
+
end
|
43
|
+
|
44
|
+
# First tries to parse the left option, falling back and trying the right option and then the any subsequent options in the others instance variable on failure. If no options successfully complete parsing then an ParseError is raised. Any zero-width parse successes thrown by alternative parsers will flow on to a higher level.
|
45
|
+
def parse(string, options = {})
|
46
|
+
raise ArgumentError if string.nil?
|
47
|
+
error = nil # for error reporting purposes will track which parseable gets farthest to the right before failing
|
48
|
+
left_recursion = nil # will also track any left recursion that we detect
|
49
|
+
@alternatives.each do |parseable|
|
50
|
+
begin
|
51
|
+
result = parseable.memoizing_parse(string, options) # successful parse
|
52
|
+
if left_recursion and left_recursion.continuation # and we have a continuation
|
53
|
+
continuation = left_recursion.continuation # continuations are once-only, one-way tickets
|
54
|
+
left_recursion = nil # set this to nil so as not to call it again without meaning to
|
55
|
+
continuation.call(result) # so jump back to where we were before
|
56
|
+
end
|
57
|
+
return result
|
58
|
+
rescue LeftRecursionException => e
|
59
|
+
left_recursion = e
|
60
|
+
|
61
|
+
# TODO:
|
62
|
+
# it's not enough to just catch this kind of exception and remember the last one
|
63
|
+
# may need to accumulate these in an array
|
64
|
+
# consider the example rule:
|
65
|
+
# :a, :a & :b | :a & :c | :a & :d | :b
|
66
|
+
# the first option will raise a LeftRecursionException
|
67
|
+
# the next option will raise for the same reason
|
68
|
+
# the third likewise
|
69
|
+
# finally we get to the fourth option, the first which might succeed
|
70
|
+
# at that point we should have three continuations
|
71
|
+
# we should try the first, falling back to the second and third if necessary
|
72
|
+
# on successfully retrying, need to start all over again and try all the options again, just in case further recursion is possible
|
73
|
+
# so it is quite complicated
|
74
|
+
# the question is, is it more complicated than the other ways of getting right-associativity into Walrus-generated parsers?
|
75
|
+
|
76
|
+
rescue ParseError => e
|
77
|
+
if error.nil? : error = e
|
78
|
+
else error = e unless error.rightmost?(e)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
raise ParseError.new('no valid alternatives while parsing "%s" (%s)' % [string, error.to_s],
|
83
|
+
:line_end => error.line_end, :column_end => error.column_end) # should generally report the rightmost error
|
84
|
+
end
|
85
|
+
|
86
|
+
def eql?(other)
|
87
|
+
return false if not other.instance_of? ParsletChoice
|
88
|
+
other_alternatives = other.alternatives
|
89
|
+
return false if @alternatives.length != other_alternatives.length
|
90
|
+
for i in 0..(@alternatives.length - 1)
|
91
|
+
return false unless @alternatives[i].eql? other_alternatives[i]
|
92
|
+
end
|
93
|
+
true
|
94
|
+
end
|
95
|
+
|
96
|
+
protected
|
97
|
+
|
98
|
+
# For determining equality.
|
99
|
+
attr_reader :alternatives
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def update_hash
|
104
|
+
@hash = 30 # fixed offset to avoid unwanted collisions with similar classes
|
105
|
+
@alternatives.each { |parseable| @hash += parseable.hash }
|
106
|
+
end
|
107
|
+
|
108
|
+
# Appends another Parslet (or ParsletCombination) to the receiver and returns the receiver.
|
109
|
+
# Raises if parslet is nil.
|
110
|
+
# Cannot use << as a method name because Ruby cannot parse it without the self, and self is not allowed as en explicit receiver for private messages.
|
111
|
+
def append(next_parslet)
|
112
|
+
raise ArgumentError if next_parslet.nil?
|
113
|
+
@alternatives << next_parslet.to_parseable
|
114
|
+
update_hash
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
end # class ParsletChoice
|
119
|
+
end # class Grammar
|
120
|
+
end # module Walrus
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright 2007 Wincent Colaiuta
|
2
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
3
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
4
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
5
|
+
# in the accompanying file, "LICENSE.txt", for more details.
|
6
|
+
#
|
7
|
+
# $Id$
|
8
|
+
|
9
|
+
require 'walrus'
|
10
|
+
|
11
|
+
module Walrus
|
12
|
+
class Grammar
|
13
|
+
|
14
|
+
class ParsletCombination
|
15
|
+
|
16
|
+
include Walrus::Grammar::ParsletCombining
|
17
|
+
include Walrus::Grammar::Memoizing
|
18
|
+
|
19
|
+
def to_parseable
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
end # module ParsletCombination
|
24
|
+
|
25
|
+
end # class Grammar
|
26
|
+
end # module Walrus
|