ghazel-parslet 1.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,38 @@
1
+ # Matches a special kind of regular expression that only ever matches one
2
+ # character at a time. Useful members of this family are: <code>character
3
+ # ranges, \\w, \\d, \\r, \\n, ...</code>
4
+ #
5
+ # Example:
6
+ #
7
+ # match('[a-z]') # matches a-z
8
+ # match('\s') # like regexps: matches space characters
9
+ #
10
+ class Parslet::Atoms::Re < Parslet::Atoms::Base
11
+ attr_reader :match, :re
12
+ def initialize(match)
13
+ super()
14
+
15
+ @match = match.to_s
16
+ @re = Regexp.new(self.match, Regexp::MULTILINE)
17
+ @error_msgs = {
18
+ :premature => "Premature end of input",
19
+ :failed => "Failed to match #{match.inspect[1..-2]}"
20
+ }
21
+ end
22
+
23
+ def try(source, context)
24
+ return succ(source.consume(1)) if source.matches?(re)
25
+
26
+ # No string could be read
27
+ return context.err(self, source, @error_msgs[:premature]) \
28
+ if source.chars_left < 1
29
+
30
+ # No match
31
+ return context.err(self, source, @error_msgs[:failed])
32
+ end
33
+
34
+ def to_s_inner(prec)
35
+ match.inspect[1..-2]
36
+ end
37
+ end
38
+
@@ -0,0 +1,63 @@
1
+
2
+ # Matches a parslet repeatedly.
3
+ #
4
+ # Example:
5
+ #
6
+ # str('a').repeat(1,3) # matches 'a' at least once, but at most three times
7
+ # str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
8
+ #
9
+ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
10
+ attr_reader :min, :max, :parslet
11
+ def initialize(parslet, min, max, tag=:repetition)
12
+ super()
13
+
14
+ @parslet = parslet
15
+ @min, @max = min, max
16
+ @tag = tag
17
+ @error_msgs = {
18
+ :minrep => "Expected at least #{min} of #{parslet.inspect}"
19
+ }
20
+ end
21
+
22
+ def try(source, context)
23
+ occ = 0
24
+ accum = [@tag] # initialize the result array with the tag (for flattening)
25
+ start_pos = source.pos
26
+
27
+ break_on = nil
28
+ loop do
29
+ success, value = parslet.apply(source, context)
30
+
31
+ break_on = value
32
+ break unless success
33
+
34
+ occ += 1
35
+ accum << value
36
+
37
+ # If we're not greedy (max is defined), check if that has been reached.
38
+ return succ(accum) if max && occ>=max
39
+ end
40
+
41
+ # Last attempt to match parslet was a failure, failure reason in break_on.
42
+
43
+ # Greedy matcher has produced a failure. Check if occ (which will
44
+ # contain the number of sucesses) is >= min.
45
+ return context.err_at(
46
+ self,
47
+ source,
48
+ @error_msgs[:minrep],
49
+ start_pos,
50
+ [break_on]) if occ < min
51
+
52
+ return succ(accum)
53
+ end
54
+
55
+ precedence REPETITION
56
+ def to_s_inner(prec)
57
+ minmax = "{#{min}, #{max}}"
58
+ minmax = '?' if min == 0 && max == 1
59
+
60
+ parslet.to_s(prec) + minmax
61
+ end
62
+ end
63
+
@@ -0,0 +1,12 @@
1
+
2
+ class Parslet::Atoms::Rule < Parslet::Atoms::Entity
3
+ alias_method :eval_rule_body, :try
4
+
5
+ def try(source, context)
6
+ Position.new(source.pos, source, context, self).apply_rule
7
+ end
8
+
9
+ end
10
+
11
+ require 'parslet/atoms/rule/position'
12
+
@@ -0,0 +1,143 @@
1
+ # Update/fetch parsed entry at a given position in source
2
+ # Eval rule body at a given position in source and cache the result
3
+ class Parslet::Atoms::Rule::Position < Struct.new(:pos, :source, :context, :rule)
4
+ class MemoEntry < Struct.new(:answer, :pos)
5
+ end
6
+
7
+ # A LR is info holder for left recursion
8
+ # seed: the last left recursion exp parse result
9
+ # rule: the rule starting left recursion
10
+ # head: when left recursion detected, head holds info to re-eval involved rules
11
+ class LR < Struct.new(:seed, :rule, :pos, :head)
12
+ class Head < Struct.new(:rule, :involved_rules, :eval_rules)
13
+ def involved?(rule)
14
+ self.rule == rule || self.involved_rules.include?(rule)
15
+ end
16
+
17
+ def eval?(rule)
18
+ eval_rules.include?(rule)
19
+ end
20
+
21
+ def exclude_eval_rule!(rule)
22
+ eval_rules.delete(rule)
23
+ end
24
+
25
+ def reset_eval_rules
26
+ self.eval_rules = self.involved_rules.dup
27
+ end
28
+ end
29
+
30
+ alias :answer :seed
31
+
32
+ def detected?
33
+ self.head != nil
34
+ end
35
+
36
+ def setup_for_re_eval_involved_rules(lr_stack)
37
+ self.head ||= Head.new(rule, [], [])
38
+ lr_stack.top_down do |lr|
39
+ return if lr.head == self.head
40
+ lr.head = self.head
41
+ self.head.involved_rules.push lr.rule
42
+ end
43
+ end
44
+ end
45
+
46
+ module Context
47
+ def entry=(entry)
48
+ context.set rule, pos, entry
49
+ end
50
+
51
+ def entry
52
+ context.lookup(rule, pos)
53
+ end
54
+
55
+ def head
56
+ context.heads[pos]
57
+ end
58
+
59
+ def head=(h)
60
+ context.heads[pos] = h
61
+ end
62
+
63
+ def lr_stack
64
+ context.lr_stack
65
+ end
66
+ end
67
+
68
+ include Context
69
+
70
+ def apply_rule
71
+ result = recall
72
+ if result.nil?
73
+ # Eval rule body with LR supported by
74
+ # placing a LR flag before eval rule body
75
+ # and growing LR seed after detected LR
76
+ lr = LR.new(fail('left recursion detected'), self.rule, self.pos)
77
+ lr_stack.push(lr)
78
+ self.entry = lr
79
+ self.entry = eval_rule_body
80
+ lr_stack.pop
81
+ if self.entry.first && lr.detected?
82
+ grow_lr(lr.head)
83
+ end
84
+ result = self.entry
85
+ elsif result.is_a?(LR)
86
+ # Find out all involved lrs in stack
87
+ # Collect rules of involved lrs
88
+ # And set head of involved lrs for re-eval
89
+ # rules in recall process
90
+ result.setup_for_re_eval_involved_rules(lr_stack)
91
+ end
92
+ source.pos = result.pos
93
+ result.answer
94
+ end
95
+
96
+ private
97
+ def recall
98
+ # if not growing a seed parse, just return what is stored
99
+ # in the memo table
100
+ return self.entry if self.head.nil?
101
+ # do not evaluate any rule that is not involved in this
102
+ # left recursion
103
+ # question: why self.entry.nil?
104
+ if self.entry.nil? && !self.head.involved?(self.rule)
105
+ return fail('not involved in head left recursion')
106
+ end
107
+
108
+ # allow involved rules to be evaluated, but only once
109
+ # during a seed-growing iteration
110
+ if self.head.eval?(self.rule)
111
+ self.head.exclude_eval_rule!(self.rule)
112
+ self.entry = eval_rule_body
113
+ end
114
+ self.entry
115
+ end
116
+
117
+ # Tries to grow the parse of rule at given position
118
+ def grow_lr(h)
119
+ self.head = h
120
+ loop do
121
+ h.reset_eval_rules
122
+ entry = eval_rule_body
123
+ break if !entry.first || no_progress?(entry)
124
+ self.entry = entry
125
+ end
126
+ self.head = nil
127
+ end
128
+
129
+ def eval_rule_body
130
+ source.pos = self.pos
131
+ answer = rule.eval_rule_body(source, context)
132
+ MemoEntry.new(answer, source.pos)
133
+ end
134
+
135
+ def no_progress?(entry)
136
+ entry.pos <= self.entry.pos
137
+ end
138
+
139
+ def fail(message)
140
+ MemoEntry.new(context.err(rule, source, message), self.pos)
141
+ end
142
+
143
+ end
@@ -0,0 +1,38 @@
1
+ # A sequence of parslets, matched from left to right. Denoted by '>>'
2
+ #
3
+ # Example:
4
+ #
5
+ # str('a') >> str('b') # matches 'a', then 'b'
6
+ #
7
+ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
8
+ attr_reader :parslets
9
+ def initialize(*parslets)
10
+ super()
11
+
12
+ @parslets = parslets
13
+ @error_msgs = {
14
+ :failed => "Failed to match sequence (#{self.inspect})"
15
+ }
16
+ end
17
+
18
+ def >>(parslet)
19
+ self.class.new(* @parslets+[parslet])
20
+ end
21
+
22
+ def try(source, context)
23
+ succ([:sequence]+parslets.map { |p|
24
+ success, value = p.apply(source, context)
25
+
26
+ unless success
27
+ return context.err(self, source, @error_msgs[:failed], [value])
28
+ end
29
+
30
+ value
31
+ })
32
+ end
33
+
34
+ precedence SEQUENCE
35
+ def to_s_inner(prec)
36
+ parslets.map { |p| p.to_s(prec) }.join(' ')
37
+ end
38
+ end
@@ -0,0 +1,37 @@
1
+ # Matches a string of characters.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # matches 'foo'
6
+ #
7
+ class Parslet::Atoms::Str < Parslet::Atoms::Base
8
+ attr_reader :str
9
+ def initialize(str)
10
+ super()
11
+
12
+ @str = str.to_s
13
+ @len = str.size
14
+ @error_msgs = {
15
+ :premature => "Premature end of input",
16
+ :failed => "Expected #{str.inspect}, but got "
17
+ }
18
+ end
19
+
20
+ def try(source, context)
21
+ return succ(source.consume(@len)) if source.matches?(str)
22
+
23
+ # Failures:
24
+ return context.err(self, source, @error_msgs[:premature]) \
25
+ if source.chars_left<@len
26
+
27
+ error_pos = source.pos
28
+ return context.err_at(
29
+ self, source,
30
+ [@error_msgs[:failed], source.consume(@len)], error_pos)
31
+ end
32
+
33
+ def to_s_inner(prec)
34
+ "'#{str}'"
35
+ end
36
+ end
37
+
@@ -0,0 +1,89 @@
1
+ # Augments all parslet atoms with an accept method that will call back
2
+ # to the visitor given.
3
+
4
+ #
5
+ module Parslet::Atoms
6
+ class Base
7
+ def accept(visitor)
8
+ raise NotImplementedError, "No #accept method on #{self.class.name}."
9
+ end
10
+ end
11
+
12
+ class Str
13
+ # Call back visitors #visit_str method. See parslet/export for an example.
14
+ #
15
+ def accept(visitor)
16
+ visitor.visit_str(str)
17
+ end
18
+ end
19
+
20
+ class Entity
21
+ # Call back visitors #visit_entity method. See parslet/export for an
22
+ # example.
23
+ #
24
+ def accept(visitor)
25
+ visitor.visit_entity(name, block)
26
+ end
27
+ end
28
+
29
+ class Named
30
+ # Call back visitors #visit_named method. See parslet/export for an
31
+ # example.
32
+ #
33
+ def accept(visitor)
34
+ visitor.visit_named(name, parslet)
35
+ end
36
+ end
37
+
38
+ class Sequence
39
+ # Call back visitors #visit_sequence method. See parslet/export for an
40
+ # example.
41
+ #
42
+ def accept(visitor)
43
+ visitor.visit_sequence(parslets)
44
+ end
45
+ end
46
+
47
+ class Repetition
48
+ # Call back visitors #visit_repetition method. See parslet/export for an
49
+ # example.
50
+ #
51
+ def accept(visitor)
52
+ visitor.visit_repetition(@tag, min, max, parslet)
53
+ end
54
+ end
55
+
56
+ class Alternative
57
+ # Call back visitors #visit_alternative method. See parslet/export for an
58
+ # example.
59
+ #
60
+ def accept(visitor)
61
+ visitor.visit_alternative(alternatives)
62
+ end
63
+ end
64
+
65
+ class Lookahead
66
+ # Call back visitors #visit_lookahead method. See parslet/export for an
67
+ # example.
68
+ #
69
+ def accept(visitor)
70
+ visitor.visit_lookahead(positive, bound_parslet)
71
+ end
72
+ end
73
+
74
+ class Re
75
+ # Call back visitors #visit_re method. See parslet/export for an example.
76
+ #
77
+ def accept(visitor)
78
+ visitor.visit_re(match)
79
+ end
80
+ end
81
+ end
82
+
83
+ class Parslet::Parser
84
+ # Call back visitors #visit_parser method.
85
+ #
86
+ def accept(visitor)
87
+ visitor.visit_parser(root)
88
+ end
89
+ end
@@ -0,0 +1,94 @@
1
+ module Parslet
2
+ # Represents a cause why a parse did fail. A lot of these objects are
3
+ # constructed - not all of the causes turn out to be failures for the whole
4
+ # parse.
5
+ #
6
+ class Cause
7
+ def initialize(message, source, pos, children)
8
+ @message, @source, @pos, @children =
9
+ message, source, pos, children
10
+ end
11
+
12
+ # @return [String, Array] A string or an array of message pieces that
13
+ # provide failure information. Use #to_s to get a formatted string.
14
+ attr_reader :message
15
+
16
+ # @return [Parslet::Source] Source that was parsed when this error
17
+ # happend. Mainly used for line number information.
18
+ attr_reader :source
19
+
20
+ # Location of the error.
21
+ #
22
+ # @return [Fixnum] Position where the error happened. (character offset)
23
+ attr_reader :pos
24
+
25
+ # When this cause is part of a tree of error causes: child nodes for this
26
+ # node. Very often carries the reasons for this cause.
27
+ #
28
+ # @return [Array<Parslet::Cause>] A list of reasons for this cause.
29
+ def children
30
+ @children ||= []
31
+ end
32
+
33
+ # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
34
+ # override the position of the +source+. This method returns an object
35
+ # that can be turned into a string using #to_s.
36
+ #
37
+ # @param source [Parslet::Source] source that was parsed when this error
38
+ # happened
39
+ # @param pos [Fixnum] position of error
40
+ # @param str [String, Array<String>] message parts
41
+ # @param children [Array<Parslet::Cause>] child nodes for this error tree
42
+ # @return [Parslet::Cause] a new instance of {Parslet::Cause}
43
+ #
44
+ def self.format(source, pos, str, children=[])
45
+ self.new(str, source, pos, children)
46
+ end
47
+
48
+ def to_s
49
+ line, column = source.line_and_column(pos)
50
+ # Allow message to be a list of objects. Join them here, since we now
51
+ # really need it.
52
+ Array(message).map { |o|
53
+ o.respond_to?(:to_slice) ?
54
+ o.str.inspect :
55
+ o.to_s }.join + " at line #{line} char #{column}."
56
+ end
57
+
58
+ # Signals to the outside that the parse has failed. Use this in
59
+ # conjunction with .format for nice error messages.
60
+ #
61
+ def raise(exception_klass=Parslet::ParseFailed)
62
+ exception = exception_klass.new(self.to_s, self)
63
+ Kernel.raise exception
64
+ end
65
+
66
+ # Returns an ascii tree representation of the causes of this node and its
67
+ # children.
68
+ #
69
+ def ascii_tree
70
+ StringIO.new.tap { |io|
71
+ recursive_ascii_tree(self, io, [true]) }.
72
+ string
73
+ end
74
+
75
+ private
76
+ def recursive_ascii_tree(node, stream, curved)
77
+ append_prefix(stream, curved)
78
+ stream.puts node.to_s
79
+
80
+ node.children.each do |child|
81
+ last_child = (node.children.last == child)
82
+
83
+ recursive_ascii_tree(child, stream, curved + [last_child])
84
+ end
85
+ end
86
+ def append_prefix(stream, curved)
87
+ return if curved.size < 2
88
+ curved[1..-2].each do |c|
89
+ stream.print c ? " " : "| "
90
+ end
91
+ stream.print curved.last ? "`- " : "|- "
92
+ end
93
+ end
94
+ end