ghazel-parslet 1.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,38 @@
1
+ # Matches a special kind of regular expression that only ever matches one
2
+ # character at a time. Useful members of this family are: <code>character
3
+ # ranges, \\w, \\d, \\r, \\n, ...</code>
4
+ #
5
+ # Example:
6
+ #
7
+ # match('[a-z]') # matches a-z
8
+ # match('\s') # like regexps: matches space characters
9
+ #
10
+ class Parslet::Atoms::Re < Parslet::Atoms::Base
11
+ attr_reader :match, :re
12
+ def initialize(match)
13
+ super()
14
+
15
+ @match = match.to_s
16
+ @re = Regexp.new(self.match, Regexp::MULTILINE)
17
+ @error_msgs = {
18
+ :premature => "Premature end of input",
19
+ :failed => "Failed to match #{match.inspect[1..-2]}"
20
+ }
21
+ end
22
+
23
+ def try(source, context)
24
+ return succ(source.consume(1)) if source.matches?(re)
25
+
26
+ # No string could be read
27
+ return context.err(self, source, @error_msgs[:premature]) \
28
+ if source.chars_left < 1
29
+
30
+ # No match
31
+ return context.err(self, source, @error_msgs[:failed])
32
+ end
33
+
34
+ def to_s_inner(prec)
35
+ match.inspect[1..-2]
36
+ end
37
+ end
38
+
@@ -0,0 +1,63 @@
1
+
2
+ # Matches a parslet repeatedly.
3
+ #
4
+ # Example:
5
+ #
6
+ # str('a').repeat(1,3) # matches 'a' at least once, but at most three times
7
+ # str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
8
+ #
9
+ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
10
+ attr_reader :min, :max, :parslet
11
+ def initialize(parslet, min, max, tag=:repetition)
12
+ super()
13
+
14
+ @parslet = parslet
15
+ @min, @max = min, max
16
+ @tag = tag
17
+ @error_msgs = {
18
+ :minrep => "Expected at least #{min} of #{parslet.inspect}"
19
+ }
20
+ end
21
+
22
+ def try(source, context)
23
+ occ = 0
24
+ accum = [@tag] # initialize the result array with the tag (for flattening)
25
+ start_pos = source.pos
26
+
27
+ break_on = nil
28
+ loop do
29
+ success, value = parslet.apply(source, context)
30
+
31
+ break_on = value
32
+ break unless success
33
+
34
+ occ += 1
35
+ accum << value
36
+
37
+ # If we're not greedy (max is defined), check if that has been reached.
38
+ return succ(accum) if max && occ>=max
39
+ end
40
+
41
+ # Last attempt to match parslet was a failure, failure reason in break_on.
42
+
43
+ # Greedy matcher has produced a failure. Check if occ (which will
44
+ # contain the number of sucesses) is >= min.
45
+ return context.err_at(
46
+ self,
47
+ source,
48
+ @error_msgs[:minrep],
49
+ start_pos,
50
+ [break_on]) if occ < min
51
+
52
+ return succ(accum)
53
+ end
54
+
55
+ precedence REPETITION
56
+ def to_s_inner(prec)
57
+ minmax = "{#{min}, #{max}}"
58
+ minmax = '?' if min == 0 && max == 1
59
+
60
+ parslet.to_s(prec) + minmax
61
+ end
62
+ end
63
+
@@ -0,0 +1,12 @@
1
+
2
+ class Parslet::Atoms::Rule < Parslet::Atoms::Entity
3
+ alias_method :eval_rule_body, :try
4
+
5
+ def try(source, context)
6
+ Position.new(source.pos, source, context, self).apply_rule
7
+ end
8
+
9
+ end
10
+
11
+ require 'parslet/atoms/rule/position'
12
+
@@ -0,0 +1,143 @@
1
+ # Update/fetch parsed entry at a given position in source
2
+ # Eval rule body at a given position in source and cache the result
3
+ class Parslet::Atoms::Rule::Position < Struct.new(:pos, :source, :context, :rule)
4
+ class MemoEntry < Struct.new(:answer, :pos)
5
+ end
6
+
7
+ # A LR is info holder for left recursion
8
+ # seed: the last left recursion exp parse result
9
+ # rule: the rule starting left recursion
10
+ # head: when left recursion detected, head holds info to re-eval involved rules
11
+ class LR < Struct.new(:seed, :rule, :pos, :head)
12
+ class Head < Struct.new(:rule, :involved_rules, :eval_rules)
13
+ def involved?(rule)
14
+ self.rule == rule || self.involved_rules.include?(rule)
15
+ end
16
+
17
+ def eval?(rule)
18
+ eval_rules.include?(rule)
19
+ end
20
+
21
+ def exclude_eval_rule!(rule)
22
+ eval_rules.delete(rule)
23
+ end
24
+
25
+ def reset_eval_rules
26
+ self.eval_rules = self.involved_rules.dup
27
+ end
28
+ end
29
+
30
+ alias :answer :seed
31
+
32
+ def detected?
33
+ self.head != nil
34
+ end
35
+
36
+ def setup_for_re_eval_involved_rules(lr_stack)
37
+ self.head ||= Head.new(rule, [], [])
38
+ lr_stack.top_down do |lr|
39
+ return if lr.head == self.head
40
+ lr.head = self.head
41
+ self.head.involved_rules.push lr.rule
42
+ end
43
+ end
44
+ end
45
+
46
+ module Context
47
+ def entry=(entry)
48
+ context.set rule, pos, entry
49
+ end
50
+
51
+ def entry
52
+ context.lookup(rule, pos)
53
+ end
54
+
55
+ def head
56
+ context.heads[pos]
57
+ end
58
+
59
+ def head=(h)
60
+ context.heads[pos] = h
61
+ end
62
+
63
+ def lr_stack
64
+ context.lr_stack
65
+ end
66
+ end
67
+
68
+ include Context
69
+
70
+ def apply_rule
71
+ result = recall
72
+ if result.nil?
73
+ # Eval rule body with LR supported by
74
+ # placing a LR flag before eval rule body
75
+ # and growing LR seed after detected LR
76
+ lr = LR.new(fail('left recursion detected'), self.rule, self.pos)
77
+ lr_stack.push(lr)
78
+ self.entry = lr
79
+ self.entry = eval_rule_body
80
+ lr_stack.pop
81
+ if self.entry.first && lr.detected?
82
+ grow_lr(lr.head)
83
+ end
84
+ result = self.entry
85
+ elsif result.is_a?(LR)
86
+ # Find out all involved lrs in stack
87
+ # Collect rules of involved lrs
88
+ # And set head of involved lrs for re-eval
89
+ # rules in recall process
90
+ result.setup_for_re_eval_involved_rules(lr_stack)
91
+ end
92
+ source.pos = result.pos
93
+ result.answer
94
+ end
95
+
96
+ private
97
+ def recall
98
+ # if not growing a seed parse, just return what is stored
99
+ # in the memo table
100
+ return self.entry if self.head.nil?
101
+ # do not evaluate any rule that is not involved in this
102
+ # left recursion
103
+ # question: why self.entry.nil?
104
+ if self.entry.nil? && !self.head.involved?(self.rule)
105
+ return fail('not involved in head left recursion')
106
+ end
107
+
108
+ # allow involved rules to be evaluated, but only once
109
+ # during a seed-growing iteration
110
+ if self.head.eval?(self.rule)
111
+ self.head.exclude_eval_rule!(self.rule)
112
+ self.entry = eval_rule_body
113
+ end
114
+ self.entry
115
+ end
116
+
117
+ # Tries to grow the parse of rule at given position
118
+ def grow_lr(h)
119
+ self.head = h
120
+ loop do
121
+ h.reset_eval_rules
122
+ entry = eval_rule_body
123
+ break if !entry.first || no_progress?(entry)
124
+ self.entry = entry
125
+ end
126
+ self.head = nil
127
+ end
128
+
129
+ def eval_rule_body
130
+ source.pos = self.pos
131
+ answer = rule.eval_rule_body(source, context)
132
+ MemoEntry.new(answer, source.pos)
133
+ end
134
+
135
+ def no_progress?(entry)
136
+ entry.pos <= self.entry.pos
137
+ end
138
+
139
+ def fail(message)
140
+ MemoEntry.new(context.err(rule, source, message), self.pos)
141
+ end
142
+
143
+ end
@@ -0,0 +1,38 @@
1
+ # A sequence of parslets, matched from left to right. Denoted by '>>'
2
+ #
3
+ # Example:
4
+ #
5
+ # str('a') >> str('b') # matches 'a', then 'b'
6
+ #
7
+ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
8
+ attr_reader :parslets
9
+ def initialize(*parslets)
10
+ super()
11
+
12
+ @parslets = parslets
13
+ @error_msgs = {
14
+ :failed => "Failed to match sequence (#{self.inspect})"
15
+ }
16
+ end
17
+
18
+ def >>(parslet)
19
+ self.class.new(* @parslets+[parslet])
20
+ end
21
+
22
+ def try(source, context)
23
+ succ([:sequence]+parslets.map { |p|
24
+ success, value = p.apply(source, context)
25
+
26
+ unless success
27
+ return context.err(self, source, @error_msgs[:failed], [value])
28
+ end
29
+
30
+ value
31
+ })
32
+ end
33
+
34
+ precedence SEQUENCE
35
+ def to_s_inner(prec)
36
+ parslets.map { |p| p.to_s(prec) }.join(' ')
37
+ end
38
+ end
@@ -0,0 +1,37 @@
1
+ # Matches a string of characters.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # matches 'foo'
6
+ #
7
+ class Parslet::Atoms::Str < Parslet::Atoms::Base
8
+ attr_reader :str
9
+ def initialize(str)
10
+ super()
11
+
12
+ @str = str.to_s
13
+ @len = str.size
14
+ @error_msgs = {
15
+ :premature => "Premature end of input",
16
+ :failed => "Expected #{str.inspect}, but got "
17
+ }
18
+ end
19
+
20
+ def try(source, context)
21
+ return succ(source.consume(@len)) if source.matches?(str)
22
+
23
+ # Failures:
24
+ return context.err(self, source, @error_msgs[:premature]) \
25
+ if source.chars_left<@len
26
+
27
+ error_pos = source.pos
28
+ return context.err_at(
29
+ self, source,
30
+ [@error_msgs[:failed], source.consume(@len)], error_pos)
31
+ end
32
+
33
+ def to_s_inner(prec)
34
+ "'#{str}'"
35
+ end
36
+ end
37
+
@@ -0,0 +1,89 @@
1
+ # Augments all parslet atoms with an accept method that will call back
2
+ # to the visitor given.
3
+
4
+ #
5
+ module Parslet::Atoms
6
+ class Base
7
+ def accept(visitor)
8
+ raise NotImplementedError, "No #accept method on #{self.class.name}."
9
+ end
10
+ end
11
+
12
+ class Str
13
+ # Call back visitors #visit_str method. See parslet/export for an example.
14
+ #
15
+ def accept(visitor)
16
+ visitor.visit_str(str)
17
+ end
18
+ end
19
+
20
+ class Entity
21
+ # Call back visitors #visit_entity method. See parslet/export for an
22
+ # example.
23
+ #
24
+ def accept(visitor)
25
+ visitor.visit_entity(name, block)
26
+ end
27
+ end
28
+
29
+ class Named
30
+ # Call back visitors #visit_named method. See parslet/export for an
31
+ # example.
32
+ #
33
+ def accept(visitor)
34
+ visitor.visit_named(name, parslet)
35
+ end
36
+ end
37
+
38
+ class Sequence
39
+ # Call back visitors #visit_sequence method. See parslet/export for an
40
+ # example.
41
+ #
42
+ def accept(visitor)
43
+ visitor.visit_sequence(parslets)
44
+ end
45
+ end
46
+
47
+ class Repetition
48
+ # Call back visitors #visit_repetition method. See parslet/export for an
49
+ # example.
50
+ #
51
+ def accept(visitor)
52
+ visitor.visit_repetition(@tag, min, max, parslet)
53
+ end
54
+ end
55
+
56
+ class Alternative
57
+ # Call back visitors #visit_alternative method. See parslet/export for an
58
+ # example.
59
+ #
60
+ def accept(visitor)
61
+ visitor.visit_alternative(alternatives)
62
+ end
63
+ end
64
+
65
+ class Lookahead
66
+ # Call back visitors #visit_lookahead method. See parslet/export for an
67
+ # example.
68
+ #
69
+ def accept(visitor)
70
+ visitor.visit_lookahead(positive, bound_parslet)
71
+ end
72
+ end
73
+
74
+ class Re
75
+ # Call back visitors #visit_re method. See parslet/export for an example.
76
+ #
77
+ def accept(visitor)
78
+ visitor.visit_re(match)
79
+ end
80
+ end
81
+ end
82
+
83
+ class Parslet::Parser
84
+ # Call back visitors #visit_parser method.
85
+ #
86
+ def accept(visitor)
87
+ visitor.visit_parser(root)
88
+ end
89
+ end
@@ -0,0 +1,94 @@
1
+ module Parslet
2
+ # Represents a cause why a parse did fail. A lot of these objects are
3
+ # constructed - not all of the causes turn out to be failures for the whole
4
+ # parse.
5
+ #
6
+ class Cause
7
+ def initialize(message, source, pos, children)
8
+ @message, @source, @pos, @children =
9
+ message, source, pos, children
10
+ end
11
+
12
+ # @return [String, Array] A string or an array of message pieces that
13
+ # provide failure information. Use #to_s to get a formatted string.
14
+ attr_reader :message
15
+
16
+ # @return [Parslet::Source] Source that was parsed when this error
17
+ # happend. Mainly used for line number information.
18
+ attr_reader :source
19
+
20
+ # Location of the error.
21
+ #
22
+ # @return [Fixnum] Position where the error happened. (character offset)
23
+ attr_reader :pos
24
+
25
+ # When this cause is part of a tree of error causes: child nodes for this
26
+ # node. Very often carries the reasons for this cause.
27
+ #
28
+ # @return [Array<Parslet::Cause>] A list of reasons for this cause.
29
+ def children
30
+ @children ||= []
31
+ end
32
+
33
+ # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
34
+ # override the position of the +source+. This method returns an object
35
+ # that can be turned into a string using #to_s.
36
+ #
37
+ # @param source [Parslet::Source] source that was parsed when this error
38
+ # happened
39
+ # @param pos [Fixnum] position of error
40
+ # @param str [String, Array<String>] message parts
41
+ # @param children [Array<Parslet::Cause>] child nodes for this error tree
42
+ # @return [Parslet::Cause] a new instance of {Parslet::Cause}
43
+ #
44
+ def self.format(source, pos, str, children=[])
45
+ self.new(str, source, pos, children)
46
+ end
47
+
48
+ def to_s
49
+ line, column = source.line_and_column(pos)
50
+ # Allow message to be a list of objects. Join them here, since we now
51
+ # really need it.
52
+ Array(message).map { |o|
53
+ o.respond_to?(:to_slice) ?
54
+ o.str.inspect :
55
+ o.to_s }.join + " at line #{line} char #{column}."
56
+ end
57
+
58
+ # Signals to the outside that the parse has failed. Use this in
59
+ # conjunction with .format for nice error messages.
60
+ #
61
+ def raise(exception_klass=Parslet::ParseFailed)
62
+ exception = exception_klass.new(self.to_s, self)
63
+ Kernel.raise exception
64
+ end
65
+
66
+ # Returns an ascii tree representation of the causes of this node and its
67
+ # children.
68
+ #
69
+ def ascii_tree
70
+ StringIO.new.tap { |io|
71
+ recursive_ascii_tree(self, io, [true]) }.
72
+ string
73
+ end
74
+
75
+ private
76
+ def recursive_ascii_tree(node, stream, curved)
77
+ append_prefix(stream, curved)
78
+ stream.puts node.to_s
79
+
80
+ node.children.each do |child|
81
+ last_child = (node.children.last == child)
82
+
83
+ recursive_ascii_tree(child, stream, curved + [last_child])
84
+ end
85
+ end
86
+ def append_prefix(stream, curved)
87
+ return if curved.size < 2
88
+ curved[1..-2].each do |c|
89
+ stream.print c ? " " : "| "
90
+ end
91
+ stream.print curved.last ? "`- " : "|- "
92
+ end
93
+ end
94
+ end