ghazel-parslet 1.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,35 @@
1
+ class Parslet::Atoms::Base
2
+
3
+ # Packages the common idiom
4
+ #
5
+ # begin
6
+ # tree = parser.parse('something')
7
+ # rescue Parslet::ParseFailed => error
8
+ # puts parser.cause.ascii_tree
9
+ # end
10
+ #
11
+ # into a convenient method.
12
+ #
13
+ # Usage:
14
+ #
15
+ # require 'parslet'
16
+ # require 'parslet/convenience'
17
+ #
18
+ # class FooParser < Parslet::Parser
19
+ # rule(:foo) { str('foo') }
20
+ # root(:foo)
21
+ # end
22
+ #
23
+ # FooParser.new.parse_with_debug('bar')
24
+ #
25
+ # @see Parslet::Atoms::Base#parse
26
+ #
27
+ def parse_with_debug str, opts={}
28
+ parse str, opts
29
+ rescue Parslet::UnconsumedInput => error
30
+ puts error
31
+ rescue Parslet::ParseFailed => error
32
+ puts error.cause.ascii_tree
33
+ end
34
+
35
+ end
@@ -0,0 +1,7 @@
1
+ # A namespace for all error reporters.
2
+ #
3
+ module Parslet::ErrorReporter
4
+ end
5
+
6
+ require 'parslet/error_reporter/tree'
7
+ require 'parslet/error_reporter/deepest'
@@ -0,0 +1,95 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # Instead of reporting the latest error that happens like {Tree} does,
4
+ # this class reports the deepest error. Depth is defined here as how
5
+ # advanced into the input an error happens. The errors close to the
6
+ # greatest depth tend to be more relevant to the end user, since they
7
+ # specify what could be done to make them go away.
8
+ #
9
+ # More specifically, errors produced by this reporter won't be related to
10
+ # the structure of the grammar at all. The positions of the errors will
11
+ # be advanced and convey at every grammar level what the deepest rule
12
+ # was to fail.
13
+ #
14
+ class Deepest
15
+ def initialize
16
+ @deepest_cause = nil
17
+ end
18
+
19
+ # Produces an error cause that combines the message at the current level
20
+ # with the errors that happened at a level below (children).
21
+ #
22
+ # @param atom [Parslet::Atoms::Base] parslet that failed
23
+ # @param source [Source] Source that we're using for this parse. (line
24
+ # number information...)
25
+ # @param message [String, Array] Error message at this level.
26
+ # @param children [Array] A list of errors from a deeper level (or nil).
27
+ # @return [Cause] An error tree combining children with message.
28
+ #
29
+ def err(atom, source, message, children=nil)
30
+ position = source.pos
31
+ cause = Cause.format(source, position, message, children)
32
+ return deepest(cause)
33
+ end
34
+
35
+ # Produces an error cause that combines the message at the current level
36
+ # with the errors that happened at a level below (children).
37
+ #
38
+ # @param atom [Parslet::Atoms::Base] parslet that failed
39
+ # @param source [Source] Source that we're using for this parse. (line
40
+ # number information...)
41
+ # @param message [String, Array] Error message at this level.
42
+ # @param pos [Fixnum] The real position of the error.
43
+ # @param children [Array] A list of errors from a deeper level (or nil).
44
+ # @return [Cause] An error tree combining children with message.
45
+ #
46
+ def err_at(atom, source, message, pos, children=nil)
47
+ position = pos
48
+ cause = Cause.format(source, position, message, children)
49
+ return deepest(cause)
50
+ end
51
+
52
+ # Returns the cause that is currently deepest. Mainly for specs.
53
+ #
54
+ attr_reader :deepest_cause
55
+
56
+ # Checks to see if the lineage of the cause given includes a cause with
57
+ # an error position deeper than the current deepest cause stored. If
58
+ # yes, it passes the cause through to the caller. If no, it returns the
59
+ # current deepest error that was saved as a reference.
60
+ #
61
+ def deepest(cause)
62
+ rank, leaf = deepest_child(cause)
63
+
64
+ if !deepest_cause || leaf.pos >= deepest_cause.pos
65
+ # This error reaches deeper into the input, save it as reference.
66
+ @deepest_cause = leaf
67
+ return cause
68
+ end
69
+
70
+ return deepest_cause
71
+ end
72
+
73
+ private
74
+ # Returns the leaf from a given error tree with the biggest rank.
75
+ #
76
+ def deepest_child(cause, rank=0)
77
+ max_child = cause
78
+ max_rank = rank
79
+
80
+ if cause.children && !cause.children.empty?
81
+ cause.children.each do |child|
82
+ c_rank, c_cause = deepest_child(child, rank+1)
83
+
84
+ if c_rank > max_rank
85
+ max_rank = c_rank
86
+ max_child = c_cause
87
+ end
88
+ end
89
+ end
90
+
91
+ return max_rank, max_child
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,57 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # An error reporter has two central methods, one for reporting errors at
4
+ # the current parse position (#err) and one for reporting errors at a
5
+ # given parse position (#err_at). The reporter can return an object (a
6
+ # 'cause') that will be returned to the caller along with the information
7
+ # that the parse failed.
8
+ #
9
+ # When reporting errors on the outer levels of your parser, these methods
10
+ # get passed a list of error objects ('causes') from the inner levels. In
11
+ # this default implementation, the inner levels are considered error
12
+ # subtrees and are appended to the generated tree node at each level,
13
+ # thereby constructing an error tree.
14
+ #
15
+ # This error tree will report in parallel with the grammar structure that
16
+ # failed. A one-to-one correspondence exists between each error in the
17
+ # tree and the parslet atom that produced that error.
18
+ #
19
+ # The implementor is really free to use these return values as he sees
20
+ # fit. One example would be to return an error state object from these
21
+ # methods that is then updated as errors cascade up the parse derivation
22
+ # tree.
23
+ #
24
+ class Tree
25
+ # Produces an error cause that combines the message at the current level
26
+ # with the errors that happened at a level below (children).
27
+ #
28
+ # @param atom [Parslet::Atoms::Base] parslet that failed
29
+ # @param source [Source] Source that we're using for this parse. (line
30
+ # number information...)
31
+ # @param message [String, Array] Error message at this level.
32
+ # @param children [Array] A list of errors from a deeper level (or nil).
33
+ # @return [Cause] An error tree combining children with message.
34
+ #
35
+ def err(atom, source, message, children=nil)
36
+ position = source.pos
37
+ Cause.format(source, position, message, children)
38
+ end
39
+
40
+ # Produces an error cause that combines the message at the current level
41
+ # with the errors that happened at a level below (children).
42
+ #
43
+ # @param atom [Parslet::Atoms::Base] parslet that failed
44
+ # @param source [Source] Source that we're using for this parse. (line
45
+ # number information...)
46
+ # @param message [String, Array] Error message at this level.
47
+ # @param pos [Fixnum] The real position of the error.
48
+ # @param children [Array] A list of errors from a deeper level (or nil).
49
+ # @return [Cause] An error tree combining children with message.
50
+ #
51
+ def err_at(atom, source, message, pos, children=nil)
52
+ position = pos
53
+ Cause.format(source, position, message, children)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,162 @@
1
+ # Allows exporting parslet grammars to other lingos.
2
+
3
+ require 'set'
4
+ require 'parslet/atoms/visitor'
5
+
6
+ class Parslet::Parser
7
+ module Visitors
8
+ class Citrus
9
+ attr_reader :context, :output
10
+ def initialize(context)
11
+ @context = context
12
+ end
13
+
14
+ def visit_str(str)
15
+ "\"#{str.inspect[1..-2]}\""
16
+ end
17
+ def visit_re(match)
18
+ match.to_s
19
+ end
20
+
21
+ def visit_entity(name, block)
22
+ context.deferred(name, block)
23
+
24
+ "(#{context.mangle_name(name)})"
25
+ end
26
+ def visit_named(name, parslet)
27
+ parslet.accept(self)
28
+ end
29
+
30
+ def visit_sequence(parslets)
31
+ '(' <<
32
+ parslets.
33
+ map { |el| el.accept(self) }.
34
+ join(' ') <<
35
+ ')'
36
+ end
37
+ def visit_repetition(tag, min, max, parslet)
38
+ parslet.accept(self) << "#{min}*#{max}"
39
+ end
40
+ def visit_alternative(alternatives)
41
+ '(' <<
42
+ alternatives.
43
+ map { |el| el.accept(self) }.
44
+ join(' | ') <<
45
+ ')'
46
+ end
47
+
48
+ def visit_lookahead(positive, bound_parslet)
49
+ (positive ? '&' : '!') <<
50
+ bound_parslet.accept(self)
51
+ end
52
+ end
53
+
54
+ class Treetop < Citrus
55
+ def visit_repetition(tag, min, max, parslet)
56
+ parslet.accept(self) << "#{min}..#{max}"
57
+ end
58
+
59
+ def visit_alternative(alternatives)
60
+ '(' <<
61
+ alternatives.
62
+ map { |el| el.accept(self) }.
63
+ join(' / ') <<
64
+ ')'
65
+ end
66
+ end
67
+ end
68
+
69
+ # A helper class that formats Citrus and Treetop grammars as a string.
70
+ #
71
+ class PrettyPrinter
72
+ attr_reader :visitor
73
+ def initialize(visitor_klass)
74
+ @visitor = visitor_klass.new(self)
75
+ end
76
+
77
+ # Pretty prints the given parslet using the visitor that has been
78
+ # configured in initialize. Returns the string representation of the
79
+ # Citrus or Treetop grammar.
80
+ #
81
+ def pretty_print(name, parslet)
82
+ output = "grammar #{name}\n"
83
+
84
+ output << rule('root', parslet)
85
+
86
+ seen = Set.new
87
+ loop do
88
+ # @todo is constantly filled by the visitor (see #deferred). We
89
+ # keep going until it is empty.
90
+ break if @todo.empty?
91
+ name, block = @todo.shift
92
+
93
+ # Track what rules we've already seen. This breaks loops.
94
+ next if seen.include?(name)
95
+ seen << name
96
+
97
+ output << rule(name, block.call)
98
+ end
99
+
100
+ output << "end\n"
101
+ end
102
+
103
+ # Formats a rule in either dialect.
104
+ #
105
+ def rule(name, parslet)
106
+ " rule #{mangle_name name}\n" <<
107
+ " " << parslet.accept(visitor) << "\n" <<
108
+ " end\n"
109
+ end
110
+
111
+ # Whenever the visitor encounters an rule in a parslet, it defers the
112
+ # pretty printing of the rule by calling this method.
113
+ #
114
+ def deferred(name, content)
115
+ @todo ||= []
116
+ @todo << [name, content]
117
+ end
118
+
119
+ # Mangles names so that Citrus and Treetop can live with it. This mostly
120
+ # transforms some of the things that Ruby allows into other patterns. If
121
+ # there is collision, we will not detect it for now.
122
+ #
123
+ def mangle_name(str)
124
+ str.to_s.sub(/\?$/, '_p')
125
+ end
126
+ end
127
+
128
+ # Exports the current parser instance as a string in the Citrus dialect.
129
+ #
130
+ # Example:
131
+ #
132
+ # require 'parslet/export'
133
+ # class MyParser < Parslet::Parser
134
+ # root(:expression)
135
+ # rule(:expression) { str('foo') }
136
+ # end
137
+ #
138
+ # MyParser.new.to_citrus # => a citrus grammar as a string
139
+ #
140
+ def to_citrus
141
+ PrettyPrinter.new(Visitors::Citrus).
142
+ pretty_print(self.class.name, root)
143
+ end
144
+
145
+ # Exports the current parser instance as a string in the Treetop dialect.
146
+ #
147
+ # Example:
148
+ #
149
+ # require 'parslet/export'
150
+ # class MyParser < Parslet::Parser
151
+ # root(:expression)
152
+ # rule(:expression) { str('foo') }
153
+ # end
154
+ #
155
+ # MyParser.new.to_treetop # => a treetop grammar as a string
156
+ #
157
+ def to_treetop
158
+ PrettyPrinter.new(Visitors::Treetop).
159
+ pretty_print(self.class.name, root)
160
+ end
161
+ end
162
+
@@ -0,0 +1,51 @@
1
+
2
+ # Allows specifying rules as strings using the exact same grammar that treetop
3
+ # does, minus the actions. This is on one hand a good example of a fully
4
+ # fledged parser and on the other hand might even turn out really useful.
5
+ #
6
+ # This can be viewed as an extension to parslet and might even be hosted in
7
+ # its own gem one fine day.
8
+ #
9
+ class Parslet::Expression
10
+ include Parslet
11
+
12
+ autoload :Treetop, 'parslet/expression/treetop'
13
+
14
+ # Creates a parslet from a foreign language expression.
15
+ #
16
+ # Example:
17
+ #
18
+ # Parslet::Expression.new("'a' 'b'")
19
+ #
20
+ def initialize(str, opts={}, context=self)
21
+ @type = opts[:type] || :treetop
22
+ @exp = str
23
+ @parslet = transform(
24
+ parse(str))
25
+ end
26
+
27
+ # Transforms the parse tree into a parslet expression.
28
+ #
29
+ def transform(tree)
30
+ transform = Treetop::Transform.new
31
+
32
+ # pp tree
33
+ transform.apply(tree)
34
+ rescue
35
+ warn "Could not transform: " + tree.inspect
36
+ raise
37
+ end
38
+
39
+ # Parses the string and returns a parse tree.
40
+ #
41
+ def parse(str)
42
+ parser = Treetop::Parser.new
43
+ parser.parse(str)
44
+ end
45
+
46
+ # Turns this expression into a parslet.
47
+ #
48
+ def to_parslet
49
+ @parslet
50
+ end
51
+ end
@@ -0,0 +1,92 @@
1
+ class Parslet::Expression::Treetop
2
+ class Parser < Parslet::Parser
3
+ root(:expression)
4
+
5
+ rule(:expression) { alternatives }
6
+
7
+ # alternative 'a' / 'b'
8
+ rule(:alternatives) {
9
+ (simple >> (spaced('/') >> simple).repeat).as(:alt)
10
+ }
11
+
12
+ # sequence by simple concatenation 'a' 'b'
13
+ rule(:simple) { occurrence.repeat(1).as(:seq) }
14
+
15
+ # occurrence modifiers
16
+ rule(:occurrence) {
17
+ atom.as(:repetition) >> spaced('*').as(:sign) |
18
+ atom.as(:repetition) >> spaced('+').as(:sign) |
19
+ atom.as(:repetition) >> repetition_spec |
20
+
21
+ atom.as(:maybe) >> spaced('?') |
22
+ atom
23
+ }
24
+
25
+ rule(:atom) {
26
+ spaced('(') >> expression.as(:unwrap) >> spaced(')') |
27
+ dot |
28
+ string |
29
+ char_class
30
+ }
31
+
32
+ # a character class
33
+ rule(:char_class) {
34
+ (str('[') >>
35
+ (str('\\') >> any |
36
+ str(']').absent? >> any).repeat(1) >>
37
+ str(']')).as(:match) >> space?
38
+ }
39
+
40
+ # anything at all
41
+ rule(:dot) { spaced('.').as(:any) }
42
+
43
+ # recognizing strings
44
+ rule(:string) {
45
+ str('\'') >>
46
+ (
47
+ (str('\\') >> any) |
48
+ (str("'").absent? >> any)
49
+ ).repeat.as(:string) >>
50
+ str('\'') >> space?
51
+ }
52
+
53
+ # repetition specification like {1, 2}
54
+ rule(:repetition_spec) {
55
+ spaced('{') >>
56
+ integer.maybe.as(:min) >> spaced(',') >>
57
+ integer.maybe.as(:max) >> spaced('}')
58
+ }
59
+ rule(:integer) {
60
+ match['0-9'].repeat(1)
61
+ }
62
+
63
+ # whitespace handling
64
+ rule(:space) { match("\s").repeat(1) }
65
+ rule(:space?) { space.maybe }
66
+
67
+ def spaced(str)
68
+ str(str) >> space?
69
+ end
70
+ end
71
+
72
+ class Transform < Parslet::Transform
73
+
74
+ rule(:repetition => simple(:rep), :sign => simple(:sign)) {
75
+ min = sign=='+' ? 1 : 0
76
+ Parslet::Atoms::Repetition.new(rep, min, nil) }
77
+ rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
78
+ Parslet::Atoms::Repetition.new(rep,
79
+ Integer(min || 0),
80
+ max && Integer(max) || nil) }
81
+
82
+ rule(:alt => subtree(:alt)) { Parslet::Atoms::Alternative.new(*alt) }
83
+ rule(:seq => sequence(:s)) { Parslet::Atoms::Sequence.new(*s) }
84
+ rule(:unwrap => simple(:u)) { u }
85
+ rule(:maybe => simple(:m)) { |d| d[:m].maybe }
86
+ rule(:string => simple(:s)) { Parslet::Atoms::Str.new(s) }
87
+ rule(:match => simple(:m)) { Parslet::Atoms::Re.new(m) }
88
+ rule(:any => simple(:a)) { Parslet::Atoms::Re.new('.') }
89
+ end
90
+
91
+ end
92
+