ghazel-parslet 1.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,77 @@
1
+ # A more complex parser that illustrates how a compiler might be constructed.
2
+ # The parser recognizes strings and integer literals and constructs almost a
3
+ # useful AST from the file contents.
4
+
5
+ require 'pp'
6
+
7
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
+ require 'parslet'
9
+
10
+ include Parslet
11
+
12
+ class LiteralsParser < Parslet::Parser
13
+ rule :space do
14
+ (match '[ ]').repeat(1)
15
+ end
16
+
17
+ rule :literals do
18
+ (literal >> eol).repeat
19
+ end
20
+
21
+ rule :literal do
22
+ (integer | string).as(:literal) >> space.maybe
23
+ end
24
+
25
+ rule :string do
26
+ str('"') >>
27
+ (
28
+ (str('\\') >> any) |
29
+ (str('"').absent? >> any)
30
+ ).repeat.as(:string) >>
31
+ str('"')
32
+ end
33
+
34
+ rule :integer do
35
+ match('[0-9]').repeat(1).as(:integer)
36
+ end
37
+
38
+ rule :eol do
39
+ line_end.repeat(1)
40
+ end
41
+
42
+ rule :line_end do
43
+ crlf >> space.maybe
44
+ end
45
+
46
+ rule :crlf do
47
+ match('[\r\n]').repeat(1)
48
+ end
49
+
50
+ root :literals
51
+ end
52
+
53
+ input_name = File.join(File.dirname(__FILE__), 'simple.lit')
54
+ file = File.read(input_name)
55
+
56
+ parsetree = LiteralsParser.new.parse(file)
57
+
58
+ class Lit < Struct.new(:text)
59
+ def to_s
60
+ text.inspect
61
+ end
62
+ end
63
+ class StringLit < Lit
64
+ end
65
+ class IntLit < Lit
66
+ def to_s
67
+ text
68
+ end
69
+ end
70
+
71
+ transform = Parslet::Transform.new do
72
+ rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
73
+ rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
74
+ end
75
+
76
+ ast = transform.apply(parsetree)
77
+ pp ast
@@ -0,0 +1,4 @@
1
+ "THis is a string"
2
+ "This is another string"
3
+ "This string is escaped \"embedded quoted stuff \" "
4
+ 12 // an integer literal and a comment
@@ -0,0 +1,254 @@
1
+ # A simple parser generator library. Typical usage would look like this:
2
+ #
3
+ # require 'parslet'
4
+ #
5
+ # class MyParser < Parslet::Parser
6
+ # rule(:a) { str('a').repeat }
7
+ # root(:a)
8
+ # end
9
+ #
10
+ # pp MyParser.new.parse('aaaa') # => 'aaaa'@0
11
+ # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
12
+ # # Don't know what to do with bbbb at line 1 char 1.
13
+ #
14
+ # The simple DSL allows you to define grammars in PEG-style. This kind of
15
+ # grammar construction does away with the ambiguities that usually comes with
16
+ # parsers; instead, it allows you to construct grammars that are easier to
17
+ # debug, since less magic is involved.
18
+ #
19
+ # Parslet is typically used in stages:
20
+ #
21
+ #
22
+ # * Parsing the input string; this yields an intermediary tree, see
23
+ # Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
24
+ # Parslet::ClassMethods#root.
25
+ # * Transformation of the tree into something useful to you, see
26
+ # Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
27
+ #
28
+ # The first stage is traditionally intermingled with the second stage; output
29
+ # from the second stage is usually called the 'Abstract Syntax Tree' or AST.
30
+ #
31
+ # The stages are completely decoupled; You can change your grammar around and
32
+ # use the second stage to isolate the rest of your code from the changes
33
+ # you've effected.
34
+ #
35
+ # == Further reading
36
+ #
37
+ # All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
38
+ # look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
39
+ # {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
40
+ # {Parslet::Atoms::Alternative}.
41
+ #
42
+ # == When things go wrong
43
+ #
44
+ # A parse that fails will raise {Parslet::ParseFailed}. This exception contains
45
+ # all the details of what went wrong, including a detailed error trace that
46
+ # can be printed out as an ascii tree. ({Parslet::Cause})
47
+ #
48
+ module Parslet
49
+ # Extends classes that include Parslet with the module
50
+ # {Parslet::ClassMethods}.
51
+ #
52
+ def self.included(base)
53
+ base.extend(ClassMethods)
54
+ end
55
+
56
+ # Raised when the parse failed to match. It contains the message that should
57
+ # be presented to the user. More details can be extracted from the
58
+ # exceptions #cause member: It contains an instance of {Parslet::Cause} that
59
+ # stores all the details of your failed parse in a tree structure.
60
+ #
61
+ # begin
62
+ # parslet.parse(str)
63
+ # rescue Parslet::ParseFailed => failure
64
+ # puts failure.cause.ascii_tree
65
+ # end
66
+ #
67
+ # Alternatively, you can just require 'parslet/convenience' and call the
68
+ # method #parse_with_debug instead of #parse. This method will never raise
69
+ # and print error trees to stdout.
70
+ #
71
+ # require 'parslet/convenience'
72
+ # parslet.parse_with_debug(str)
73
+ #
74
+ class ParseFailed < StandardError
75
+ def initialize(message, cause=nil)
76
+ super(message)
77
+ @cause = cause
78
+ end
79
+
80
+ # Why the parse failed.
81
+ #
82
+ # @return [Parslet::Cause]
83
+ attr_reader :cause
84
+ end
85
+
86
+ # Raised when the parse operation didn't consume all of its input. In this
87
+ # case, it makes only limited sense to look at the error tree. Maybe the
88
+ # parser worked just fine, but didn't account for the characters at the tail
89
+ # of the input?
90
+ #
91
+ # str('foo').parse('foobar')
92
+ # # raises Parslet::UnconsumedInput:
93
+ # # Don't know what to do with "bar" at line 1 char 4.
94
+ #
95
+ # Note that you can have parslet ignore this error:
96
+ #
97
+ # str('foo').parse('foobar', prefix: true) # => "foo"@0
98
+ #
99
+ class UnconsumedInput < ParseFailed
100
+ end
101
+
102
+ module ClassMethods
103
+ # Define an entity for the parser. This generates a method of the same
104
+ # name that can be used as part of other patterns. Those methods can be
105
+ # freely mixed in your parser class with real ruby methods.
106
+ #
107
+ # class MyParser
108
+ # include Parslet
109
+ #
110
+ # rule(:bar) { str('bar') }
111
+ # rule(:twobar) do
112
+ # bar >> bar
113
+ # end
114
+ #
115
+ # root :twobar
116
+ # end
117
+ #
118
+ def rule(name, &definition)
119
+ define_method(name) do
120
+ @rules ||= {} # <name, rule> memoization
121
+ return @rules[name] if @rules.has_key?(name)
122
+
123
+ # Capture the self of the parser class along with the definition.
124
+ definition_closure = proc {
125
+ self.instance_eval(&definition)
126
+ }
127
+
128
+ @rules[name] = Atoms::Rule.new(name, &definition_closure)
129
+ end
130
+ end
131
+ end
132
+
133
+ # Allows for delayed construction of #match. See also Parslet.match.
134
+ #
135
+ # @api private
136
+ class DelayedMatchConstructor
137
+ def [](str)
138
+ Atoms::Re.new("[" + str + "]")
139
+ end
140
+ end
141
+
142
+ # Returns an atom matching a character class. All regular expressions can be
143
+ # used, as long as they match only a single character at a time.
144
+ #
145
+ # match('[ab]') # will match either 'a' or 'b'
146
+ # match('[\n\s]') # will match newlines and spaces
147
+ #
148
+ # There is also another (convenience) form of this method:
149
+ #
150
+ # match['a-z'] # synonymous to match('[a-z]')
151
+ # match['\n'] # synonymous to match('[\n]')
152
+ #
153
+ # @overload match(str)
154
+ # @param str [String] character class to match (regexp syntax)
155
+ # @return [Parslet::Atoms::Re] a parslet atom
156
+ #
157
+ def match(str=nil)
158
+ return DelayedMatchConstructor.new unless str
159
+
160
+ return Atoms::Re.new(str)
161
+ end
162
+ module_function :match
163
+
164
+ # Returns an atom matching the +str+ given:
165
+ #
166
+ # str('class') # will match 'class'
167
+ #
168
+ # @param str [String] string to match verbatim
169
+ # @return [Parslet::Atoms::Str] a parslet atom
170
+ #
171
+ def str(str)
172
+ Atoms::Str.new(str)
173
+ end
174
+ module_function :str
175
+
176
+ # Returns an atom matching any character. It acts like the '.' (dot)
177
+ # character in regular expressions.
178
+ #
179
+ # any.parse('a') # => 'a'
180
+ #
181
+ # @return [Parslet::Atoms::Re] a parslet atom
182
+ #
183
+ def any
184
+ Atoms::Re.new('.')
185
+ end
186
+ module_function :any
187
+
188
+ # A special kind of atom that allows embedding whole treetop expressions
189
+ # into parslet construction.
190
+ #
191
+ # # the same as str('a') >> str('b').maybe
192
+ # exp(%Q("a" "b"?))
193
+ #
194
+ # @param str [String] a treetop expression
195
+ # @return [Parslet::Atoms::Base] the corresponding parslet parser
196
+ #
197
+ def exp(str)
198
+ Parslet::Expression.new(str).to_parslet
199
+ end
200
+ module_function :exp
201
+
202
+ # Returns a placeholder for a tree transformation that will only match a
203
+ # sequence of elements. The +symbol+ you specify will be the key for the
204
+ # matched sequence in the returned dictionary.
205
+ #
206
+ # # This would match a body element that contains several declarations.
207
+ # { :body => sequence(:declarations) }
208
+ #
209
+ # The above example would match <code>:body => ['a', 'b']</code>, but not
210
+ # <code>:body => 'a'</code>.
211
+ #
212
+ # see {Parslet::Transform}
213
+ #
214
+ def sequence(symbol)
215
+ Pattern::SequenceBind.new(symbol)
216
+ end
217
+ module_function :sequence
218
+
219
+ # Returns a placeholder for a tree transformation that will only match
220
+ # simple elements. This matches everything that <code>#sequence</code>
221
+ # doesn't match.
222
+ #
223
+ # # Matches a single header.
224
+ # { :header => simple(:header) }
225
+ #
226
+ # see {Parslet::Transform}
227
+ #
228
+ def simple(symbol)
229
+ Pattern::SimpleBind.new(symbol)
230
+ end
231
+ module_function :simple
232
+
233
+ # Returns a placeholder for tree transformation patterns that will match
234
+ # any kind of subtree.
235
+ #
236
+ # { :expression => subtree(:exp) }
237
+ #
238
+ def subtree(symbol)
239
+ Pattern::SubtreeBind.new(symbol)
240
+ end
241
+ module_function :subtree
242
+
243
+ autoload :Expression, 'parslet/expression'
244
+ end
245
+
246
+ require 'parslet/slice'
247
+ require 'parslet/cause'
248
+ require 'parslet/source'
249
+ require 'parslet/atoms'
250
+ require 'parslet/pattern'
251
+ require 'parslet/pattern/binding'
252
+ require 'parslet/transform'
253
+ require 'parslet/parser'
254
+ require 'parslet/error_reporter'
@@ -0,0 +1,32 @@
1
+
2
+ # This is where parslets name comes from: Small parser atoms.
3
+ #
4
+ module Parslet::Atoms
5
+ # The precedence module controls parenthesis during the #inspect printing
6
+ # of parslets. It is not relevant to other aspects of the parsing.
7
+ #
8
+ module Precedence
9
+ prec = 0
10
+ BASE = (prec+=1) # everything else
11
+ LOOKAHEAD = (prec+=1) # &SOMETHING
12
+ REPETITION = (prec+=1) # 'a'+, 'a'?
13
+ SEQUENCE = (prec+=1) # 'a' 'b'
14
+ ALTERNATE = (prec+=1) # 'a' | 'b'
15
+ OUTER = (prec+=1) # printing is done here.
16
+ end
17
+
18
+ require 'parslet/atoms/can_flatten'
19
+ require 'parslet/atoms/context'
20
+ require 'parslet/atoms/dsl'
21
+ require 'parslet/atoms/base'
22
+ require 'parslet/atoms/named'
23
+ require 'parslet/atoms/lookahead'
24
+ require 'parslet/atoms/alternative'
25
+ require 'parslet/atoms/sequence'
26
+ require 'parslet/atoms/repetition'
27
+ require 'parslet/atoms/re'
28
+ require 'parslet/atoms/str'
29
+ require 'parslet/atoms/entity'
30
+ require 'parslet/atoms/rule'
31
+ end
32
+
@@ -0,0 +1,50 @@
1
+
2
+ # Alternative during matching. Contains a list of parslets that is tried each
3
+ # one in turn. Only fails if all alternatives fail.
4
+ #
5
+ # Example:
6
+ #
7
+ # str('a') | str('b') # matches either 'a' or 'b'
8
+ #
9
+ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
10
+ attr_reader :alternatives
11
+
12
+ # Constructs an Alternative instance using all given parslets in the order
13
+ # given. This is what happens if you call '|' on existing parslets, like
14
+ # this:
15
+ #
16
+ # str('a') | str('b')
17
+ #
18
+ def initialize(*alternatives)
19
+ super()
20
+
21
+ @alternatives = alternatives
22
+ @error_msg = "Expected one of #{alternatives.inspect}"
23
+ end
24
+
25
+ #---
26
+ # Don't construct a hanging tree of Alternative parslets, instead store them
27
+ # all here. This reduces the number of objects created.
28
+ #+++
29
+ def |(parslet)
30
+ self.class.new(*@alternatives + [parslet])
31
+ end
32
+
33
+ def try(source, context)
34
+ errors = alternatives.map { |a|
35
+ success, value = result = a.apply(source, context)
36
+ return result if success
37
+
38
+ # Aggregate all errors
39
+ value
40
+ }
41
+
42
+ # If we reach this point, all alternatives have failed.
43
+ context.err(self, source, @error_msg, errors)
44
+ end
45
+
46
+ precedence ALTERNATE
47
+ def to_s_inner(prec)
48
+ alternatives.map { |a| a.to_s(prec) }.join(' / ')
49
+ end
50
+ end
@@ -0,0 +1,124 @@
1
+ # Base class for all parslets, handles orchestration of calls and implements
2
+ # a lot of the operator and chaining methods.
3
+ #
4
+ # Also see Parslet::Atoms::DSL chaining parslet atoms together.
5
+ #
6
+ class Parslet::Atoms::Base
7
+ include Parslet::Atoms::Precedence
8
+ include Parslet::Atoms::DSL
9
+ include Parslet::Atoms::CanFlatten
10
+
11
+ # Given a string or an IO object, this will attempt a parse of its contents
12
+ # and return a result. If the parse fails, a Parslet::ParseFailed exception
13
+ # will be thrown.
14
+ #
15
+ # @param io [String, Source] input for the parse process
16
+ # @option options [Parslet::ErrorReporter] :reporter error reporter to use,
17
+ # defaults to Parslet::ErrorReporter::Tree
18
+ # @option options [Boolean] :prefix Should a prefix match be accepted?
19
+ # (default: false)
20
+ # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
21
+ # tree
22
+ #
23
+ def parse(io, options={})
24
+ source = io.respond_to?(:line_and_column) ?
25
+ io :
26
+ Parslet::Source.new(io)
27
+
28
+ # Try to cheat. Assuming that we'll be able to parse the input, don't
29
+ # run error reporting code.
30
+ success, value = setup_and_apply(source, nil)
31
+
32
+ # If we didn't succeed the parse, raise an exception for the user.
33
+ # Stack trace will be off, but the error tree should explain the reason
34
+ # it failed.
35
+ unless success
36
+ # Cheating has not paid off. Now pay the cost: Rerun the parse,
37
+ # gathering error information in the process.
38
+ reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
39
+ success, value = setup_and_apply(source, reporter)
40
+
41
+ fail "Assertion failed: success was true when parsing with reporter" \
42
+ if success
43
+
44
+ # Value is a Parslet::Cause, which can be turned into an exception:
45
+ value.raise
46
+
47
+ fail "NEVER REACHED"
48
+ end
49
+
50
+ # assert: success is true
51
+
52
+ # If we haven't consumed the input, then the pattern doesn't match. Try
53
+ # to provide a good error message (even asking down below)
54
+ if !options[:prefix] && source.chars_left > 0
55
+ old_pos = source.pos
56
+ Parslet::Cause.format(
57
+ source, old_pos,
58
+ "Don't know what to do with #{source.consume(10).to_s.inspect}").
59
+ raise(Parslet::UnconsumedInput)
60
+ end
61
+
62
+ return flatten(value)
63
+ end
64
+
65
+ # Creates a context for parsing and applies the current atom to the input.
66
+ # Returns the parse result.
67
+ #
68
+ # @return [<Boolean, Object>] Result of the parse. If the first member is
69
+ # true, the parse has succeeded.
70
+ def setup_and_apply(source, error_reporter)
71
+ context = Parslet::Atoms::Context.new(error_reporter)
72
+ apply(source, context)
73
+ end
74
+
75
+ #---
76
+ # Calls the #try method of this parslet. In case of a parse error, apply
77
+ # leaves the source in the state it was before the attempt.
78
+ #+++
79
+ def apply(source, context)
80
+ old_pos = source.pos
81
+
82
+ #success, value = result = context.try_with_cache(self, source)
83
+ success, value = result = try(source, context)
84
+
85
+ return result if success
86
+
87
+ # We only reach this point if the parse has failed. Rewind the input.
88
+ source.pos = old_pos
89
+ return result
90
+ end
91
+
92
+ # Override this in your Atoms::Base subclasses to implement parsing
93
+ # behaviour.
94
+ #
95
+ def try(source, context)
96
+ raise NotImplementedError, \
97
+ "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
98
+ end
99
+
100
+ # Debug printing - in Treetop syntax.
101
+ #
102
+ def self.precedence(prec)
103
+ define_method(:precedence) { prec }
104
+ end
105
+ precedence BASE
106
+ def to_s(outer_prec=OUTER)
107
+ if outer_prec < precedence
108
+ "("+to_s_inner(precedence)+")"
109
+ else
110
+ to_s_inner(precedence)
111
+ end
112
+ end
113
+ def inspect
114
+ to_s(OUTER)
115
+ end
116
+
117
+ private
118
+
119
+ # Produces an instance of Success and returns it.
120
+ #
121
+ def succ(result)
122
+ [true, result]
123
+ end
124
+ end