ghazel-parslet 1.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,77 @@
1
+ # A more complex parser that illustrates how a compiler might be constructed.
2
+ # The parser recognizes strings and integer literals and constructs almost a
3
+ # useful AST from the file contents.
4
+
5
+ require 'pp'
6
+
7
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
+ require 'parslet'
9
+
10
+ include Parslet
11
+
12
+ class LiteralsParser < Parslet::Parser
13
+ rule :space do
14
+ (match '[ ]').repeat(1)
15
+ end
16
+
17
+ rule :literals do
18
+ (literal >> eol).repeat
19
+ end
20
+
21
+ rule :literal do
22
+ (integer | string).as(:literal) >> space.maybe
23
+ end
24
+
25
+ rule :string do
26
+ str('"') >>
27
+ (
28
+ (str('\\') >> any) |
29
+ (str('"').absent? >> any)
30
+ ).repeat.as(:string) >>
31
+ str('"')
32
+ end
33
+
34
+ rule :integer do
35
+ match('[0-9]').repeat(1).as(:integer)
36
+ end
37
+
38
+ rule :eol do
39
+ line_end.repeat(1)
40
+ end
41
+
42
+ rule :line_end do
43
+ crlf >> space.maybe
44
+ end
45
+
46
+ rule :crlf do
47
+ match('[\r\n]').repeat(1)
48
+ end
49
+
50
+ root :literals
51
+ end
52
+
53
+ input_name = File.join(File.dirname(__FILE__), 'simple.lit')
54
+ file = File.read(input_name)
55
+
56
+ parsetree = LiteralsParser.new.parse(file)
57
+
58
+ class Lit < Struct.new(:text)
59
+ def to_s
60
+ text.inspect
61
+ end
62
+ end
63
+ class StringLit < Lit
64
+ end
65
+ class IntLit < Lit
66
+ def to_s
67
+ text
68
+ end
69
+ end
70
+
71
+ transform = Parslet::Transform.new do
72
+ rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
73
+ rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
74
+ end
75
+
76
+ ast = transform.apply(parsetree)
77
+ pp ast
@@ -0,0 +1,4 @@
1
+ "THis is a string"
2
+ "This is another string"
3
+ "This string is escaped \"embedded quoted stuff \" "
4
+ 12 // an integer literal and a comment
@@ -0,0 +1,254 @@
1
+ # A simple parser generator library. Typical usage would look like this:
2
+ #
3
+ # require 'parslet'
4
+ #
5
+ # class MyParser < Parslet::Parser
6
+ # rule(:a) { str('a').repeat }
7
+ # root(:a)
8
+ # end
9
+ #
10
+ # pp MyParser.new.parse('aaaa') # => 'aaaa'@0
11
+ # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
12
+ # # Don't know what to do with bbbb at line 1 char 1.
13
+ #
14
+ # The simple DSL allows you to define grammars in PEG-style. This kind of
15
+ # grammar construction does away with the ambiguities that usually comes with
16
+ # parsers; instead, it allows you to construct grammars that are easier to
17
+ # debug, since less magic is involved.
18
+ #
19
+ # Parslet is typically used in stages:
20
+ #
21
+ #
22
+ # * Parsing the input string; this yields an intermediary tree, see
23
+ # Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
24
+ # Parslet::ClassMethods#root.
25
+ # * Transformation of the tree into something useful to you, see
26
+ # Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
27
+ #
28
+ # The first stage is traditionally intermingled with the second stage; output
29
+ # from the second stage is usually called the 'Abstract Syntax Tree' or AST.
30
+ #
31
+ # The stages are completely decoupled; You can change your grammar around and
32
+ # use the second stage to isolate the rest of your code from the changes
33
+ # you've effected.
34
+ #
35
+ # == Further reading
36
+ #
37
+ # All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
38
+ # look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
39
+ # {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
40
+ # {Parslet::Atoms::Alternative}.
41
+ #
42
+ # == When things go wrong
43
+ #
44
+ # A parse that fails will raise {Parslet::ParseFailed}. This exception contains
45
+ # all the details of what went wrong, including a detailed error trace that
46
+ # can be printed out as an ascii tree. ({Parslet::Cause})
47
+ #
48
+ module Parslet
49
+ # Extends classes that include Parslet with the module
50
+ # {Parslet::ClassMethods}.
51
+ #
52
+ def self.included(base)
53
+ base.extend(ClassMethods)
54
+ end
55
+
56
+ # Raised when the parse failed to match. It contains the message that should
57
+ # be presented to the user. More details can be extracted from the
58
+ # exceptions #cause member: It contains an instance of {Parslet::Cause} that
59
+ # stores all the details of your failed parse in a tree structure.
60
+ #
61
+ # begin
62
+ # parslet.parse(str)
63
+ # rescue Parslet::ParseFailed => failure
64
+ # puts failure.cause.ascii_tree
65
+ # end
66
+ #
67
+ # Alternatively, you can just require 'parslet/convenience' and call the
68
+ # method #parse_with_debug instead of #parse. This method will never raise
69
+ # and print error trees to stdout.
70
+ #
71
+ # require 'parslet/convenience'
72
+ # parslet.parse_with_debug(str)
73
+ #
74
+ class ParseFailed < StandardError
75
+ def initialize(message, cause=nil)
76
+ super(message)
77
+ @cause = cause
78
+ end
79
+
80
+ # Why the parse failed.
81
+ #
82
+ # @return [Parslet::Cause]
83
+ attr_reader :cause
84
+ end
85
+
86
+ # Raised when the parse operation didn't consume all of its input. In this
87
+ # case, it makes only limited sense to look at the error tree. Maybe the
88
+ # parser worked just fine, but didn't account for the characters at the tail
89
+ # of the input?
90
+ #
91
+ # str('foo').parse('foobar')
92
+ # # raises Parslet::UnconsumedInput:
93
+ # # Don't know what to do with "bar" at line 1 char 4.
94
+ #
95
+ # Note that you can have parslet ignore this error:
96
+ #
97
+ # str('foo').parse('foobar', prefix: true) # => "foo"@0
98
+ #
99
+ class UnconsumedInput < ParseFailed
100
+ end
101
+
102
+ module ClassMethods
103
+ # Define an entity for the parser. This generates a method of the same
104
+ # name that can be used as part of other patterns. Those methods can be
105
+ # freely mixed in your parser class with real ruby methods.
106
+ #
107
+ # class MyParser
108
+ # include Parslet
109
+ #
110
+ # rule(:bar) { str('bar') }
111
+ # rule(:twobar) do
112
+ # bar >> bar
113
+ # end
114
+ #
115
+ # root :twobar
116
+ # end
117
+ #
118
+ def rule(name, &definition)
119
+ define_method(name) do
120
+ @rules ||= {} # <name, rule> memoization
121
+ return @rules[name] if @rules.has_key?(name)
122
+
123
+ # Capture the self of the parser class along with the definition.
124
+ definition_closure = proc {
125
+ self.instance_eval(&definition)
126
+ }
127
+
128
+ @rules[name] = Atoms::Rule.new(name, &definition_closure)
129
+ end
130
+ end
131
+ end
132
+
133
+ # Allows for delayed construction of #match. See also Parslet.match.
134
+ #
135
+ # @api private
136
+ class DelayedMatchConstructor
137
+ def [](str)
138
+ Atoms::Re.new("[" + str + "]")
139
+ end
140
+ end
141
+
142
+ # Returns an atom matching a character class. All regular expressions can be
143
+ # used, as long as they match only a single character at a time.
144
+ #
145
+ # match('[ab]') # will match either 'a' or 'b'
146
+ # match('[\n\s]') # will match newlines and spaces
147
+ #
148
+ # There is also another (convenience) form of this method:
149
+ #
150
+ # match['a-z'] # synonymous to match('[a-z]')
151
+ # match['\n'] # synonymous to match('[\n]')
152
+ #
153
+ # @overload match(str)
154
+ # @param str [String] character class to match (regexp syntax)
155
+ # @return [Parslet::Atoms::Re] a parslet atom
156
+ #
157
+ def match(str=nil)
158
+ return DelayedMatchConstructor.new unless str
159
+
160
+ return Atoms::Re.new(str)
161
+ end
162
+ module_function :match
163
+
164
+ # Returns an atom matching the +str+ given:
165
+ #
166
+ # str('class') # will match 'class'
167
+ #
168
+ # @param str [String] string to match verbatim
169
+ # @return [Parslet::Atoms::Str] a parslet atom
170
+ #
171
+ def str(str)
172
+ Atoms::Str.new(str)
173
+ end
174
+ module_function :str
175
+
176
+ # Returns an atom matching any character. It acts like the '.' (dot)
177
+ # character in regular expressions.
178
+ #
179
+ # any.parse('a') # => 'a'
180
+ #
181
+ # @return [Parslet::Atoms::Re] a parslet atom
182
+ #
183
+ def any
184
+ Atoms::Re.new('.')
185
+ end
186
+ module_function :any
187
+
188
+ # A special kind of atom that allows embedding whole treetop expressions
189
+ # into parslet construction.
190
+ #
191
+ # # the same as str('a') >> str('b').maybe
192
+ # exp(%Q("a" "b"?))
193
+ #
194
+ # @param str [String] a treetop expression
195
+ # @return [Parslet::Atoms::Base] the corresponding parslet parser
196
+ #
197
+ def exp(str)
198
+ Parslet::Expression.new(str).to_parslet
199
+ end
200
+ module_function :exp
201
+
202
+ # Returns a placeholder for a tree transformation that will only match a
203
+ # sequence of elements. The +symbol+ you specify will be the key for the
204
+ # matched sequence in the returned dictionary.
205
+ #
206
+ # # This would match a body element that contains several declarations.
207
+ # { :body => sequence(:declarations) }
208
+ #
209
+ # The above example would match <code>:body => ['a', 'b']</code>, but not
210
+ # <code>:body => 'a'</code>.
211
+ #
212
+ # see {Parslet::Transform}
213
+ #
214
+ def sequence(symbol)
215
+ Pattern::SequenceBind.new(symbol)
216
+ end
217
+ module_function :sequence
218
+
219
+ # Returns a placeholder for a tree transformation that will only match
220
+ # simple elements. This matches everything that <code>#sequence</code>
221
+ # doesn't match.
222
+ #
223
+ # # Matches a single header.
224
+ # { :header => simple(:header) }
225
+ #
226
+ # see {Parslet::Transform}
227
+ #
228
+ def simple(symbol)
229
+ Pattern::SimpleBind.new(symbol)
230
+ end
231
+ module_function :simple
232
+
233
+ # Returns a placeholder for tree transformation patterns that will match
234
+ # any kind of subtree.
235
+ #
236
+ # { :expression => subtree(:exp) }
237
+ #
238
+ def subtree(symbol)
239
+ Pattern::SubtreeBind.new(symbol)
240
+ end
241
+ module_function :subtree
242
+
243
+ autoload :Expression, 'parslet/expression'
244
+ end
245
+
246
+ require 'parslet/slice'
247
+ require 'parslet/cause'
248
+ require 'parslet/source'
249
+ require 'parslet/atoms'
250
+ require 'parslet/pattern'
251
+ require 'parslet/pattern/binding'
252
+ require 'parslet/transform'
253
+ require 'parslet/parser'
254
+ require 'parslet/error_reporter'
@@ -0,0 +1,32 @@
1
+
2
+ # This is where parslets name comes from: Small parser atoms.
3
+ #
4
+ module Parslet::Atoms
5
+ # The precedence module controls parenthesis during the #inspect printing
6
+ # of parslets. It is not relevant to other aspects of the parsing.
7
+ #
8
+ module Precedence
9
+ prec = 0
10
+ BASE = (prec+=1) # everything else
11
+ LOOKAHEAD = (prec+=1) # &SOMETHING
12
+ REPETITION = (prec+=1) # 'a'+, 'a'?
13
+ SEQUENCE = (prec+=1) # 'a' 'b'
14
+ ALTERNATE = (prec+=1) # 'a' | 'b'
15
+ OUTER = (prec+=1) # printing is done here.
16
+ end
17
+
18
+ require 'parslet/atoms/can_flatten'
19
+ require 'parslet/atoms/context'
20
+ require 'parslet/atoms/dsl'
21
+ require 'parslet/atoms/base'
22
+ require 'parslet/atoms/named'
23
+ require 'parslet/atoms/lookahead'
24
+ require 'parslet/atoms/alternative'
25
+ require 'parslet/atoms/sequence'
26
+ require 'parslet/atoms/repetition'
27
+ require 'parslet/atoms/re'
28
+ require 'parslet/atoms/str'
29
+ require 'parslet/atoms/entity'
30
+ require 'parslet/atoms/rule'
31
+ end
32
+
@@ -0,0 +1,50 @@
1
+
2
+ # Alternative during matching. Contains a list of parslets that is tried each
3
+ # one in turn. Only fails if all alternatives fail.
4
+ #
5
+ # Example:
6
+ #
7
+ # str('a') | str('b') # matches either 'a' or 'b'
8
+ #
9
+ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
10
+ attr_reader :alternatives
11
+
12
+ # Constructs an Alternative instance using all given parslets in the order
13
+ # given. This is what happens if you call '|' on existing parslets, like
14
+ # this:
15
+ #
16
+ # str('a') | str('b')
17
+ #
18
+ def initialize(*alternatives)
19
+ super()
20
+
21
+ @alternatives = alternatives
22
+ @error_msg = "Expected one of #{alternatives.inspect}"
23
+ end
24
+
25
+ #---
26
+ # Don't construct a hanging tree of Alternative parslets, instead store them
27
+ # all here. This reduces the number of objects created.
28
+ #+++
29
+ def |(parslet)
30
+ self.class.new(*@alternatives + [parslet])
31
+ end
32
+
33
+ def try(source, context)
34
+ errors = alternatives.map { |a|
35
+ success, value = result = a.apply(source, context)
36
+ return result if success
37
+
38
+ # Aggregate all errors
39
+ value
40
+ }
41
+
42
+ # If we reach this point, all alternatives have failed.
43
+ context.err(self, source, @error_msg, errors)
44
+ end
45
+
46
+ precedence ALTERNATE
47
+ def to_s_inner(prec)
48
+ alternatives.map { |a| a.to_s(prec) }.join(' / ')
49
+ end
50
+ end
@@ -0,0 +1,124 @@
1
+ # Base class for all parslets, handles orchestration of calls and implements
2
+ # a lot of the operator and chaining methods.
3
+ #
4
+ # Also see Parslet::Atoms::DSL chaining parslet atoms together.
5
+ #
6
+ class Parslet::Atoms::Base
7
+ include Parslet::Atoms::Precedence
8
+ include Parslet::Atoms::DSL
9
+ include Parslet::Atoms::CanFlatten
10
+
11
+ # Given a string or an IO object, this will attempt a parse of its contents
12
+ # and return a result. If the parse fails, a Parslet::ParseFailed exception
13
+ # will be thrown.
14
+ #
15
+ # @param io [String, Source] input for the parse process
16
+ # @option options [Parslet::ErrorReporter] :reporter error reporter to use,
17
+ # defaults to Parslet::ErrorReporter::Tree
18
+ # @option options [Boolean] :prefix Should a prefix match be accepted?
19
+ # (default: false)
20
+ # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
21
+ # tree
22
+ #
23
+ def parse(io, options={})
24
+ source = io.respond_to?(:line_and_column) ?
25
+ io :
26
+ Parslet::Source.new(io)
27
+
28
+ # Try to cheat. Assuming that we'll be able to parse the input, don't
29
+ # run error reporting code.
30
+ success, value = setup_and_apply(source, nil)
31
+
32
+ # If we didn't succeed the parse, raise an exception for the user.
33
+ # Stack trace will be off, but the error tree should explain the reason
34
+ # it failed.
35
+ unless success
36
+ # Cheating has not paid off. Now pay the cost: Rerun the parse,
37
+ # gathering error information in the process.
38
+ reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
39
+ success, value = setup_and_apply(source, reporter)
40
+
41
+ fail "Assertion failed: success was true when parsing with reporter" \
42
+ if success
43
+
44
+ # Value is a Parslet::Cause, which can be turned into an exception:
45
+ value.raise
46
+
47
+ fail "NEVER REACHED"
48
+ end
49
+
50
+ # assert: success is true
51
+
52
+ # If we haven't consumed the input, then the pattern doesn't match. Try
53
+ # to provide a good error message (even asking down below)
54
+ if !options[:prefix] && source.chars_left > 0
55
+ old_pos = source.pos
56
+ Parslet::Cause.format(
57
+ source, old_pos,
58
+ "Don't know what to do with #{source.consume(10).to_s.inspect}").
59
+ raise(Parslet::UnconsumedInput)
60
+ end
61
+
62
+ return flatten(value)
63
+ end
64
+
65
+ # Creates a context for parsing and applies the current atom to the input.
66
+ # Returns the parse result.
67
+ #
68
+ # @return [<Boolean, Object>] Result of the parse. If the first member is
69
+ # true, the parse has succeeded.
70
+ def setup_and_apply(source, error_reporter)
71
+ context = Parslet::Atoms::Context.new(error_reporter)
72
+ apply(source, context)
73
+ end
74
+
75
+ #---
76
+ # Calls the #try method of this parslet. In case of a parse error, apply
77
+ # leaves the source in the state it was before the attempt.
78
+ #+++
79
+ def apply(source, context)
80
+ old_pos = source.pos
81
+
82
+ #success, value = result = context.try_with_cache(self, source)
83
+ success, value = result = try(source, context)
84
+
85
+ return result if success
86
+
87
+ # We only reach this point if the parse has failed. Rewind the input.
88
+ source.pos = old_pos
89
+ return result
90
+ end
91
+
92
+ # Override this in your Atoms::Base subclasses to implement parsing
93
+ # behaviour.
94
+ #
95
+ def try(source, context)
96
+ raise NotImplementedError, \
97
+ "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
98
+ end
99
+
100
+ # Debug printing - in Treetop syntax.
101
+ #
102
+ def self.precedence(prec)
103
+ define_method(:precedence) { prec }
104
+ end
105
+ precedence BASE
106
+ def to_s(outer_prec=OUTER)
107
+ if outer_prec < precedence
108
+ "("+to_s_inner(precedence)+")"
109
+ else
110
+ to_s_inner(precedence)
111
+ end
112
+ end
113
+ def inspect
114
+ to_s(OUTER)
115
+ end
116
+
117
+ private
118
+
119
+ # Produces an instance of Success and returns it.
120
+ #
121
+ def succ(result)
122
+ [true, result]
123
+ end
124
+ end