ghazel-parslet 1.4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +195 -0
- data/LICENSE +23 -0
- data/README +70 -0
- data/Rakefile +49 -0
- data/example/boolean_algebra.rb +70 -0
- data/example/calc.rb +153 -0
- data/example/comments.rb +35 -0
- data/example/deepest_errors.rb +131 -0
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/erb.rb +47 -0
- data/example/ignore.rb +33 -0
- data/example/ip_address.rb +125 -0
- data/example/json.rb +128 -0
- data/example/local.rb +34 -0
- data/example/mathn.rb +44 -0
- data/example/minilisp.rb +94 -0
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +30 -0
- data/example/seasons.rb +46 -0
- data/example/sentence.rb +36 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +54 -0
- data/example/string_parser.rb +77 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +254 -0
- data/lib/parslet/atoms.rb +32 -0
- data/lib/parslet/atoms/alternative.rb +50 -0
- data/lib/parslet/atoms/base.rb +124 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/context.rb +94 -0
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +41 -0
- data/lib/parslet/atoms/lookahead.rb +49 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +38 -0
- data/lib/parslet/atoms/repetition.rb +63 -0
- data/lib/parslet/atoms/rule.rb +12 -0
- data/lib/parslet/atoms/rule/position.rb +143 -0
- data/lib/parslet/atoms/sequence.rb +38 -0
- data/lib/parslet/atoms/str.rb +37 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/cause.rb +94 -0
- data/lib/parslet/convenience.rb +35 -0
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +162 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/parser.rb +67 -0
- data/lib/parslet/pattern.rb +114 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/rig/rspec.rb +51 -0
- data/lib/parslet/slice.rb +101 -0
- data/lib/parslet/source.rb +62 -0
- data/lib/parslet/source/line_cache.rb +95 -0
- data/lib/parslet/transform.rb +236 -0
- data/lib/parslet/transform/context.rb +32 -0
- metadata +264 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
# The base class for all your parsers. Use as follows:
|
3
|
+
#
|
4
|
+
# require 'parslet'
|
5
|
+
#
|
6
|
+
# class MyParser < Parslet::Parser
|
7
|
+
# rule(:a) { str('a').repeat }
|
8
|
+
# root(:a)
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
12
|
+
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
13
|
+
# # Don't know what to do with bbbb at line 1 char 1.
|
14
|
+
#
|
15
|
+
# Parslet::Parser is also a grammar atom. This means that you can mix full
|
16
|
+
# fledged parsers freely with small parts of a different parser.
|
17
|
+
#
|
18
|
+
# Example:
|
19
|
+
# class ParserA < Parslet::Parser
|
20
|
+
# root :aaa
|
21
|
+
# rule(:aaa) { str('a').repeat(3,3) }
|
22
|
+
# end
|
23
|
+
# class ParserB < Parslet::Parser
|
24
|
+
# root :expression
|
25
|
+
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# In the above example, ParserB would parse something like 'baaab'.
|
29
|
+
#
|
30
|
+
class Parslet::Parser < Parslet::Atoms::Base
|
31
|
+
include Parslet
|
32
|
+
|
33
|
+
class <<self # class methods
|
34
|
+
# Define the parsers #root function. This is the place where you start
|
35
|
+
# parsing; if you have a rule for 'file' that describes what should be
|
36
|
+
# in a file, this would be your root declaration:
|
37
|
+
#
|
38
|
+
# class Parser
|
39
|
+
# root :file
|
40
|
+
# rule(:file) { ... }
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# #root declares a 'parse' function that works just like the parse
|
44
|
+
# function that you can call on a simple parslet, taking a string as input
|
45
|
+
# and producing parse output.
|
46
|
+
#
|
47
|
+
# In a way, #root is a shorthand for:
|
48
|
+
#
|
49
|
+
# def parse(str)
|
50
|
+
# your_parser_root.parse(str)
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
def root(name)
|
54
|
+
define_method(:root) do
|
55
|
+
self.send(name)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def try(source, context)
|
61
|
+
root.try(source, context)
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s_inner(prec)
|
65
|
+
root.to_s(prec)
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# Matches trees against expressions. Trees are formed by arrays and hashes
|
2
|
+
# for expressing membership and sequence. The leafs of the tree are other
|
3
|
+
# classes.
|
4
|
+
#
|
5
|
+
# A tree issued by the parslet library might look like this:
|
6
|
+
#
|
7
|
+
# {
|
8
|
+
# :function_call => {
|
9
|
+
# :name => 'foobar',
|
10
|
+
# :args => [1, 2, 3]
|
11
|
+
# }
|
12
|
+
# }
|
13
|
+
#
|
14
|
+
# A pattern that would match against this tree would be:
|
15
|
+
#
|
16
|
+
# { :function_call => { :name => simple(:name), :args => sequence(:args) }}
|
17
|
+
#
|
18
|
+
# Note that Parslet::Pattern only matches at a given subtree; it wont try
|
19
|
+
# to match recursively. To do that, please use Parslet::Transform.
|
20
|
+
#
|
21
|
+
class Parslet::Pattern
|
22
|
+
def initialize(pattern)
|
23
|
+
@pattern = pattern
|
24
|
+
end
|
25
|
+
|
26
|
+
# Decides if the given subtree matches this pattern. Returns the bindings
|
27
|
+
# made on a successful match or nil if the match fails. If you specify
|
28
|
+
# bindings to be a hash, the mappings in it will be treated like bindings
|
29
|
+
# made during an attempted match.
|
30
|
+
#
|
31
|
+
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
|
32
|
+
#
|
33
|
+
# @param subtree [String, Hash, Array] poro subtree returned by a parse
|
34
|
+
# @param bindings [Hash] variable bindings to be verified
|
35
|
+
# @return [Hash, nil] On success: variable bindings that allow a match. On
|
36
|
+
# failure: nil
|
37
|
+
#
|
38
|
+
def match(subtree, bindings=nil)
|
39
|
+
bindings = bindings && bindings.dup || Hash.new
|
40
|
+
return bindings if element_match(subtree, @pattern, bindings)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns true if the tree element given by +tree+ matches the expression
|
44
|
+
# given by +exp+. This match must respect bindings already made in
|
45
|
+
# +bindings+. Note that bindings is carried along and modified.
|
46
|
+
#
|
47
|
+
# @api private
|
48
|
+
#
|
49
|
+
def element_match(tree, exp, bindings)
|
50
|
+
# p [:elm, tree, exp]
|
51
|
+
case [tree, exp].map { |e| e.class }
|
52
|
+
when [Hash,Hash]
|
53
|
+
return element_match_hash(tree, exp, bindings)
|
54
|
+
when [Array,Array]
|
55
|
+
return element_match_ary_single(tree, exp, bindings)
|
56
|
+
else
|
57
|
+
# If elements match exactly, then that is good enough in all cases
|
58
|
+
return true if tree == exp
|
59
|
+
|
60
|
+
# If exp is a bind variable: Check if the binding matches
|
61
|
+
if exp.respond_to?(:can_bind?) && exp.can_bind?(tree)
|
62
|
+
return element_match_binding(tree, exp, bindings)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Otherwise: No match (we don't know anything about the element
|
66
|
+
# combination)
|
67
|
+
return false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @api private
|
72
|
+
#
|
73
|
+
def element_match_binding(tree, exp, bindings)
|
74
|
+
var_name = exp.variable_name
|
75
|
+
|
76
|
+
# TODO test for the hidden :_ feature.
|
77
|
+
if var_name && bound_value = bindings[var_name]
|
78
|
+
return bound_value == tree
|
79
|
+
end
|
80
|
+
|
81
|
+
# New binding:
|
82
|
+
bindings.store var_name, tree
|
83
|
+
|
84
|
+
return true
|
85
|
+
end
|
86
|
+
|
87
|
+
# @api private
|
88
|
+
#
|
89
|
+
def element_match_ary_single(sequence, exp, bindings)
|
90
|
+
return false if sequence.size != exp.size
|
91
|
+
|
92
|
+
return sequence.zip(exp).all? { |elt, subexp|
|
93
|
+
element_match(elt, subexp, bindings) }
|
94
|
+
end
|
95
|
+
|
96
|
+
# @api private
|
97
|
+
#
|
98
|
+
def element_match_hash(tree, exp, bindings)
|
99
|
+
# Early failure when one hash is bigger than the other
|
100
|
+
return false unless exp.size == tree.size
|
101
|
+
|
102
|
+
# We iterate over expected pattern, since we demand that the keys that
|
103
|
+
# are there should be in tree as well.
|
104
|
+
exp.each do |expected_key, expected_value|
|
105
|
+
return false unless tree.has_key? expected_key
|
106
|
+
|
107
|
+
# Recurse into the value and stop early on failure
|
108
|
+
value = tree[expected_key]
|
109
|
+
return false unless element_match(value, expected_value, bindings)
|
110
|
+
end
|
111
|
+
|
112
|
+
return true
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
|
2
|
+
# Used internally for representing a bind placeholder in a Parslet::Transform
|
3
|
+
# pattern. This is the superclass for all bindings.
|
4
|
+
#
|
5
|
+
# It defines the most permissive kind of bind, the one that matches any subtree
|
6
|
+
# whatever it looks like.
|
7
|
+
#
|
8
|
+
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
9
|
+
def variable_name
|
10
|
+
symbol
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"#{bind_type_name}(#{symbol.inspect})"
|
15
|
+
end
|
16
|
+
|
17
|
+
def can_bind?(subtree)
|
18
|
+
true
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def bind_type_name
|
23
|
+
if md=self.class.name.match(/(\w+)Bind/)
|
24
|
+
md.captures.first.downcase
|
25
|
+
else
|
26
|
+
# This path should never be used, but since this is for inspection only,
|
27
|
+
# let's not raise.
|
28
|
+
'unknown_bind'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
34
|
+
# elements or a collection of attributes.
|
35
|
+
#
|
36
|
+
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
37
|
+
def can_bind?(subtree)
|
38
|
+
not [Hash, Array].include?(subtree.class)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
43
|
+
#
|
44
|
+
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
45
|
+
def can_bind?(subtree)
|
46
|
+
subtree.kind_of?(Array) &&
|
47
|
+
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
RSpec::Matchers.define(:parse) do |input, opts|
|
2
|
+
as = block = nil
|
3
|
+
result = trace = nil
|
4
|
+
match do |parser|
|
5
|
+
begin
|
6
|
+
result = parser.parse(input)
|
7
|
+
block ?
|
8
|
+
block.call(result) :
|
9
|
+
(as == result || as.nil?)
|
10
|
+
rescue Parslet::ParseFailed => ex
|
11
|
+
trace = ex.cause.ascii_tree if opts && opts[:trace]
|
12
|
+
false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
failure_message_for_should do |is|
|
17
|
+
if block
|
18
|
+
"expected output of parsing #{input.inspect}" <<
|
19
|
+
" with #{is.inspect} to meet block conditions, but it didn't"
|
20
|
+
else
|
21
|
+
"expected " <<
|
22
|
+
(as ?
|
23
|
+
"output of parsing #{input.inspect}"<<
|
24
|
+
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
|
25
|
+
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
26
|
+
(trace ?
|
27
|
+
"\n"+trace :
|
28
|
+
'')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
failure_message_for_should_not do |is|
|
33
|
+
if block
|
34
|
+
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
35
|
+
else
|
36
|
+
"expected " <<
|
37
|
+
(as ?
|
38
|
+
"output of parsing #{input.inspect}"<<
|
39
|
+
" with #{is.inspect} not to equal #{as.inspect}" :
|
40
|
+
|
41
|
+
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
46
|
+
# Object, a thing I would never allow.
|
47
|
+
chain :as do |expected_output, &block|
|
48
|
+
as = expected_output
|
49
|
+
block = block
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
|
2
|
+
# A slice is a small part from the parse input. A slice mainly behaves like
|
3
|
+
# any other string, except that it remembers where it came from (offset in
|
4
|
+
# original input).
|
5
|
+
#
|
6
|
+
# == Extracting line and column
|
7
|
+
#
|
8
|
+
# Using the #line_and_column method, you can extract the line and column in
|
9
|
+
# the original input where this slice starts.
|
10
|
+
#
|
11
|
+
# Example:
|
12
|
+
# slice.line_and_column # => [1, 13]
|
13
|
+
# slice.offset # => 12
|
14
|
+
#
|
15
|
+
# == Likeness to strings
|
16
|
+
#
|
17
|
+
# Parslet::Slice behaves in many ways like a Ruby String. This likeness
|
18
|
+
# however is not complete - many of the myriad of operations String supports
|
19
|
+
# are not yet in Slice. You can always extract the internal string instance by
|
20
|
+
# calling #to_s.
|
21
|
+
#
|
22
|
+
# These omissions are somewhat intentional. Rather than maintaining a full
|
23
|
+
# delegation, we opt for a partial emulation that gets the job done.
|
24
|
+
#
|
25
|
+
class Parslet::Slice
|
26
|
+
attr_reader :str, :offset
|
27
|
+
attr_reader :line_cache
|
28
|
+
|
29
|
+
# Construct a slice using a string, an offset and an optional line cache.
|
30
|
+
# The line cache should be able to answer to the #line_and_column message.
|
31
|
+
#
|
32
|
+
def initialize(string, offset, line_cache=nil)
|
33
|
+
@str, @offset = string, offset
|
34
|
+
@line_cache = line_cache
|
35
|
+
end
|
36
|
+
|
37
|
+
# Compares slices to other slices or strings.
|
38
|
+
#
|
39
|
+
def == other
|
40
|
+
str == other
|
41
|
+
end
|
42
|
+
|
43
|
+
# Match regular expressions.
|
44
|
+
#
|
45
|
+
def match(regexp)
|
46
|
+
str.match(regexp)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns the slices size in characters.
|
50
|
+
#
|
51
|
+
def size
|
52
|
+
str.size
|
53
|
+
end
|
54
|
+
|
55
|
+
# Concatenate two slices; it is assumed that the second slice begins
|
56
|
+
# where the first one ends. The offset of the resulting slice is the same
|
57
|
+
# as the one of this slice.
|
58
|
+
#
|
59
|
+
def +(other)
|
60
|
+
self.class.new(str + other.to_s, offset, line_cache)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns a <line, column> tuple referring to the original input.
|
64
|
+
#
|
65
|
+
def line_and_column
|
66
|
+
raise ArgumentError, "No line cache was given, cannot infer line and column." \
|
67
|
+
unless line_cache
|
68
|
+
|
69
|
+
line_cache.line_and_column(self.offset)
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# Conversion operators -----------------------------------------------------
|
74
|
+
def to_str
|
75
|
+
str
|
76
|
+
end
|
77
|
+
alias to_s to_str
|
78
|
+
|
79
|
+
def to_slice
|
80
|
+
self
|
81
|
+
end
|
82
|
+
def to_sym
|
83
|
+
str.to_sym
|
84
|
+
end
|
85
|
+
def to_int
|
86
|
+
Integer(str)
|
87
|
+
end
|
88
|
+
def to_i
|
89
|
+
str.to_i
|
90
|
+
end
|
91
|
+
def to_f
|
92
|
+
str.to_f
|
93
|
+
end
|
94
|
+
|
95
|
+
# Inspection & Debugging ---------------------------------------------------
|
96
|
+
|
97
|
+
# Prints the slice as <code>"string"@offset</code>.
|
98
|
+
def inspect
|
99
|
+
str.inspect << "@#{offset}"
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
require 'parslet/source/line_cache'
|
5
|
+
|
6
|
+
module Parslet
|
7
|
+
# Wraps the input string for parslet.
|
8
|
+
#
|
9
|
+
class Source
|
10
|
+
def initialize(str)
|
11
|
+
raise ArgumentError unless str.respond_to?(:to_str)
|
12
|
+
|
13
|
+
@pos = 0
|
14
|
+
@str = str
|
15
|
+
|
16
|
+
@line_cache = LineCache.new
|
17
|
+
@line_cache.scan_for_line_endings(0, @str)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Checks if the given pattern matches at the current input position.
|
21
|
+
#
|
22
|
+
# @param pattern [Regexp, String] pattern to check for
|
23
|
+
# @return [Boolean] true if the pattern matches at #pos
|
24
|
+
#
|
25
|
+
def matches?(pattern)
|
26
|
+
@str.index(pattern, @pos) == @pos
|
27
|
+
end
|
28
|
+
alias match matches?
|
29
|
+
|
30
|
+
# Consumes n characters from the input, returning them as a slice of the
|
31
|
+
# input.
|
32
|
+
#
|
33
|
+
def consume(n)
|
34
|
+
slice_str = @str.slice(@pos, n)
|
35
|
+
slice = Parslet::Slice.new(
|
36
|
+
slice_str,
|
37
|
+
pos,
|
38
|
+
@line_cache)
|
39
|
+
|
40
|
+
@pos += slice_str.size
|
41
|
+
return slice
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns how many chars remain in the input.
|
45
|
+
#
|
46
|
+
def chars_left
|
47
|
+
@str.size - @pos
|
48
|
+
end
|
49
|
+
|
50
|
+
# Position of the parse as a character offset into the original string.
|
51
|
+
# @note: Encodings...
|
52
|
+
attr_accessor :pos
|
53
|
+
|
54
|
+
# Returns a <line, column> tuple for the given position. If no position is
|
55
|
+
# given, line/column information is returned for the current position
|
56
|
+
# given by #pos.
|
57
|
+
#
|
58
|
+
def line_and_column(position=nil)
|
59
|
+
@line_cache.line_and_column(position || self.pos)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|