ghazel-parslet 1.4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +195 -0
- data/LICENSE +23 -0
- data/README +70 -0
- data/Rakefile +49 -0
- data/example/boolean_algebra.rb +70 -0
- data/example/calc.rb +153 -0
- data/example/comments.rb +35 -0
- data/example/deepest_errors.rb +131 -0
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/erb.rb +47 -0
- data/example/ignore.rb +33 -0
- data/example/ip_address.rb +125 -0
- data/example/json.rb +128 -0
- data/example/local.rb +34 -0
- data/example/mathn.rb +44 -0
- data/example/minilisp.rb +94 -0
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +30 -0
- data/example/seasons.rb +46 -0
- data/example/sentence.rb +36 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +54 -0
- data/example/string_parser.rb +77 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +254 -0
- data/lib/parslet/atoms.rb +32 -0
- data/lib/parslet/atoms/alternative.rb +50 -0
- data/lib/parslet/atoms/base.rb +124 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/context.rb +94 -0
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +41 -0
- data/lib/parslet/atoms/lookahead.rb +49 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +38 -0
- data/lib/parslet/atoms/repetition.rb +63 -0
- data/lib/parslet/atoms/rule.rb +12 -0
- data/lib/parslet/atoms/rule/position.rb +143 -0
- data/lib/parslet/atoms/sequence.rb +38 -0
- data/lib/parslet/atoms/str.rb +37 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/cause.rb +94 -0
- data/lib/parslet/convenience.rb +35 -0
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +162 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/parser.rb +67 -0
- data/lib/parslet/pattern.rb +114 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/rig/rspec.rb +51 -0
- data/lib/parslet/slice.rb +101 -0
- data/lib/parslet/source.rb +62 -0
- data/lib/parslet/source/line_cache.rb +95 -0
- data/lib/parslet/transform.rb +236 -0
- data/lib/parslet/transform/context.rb +32 -0
- metadata +264 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
# The base class for all your parsers. Use as follows:
|
3
|
+
#
|
4
|
+
# require 'parslet'
|
5
|
+
#
|
6
|
+
# class MyParser < Parslet::Parser
|
7
|
+
# rule(:a) { str('a').repeat }
|
8
|
+
# root(:a)
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
12
|
+
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
13
|
+
# # Don't know what to do with bbbb at line 1 char 1.
|
14
|
+
#
|
15
|
+
# Parslet::Parser is also a grammar atom. This means that you can mix full
|
16
|
+
# fledged parsers freely with small parts of a different parser.
|
17
|
+
#
|
18
|
+
# Example:
|
19
|
+
# class ParserA < Parslet::Parser
|
20
|
+
# root :aaa
|
21
|
+
# rule(:aaa) { str('a').repeat(3,3) }
|
22
|
+
# end
|
23
|
+
# class ParserB < Parslet::Parser
|
24
|
+
# root :expression
|
25
|
+
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# In the above example, ParserB would parse something like 'baaab'.
|
29
|
+
#
|
30
|
+
class Parslet::Parser < Parslet::Atoms::Base
|
31
|
+
include Parslet
|
32
|
+
|
33
|
+
class <<self # class methods
|
34
|
+
# Define the parsers #root function. This is the place where you start
|
35
|
+
# parsing; if you have a rule for 'file' that describes what should be
|
36
|
+
# in a file, this would be your root declaration:
|
37
|
+
#
|
38
|
+
# class Parser
|
39
|
+
# root :file
|
40
|
+
# rule(:file) { ... }
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# #root declares a 'parse' function that works just like the parse
|
44
|
+
# function that you can call on a simple parslet, taking a string as input
|
45
|
+
# and producing parse output.
|
46
|
+
#
|
47
|
+
# In a way, #root is a shorthand for:
|
48
|
+
#
|
49
|
+
# def parse(str)
|
50
|
+
# your_parser_root.parse(str)
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
def root(name)
|
54
|
+
define_method(:root) do
|
55
|
+
self.send(name)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def try(source, context)
|
61
|
+
root.try(source, context)
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s_inner(prec)
|
65
|
+
root.to_s(prec)
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# Matches trees against expressions. Trees are formed by arrays and hashes
|
2
|
+
# for expressing membership and sequence. The leafs of the tree are other
|
3
|
+
# classes.
|
4
|
+
#
|
5
|
+
# A tree issued by the parslet library might look like this:
|
6
|
+
#
|
7
|
+
# {
|
8
|
+
# :function_call => {
|
9
|
+
# :name => 'foobar',
|
10
|
+
# :args => [1, 2, 3]
|
11
|
+
# }
|
12
|
+
# }
|
13
|
+
#
|
14
|
+
# A pattern that would match against this tree would be:
|
15
|
+
#
|
16
|
+
# { :function_call => { :name => simple(:name), :args => sequence(:args) }}
|
17
|
+
#
|
18
|
+
# Note that Parslet::Pattern only matches at a given subtree; it wont try
|
19
|
+
# to match recursively. To do that, please use Parslet::Transform.
|
20
|
+
#
|
21
|
+
class Parslet::Pattern
|
22
|
+
def initialize(pattern)
|
23
|
+
@pattern = pattern
|
24
|
+
end
|
25
|
+
|
26
|
+
# Decides if the given subtree matches this pattern. Returns the bindings
|
27
|
+
# made on a successful match or nil if the match fails. If you specify
|
28
|
+
# bindings to be a hash, the mappings in it will be treated like bindings
|
29
|
+
# made during an attempted match.
|
30
|
+
#
|
31
|
+
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
|
32
|
+
#
|
33
|
+
# @param subtree [String, Hash, Array] poro subtree returned by a parse
|
34
|
+
# @param bindings [Hash] variable bindings to be verified
|
35
|
+
# @return [Hash, nil] On success: variable bindings that allow a match. On
|
36
|
+
# failure: nil
|
37
|
+
#
|
38
|
+
def match(subtree, bindings=nil)
|
39
|
+
bindings = bindings && bindings.dup || Hash.new
|
40
|
+
return bindings if element_match(subtree, @pattern, bindings)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns true if the tree element given by +tree+ matches the expression
|
44
|
+
# given by +exp+. This match must respect bindings already made in
|
45
|
+
# +bindings+. Note that bindings is carried along and modified.
|
46
|
+
#
|
47
|
+
# @api private
|
48
|
+
#
|
49
|
+
def element_match(tree, exp, bindings)
|
50
|
+
# p [:elm, tree, exp]
|
51
|
+
case [tree, exp].map { |e| e.class }
|
52
|
+
when [Hash,Hash]
|
53
|
+
return element_match_hash(tree, exp, bindings)
|
54
|
+
when [Array,Array]
|
55
|
+
return element_match_ary_single(tree, exp, bindings)
|
56
|
+
else
|
57
|
+
# If elements match exactly, then that is good enough in all cases
|
58
|
+
return true if tree == exp
|
59
|
+
|
60
|
+
# If exp is a bind variable: Check if the binding matches
|
61
|
+
if exp.respond_to?(:can_bind?) && exp.can_bind?(tree)
|
62
|
+
return element_match_binding(tree, exp, bindings)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Otherwise: No match (we don't know anything about the element
|
66
|
+
# combination)
|
67
|
+
return false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @api private
|
72
|
+
#
|
73
|
+
def element_match_binding(tree, exp, bindings)
|
74
|
+
var_name = exp.variable_name
|
75
|
+
|
76
|
+
# TODO test for the hidden :_ feature.
|
77
|
+
if var_name && bound_value = bindings[var_name]
|
78
|
+
return bound_value == tree
|
79
|
+
end
|
80
|
+
|
81
|
+
# New binding:
|
82
|
+
bindings.store var_name, tree
|
83
|
+
|
84
|
+
return true
|
85
|
+
end
|
86
|
+
|
87
|
+
# @api private
|
88
|
+
#
|
89
|
+
def element_match_ary_single(sequence, exp, bindings)
|
90
|
+
return false if sequence.size != exp.size
|
91
|
+
|
92
|
+
return sequence.zip(exp).all? { |elt, subexp|
|
93
|
+
element_match(elt, subexp, bindings) }
|
94
|
+
end
|
95
|
+
|
96
|
+
# @api private
|
97
|
+
#
|
98
|
+
def element_match_hash(tree, exp, bindings)
|
99
|
+
# Early failure when one hash is bigger than the other
|
100
|
+
return false unless exp.size == tree.size
|
101
|
+
|
102
|
+
# We iterate over expected pattern, since we demand that the keys that
|
103
|
+
# are there should be in tree as well.
|
104
|
+
exp.each do |expected_key, expected_value|
|
105
|
+
return false unless tree.has_key? expected_key
|
106
|
+
|
107
|
+
# Recurse into the value and stop early on failure
|
108
|
+
value = tree[expected_key]
|
109
|
+
return false unless element_match(value, expected_value, bindings)
|
110
|
+
end
|
111
|
+
|
112
|
+
return true
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
|
2
|
+
# Used internally for representing a bind placeholder in a Parslet::Transform
|
3
|
+
# pattern. This is the superclass for all bindings.
|
4
|
+
#
|
5
|
+
# It defines the most permissive kind of bind, the one that matches any subtree
|
6
|
+
# whatever it looks like.
|
7
|
+
#
|
8
|
+
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
9
|
+
def variable_name
|
10
|
+
symbol
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"#{bind_type_name}(#{symbol.inspect})"
|
15
|
+
end
|
16
|
+
|
17
|
+
def can_bind?(subtree)
|
18
|
+
true
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def bind_type_name
|
23
|
+
if md=self.class.name.match(/(\w+)Bind/)
|
24
|
+
md.captures.first.downcase
|
25
|
+
else
|
26
|
+
# This path should never be used, but since this is for inspection only,
|
27
|
+
# let's not raise.
|
28
|
+
'unknown_bind'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
34
|
+
# elements or a collection of attributes.
|
35
|
+
#
|
36
|
+
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
37
|
+
def can_bind?(subtree)
|
38
|
+
not [Hash, Array].include?(subtree.class)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
43
|
+
#
|
44
|
+
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
45
|
+
def can_bind?(subtree)
|
46
|
+
subtree.kind_of?(Array) &&
|
47
|
+
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
RSpec::Matchers.define(:parse) do |input, opts|
|
2
|
+
as = block = nil
|
3
|
+
result = trace = nil
|
4
|
+
match do |parser|
|
5
|
+
begin
|
6
|
+
result = parser.parse(input)
|
7
|
+
block ?
|
8
|
+
block.call(result) :
|
9
|
+
(as == result || as.nil?)
|
10
|
+
rescue Parslet::ParseFailed => ex
|
11
|
+
trace = ex.cause.ascii_tree if opts && opts[:trace]
|
12
|
+
false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
failure_message_for_should do |is|
|
17
|
+
if block
|
18
|
+
"expected output of parsing #{input.inspect}" <<
|
19
|
+
" with #{is.inspect} to meet block conditions, but it didn't"
|
20
|
+
else
|
21
|
+
"expected " <<
|
22
|
+
(as ?
|
23
|
+
"output of parsing #{input.inspect}"<<
|
24
|
+
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
|
25
|
+
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
26
|
+
(trace ?
|
27
|
+
"\n"+trace :
|
28
|
+
'')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
failure_message_for_should_not do |is|
|
33
|
+
if block
|
34
|
+
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
35
|
+
else
|
36
|
+
"expected " <<
|
37
|
+
(as ?
|
38
|
+
"output of parsing #{input.inspect}"<<
|
39
|
+
" with #{is.inspect} not to equal #{as.inspect}" :
|
40
|
+
|
41
|
+
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
46
|
+
# Object, a thing I would never allow.
|
47
|
+
chain :as do |expected_output, &block|
|
48
|
+
as = expected_output
|
49
|
+
block = block
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
|
2
|
+
# A slice is a small part from the parse input. A slice mainly behaves like
|
3
|
+
# any other string, except that it remembers where it came from (offset in
|
4
|
+
# original input).
|
5
|
+
#
|
6
|
+
# == Extracting line and column
|
7
|
+
#
|
8
|
+
# Using the #line_and_column method, you can extract the line and column in
|
9
|
+
# the original input where this slice starts.
|
10
|
+
#
|
11
|
+
# Example:
|
12
|
+
# slice.line_and_column # => [1, 13]
|
13
|
+
# slice.offset # => 12
|
14
|
+
#
|
15
|
+
# == Likeness to strings
|
16
|
+
#
|
17
|
+
# Parslet::Slice behaves in many ways like a Ruby String. This likeness
|
18
|
+
# however is not complete - many of the myriad of operations String supports
|
19
|
+
# are not yet in Slice. You can always extract the internal string instance by
|
20
|
+
# calling #to_s.
|
21
|
+
#
|
22
|
+
# These omissions are somewhat intentional. Rather than maintaining a full
|
23
|
+
# delegation, we opt for a partial emulation that gets the job done.
|
24
|
+
#
|
25
|
+
class Parslet::Slice
|
26
|
+
attr_reader :str, :offset
|
27
|
+
attr_reader :line_cache
|
28
|
+
|
29
|
+
# Construct a slice using a string, an offset and an optional line cache.
|
30
|
+
# The line cache should be able to answer to the #line_and_column message.
|
31
|
+
#
|
32
|
+
def initialize(string, offset, line_cache=nil)
|
33
|
+
@str, @offset = string, offset
|
34
|
+
@line_cache = line_cache
|
35
|
+
end
|
36
|
+
|
37
|
+
# Compares slices to other slices or strings.
|
38
|
+
#
|
39
|
+
def == other
|
40
|
+
str == other
|
41
|
+
end
|
42
|
+
|
43
|
+
# Match regular expressions.
|
44
|
+
#
|
45
|
+
def match(regexp)
|
46
|
+
str.match(regexp)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns the slices size in characters.
|
50
|
+
#
|
51
|
+
def size
|
52
|
+
str.size
|
53
|
+
end
|
54
|
+
|
55
|
+
# Concatenate two slices; it is assumed that the second slice begins
|
56
|
+
# where the first one ends. The offset of the resulting slice is the same
|
57
|
+
# as the one of this slice.
|
58
|
+
#
|
59
|
+
def +(other)
|
60
|
+
self.class.new(str + other.to_s, offset, line_cache)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns a <line, column> tuple referring to the original input.
|
64
|
+
#
|
65
|
+
def line_and_column
|
66
|
+
raise ArgumentError, "No line cache was given, cannot infer line and column." \
|
67
|
+
unless line_cache
|
68
|
+
|
69
|
+
line_cache.line_and_column(self.offset)
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# Conversion operators -----------------------------------------------------
|
74
|
+
def to_str
|
75
|
+
str
|
76
|
+
end
|
77
|
+
alias to_s to_str
|
78
|
+
|
79
|
+
def to_slice
|
80
|
+
self
|
81
|
+
end
|
82
|
+
def to_sym
|
83
|
+
str.to_sym
|
84
|
+
end
|
85
|
+
def to_int
|
86
|
+
Integer(str)
|
87
|
+
end
|
88
|
+
def to_i
|
89
|
+
str.to_i
|
90
|
+
end
|
91
|
+
def to_f
|
92
|
+
str.to_f
|
93
|
+
end
|
94
|
+
|
95
|
+
# Inspection & Debugging ---------------------------------------------------
|
96
|
+
|
97
|
+
# Prints the slice as <code>"string"@offset</code>.
|
98
|
+
def inspect
|
99
|
+
str.inspect << "@#{offset}"
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
require 'parslet/source/line_cache'
|
5
|
+
|
6
|
+
module Parslet
|
7
|
+
# Wraps the input string for parslet.
|
8
|
+
#
|
9
|
+
class Source
|
10
|
+
def initialize(str)
|
11
|
+
raise ArgumentError unless str.respond_to?(:to_str)
|
12
|
+
|
13
|
+
@pos = 0
|
14
|
+
@str = str
|
15
|
+
|
16
|
+
@line_cache = LineCache.new
|
17
|
+
@line_cache.scan_for_line_endings(0, @str)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Checks if the given pattern matches at the current input position.
|
21
|
+
#
|
22
|
+
# @param pattern [Regexp, String] pattern to check for
|
23
|
+
# @return [Boolean] true if the pattern matches at #pos
|
24
|
+
#
|
25
|
+
def matches?(pattern)
|
26
|
+
@str.index(pattern, @pos) == @pos
|
27
|
+
end
|
28
|
+
alias match matches?
|
29
|
+
|
30
|
+
# Consumes n characters from the input, returning them as a slice of the
|
31
|
+
# input.
|
32
|
+
#
|
33
|
+
def consume(n)
|
34
|
+
slice_str = @str.slice(@pos, n)
|
35
|
+
slice = Parslet::Slice.new(
|
36
|
+
slice_str,
|
37
|
+
pos,
|
38
|
+
@line_cache)
|
39
|
+
|
40
|
+
@pos += slice_str.size
|
41
|
+
return slice
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns how many chars remain in the input.
|
45
|
+
#
|
46
|
+
def chars_left
|
47
|
+
@str.size - @pos
|
48
|
+
end
|
49
|
+
|
50
|
+
# Position of the parse as a character offset into the original string.
|
51
|
+
# @note: Encodings...
|
52
|
+
attr_accessor :pos
|
53
|
+
|
54
|
+
# Returns a <line, column> tuple for the given position. If no position is
|
55
|
+
# given, line/column information is returned for the current position
|
56
|
+
# given by #pos.
|
57
|
+
#
|
58
|
+
def line_and_column(position=nil)
|
59
|
+
@line_cache.line_and_column(position || self.pos)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|