ghazel-parslet 1.4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +195 -0
- data/LICENSE +23 -0
- data/README +70 -0
- data/Rakefile +49 -0
- data/example/boolean_algebra.rb +70 -0
- data/example/calc.rb +153 -0
- data/example/comments.rb +35 -0
- data/example/deepest_errors.rb +131 -0
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/erb.rb +47 -0
- data/example/ignore.rb +33 -0
- data/example/ip_address.rb +125 -0
- data/example/json.rb +128 -0
- data/example/local.rb +34 -0
- data/example/mathn.rb +44 -0
- data/example/minilisp.rb +94 -0
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +30 -0
- data/example/seasons.rb +46 -0
- data/example/sentence.rb +36 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +54 -0
- data/example/string_parser.rb +77 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +254 -0
- data/lib/parslet/atoms.rb +32 -0
- data/lib/parslet/atoms/alternative.rb +50 -0
- data/lib/parslet/atoms/base.rb +124 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/context.rb +94 -0
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +41 -0
- data/lib/parslet/atoms/lookahead.rb +49 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +38 -0
- data/lib/parslet/atoms/repetition.rb +63 -0
- data/lib/parslet/atoms/rule.rb +12 -0
- data/lib/parslet/atoms/rule/position.rb +143 -0
- data/lib/parslet/atoms/sequence.rb +38 -0
- data/lib/parslet/atoms/str.rb +37 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/cause.rb +94 -0
- data/lib/parslet/convenience.rb +35 -0
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +162 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/parser.rb +67 -0
- data/lib/parslet/pattern.rb +114 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/rig/rspec.rb +51 -0
- data/lib/parslet/slice.rb +101 -0
- data/lib/parslet/source.rb +62 -0
- data/lib/parslet/source/line_cache.rb +95 -0
- data/lib/parslet/transform.rb +236 -0
- data/lib/parslet/transform/context.rb +32 -0
- metadata +264 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
|
2
|
+
module Parslet::Atoms
|
3
|
+
# A series of helper functions that have the common topic of flattening
|
4
|
+
# result values into the intermediary tree that consists of Ruby Hashes and
|
5
|
+
# Arrays.
|
6
|
+
#
|
7
|
+
# This module has one main function, #flatten, that takes an annotated
|
8
|
+
# structure as input and returns the reduced form that users expect from
|
9
|
+
# Atom#parse.
|
10
|
+
#
|
11
|
+
# NOTE: Since all of these functions are just that, functions without
|
12
|
+
# side effects, they are in a module and not in a class. Its hard to draw
|
13
|
+
# the line sometimes, but this is beyond.
|
14
|
+
#
|
15
|
+
module CanFlatten
|
16
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
17
|
+
# value for the user by dropping things and merging hashes.
|
18
|
+
#
|
19
|
+
# Named is set to true if this result will be embedded in a Hash result from
|
20
|
+
# naming something using <code>.as(...)</code>. It changes the folding
|
21
|
+
# semantics of repetition.
|
22
|
+
#
|
23
|
+
def flatten(value, named=false)
|
24
|
+
# Passes through everything that isn't an array of things
|
25
|
+
return value unless value.instance_of? Array
|
26
|
+
|
27
|
+
# Extracts the s-expression tag
|
28
|
+
tag, *tail = value
|
29
|
+
|
30
|
+
# Merges arrays:
|
31
|
+
result = tail.
|
32
|
+
map { |e| flatten(e) } # first flatten each element
|
33
|
+
|
34
|
+
case tag
|
35
|
+
when :sequence
|
36
|
+
return flatten_sequence(result)
|
37
|
+
when :maybe
|
38
|
+
return named ? result.first : result.first || ''
|
39
|
+
when :repetition
|
40
|
+
return flatten_repetition(result, named)
|
41
|
+
end
|
42
|
+
|
43
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
44
|
+
end
|
45
|
+
|
46
|
+
# Lisp style fold left where the first element builds the basis for
|
47
|
+
# an inject.
|
48
|
+
#
|
49
|
+
def foldl(list, &block)
|
50
|
+
return '' if list.empty?
|
51
|
+
list[1..-1].inject(list.first, &block)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Flatten results from a sequence of parslets.
|
55
|
+
#
|
56
|
+
# @api private
|
57
|
+
#
|
58
|
+
def flatten_sequence(list)
|
59
|
+
foldl(list.compact) { |r, e| # and then merge flat elements
|
60
|
+
merge_fold(r, e)
|
61
|
+
}
|
62
|
+
end
|
63
|
+
# @api private
|
64
|
+
def merge_fold(l, r)
|
65
|
+
# equal pairs: merge. ----------------------------------------------------
|
66
|
+
if l.class == r.class
|
67
|
+
if l.is_a?(Hash)
|
68
|
+
warn_about_duplicate_keys(l, r)
|
69
|
+
return l.merge(r)
|
70
|
+
else
|
71
|
+
return l + r
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
76
|
+
|
77
|
+
# Maybe classes are not equal, but both are stringlike?
|
78
|
+
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
79
|
+
# if we're merging a String with a Slice, the slice wins.
|
80
|
+
return r if r.respond_to? :to_slice
|
81
|
+
return l if l.respond_to? :to_slice
|
82
|
+
|
83
|
+
fail "NOTREACHED: What other stringlike classes are there?"
|
84
|
+
end
|
85
|
+
|
86
|
+
# special case: If one of them is a string/slice, the other is more important
|
87
|
+
return l if r.respond_to? :to_str
|
88
|
+
return r if l.respond_to? :to_str
|
89
|
+
|
90
|
+
# otherwise just create an array for one of them to live in
|
91
|
+
return l + [r] if r.class == Hash
|
92
|
+
return [l] + r if l.class == Hash
|
93
|
+
|
94
|
+
fail "Unhandled case when foldr'ing sequence."
|
95
|
+
end
|
96
|
+
|
97
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
98
|
+
# whether the user has named the result or not. If the user has named
|
99
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
100
|
+
# turned into an empty string.
|
101
|
+
#
|
102
|
+
# @api private
|
103
|
+
#
|
104
|
+
def flatten_repetition(list, named)
|
105
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
106
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
107
|
+
# strings inbetween. To keep them, name them.
|
108
|
+
return list.select { |e| e.instance_of?(Hash) }
|
109
|
+
end
|
110
|
+
|
111
|
+
if list.any? { |e| e.instance_of?(Array) }
|
112
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
113
|
+
# level.
|
114
|
+
return list.
|
115
|
+
select { |e| e.instance_of?(Array) }.
|
116
|
+
flatten(1)
|
117
|
+
end
|
118
|
+
|
119
|
+
# Consistent handling of empty lists, when we act on a named result
|
120
|
+
return [] if named && list.empty?
|
121
|
+
|
122
|
+
# If there are only strings, concatenate them and return that.
|
123
|
+
foldl(list) { |s,e| s+e }
|
124
|
+
end
|
125
|
+
|
126
|
+
# That annoying warning 'Duplicate subtrees while merging result' comes
|
127
|
+
# from here. You should add more '.as(...)' names to your intermediary tree.
|
128
|
+
#
|
129
|
+
def warn_about_duplicate_keys(h1, h2)
|
130
|
+
d = h1.keys & h2.keys
|
131
|
+
unless d.empty?
|
132
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
133
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Parslet::Atoms
|
2
|
+
# Helper class that implements a transient cache that maps position and
|
3
|
+
# parslet object to results. This is used for memoization in the packrat
|
4
|
+
# style.
|
5
|
+
#
|
6
|
+
# Also, error reporter is stored here and error reporting happens through
|
7
|
+
# this class. This makes the reporting pluggable.
|
8
|
+
#
|
9
|
+
class Context
|
10
|
+
|
11
|
+
class LRStack < Struct.new(:lrs)
|
12
|
+
def push(lr)
|
13
|
+
lrs.unshift(lr)
|
14
|
+
end
|
15
|
+
|
16
|
+
def pop
|
17
|
+
lrs.shift
|
18
|
+
end
|
19
|
+
|
20
|
+
def top_down(&block)
|
21
|
+
lrs.each(&block)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :lr_stack
|
26
|
+
|
27
|
+
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
28
|
+
# reporter)
|
29
|
+
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
30
|
+
@cache = Hash.new { |h, k| h[k] = {} }
|
31
|
+
@reporter = reporter
|
32
|
+
@heads = {}
|
33
|
+
@lr_stack = LRStack.new([])
|
34
|
+
end
|
35
|
+
|
36
|
+
def heads
|
37
|
+
@heads
|
38
|
+
end
|
39
|
+
|
40
|
+
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
41
|
+
# at one position of input always yields the same result, unless the input
|
42
|
+
# has changed.
|
43
|
+
#
|
44
|
+
# We need the entire source here so we can ask for how many characters
|
45
|
+
# were consumed by a successful parse. Imitation of such a parse must
|
46
|
+
# advance the input pos by the same amount of bytes.
|
47
|
+
#
|
48
|
+
def try_with_cache(obj, source)
|
49
|
+
beg = source.pos
|
50
|
+
|
51
|
+
# Not in cache yet? Return early.
|
52
|
+
unless entry = lookup(obj, beg)
|
53
|
+
result = obj.try(source, self)
|
54
|
+
|
55
|
+
set obj, beg, [result, source.pos-beg]
|
56
|
+
return result
|
57
|
+
end
|
58
|
+
|
59
|
+
# the condition in unless has returned true, so entry is not nil.
|
60
|
+
result, advance = entry
|
61
|
+
|
62
|
+
# The data we're skipping here has been read before. (since it is in
|
63
|
+
# the cache) PLUS the actual contents are not interesting anymore since
|
64
|
+
# we know obj matches at beg. So skip reading.
|
65
|
+
source.pos = beg + advance
|
66
|
+
return result
|
67
|
+
end
|
68
|
+
|
69
|
+
# Report an error at a given position.
|
70
|
+
# @see ErrorReporter
|
71
|
+
#
|
72
|
+
def err_at(*args)
|
73
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
74
|
+
return [false, nil]
|
75
|
+
end
|
76
|
+
|
77
|
+
# Report an error.
|
78
|
+
# @see ErrorReporter
|
79
|
+
#
|
80
|
+
def err(*args)
|
81
|
+
return [false, @reporter.err(*args)] if @reporter
|
82
|
+
return [false, nil]
|
83
|
+
end
|
84
|
+
|
85
|
+
#private
|
86
|
+
def lookup(obj, pos)
|
87
|
+
@cache[pos][obj]
|
88
|
+
end
|
89
|
+
|
90
|
+
def set(obj, pos, val)
|
91
|
+
@cache[pos][obj] = val
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
# A mixin module that defines operations that can be called on any subclass
|
3
|
+
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
4
|
+
# allow combination of parslet atoms to form bigger parsers.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# str('foo') >> str('bar')
|
9
|
+
# str('f').repeat
|
10
|
+
# any.absent? # also called The Epsilon
|
11
|
+
#
|
12
|
+
module Parslet::Atoms::DSL
|
13
|
+
# Construct a new atom that repeats the current atom min times at least and
|
14
|
+
# at most max times. max can be nil to indicate that no maximum is present.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# # match any number of 'a's
|
18
|
+
# str('a').repeat
|
19
|
+
#
|
20
|
+
# # match between 1 and 3 'a's
|
21
|
+
# str('a').repeat(1,3)
|
22
|
+
#
|
23
|
+
def repeat(min=0, max=nil)
|
24
|
+
Parslet::Atoms::Repetition.new(self, min, max)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a new parslet atom that is only maybe present in the input. This
|
28
|
+
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
29
|
+
# either nil (if atom is not present in the input) or the matched subtree.
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# str('foo').maybe
|
33
|
+
#
|
34
|
+
def maybe
|
35
|
+
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Chains two parslet atoms together as a sequence.
|
39
|
+
#
|
40
|
+
# Example:
|
41
|
+
# str('a') >> str('b')
|
42
|
+
#
|
43
|
+
def >>(parslet)
|
44
|
+
Parslet::Atoms::Sequence.new(self, parslet)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Chains two parslet atoms together to express alternation. A match will
|
48
|
+
# always be attempted with the parslet on the left side first. If it doesn't
|
49
|
+
# match, the right side will be tried.
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# # matches either 'a' OR 'b'
|
53
|
+
# str('a') | str('b')
|
54
|
+
#
|
55
|
+
def |(parslet)
|
56
|
+
Parslet::Atoms::Alternative.new(self, parslet)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Tests for absence of a parslet atom in the input stream without consuming
|
60
|
+
# it.
|
61
|
+
#
|
62
|
+
# Example:
|
63
|
+
# # Only proceed the parse if 'a' is absent.
|
64
|
+
# str('a').absent?
|
65
|
+
#
|
66
|
+
def absent?
|
67
|
+
Parslet::Atoms::Lookahead.new(self, false)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Tests for presence of a parslet atom in the input stream without consuming
|
71
|
+
# it.
|
72
|
+
#
|
73
|
+
# Example:
|
74
|
+
# # Only proceed the parse if 'a' is present.
|
75
|
+
# str('a').present?
|
76
|
+
#
|
77
|
+
def present?
|
78
|
+
Parslet::Atoms::Lookahead.new(self, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Alias for present? that will disappear in 2.0 (deprecated)
|
82
|
+
#
|
83
|
+
alias prsnt? present?
|
84
|
+
|
85
|
+
# Alias for absent? that will disappear in 2.0 (deprecated)
|
86
|
+
#
|
87
|
+
alias absnt? absent?
|
88
|
+
|
89
|
+
# Marks a parslet atom as important for the tree output. This must be used
|
90
|
+
# to achieve meaningful output from the #parse method.
|
91
|
+
#
|
92
|
+
# Example:
|
93
|
+
# str('a').as(:b) # will produce {:b => 'a'}
|
94
|
+
#
|
95
|
+
def as(name)
|
96
|
+
Parslet::Atoms::Named.new(self, name)
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
2
|
+
# piece is lazily evaluated and cached. This has two purposes:
|
3
|
+
#
|
4
|
+
# * Avoid infinite recursion during evaluation of the definition
|
5
|
+
# * Be able to print things by their name, not by their sometimes
|
6
|
+
# complicated content.
|
7
|
+
#
|
8
|
+
# You don't normally use this directly, instead you should generated it by
|
9
|
+
# using the structuring method Parslet.rule.
|
10
|
+
#
|
11
|
+
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
12
|
+
attr_reader :name, :block
|
13
|
+
def initialize(name, &block)
|
14
|
+
super()
|
15
|
+
|
16
|
+
@name = name
|
17
|
+
@block = block
|
18
|
+
end
|
19
|
+
|
20
|
+
def try(source, context)
|
21
|
+
parslet.apply(source, context)
|
22
|
+
end
|
23
|
+
|
24
|
+
def parslet
|
25
|
+
@parslet ||= @block.call.tap { |p|
|
26
|
+
raise_not_implemented unless p
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s_inner(prec)
|
31
|
+
name.to_s.upcase
|
32
|
+
end
|
33
|
+
private
|
34
|
+
def raise_not_implemented
|
35
|
+
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
36
|
+
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
37
|
+
exception.set_backtrace(trace)
|
38
|
+
|
39
|
+
raise exception
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Either positive or negative lookahead, doesn't consume its input.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo').present? # matches when the input contains 'foo', but leaves it
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
8
|
+
attr_reader :positive
|
9
|
+
attr_reader :bound_parslet
|
10
|
+
|
11
|
+
def initialize(bound_parslet, positive=true)
|
12
|
+
super()
|
13
|
+
|
14
|
+
# Model positive and negative lookahead by testing this flag.
|
15
|
+
@positive = positive
|
16
|
+
@bound_parslet = bound_parslet
|
17
|
+
|
18
|
+
@error_msgs = {
|
19
|
+
:positive => ["Input should start with ", bound_parslet],
|
20
|
+
:negative => ["Input should not start with ", bound_parslet]
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def try(source, context)
|
25
|
+
pos = source.pos
|
26
|
+
|
27
|
+
success, value = bound_parslet.apply(source, context)
|
28
|
+
|
29
|
+
if positive
|
30
|
+
return succ(nil) if success
|
31
|
+
return context.err_at(self, source, @error_msgs[:positive], pos)
|
32
|
+
else
|
33
|
+
return succ(nil) unless success
|
34
|
+
return context.err_at(self, source, @error_msgs[:negative], pos)
|
35
|
+
end
|
36
|
+
|
37
|
+
# This is probably the only parslet that rewinds its input in #try.
|
38
|
+
# Lookaheads NEVER consume their input, even on success, that's why.
|
39
|
+
ensure
|
40
|
+
source.pos = pos
|
41
|
+
end
|
42
|
+
|
43
|
+
precedence LOOKAHEAD
|
44
|
+
def to_s_inner(prec)
|
45
|
+
char = positive ? '&' : '!'
|
46
|
+
|
47
|
+
"#{char}#{bound_parslet.to_s(prec)}"
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Names a match to influence tree construction.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo') # will return 'foo',
|
6
|
+
# str('foo').as(:foo) # will return :foo => 'foo'
|
7
|
+
#
|
8
|
+
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
9
|
+
attr_reader :parslet, :name
|
10
|
+
def initialize(parslet, name)
|
11
|
+
super()
|
12
|
+
|
13
|
+
@parslet, @name = parslet, name
|
14
|
+
end
|
15
|
+
|
16
|
+
def apply(source, context)
|
17
|
+
success, value = result = parslet.apply(source, context)
|
18
|
+
|
19
|
+
return result unless success
|
20
|
+
succ(
|
21
|
+
produce_return_value(
|
22
|
+
value))
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_s_inner(prec)
|
26
|
+
"#{name}:#{parslet.to_s(prec)}"
|
27
|
+
end
|
28
|
+
private
|
29
|
+
def produce_return_value(val)
|
30
|
+
{ name => flatten(val, true) }
|
31
|
+
end
|
32
|
+
end
|