ghazel-parslet 1.4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +195 -0
- data/LICENSE +23 -0
- data/README +70 -0
- data/Rakefile +49 -0
- data/example/boolean_algebra.rb +70 -0
- data/example/calc.rb +153 -0
- data/example/comments.rb +35 -0
- data/example/deepest_errors.rb +131 -0
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/erb.rb +47 -0
- data/example/ignore.rb +33 -0
- data/example/ip_address.rb +125 -0
- data/example/json.rb +128 -0
- data/example/local.rb +34 -0
- data/example/mathn.rb +44 -0
- data/example/minilisp.rb +94 -0
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +30 -0
- data/example/seasons.rb +46 -0
- data/example/sentence.rb +36 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +54 -0
- data/example/string_parser.rb +77 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +254 -0
- data/lib/parslet/atoms.rb +32 -0
- data/lib/parslet/atoms/alternative.rb +50 -0
- data/lib/parslet/atoms/base.rb +124 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/context.rb +94 -0
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +41 -0
- data/lib/parslet/atoms/lookahead.rb +49 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +38 -0
- data/lib/parslet/atoms/repetition.rb +63 -0
- data/lib/parslet/atoms/rule.rb +12 -0
- data/lib/parslet/atoms/rule/position.rb +143 -0
- data/lib/parslet/atoms/sequence.rb +38 -0
- data/lib/parslet/atoms/str.rb +37 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/cause.rb +94 -0
- data/lib/parslet/convenience.rb +35 -0
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +162 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/parser.rb +67 -0
- data/lib/parslet/pattern.rb +114 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/rig/rspec.rb +51 -0
- data/lib/parslet/slice.rb +101 -0
- data/lib/parslet/source.rb +62 -0
- data/lib/parslet/source/line_cache.rb +95 -0
- data/lib/parslet/transform.rb +236 -0
- data/lib/parslet/transform/context.rb +32 -0
- metadata +264 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
|
2
|
+
module Parslet::Atoms
|
3
|
+
# A series of helper functions that have the common topic of flattening
|
4
|
+
# result values into the intermediary tree that consists of Ruby Hashes and
|
5
|
+
# Arrays.
|
6
|
+
#
|
7
|
+
# This module has one main function, #flatten, that takes an annotated
|
8
|
+
# structure as input and returns the reduced form that users expect from
|
9
|
+
# Atom#parse.
|
10
|
+
#
|
11
|
+
# NOTE: Since all of these functions are just that, functions without
|
12
|
+
# side effects, they are in a module and not in a class. Its hard to draw
|
13
|
+
# the line sometimes, but this is beyond.
|
14
|
+
#
|
15
|
+
module CanFlatten
|
16
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
17
|
+
# value for the user by dropping things and merging hashes.
|
18
|
+
#
|
19
|
+
# Named is set to true if this result will be embedded in a Hash result from
|
20
|
+
# naming something using <code>.as(...)</code>. It changes the folding
|
21
|
+
# semantics of repetition.
|
22
|
+
#
|
23
|
+
def flatten(value, named=false)
|
24
|
+
# Passes through everything that isn't an array of things
|
25
|
+
return value unless value.instance_of? Array
|
26
|
+
|
27
|
+
# Extracts the s-expression tag
|
28
|
+
tag, *tail = value
|
29
|
+
|
30
|
+
# Merges arrays:
|
31
|
+
result = tail.
|
32
|
+
map { |e| flatten(e) } # first flatten each element
|
33
|
+
|
34
|
+
case tag
|
35
|
+
when :sequence
|
36
|
+
return flatten_sequence(result)
|
37
|
+
when :maybe
|
38
|
+
return named ? result.first : result.first || ''
|
39
|
+
when :repetition
|
40
|
+
return flatten_repetition(result, named)
|
41
|
+
end
|
42
|
+
|
43
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
44
|
+
end
|
45
|
+
|
46
|
+
# Lisp style fold left where the first element builds the basis for
|
47
|
+
# an inject.
|
48
|
+
#
|
49
|
+
def foldl(list, &block)
|
50
|
+
return '' if list.empty?
|
51
|
+
list[1..-1].inject(list.first, &block)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Flatten results from a sequence of parslets.
|
55
|
+
#
|
56
|
+
# @api private
|
57
|
+
#
|
58
|
+
def flatten_sequence(list)
|
59
|
+
foldl(list.compact) { |r, e| # and then merge flat elements
|
60
|
+
merge_fold(r, e)
|
61
|
+
}
|
62
|
+
end
|
63
|
+
# @api private
|
64
|
+
def merge_fold(l, r)
|
65
|
+
# equal pairs: merge. ----------------------------------------------------
|
66
|
+
if l.class == r.class
|
67
|
+
if l.is_a?(Hash)
|
68
|
+
warn_about_duplicate_keys(l, r)
|
69
|
+
return l.merge(r)
|
70
|
+
else
|
71
|
+
return l + r
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
76
|
+
|
77
|
+
# Maybe classes are not equal, but both are stringlike?
|
78
|
+
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
79
|
+
# if we're merging a String with a Slice, the slice wins.
|
80
|
+
return r if r.respond_to? :to_slice
|
81
|
+
return l if l.respond_to? :to_slice
|
82
|
+
|
83
|
+
fail "NOTREACHED: What other stringlike classes are there?"
|
84
|
+
end
|
85
|
+
|
86
|
+
# special case: If one of them is a string/slice, the other is more important
|
87
|
+
return l if r.respond_to? :to_str
|
88
|
+
return r if l.respond_to? :to_str
|
89
|
+
|
90
|
+
# otherwise just create an array for one of them to live in
|
91
|
+
return l + [r] if r.class == Hash
|
92
|
+
return [l] + r if l.class == Hash
|
93
|
+
|
94
|
+
fail "Unhandled case when foldr'ing sequence."
|
95
|
+
end
|
96
|
+
|
97
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
98
|
+
# whether the user has named the result or not. If the user has named
|
99
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
100
|
+
# turned into an empty string.
|
101
|
+
#
|
102
|
+
# @api private
|
103
|
+
#
|
104
|
+
def flatten_repetition(list, named)
|
105
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
106
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
107
|
+
# strings inbetween. To keep them, name them.
|
108
|
+
return list.select { |e| e.instance_of?(Hash) }
|
109
|
+
end
|
110
|
+
|
111
|
+
if list.any? { |e| e.instance_of?(Array) }
|
112
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
113
|
+
# level.
|
114
|
+
return list.
|
115
|
+
select { |e| e.instance_of?(Array) }.
|
116
|
+
flatten(1)
|
117
|
+
end
|
118
|
+
|
119
|
+
# Consistent handling of empty lists, when we act on a named result
|
120
|
+
return [] if named && list.empty?
|
121
|
+
|
122
|
+
# If there are only strings, concatenate them and return that.
|
123
|
+
foldl(list) { |s,e| s+e }
|
124
|
+
end
|
125
|
+
|
126
|
+
# That annoying warning 'Duplicate subtrees while merging result' comes
|
127
|
+
# from here. You should add more '.as(...)' names to your intermediary tree.
|
128
|
+
#
|
129
|
+
def warn_about_duplicate_keys(h1, h2)
|
130
|
+
d = h1.keys & h2.keys
|
131
|
+
unless d.empty?
|
132
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
133
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Parslet::Atoms
|
2
|
+
# Helper class that implements a transient cache that maps position and
|
3
|
+
# parslet object to results. This is used for memoization in the packrat
|
4
|
+
# style.
|
5
|
+
#
|
6
|
+
# Also, error reporter is stored here and error reporting happens through
|
7
|
+
# this class. This makes the reporting pluggable.
|
8
|
+
#
|
9
|
+
class Context
|
10
|
+
|
11
|
+
class LRStack < Struct.new(:lrs)
|
12
|
+
def push(lr)
|
13
|
+
lrs.unshift(lr)
|
14
|
+
end
|
15
|
+
|
16
|
+
def pop
|
17
|
+
lrs.shift
|
18
|
+
end
|
19
|
+
|
20
|
+
def top_down(&block)
|
21
|
+
lrs.each(&block)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :lr_stack
|
26
|
+
|
27
|
+
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
28
|
+
# reporter)
|
29
|
+
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
30
|
+
@cache = Hash.new { |h, k| h[k] = {} }
|
31
|
+
@reporter = reporter
|
32
|
+
@heads = {}
|
33
|
+
@lr_stack = LRStack.new([])
|
34
|
+
end
|
35
|
+
|
36
|
+
def heads
|
37
|
+
@heads
|
38
|
+
end
|
39
|
+
|
40
|
+
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
41
|
+
# at one position of input always yields the same result, unless the input
|
42
|
+
# has changed.
|
43
|
+
#
|
44
|
+
# We need the entire source here so we can ask for how many characters
|
45
|
+
# were consumed by a successful parse. Imitation of such a parse must
|
46
|
+
# advance the input pos by the same amount of bytes.
|
47
|
+
#
|
48
|
+
def try_with_cache(obj, source)
|
49
|
+
beg = source.pos
|
50
|
+
|
51
|
+
# Not in cache yet? Return early.
|
52
|
+
unless entry = lookup(obj, beg)
|
53
|
+
result = obj.try(source, self)
|
54
|
+
|
55
|
+
set obj, beg, [result, source.pos-beg]
|
56
|
+
return result
|
57
|
+
end
|
58
|
+
|
59
|
+
# the condition in unless has returned true, so entry is not nil.
|
60
|
+
result, advance = entry
|
61
|
+
|
62
|
+
# The data we're skipping here has been read before. (since it is in
|
63
|
+
# the cache) PLUS the actual contents are not interesting anymore since
|
64
|
+
# we know obj matches at beg. So skip reading.
|
65
|
+
source.pos = beg + advance
|
66
|
+
return result
|
67
|
+
end
|
68
|
+
|
69
|
+
# Report an error at a given position.
|
70
|
+
# @see ErrorReporter
|
71
|
+
#
|
72
|
+
def err_at(*args)
|
73
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
74
|
+
return [false, nil]
|
75
|
+
end
|
76
|
+
|
77
|
+
# Report an error.
|
78
|
+
# @see ErrorReporter
|
79
|
+
#
|
80
|
+
def err(*args)
|
81
|
+
return [false, @reporter.err(*args)] if @reporter
|
82
|
+
return [false, nil]
|
83
|
+
end
|
84
|
+
|
85
|
+
#private
|
86
|
+
def lookup(obj, pos)
|
87
|
+
@cache[pos][obj]
|
88
|
+
end
|
89
|
+
|
90
|
+
def set(obj, pos, val)
|
91
|
+
@cache[pos][obj] = val
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
# A mixin module that defines operations that can be called on any subclass
|
3
|
+
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
4
|
+
# allow combination of parslet atoms to form bigger parsers.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# str('foo') >> str('bar')
|
9
|
+
# str('f').repeat
|
10
|
+
# any.absent? # also called The Epsilon
|
11
|
+
#
|
12
|
+
module Parslet::Atoms::DSL
|
13
|
+
# Construct a new atom that repeats the current atom min times at least and
|
14
|
+
# at most max times. max can be nil to indicate that no maximum is present.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# # match any number of 'a's
|
18
|
+
# str('a').repeat
|
19
|
+
#
|
20
|
+
# # match between 1 and 3 'a's
|
21
|
+
# str('a').repeat(1,3)
|
22
|
+
#
|
23
|
+
def repeat(min=0, max=nil)
|
24
|
+
Parslet::Atoms::Repetition.new(self, min, max)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a new parslet atom that is only maybe present in the input. This
|
28
|
+
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
29
|
+
# either nil (if atom is not present in the input) or the matched subtree.
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# str('foo').maybe
|
33
|
+
#
|
34
|
+
def maybe
|
35
|
+
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Chains two parslet atoms together as a sequence.
|
39
|
+
#
|
40
|
+
# Example:
|
41
|
+
# str('a') >> str('b')
|
42
|
+
#
|
43
|
+
def >>(parslet)
|
44
|
+
Parslet::Atoms::Sequence.new(self, parslet)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Chains two parslet atoms together to express alternation. A match will
|
48
|
+
# always be attempted with the parslet on the left side first. If it doesn't
|
49
|
+
# match, the right side will be tried.
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# # matches either 'a' OR 'b'
|
53
|
+
# str('a') | str('b')
|
54
|
+
#
|
55
|
+
def |(parslet)
|
56
|
+
Parslet::Atoms::Alternative.new(self, parslet)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Tests for absence of a parslet atom in the input stream without consuming
|
60
|
+
# it.
|
61
|
+
#
|
62
|
+
# Example:
|
63
|
+
# # Only proceed the parse if 'a' is absent.
|
64
|
+
# str('a').absent?
|
65
|
+
#
|
66
|
+
def absent?
|
67
|
+
Parslet::Atoms::Lookahead.new(self, false)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Tests for presence of a parslet atom in the input stream without consuming
|
71
|
+
# it.
|
72
|
+
#
|
73
|
+
# Example:
|
74
|
+
# # Only proceed the parse if 'a' is present.
|
75
|
+
# str('a').present?
|
76
|
+
#
|
77
|
+
def present?
|
78
|
+
Parslet::Atoms::Lookahead.new(self, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Alias for present? that will disappear in 2.0 (deprecated)
|
82
|
+
#
|
83
|
+
alias prsnt? present?
|
84
|
+
|
85
|
+
# Alias for absent? that will disappear in 2.0 (deprecated)
|
86
|
+
#
|
87
|
+
alias absnt? absent?
|
88
|
+
|
89
|
+
# Marks a parslet atom as important for the tree output. This must be used
|
90
|
+
# to achieve meaningful output from the #parse method.
|
91
|
+
#
|
92
|
+
# Example:
|
93
|
+
# str('a').as(:b) # will produce {:b => 'a'}
|
94
|
+
#
|
95
|
+
def as(name)
|
96
|
+
Parslet::Atoms::Named.new(self, name)
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
2
|
+
# piece is lazily evaluated and cached. This has two purposes:
|
3
|
+
#
|
4
|
+
# * Avoid infinite recursion during evaluation of the definition
|
5
|
+
# * Be able to print things by their name, not by their sometimes
|
6
|
+
# complicated content.
|
7
|
+
#
|
8
|
+
# You don't normally use this directly, instead you should generated it by
|
9
|
+
# using the structuring method Parslet.rule.
|
10
|
+
#
|
11
|
+
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
12
|
+
attr_reader :name, :block
|
13
|
+
def initialize(name, &block)
|
14
|
+
super()
|
15
|
+
|
16
|
+
@name = name
|
17
|
+
@block = block
|
18
|
+
end
|
19
|
+
|
20
|
+
def try(source, context)
|
21
|
+
parslet.apply(source, context)
|
22
|
+
end
|
23
|
+
|
24
|
+
def parslet
|
25
|
+
@parslet ||= @block.call.tap { |p|
|
26
|
+
raise_not_implemented unless p
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s_inner(prec)
|
31
|
+
name.to_s.upcase
|
32
|
+
end
|
33
|
+
private
|
34
|
+
def raise_not_implemented
|
35
|
+
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
36
|
+
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
37
|
+
exception.set_backtrace(trace)
|
38
|
+
|
39
|
+
raise exception
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Either positive or negative lookahead, doesn't consume its input.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo').present? # matches when the input contains 'foo', but leaves it
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
8
|
+
attr_reader :positive
|
9
|
+
attr_reader :bound_parslet
|
10
|
+
|
11
|
+
def initialize(bound_parslet, positive=true)
|
12
|
+
super()
|
13
|
+
|
14
|
+
# Model positive and negative lookahead by testing this flag.
|
15
|
+
@positive = positive
|
16
|
+
@bound_parslet = bound_parslet
|
17
|
+
|
18
|
+
@error_msgs = {
|
19
|
+
:positive => ["Input should start with ", bound_parslet],
|
20
|
+
:negative => ["Input should not start with ", bound_parslet]
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def try(source, context)
|
25
|
+
pos = source.pos
|
26
|
+
|
27
|
+
success, value = bound_parslet.apply(source, context)
|
28
|
+
|
29
|
+
if positive
|
30
|
+
return succ(nil) if success
|
31
|
+
return context.err_at(self, source, @error_msgs[:positive], pos)
|
32
|
+
else
|
33
|
+
return succ(nil) unless success
|
34
|
+
return context.err_at(self, source, @error_msgs[:negative], pos)
|
35
|
+
end
|
36
|
+
|
37
|
+
# This is probably the only parslet that rewinds its input in #try.
|
38
|
+
# Lookaheads NEVER consume their input, even on success, that's why.
|
39
|
+
ensure
|
40
|
+
source.pos = pos
|
41
|
+
end
|
42
|
+
|
43
|
+
precedence LOOKAHEAD
|
44
|
+
def to_s_inner(prec)
|
45
|
+
char = positive ? '&' : '!'
|
46
|
+
|
47
|
+
"#{char}#{bound_parslet.to_s(prec)}"
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Names a match to influence tree construction.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo') # will return 'foo',
|
6
|
+
# str('foo').as(:foo) # will return :foo => 'foo'
|
7
|
+
#
|
8
|
+
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
9
|
+
attr_reader :parslet, :name
|
10
|
+
def initialize(parslet, name)
|
11
|
+
super()
|
12
|
+
|
13
|
+
@parslet, @name = parslet, name
|
14
|
+
end
|
15
|
+
|
16
|
+
def apply(source, context)
|
17
|
+
success, value = result = parslet.apply(source, context)
|
18
|
+
|
19
|
+
return result unless success
|
20
|
+
succ(
|
21
|
+
produce_return_value(
|
22
|
+
value))
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_s_inner(prec)
|
26
|
+
"#{name}:#{parslet.to_s(prec)}"
|
27
|
+
end
|
28
|
+
private
|
29
|
+
def produce_return_value(val)
|
30
|
+
{ name => flatten(val, true) }
|
31
|
+
end
|
32
|
+
end
|