ghazel-parslet 1.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,137 @@
1
+
2
+ module Parslet::Atoms
3
+ # A series of helper functions that have the common topic of flattening
4
+ # result values into the intermediary tree that consists of Ruby Hashes and
5
+ # Arrays.
6
+ #
7
+ # This module has one main function, #flatten, that takes an annotated
8
+ # structure as input and returns the reduced form that users expect from
9
+ # Atom#parse.
10
+ #
11
+ # NOTE: Since all of these functions are just that, functions without
12
+ # side effects, they are in a module and not in a class. Its hard to draw
13
+ # the line sometimes, but this is beyond.
14
+ #
15
+ module CanFlatten
16
+ # Takes a mixed value coming out of a parslet and converts it to a return
17
+ # value for the user by dropping things and merging hashes.
18
+ #
19
+ # Named is set to true if this result will be embedded in a Hash result from
20
+ # naming something using <code>.as(...)</code>. It changes the folding
21
+ # semantics of repetition.
22
+ #
23
+ def flatten(value, named=false)
24
+ # Passes through everything that isn't an array of things
25
+ return value unless value.instance_of? Array
26
+
27
+ # Extracts the s-expression tag
28
+ tag, *tail = value
29
+
30
+ # Merges arrays:
31
+ result = tail.
32
+ map { |e| flatten(e) } # first flatten each element
33
+
34
+ case tag
35
+ when :sequence
36
+ return flatten_sequence(result)
37
+ when :maybe
38
+ return named ? result.first : result.first || ''
39
+ when :repetition
40
+ return flatten_repetition(result, named)
41
+ end
42
+
43
+ fail "BUG: Unknown tag #{tag.inspect}."
44
+ end
45
+
46
+ # Lisp style fold left where the first element builds the basis for
47
+ # an inject.
48
+ #
49
+ def foldl(list, &block)
50
+ return '' if list.empty?
51
+ list[1..-1].inject(list.first, &block)
52
+ end
53
+
54
+ # Flatten results from a sequence of parslets.
55
+ #
56
+ # @api private
57
+ #
58
+ def flatten_sequence(list)
59
+ foldl(list.compact) { |r, e| # and then merge flat elements
60
+ merge_fold(r, e)
61
+ }
62
+ end
63
+ # @api private
64
+ def merge_fold(l, r)
65
+ # equal pairs: merge. ----------------------------------------------------
66
+ if l.class == r.class
67
+ if l.is_a?(Hash)
68
+ warn_about_duplicate_keys(l, r)
69
+ return l.merge(r)
70
+ else
71
+ return l + r
72
+ end
73
+ end
74
+
75
+ # unequal pairs: hoist to same level. ------------------------------------
76
+
77
+ # Maybe classes are not equal, but both are stringlike?
78
+ if l.respond_to?(:to_str) && r.respond_to?(:to_str)
79
+ # if we're merging a String with a Slice, the slice wins.
80
+ return r if r.respond_to? :to_slice
81
+ return l if l.respond_to? :to_slice
82
+
83
+ fail "NOTREACHED: What other stringlike classes are there?"
84
+ end
85
+
86
+ # special case: If one of them is a string/slice, the other is more important
87
+ return l if r.respond_to? :to_str
88
+ return r if l.respond_to? :to_str
89
+
90
+ # otherwise just create an array for one of them to live in
91
+ return l + [r] if r.class == Hash
92
+ return [l] + r if l.class == Hash
93
+
94
+ fail "Unhandled case when foldr'ing sequence."
95
+ end
96
+
97
+ # Flatten results from a repetition of a single parslet. named indicates
98
+ # whether the user has named the result or not. If the user has named
99
+ # the results, we want to leave an empty list alone - otherwise it is
100
+ # turned into an empty string.
101
+ #
102
+ # @api private
103
+ #
104
+ def flatten_repetition(list, named)
105
+ if list.any? { |e| e.instance_of?(Hash) }
106
+ # If keyed subtrees are in the array, we'll want to discard all
107
+ # strings inbetween. To keep them, name them.
108
+ return list.select { |e| e.instance_of?(Hash) }
109
+ end
110
+
111
+ if list.any? { |e| e.instance_of?(Array) }
112
+ # If any arrays are nested in this array, flatten all arrays to this
113
+ # level.
114
+ return list.
115
+ select { |e| e.instance_of?(Array) }.
116
+ flatten(1)
117
+ end
118
+
119
+ # Consistent handling of empty lists, when we act on a named result
120
+ return [] if named && list.empty?
121
+
122
+ # If there are only strings, concatenate them and return that.
123
+ foldl(list) { |s,e| s+e }
124
+ end
125
+
126
+ # That annoying warning 'Duplicate subtrees while merging result' comes
127
+ # from here. You should add more '.as(...)' names to your intermediary tree.
128
+ #
129
+ def warn_about_duplicate_keys(h1, h2)
130
+ d = h1.keys & h2.keys
131
+ unless d.empty?
132
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
133
+ " of the latter will be kept. (keys: #{d.inspect})"
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,94 @@
1
+ module Parslet::Atoms
2
+ # Helper class that implements a transient cache that maps position and
3
+ # parslet object to results. This is used for memoization in the packrat
4
+ # style.
5
+ #
6
+ # Also, error reporter is stored here and error reporting happens through
7
+ # this class. This makes the reporting pluggable.
8
+ #
9
+ class Context
10
+
11
+ class LRStack < Struct.new(:lrs)
12
+ def push(lr)
13
+ lrs.unshift(lr)
14
+ end
15
+
16
+ def pop
17
+ lrs.shift
18
+ end
19
+
20
+ def top_down(&block)
21
+ lrs.each(&block)
22
+ end
23
+ end
24
+
25
+ attr_reader :lr_stack
26
+
27
+ # @param reporter [#err, #err_at] Error reporter (leave empty for default
28
+ # reporter)
29
+ def initialize(reporter=Parslet::ErrorReporter::Tree.new)
30
+ @cache = Hash.new { |h, k| h[k] = {} }
31
+ @reporter = reporter
32
+ @heads = {}
33
+ @lr_stack = LRStack.new([])
34
+ end
35
+
36
+ def heads
37
+ @heads
38
+ end
39
+
40
+ # Caches a parse answer for obj at source.pos. Applying the same parslet
41
+ # at one position of input always yields the same result, unless the input
42
+ # has changed.
43
+ #
44
+ # We need the entire source here so we can ask for how many characters
45
+ # were consumed by a successful parse. Imitation of such a parse must
46
+ # advance the input pos by the same amount of bytes.
47
+ #
48
+ def try_with_cache(obj, source)
49
+ beg = source.pos
50
+
51
+ # Not in cache yet? Return early.
52
+ unless entry = lookup(obj, beg)
53
+ result = obj.try(source, self)
54
+
55
+ set obj, beg, [result, source.pos-beg]
56
+ return result
57
+ end
58
+
59
+ # the condition in unless has returned true, so entry is not nil.
60
+ result, advance = entry
61
+
62
+ # The data we're skipping here has been read before. (since it is in
63
+ # the cache) PLUS the actual contents are not interesting anymore since
64
+ # we know obj matches at beg. So skip reading.
65
+ source.pos = beg + advance
66
+ return result
67
+ end
68
+
69
+ # Report an error at a given position.
70
+ # @see ErrorReporter
71
+ #
72
+ def err_at(*args)
73
+ return [false, @reporter.err_at(*args)] if @reporter
74
+ return [false, nil]
75
+ end
76
+
77
+ # Report an error.
78
+ # @see ErrorReporter
79
+ #
80
+ def err(*args)
81
+ return [false, @reporter.err(*args)] if @reporter
82
+ return [false, nil]
83
+ end
84
+
85
+ #private
86
+ def lookup(obj, pos)
87
+ @cache[pos][obj]
88
+ end
89
+
90
+ def set(obj, pos, val)
91
+ @cache[pos][obj] = val
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,98 @@
1
+
2
+ # A mixin module that defines operations that can be called on any subclass
3
+ # of Parslet::Atoms::Base. These operations make parslets atoms chainable and
4
+ # allow combination of parslet atoms to form bigger parsers.
5
+ #
6
+ # Example:
7
+ #
8
+ # str('foo') >> str('bar')
9
+ # str('f').repeat
10
+ # any.absent? # also called The Epsilon
11
+ #
12
+ module Parslet::Atoms::DSL
13
+ # Construct a new atom that repeats the current atom min times at least and
14
+ # at most max times. max can be nil to indicate that no maximum is present.
15
+ #
16
+ # Example:
17
+ # # match any number of 'a's
18
+ # str('a').repeat
19
+ #
20
+ # # match between 1 and 3 'a's
21
+ # str('a').repeat(1,3)
22
+ #
23
+ def repeat(min=0, max=nil)
24
+ Parslet::Atoms::Repetition.new(self, min, max)
25
+ end
26
+
27
+ # Returns a new parslet atom that is only maybe present in the input. This
28
+ # is synonymous to calling #repeat(0,1). Generated tree value will be
29
+ # either nil (if atom is not present in the input) or the matched subtree.
30
+ #
31
+ # Example:
32
+ # str('foo').maybe
33
+ #
34
+ def maybe
35
+ Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
36
+ end
37
+
38
+ # Chains two parslet atoms together as a sequence.
39
+ #
40
+ # Example:
41
+ # str('a') >> str('b')
42
+ #
43
+ def >>(parslet)
44
+ Parslet::Atoms::Sequence.new(self, parslet)
45
+ end
46
+
47
+ # Chains two parslet atoms together to express alternation. A match will
48
+ # always be attempted with the parslet on the left side first. If it doesn't
49
+ # match, the right side will be tried.
50
+ #
51
+ # Example:
52
+ # # matches either 'a' OR 'b'
53
+ # str('a') | str('b')
54
+ #
55
+ def |(parslet)
56
+ Parslet::Atoms::Alternative.new(self, parslet)
57
+ end
58
+
59
+ # Tests for absence of a parslet atom in the input stream without consuming
60
+ # it.
61
+ #
62
+ # Example:
63
+ # # Only proceed the parse if 'a' is absent.
64
+ # str('a').absent?
65
+ #
66
+ def absent?
67
+ Parslet::Atoms::Lookahead.new(self, false)
68
+ end
69
+
70
+ # Tests for presence of a parslet atom in the input stream without consuming
71
+ # it.
72
+ #
73
+ # Example:
74
+ # # Only proceed the parse if 'a' is present.
75
+ # str('a').present?
76
+ #
77
+ def present?
78
+ Parslet::Atoms::Lookahead.new(self, true)
79
+ end
80
+
81
+ # Alias for present? that will disappear in 2.0 (deprecated)
82
+ #
83
+ alias prsnt? present?
84
+
85
+ # Alias for absent? that will disappear in 2.0 (deprecated)
86
+ #
87
+ alias absnt? absent?
88
+
89
+ # Marks a parslet atom as important for the tree output. This must be used
90
+ # to achieve meaningful output from the #parse method.
91
+ #
92
+ # Example:
93
+ # str('a').as(:b) # will produce {:b => 'a'}
94
+ #
95
+ def as(name)
96
+ Parslet::Atoms::Named.new(self, name)
97
+ end
98
+ end
@@ -0,0 +1,41 @@
1
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
2
+ # piece is lazily evaluated and cached. This has two purposes:
3
+ #
4
+ # * Avoid infinite recursion during evaluation of the definition
5
+ # * Be able to print things by their name, not by their sometimes
6
+ # complicated content.
7
+ #
8
+ # You don't normally use this directly, instead you should generated it by
9
+ # using the structuring method Parslet.rule.
10
+ #
11
+ class Parslet::Atoms::Entity < Parslet::Atoms::Base
12
+ attr_reader :name, :block
13
+ def initialize(name, &block)
14
+ super()
15
+
16
+ @name = name
17
+ @block = block
18
+ end
19
+
20
+ def try(source, context)
21
+ parslet.apply(source, context)
22
+ end
23
+
24
+ def parslet
25
+ @parslet ||= @block.call.tap { |p|
26
+ raise_not_implemented unless p
27
+ }
28
+ end
29
+
30
+ def to_s_inner(prec)
31
+ name.to_s.upcase
32
+ end
33
+ private
34
+ def raise_not_implemented
35
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
36
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
37
+ exception.set_backtrace(trace)
38
+
39
+ raise exception
40
+ end
41
+ end
@@ -0,0 +1,49 @@
1
+ # Either positive or negative lookahead, doesn't consume its input.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo').present? # matches when the input contains 'foo', but leaves it
6
+ #
7
+ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
+ attr_reader :positive
9
+ attr_reader :bound_parslet
10
+
11
+ def initialize(bound_parslet, positive=true)
12
+ super()
13
+
14
+ # Model positive and negative lookahead by testing this flag.
15
+ @positive = positive
16
+ @bound_parslet = bound_parslet
17
+
18
+ @error_msgs = {
19
+ :positive => ["Input should start with ", bound_parslet],
20
+ :negative => ["Input should not start with ", bound_parslet]
21
+ }
22
+ end
23
+
24
+ def try(source, context)
25
+ pos = source.pos
26
+
27
+ success, value = bound_parslet.apply(source, context)
28
+
29
+ if positive
30
+ return succ(nil) if success
31
+ return context.err_at(self, source, @error_msgs[:positive], pos)
32
+ else
33
+ return succ(nil) unless success
34
+ return context.err_at(self, source, @error_msgs[:negative], pos)
35
+ end
36
+
37
+ # This is probably the only parslet that rewinds its input in #try.
38
+ # Lookaheads NEVER consume their input, even on success, that's why.
39
+ ensure
40
+ source.pos = pos
41
+ end
42
+
43
+ precedence LOOKAHEAD
44
+ def to_s_inner(prec)
45
+ char = positive ? '&' : '!'
46
+
47
+ "#{char}#{bound_parslet.to_s(prec)}"
48
+ end
49
+ end
@@ -0,0 +1,32 @@
1
+ # Names a match to influence tree construction.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # will return 'foo',
6
+ # str('foo').as(:foo) # will return :foo => 'foo'
7
+ #
8
+ class Parslet::Atoms::Named < Parslet::Atoms::Base
9
+ attr_reader :parslet, :name
10
+ def initialize(parslet, name)
11
+ super()
12
+
13
+ @parslet, @name = parslet, name
14
+ end
15
+
16
+ def apply(source, context)
17
+ success, value = result = parslet.apply(source, context)
18
+
19
+ return result unless success
20
+ succ(
21
+ produce_return_value(
22
+ value))
23
+ end
24
+
25
+ def to_s_inner(prec)
26
+ "#{name}:#{parslet.to_s(prec)}"
27
+ end
28
+ private
29
+ def produce_return_value(val)
30
+ { name => flatten(val, true) }
31
+ end
32
+ end