ghazel-parslet 1.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,137 @@
1
+
2
+ module Parslet::Atoms
3
+ # A series of helper functions that have the common topic of flattening
4
+ # result values into the intermediary tree that consists of Ruby Hashes and
5
+ # Arrays.
6
+ #
7
+ # This module has one main function, #flatten, that takes an annotated
8
+ # structure as input and returns the reduced form that users expect from
9
+ # Atom#parse.
10
+ #
11
+ # NOTE: Since all of these functions are just that, functions without
12
+ # side effects, they are in a module and not in a class. Its hard to draw
13
+ # the line sometimes, but this is beyond.
14
+ #
15
+ module CanFlatten
16
+ # Takes a mixed value coming out of a parslet and converts it to a return
17
+ # value for the user by dropping things and merging hashes.
18
+ #
19
+ # Named is set to true if this result will be embedded in a Hash result from
20
+ # naming something using <code>.as(...)</code>. It changes the folding
21
+ # semantics of repetition.
22
+ #
23
+ def flatten(value, named=false)
24
+ # Passes through everything that isn't an array of things
25
+ return value unless value.instance_of? Array
26
+
27
+ # Extracts the s-expression tag
28
+ tag, *tail = value
29
+
30
+ # Merges arrays:
31
+ result = tail.
32
+ map { |e| flatten(e) } # first flatten each element
33
+
34
+ case tag
35
+ when :sequence
36
+ return flatten_sequence(result)
37
+ when :maybe
38
+ return named ? result.first : result.first || ''
39
+ when :repetition
40
+ return flatten_repetition(result, named)
41
+ end
42
+
43
+ fail "BUG: Unknown tag #{tag.inspect}."
44
+ end
45
+
46
+ # Lisp style fold left where the first element builds the basis for
47
+ # an inject.
48
+ #
49
+ def foldl(list, &block)
50
+ return '' if list.empty?
51
+ list[1..-1].inject(list.first, &block)
52
+ end
53
+
54
+ # Flatten results from a sequence of parslets.
55
+ #
56
+ # @api private
57
+ #
58
+ def flatten_sequence(list)
59
+ foldl(list.compact) { |r, e| # and then merge flat elements
60
+ merge_fold(r, e)
61
+ }
62
+ end
63
+ # @api private
64
+ def merge_fold(l, r)
65
+ # equal pairs: merge. ----------------------------------------------------
66
+ if l.class == r.class
67
+ if l.is_a?(Hash)
68
+ warn_about_duplicate_keys(l, r)
69
+ return l.merge(r)
70
+ else
71
+ return l + r
72
+ end
73
+ end
74
+
75
+ # unequal pairs: hoist to same level. ------------------------------------
76
+
77
+ # Maybe classes are not equal, but both are stringlike?
78
+ if l.respond_to?(:to_str) && r.respond_to?(:to_str)
79
+ # if we're merging a String with a Slice, the slice wins.
80
+ return r if r.respond_to? :to_slice
81
+ return l if l.respond_to? :to_slice
82
+
83
+ fail "NOTREACHED: What other stringlike classes are there?"
84
+ end
85
+
86
+ # special case: If one of them is a string/slice, the other is more important
87
+ return l if r.respond_to? :to_str
88
+ return r if l.respond_to? :to_str
89
+
90
+ # otherwise just create an array for one of them to live in
91
+ return l + [r] if r.class == Hash
92
+ return [l] + r if l.class == Hash
93
+
94
+ fail "Unhandled case when foldr'ing sequence."
95
+ end
96
+
97
+ # Flatten results from a repetition of a single parslet. named indicates
98
+ # whether the user has named the result or not. If the user has named
99
+ # the results, we want to leave an empty list alone - otherwise it is
100
+ # turned into an empty string.
101
+ #
102
+ # @api private
103
+ #
104
+ def flatten_repetition(list, named)
105
+ if list.any? { |e| e.instance_of?(Hash) }
106
+ # If keyed subtrees are in the array, we'll want to discard all
107
+ # strings inbetween. To keep them, name them.
108
+ return list.select { |e| e.instance_of?(Hash) }
109
+ end
110
+
111
+ if list.any? { |e| e.instance_of?(Array) }
112
+ # If any arrays are nested in this array, flatten all arrays to this
113
+ # level.
114
+ return list.
115
+ select { |e| e.instance_of?(Array) }.
116
+ flatten(1)
117
+ end
118
+
119
+ # Consistent handling of empty lists, when we act on a named result
120
+ return [] if named && list.empty?
121
+
122
+ # If there are only strings, concatenate them and return that.
123
+ foldl(list) { |s,e| s+e }
124
+ end
125
+
126
+ # That annoying warning 'Duplicate subtrees while merging result' comes
127
+ # from here. You should add more '.as(...)' names to your intermediary tree.
128
+ #
129
+ def warn_about_duplicate_keys(h1, h2)
130
+ d = h1.keys & h2.keys
131
+ unless d.empty?
132
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
133
+ " of the latter will be kept. (keys: #{d.inspect})"
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,94 @@
1
+ module Parslet::Atoms
2
+ # Helper class that implements a transient cache that maps position and
3
+ # parslet object to results. This is used for memoization in the packrat
4
+ # style.
5
+ #
6
+ # Also, error reporter is stored here and error reporting happens through
7
+ # this class. This makes the reporting pluggable.
8
+ #
9
+ class Context
10
+
11
+ class LRStack < Struct.new(:lrs)
12
+ def push(lr)
13
+ lrs.unshift(lr)
14
+ end
15
+
16
+ def pop
17
+ lrs.shift
18
+ end
19
+
20
+ def top_down(&block)
21
+ lrs.each(&block)
22
+ end
23
+ end
24
+
25
+ attr_reader :lr_stack
26
+
27
+ # @param reporter [#err, #err_at] Error reporter (leave empty for default
28
+ # reporter)
29
+ def initialize(reporter=Parslet::ErrorReporter::Tree.new)
30
+ @cache = Hash.new { |h, k| h[k] = {} }
31
+ @reporter = reporter
32
+ @heads = {}
33
+ @lr_stack = LRStack.new([])
34
+ end
35
+
36
+ def heads
37
+ @heads
38
+ end
39
+
40
+ # Caches a parse answer for obj at source.pos. Applying the same parslet
41
+ # at one position of input always yields the same result, unless the input
42
+ # has changed.
43
+ #
44
+ # We need the entire source here so we can ask for how many characters
45
+ # were consumed by a successful parse. Imitation of such a parse must
46
+ # advance the input pos by the same amount of bytes.
47
+ #
48
+ def try_with_cache(obj, source)
49
+ beg = source.pos
50
+
51
+ # Not in cache yet? Return early.
52
+ unless entry = lookup(obj, beg)
53
+ result = obj.try(source, self)
54
+
55
+ set obj, beg, [result, source.pos-beg]
56
+ return result
57
+ end
58
+
59
+ # the condition in unless has returned true, so entry is not nil.
60
+ result, advance = entry
61
+
62
+ # The data we're skipping here has been read before. (since it is in
63
+ # the cache) PLUS the actual contents are not interesting anymore since
64
+ # we know obj matches at beg. So skip reading.
65
+ source.pos = beg + advance
66
+ return result
67
+ end
68
+
69
+ # Report an error at a given position.
70
+ # @see ErrorReporter
71
+ #
72
+ def err_at(*args)
73
+ return [false, @reporter.err_at(*args)] if @reporter
74
+ return [false, nil]
75
+ end
76
+
77
+ # Report an error.
78
+ # @see ErrorReporter
79
+ #
80
+ def err(*args)
81
+ return [false, @reporter.err(*args)] if @reporter
82
+ return [false, nil]
83
+ end
84
+
85
+ #private
86
+ def lookup(obj, pos)
87
+ @cache[pos][obj]
88
+ end
89
+
90
+ def set(obj, pos, val)
91
+ @cache[pos][obj] = val
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,98 @@
1
+
2
+ # A mixin module that defines operations that can be called on any subclass
3
+ # of Parslet::Atoms::Base. These operations make parslets atoms chainable and
4
+ # allow combination of parslet atoms to form bigger parsers.
5
+ #
6
+ # Example:
7
+ #
8
+ # str('foo') >> str('bar')
9
+ # str('f').repeat
10
+ # any.absent? # also called The Epsilon
11
+ #
12
+ module Parslet::Atoms::DSL
13
+ # Construct a new atom that repeats the current atom min times at least and
14
+ # at most max times. max can be nil to indicate that no maximum is present.
15
+ #
16
+ # Example:
17
+ # # match any number of 'a's
18
+ # str('a').repeat
19
+ #
20
+ # # match between 1 and 3 'a's
21
+ # str('a').repeat(1,3)
22
+ #
23
+ def repeat(min=0, max=nil)
24
+ Parslet::Atoms::Repetition.new(self, min, max)
25
+ end
26
+
27
+ # Returns a new parslet atom that is only maybe present in the input. This
28
+ # is synonymous to calling #repeat(0,1). Generated tree value will be
29
+ # either nil (if atom is not present in the input) or the matched subtree.
30
+ #
31
+ # Example:
32
+ # str('foo').maybe
33
+ #
34
+ def maybe
35
+ Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
36
+ end
37
+
38
+ # Chains two parslet atoms together as a sequence.
39
+ #
40
+ # Example:
41
+ # str('a') >> str('b')
42
+ #
43
+ def >>(parslet)
44
+ Parslet::Atoms::Sequence.new(self, parslet)
45
+ end
46
+
47
+ # Chains two parslet atoms together to express alternation. A match will
48
+ # always be attempted with the parslet on the left side first. If it doesn't
49
+ # match, the right side will be tried.
50
+ #
51
+ # Example:
52
+ # # matches either 'a' OR 'b'
53
+ # str('a') | str('b')
54
+ #
55
+ def |(parslet)
56
+ Parslet::Atoms::Alternative.new(self, parslet)
57
+ end
58
+
59
+ # Tests for absence of a parslet atom in the input stream without consuming
60
+ # it.
61
+ #
62
+ # Example:
63
+ # # Only proceed the parse if 'a' is absent.
64
+ # str('a').absent?
65
+ #
66
+ def absent?
67
+ Parslet::Atoms::Lookahead.new(self, false)
68
+ end
69
+
70
+ # Tests for presence of a parslet atom in the input stream without consuming
71
+ # it.
72
+ #
73
+ # Example:
74
+ # # Only proceed the parse if 'a' is present.
75
+ # str('a').present?
76
+ #
77
+ def present?
78
+ Parslet::Atoms::Lookahead.new(self, true)
79
+ end
80
+
81
+ # Alias for present? that will disappear in 2.0 (deprecated)
82
+ #
83
+ alias prsnt? present?
84
+
85
+ # Alias for absent? that will disappear in 2.0 (deprecated)
86
+ #
87
+ alias absnt? absent?
88
+
89
+ # Marks a parslet atom as important for the tree output. This must be used
90
+ # to achieve meaningful output from the #parse method.
91
+ #
92
+ # Example:
93
+ # str('a').as(:b) # will produce {:b => 'a'}
94
+ #
95
+ def as(name)
96
+ Parslet::Atoms::Named.new(self, name)
97
+ end
98
+ end
@@ -0,0 +1,41 @@
1
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
2
+ # piece is lazily evaluated and cached. This has two purposes:
3
+ #
4
+ # * Avoid infinite recursion during evaluation of the definition
5
+ # * Be able to print things by their name, not by their sometimes
6
+ # complicated content.
7
+ #
8
+ # You don't normally use this directly, instead you should generated it by
9
+ # using the structuring method Parslet.rule.
10
+ #
11
+ class Parslet::Atoms::Entity < Parslet::Atoms::Base
12
+ attr_reader :name, :block
13
+ def initialize(name, &block)
14
+ super()
15
+
16
+ @name = name
17
+ @block = block
18
+ end
19
+
20
+ def try(source, context)
21
+ parslet.apply(source, context)
22
+ end
23
+
24
+ def parslet
25
+ @parslet ||= @block.call.tap { |p|
26
+ raise_not_implemented unless p
27
+ }
28
+ end
29
+
30
+ def to_s_inner(prec)
31
+ name.to_s.upcase
32
+ end
33
+ private
34
+ def raise_not_implemented
35
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
36
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
37
+ exception.set_backtrace(trace)
38
+
39
+ raise exception
40
+ end
41
+ end
@@ -0,0 +1,49 @@
1
+ # Either positive or negative lookahead, doesn't consume its input.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo').present? # matches when the input contains 'foo', but leaves it
6
+ #
7
+ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
+ attr_reader :positive
9
+ attr_reader :bound_parslet
10
+
11
+ def initialize(bound_parslet, positive=true)
12
+ super()
13
+
14
+ # Model positive and negative lookahead by testing this flag.
15
+ @positive = positive
16
+ @bound_parslet = bound_parslet
17
+
18
+ @error_msgs = {
19
+ :positive => ["Input should start with ", bound_parslet],
20
+ :negative => ["Input should not start with ", bound_parslet]
21
+ }
22
+ end
23
+
24
+ def try(source, context)
25
+ pos = source.pos
26
+
27
+ success, value = bound_parslet.apply(source, context)
28
+
29
+ if positive
30
+ return succ(nil) if success
31
+ return context.err_at(self, source, @error_msgs[:positive], pos)
32
+ else
33
+ return succ(nil) unless success
34
+ return context.err_at(self, source, @error_msgs[:negative], pos)
35
+ end
36
+
37
+ # This is probably the only parslet that rewinds its input in #try.
38
+ # Lookaheads NEVER consume their input, even on success, that's why.
39
+ ensure
40
+ source.pos = pos
41
+ end
42
+
43
+ precedence LOOKAHEAD
44
+ def to_s_inner(prec)
45
+ char = positive ? '&' : '!'
46
+
47
+ "#{char}#{bound_parslet.to_s(prec)}"
48
+ end
49
+ end
@@ -0,0 +1,32 @@
1
+ # Names a match to influence tree construction.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # will return 'foo',
6
+ # str('foo').as(:foo) # will return :foo => 'foo'
7
+ #
8
+ class Parslet::Atoms::Named < Parslet::Atoms::Base
9
+ attr_reader :parslet, :name
10
+ def initialize(parslet, name)
11
+ super()
12
+
13
+ @parslet, @name = parslet, name
14
+ end
15
+
16
+ def apply(source, context)
17
+ success, value = result = parslet.apply(source, context)
18
+
19
+ return result unless success
20
+ succ(
21
+ produce_return_value(
22
+ value))
23
+ end
24
+
25
+ def to_s_inner(prec)
26
+ "#{name}:#{parslet.to_s(prec)}"
27
+ end
28
+ private
29
+ def produce_return_value(val)
30
+ { name => flatten(val, true) }
31
+ end
32
+ end