ghazel-parslet 1.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,35 @@
1
+ class Parslet::Atoms::Base
2
+
3
+ # Packages the common idiom
4
+ #
5
+ # begin
6
+ # tree = parser.parse('something')
7
+ # rescue Parslet::ParseFailed => error
8
+ # puts parser.cause.ascii_tree
9
+ # end
10
+ #
11
+ # into a convenient method.
12
+ #
13
+ # Usage:
14
+ #
15
+ # require 'parslet'
16
+ # require 'parslet/convenience'
17
+ #
18
+ # class FooParser < Parslet::Parser
19
+ # rule(:foo) { str('foo') }
20
+ # root(:foo)
21
+ # end
22
+ #
23
+ # FooParser.new.parse_with_debug('bar')
24
+ #
25
+ # @see Parslet::Atoms::Base#parse
26
+ #
27
+ def parse_with_debug str, opts={}
28
+ parse str, opts
29
+ rescue Parslet::UnconsumedInput => error
30
+ puts error
31
+ rescue Parslet::ParseFailed => error
32
+ puts error.cause.ascii_tree
33
+ end
34
+
35
+ end
@@ -0,0 +1,7 @@
1
+ # A namespace for all error reporters.
2
+ #
3
+ module Parslet::ErrorReporter
4
+ end
5
+
6
+ require 'parslet/error_reporter/tree'
7
+ require 'parslet/error_reporter/deepest'
@@ -0,0 +1,95 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # Instead of reporting the latest error that happens like {Tree} does,
4
+ # this class reports the deepest error. Depth is defined here as how
5
+ # advanced into the input an error happens. The errors close to the
6
+ # greatest depth tend to be more relevant to the end user, since they
7
+ # specify what could be done to make them go away.
8
+ #
9
+ # More specifically, errors produced by this reporter won't be related to
10
+ # the structure of the grammar at all. The positions of the errors will
11
+ # be advanced and convey at every grammar level what the deepest rule
12
+ # was to fail.
13
+ #
14
+ class Deepest
15
+ def initialize
16
+ @deepest_cause = nil
17
+ end
18
+
19
+ # Produces an error cause that combines the message at the current level
20
+ # with the errors that happened at a level below (children).
21
+ #
22
+ # @param atom [Parslet::Atoms::Base] parslet that failed
23
+ # @param source [Source] Source that we're using for this parse. (line
24
+ # number information...)
25
+ # @param message [String, Array] Error message at this level.
26
+ # @param children [Array] A list of errors from a deeper level (or nil).
27
+ # @return [Cause] An error tree combining children with message.
28
+ #
29
+ def err(atom, source, message, children=nil)
30
+ position = source.pos
31
+ cause = Cause.format(source, position, message, children)
32
+ return deepest(cause)
33
+ end
34
+
35
+ # Produces an error cause that combines the message at the current level
36
+ # with the errors that happened at a level below (children).
37
+ #
38
+ # @param atom [Parslet::Atoms::Base] parslet that failed
39
+ # @param source [Source] Source that we're using for this parse. (line
40
+ # number information...)
41
+ # @param message [String, Array] Error message at this level.
42
+ # @param pos [Fixnum] The real position of the error.
43
+ # @param children [Array] A list of errors from a deeper level (or nil).
44
+ # @return [Cause] An error tree combining children with message.
45
+ #
46
+ def err_at(atom, source, message, pos, children=nil)
47
+ position = pos
48
+ cause = Cause.format(source, position, message, children)
49
+ return deepest(cause)
50
+ end
51
+
52
+ # Returns the cause that is currently deepest. Mainly for specs.
53
+ #
54
+ attr_reader :deepest_cause
55
+
56
+ # Checks to see if the lineage of the cause given includes a cause with
57
+ # an error position deeper than the current deepest cause stored. If
58
+ # yes, it passes the cause through to the caller. If no, it returns the
59
+ # current deepest error that was saved as a reference.
60
+ #
61
+ def deepest(cause)
62
+ rank, leaf = deepest_child(cause)
63
+
64
+ if !deepest_cause || leaf.pos >= deepest_cause.pos
65
+ # This error reaches deeper into the input, save it as reference.
66
+ @deepest_cause = leaf
67
+ return cause
68
+ end
69
+
70
+ return deepest_cause
71
+ end
72
+
73
+ private
74
+ # Returns the leaf from a given error tree with the biggest rank.
75
+ #
76
+ def deepest_child(cause, rank=0)
77
+ max_child = cause
78
+ max_rank = rank
79
+
80
+ if cause.children && !cause.children.empty?
81
+ cause.children.each do |child|
82
+ c_rank, c_cause = deepest_child(child, rank+1)
83
+
84
+ if c_rank > max_rank
85
+ max_rank = c_rank
86
+ max_child = c_cause
87
+ end
88
+ end
89
+ end
90
+
91
+ return max_rank, max_child
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,57 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # An error reporter has two central methods, one for reporting errors at
4
+ # the current parse position (#err) and one for reporting errors at a
5
+ # given parse position (#err_at). The reporter can return an object (a
6
+ # 'cause') that will be returned to the caller along with the information
7
+ # that the parse failed.
8
+ #
9
+ # When reporting errors on the outer levels of your parser, these methods
10
+ # get passed a list of error objects ('causes') from the inner levels. In
11
+ # this default implementation, the inner levels are considered error
12
+ # subtrees and are appended to the generated tree node at each level,
13
+ # thereby constructing an error tree.
14
+ #
15
+ # This error tree will report in parallel with the grammar structure that
16
+ # failed. A one-to-one correspondence exists between each error in the
17
+ # tree and the parslet atom that produced that error.
18
+ #
19
+ # The implementor is really free to use these return values as he sees
20
+ # fit. One example would be to return an error state object from these
21
+ # methods that is then updated as errors cascade up the parse derivation
22
+ # tree.
23
+ #
24
+ class Tree
25
+ # Produces an error cause that combines the message at the current level
26
+ # with the errors that happened at a level below (children).
27
+ #
28
+ # @param atom [Parslet::Atoms::Base] parslet that failed
29
+ # @param source [Source] Source that we're using for this parse. (line
30
+ # number information...)
31
+ # @param message [String, Array] Error message at this level.
32
+ # @param children [Array] A list of errors from a deeper level (or nil).
33
+ # @return [Cause] An error tree combining children with message.
34
+ #
35
+ def err(atom, source, message, children=nil)
36
+ position = source.pos
37
+ Cause.format(source, position, message, children)
38
+ end
39
+
40
+ # Produces an error cause that combines the message at the current level
41
+ # with the errors that happened at a level below (children).
42
+ #
43
+ # @param atom [Parslet::Atoms::Base] parslet that failed
44
+ # @param source [Source] Source that we're using for this parse. (line
45
+ # number information...)
46
+ # @param message [String, Array] Error message at this level.
47
+ # @param pos [Fixnum] The real position of the error.
48
+ # @param children [Array] A list of errors from a deeper level (or nil).
49
+ # @return [Cause] An error tree combining children with message.
50
+ #
51
+ def err_at(atom, source, message, pos, children=nil)
52
+ position = pos
53
+ Cause.format(source, position, message, children)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,162 @@
1
+ # Allows exporting parslet grammars to other lingos.
2
+
3
+ require 'set'
4
+ require 'parslet/atoms/visitor'
5
+
6
+ class Parslet::Parser
7
+ module Visitors
8
+ class Citrus
9
+ attr_reader :context, :output
10
+ def initialize(context)
11
+ @context = context
12
+ end
13
+
14
+ def visit_str(str)
15
+ "\"#{str.inspect[1..-2]}\""
16
+ end
17
+ def visit_re(match)
18
+ match.to_s
19
+ end
20
+
21
+ def visit_entity(name, block)
22
+ context.deferred(name, block)
23
+
24
+ "(#{context.mangle_name(name)})"
25
+ end
26
+ def visit_named(name, parslet)
27
+ parslet.accept(self)
28
+ end
29
+
30
+ def visit_sequence(parslets)
31
+ '(' <<
32
+ parslets.
33
+ map { |el| el.accept(self) }.
34
+ join(' ') <<
35
+ ')'
36
+ end
37
+ def visit_repetition(tag, min, max, parslet)
38
+ parslet.accept(self) << "#{min}*#{max}"
39
+ end
40
+ def visit_alternative(alternatives)
41
+ '(' <<
42
+ alternatives.
43
+ map { |el| el.accept(self) }.
44
+ join(' | ') <<
45
+ ')'
46
+ end
47
+
48
+ def visit_lookahead(positive, bound_parslet)
49
+ (positive ? '&' : '!') <<
50
+ bound_parslet.accept(self)
51
+ end
52
+ end
53
+
54
+ class Treetop < Citrus
55
+ def visit_repetition(tag, min, max, parslet)
56
+ parslet.accept(self) << "#{min}..#{max}"
57
+ end
58
+
59
+ def visit_alternative(alternatives)
60
+ '(' <<
61
+ alternatives.
62
+ map { |el| el.accept(self) }.
63
+ join(' / ') <<
64
+ ')'
65
+ end
66
+ end
67
+ end
68
+
69
+ # A helper class that formats Citrus and Treetop grammars as a string.
70
+ #
71
+ class PrettyPrinter
72
+ attr_reader :visitor
73
+ def initialize(visitor_klass)
74
+ @visitor = visitor_klass.new(self)
75
+ end
76
+
77
+ # Pretty prints the given parslet using the visitor that has been
78
+ # configured in initialize. Returns the string representation of the
79
+ # Citrus or Treetop grammar.
80
+ #
81
+ def pretty_print(name, parslet)
82
+ output = "grammar #{name}\n"
83
+
84
+ output << rule('root', parslet)
85
+
86
+ seen = Set.new
87
+ loop do
88
+ # @todo is constantly filled by the visitor (see #deferred). We
89
+ # keep going until it is empty.
90
+ break if @todo.empty?
91
+ name, block = @todo.shift
92
+
93
+ # Track what rules we've already seen. This breaks loops.
94
+ next if seen.include?(name)
95
+ seen << name
96
+
97
+ output << rule(name, block.call)
98
+ end
99
+
100
+ output << "end\n"
101
+ end
102
+
103
+ # Formats a rule in either dialect.
104
+ #
105
+ def rule(name, parslet)
106
+ " rule #{mangle_name name}\n" <<
107
+ " " << parslet.accept(visitor) << "\n" <<
108
+ " end\n"
109
+ end
110
+
111
+ # Whenever the visitor encounters an rule in a parslet, it defers the
112
+ # pretty printing of the rule by calling this method.
113
+ #
114
+ def deferred(name, content)
115
+ @todo ||= []
116
+ @todo << [name, content]
117
+ end
118
+
119
+ # Mangles names so that Citrus and Treetop can live with it. This mostly
120
+ # transforms some of the things that Ruby allows into other patterns. If
121
+ # there is collision, we will not detect it for now.
122
+ #
123
+ def mangle_name(str)
124
+ str.to_s.sub(/\?$/, '_p')
125
+ end
126
+ end
127
+
128
+ # Exports the current parser instance as a string in the Citrus dialect.
129
+ #
130
+ # Example:
131
+ #
132
+ # require 'parslet/export'
133
+ # class MyParser < Parslet::Parser
134
+ # root(:expression)
135
+ # rule(:expression) { str('foo') }
136
+ # end
137
+ #
138
+ # MyParser.new.to_citrus # => a citrus grammar as a string
139
+ #
140
+ def to_citrus
141
+ PrettyPrinter.new(Visitors::Citrus).
142
+ pretty_print(self.class.name, root)
143
+ end
144
+
145
+ # Exports the current parser instance as a string in the Treetop dialect.
146
+ #
147
+ # Example:
148
+ #
149
+ # require 'parslet/export'
150
+ # class MyParser < Parslet::Parser
151
+ # root(:expression)
152
+ # rule(:expression) { str('foo') }
153
+ # end
154
+ #
155
+ # MyParser.new.to_treetop # => a treetop grammar as a string
156
+ #
157
+ def to_treetop
158
+ PrettyPrinter.new(Visitors::Treetop).
159
+ pretty_print(self.class.name, root)
160
+ end
161
+ end
162
+
@@ -0,0 +1,51 @@
1
+
2
+ # Allows specifying rules as strings using the exact same grammar that treetop
3
+ # does, minus the actions. This is on one hand a good example of a fully
4
+ # fledged parser and on the other hand might even turn out really useful.
5
+ #
6
+ # This can be viewed as an extension to parslet and might even be hosted in
7
+ # its own gem one fine day.
8
+ #
9
+ class Parslet::Expression
10
+ include Parslet
11
+
12
+ autoload :Treetop, 'parslet/expression/treetop'
13
+
14
+ # Creates a parslet from a foreign language expression.
15
+ #
16
+ # Example:
17
+ #
18
+ # Parslet::Expression.new("'a' 'b'")
19
+ #
20
+ def initialize(str, opts={}, context=self)
21
+ @type = opts[:type] || :treetop
22
+ @exp = str
23
+ @parslet = transform(
24
+ parse(str))
25
+ end
26
+
27
+ # Transforms the parse tree into a parslet expression.
28
+ #
29
+ def transform(tree)
30
+ transform = Treetop::Transform.new
31
+
32
+ # pp tree
33
+ transform.apply(tree)
34
+ rescue
35
+ warn "Could not transform: " + tree.inspect
36
+ raise
37
+ end
38
+
39
+ # Parses the string and returns a parse tree.
40
+ #
41
+ def parse(str)
42
+ parser = Treetop::Parser.new
43
+ parser.parse(str)
44
+ end
45
+
46
+ # Turns this expression into a parslet.
47
+ #
48
+ def to_parslet
49
+ @parslet
50
+ end
51
+ end
@@ -0,0 +1,92 @@
1
+ class Parslet::Expression::Treetop
2
+ class Parser < Parslet::Parser
3
+ root(:expression)
4
+
5
+ rule(:expression) { alternatives }
6
+
7
+ # alternative 'a' / 'b'
8
+ rule(:alternatives) {
9
+ (simple >> (spaced('/') >> simple).repeat).as(:alt)
10
+ }
11
+
12
+ # sequence by simple concatenation 'a' 'b'
13
+ rule(:simple) { occurrence.repeat(1).as(:seq) }
14
+
15
+ # occurrence modifiers
16
+ rule(:occurrence) {
17
+ atom.as(:repetition) >> spaced('*').as(:sign) |
18
+ atom.as(:repetition) >> spaced('+').as(:sign) |
19
+ atom.as(:repetition) >> repetition_spec |
20
+
21
+ atom.as(:maybe) >> spaced('?') |
22
+ atom
23
+ }
24
+
25
+ rule(:atom) {
26
+ spaced('(') >> expression.as(:unwrap) >> spaced(')') |
27
+ dot |
28
+ string |
29
+ char_class
30
+ }
31
+
32
+ # a character class
33
+ rule(:char_class) {
34
+ (str('[') >>
35
+ (str('\\') >> any |
36
+ str(']').absent? >> any).repeat(1) >>
37
+ str(']')).as(:match) >> space?
38
+ }
39
+
40
+ # anything at all
41
+ rule(:dot) { spaced('.').as(:any) }
42
+
43
+ # recognizing strings
44
+ rule(:string) {
45
+ str('\'') >>
46
+ (
47
+ (str('\\') >> any) |
48
+ (str("'").absent? >> any)
49
+ ).repeat.as(:string) >>
50
+ str('\'') >> space?
51
+ }
52
+
53
+ # repetition specification like {1, 2}
54
+ rule(:repetition_spec) {
55
+ spaced('{') >>
56
+ integer.maybe.as(:min) >> spaced(',') >>
57
+ integer.maybe.as(:max) >> spaced('}')
58
+ }
59
+ rule(:integer) {
60
+ match['0-9'].repeat(1)
61
+ }
62
+
63
+ # whitespace handling
64
+ rule(:space) { match("\s").repeat(1) }
65
+ rule(:space?) { space.maybe }
66
+
67
+ def spaced(str)
68
+ str(str) >> space?
69
+ end
70
+ end
71
+
72
+ class Transform < Parslet::Transform
73
+
74
+ rule(:repetition => simple(:rep), :sign => simple(:sign)) {
75
+ min = sign=='+' ? 1 : 0
76
+ Parslet::Atoms::Repetition.new(rep, min, nil) }
77
+ rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
78
+ Parslet::Atoms::Repetition.new(rep,
79
+ Integer(min || 0),
80
+ max && Integer(max) || nil) }
81
+
82
+ rule(:alt => subtree(:alt)) { Parslet::Atoms::Alternative.new(*alt) }
83
+ rule(:seq => sequence(:s)) { Parslet::Atoms::Sequence.new(*s) }
84
+ rule(:unwrap => simple(:u)) { u }
85
+ rule(:maybe => simple(:m)) { |d| d[:m].maybe }
86
+ rule(:string => simple(:s)) { Parslet::Atoms::Str.new(s) }
87
+ rule(:match => simple(:m)) { Parslet::Atoms::Re.new(m) }
88
+ rule(:any => simple(:a)) { Parslet::Atoms::Re.new('.') }
89
+ end
90
+
91
+ end
92
+