rubypeg 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rubypeg.rb ADDED
@@ -0,0 +1,313 @@
1
+ # This file contains all the elements thare are required
2
+ # at runtime by a RubyPeg parser.
3
+ #
4
+ # You can either distribute it in your source code
5
+ # or include the rubypeg gem as a dependency for
6
+ # your source
7
+
8
+ # By default all non terminals that
9
+ # are returned by RubyPeg#parse are Arrays
10
+ # that have been extended with the NonTerminalNode
11
+ # module
12
+ #
13
+ # If we consider this example:
14
+ # class BasketPeg < RubyPeg
15
+ # def root
16
+ # node :basket do
17
+ # one_or_more { items }
18
+ # end
19
+ # end
20
+ #
21
+ # def items
22
+ # node :item do
23
+ # number && optional_space && fruit && optional_space
24
+ # end
25
+ # end
26
+ #
27
+ # def number
28
+ # terminal(/\d+/)
29
+ # end
30
+ #
31
+ # def fruit
32
+ # node :fruit do
33
+ # (terminal("apple") || terminal("pear")) && ignore{ optional{ terminal("s") } }
34
+ # end
35
+ # end
36
+ #
37
+ # def optional_space
38
+ # ignore{ optional{ terminal(" ") }}
39
+ # end
40
+ # end
41
+ # Then
42
+ # BasketPeg.parse("1 apple 2 apples 3 pears").should be_kind_of(NonTerminalNode)
43
+ #
44
+ # This is an array of children of this non terminal.
45
+ # The children may be other non-terminals or terminals
46
+ # The array will be empty if there are no children.
47
+ #
48
+ # basket = BasketPeg.parse("1 apple 2 apples 3 pears")
49
+ # basket.class.should == Array
50
+ # basket.size.should == 3
51
+ # basket.first.should be_kind_of(NonTerminalNode)
52
+ # basket.first.type.should == :item
53
+ # basket.first.class.should == Array
54
+ # basket.first.size.should == 2
55
+ # basket.first.first.should be_kind_of(TerminalNode)
56
+ # basket.first.first.should == "1"
57
+ # basket.first.last.should be_kind_of(NonTerminalNode)
58
+ # basket.first.last.type == :fruit
59
+ # basket.first.last.class.should == Array
60
+ # basket.first.last.size.should == 1
61
+ # basket.first.last.first.should be_kind_of(TerminalNode)
62
+ # basket.first.last.first.should == "apple"
63
+ module NonTerminalNode
64
+
65
+ # Contains the argument given to RubyPeg#node
66
+ # BasketPeg.parse("1 apple 2 apples 3 pears").type.should == :basket
67
+ attr_accessor :type
68
+
69
+
70
+ # This is a quick way of carrying out the visitor pattern on the parsed structure.
71
+ #
72
+ # If no visitor is supplied then a nested array of child nodes is returned, with terminals turned into strings:
73
+ # BasketPeg.parse("1 apple 2 apples 3 pears").build.should == [["1", "apple"], ["2", "apple"], ["3", "pear"]]
74
+ #
75
+ # If a visitor is supplied, then each non terminal node checks if there is a method on the visitor
76
+ # with a name the same as the non terminal's type. If there is, then the method is called with the
77
+ # children of the non terminal as arguments. If there isn't, then the build methods on the children
78
+ # of this node ar recursively called.
79
+ # E.g.,:
80
+ # BasketPeg.parse("1 apple 2 apples 3 pears").build.should == [["1", "apple"], ["2", "apple"], ["3", "pear"]]
81
+ # class BasketPegBuilderExample
82
+ # attr_accessor :total
83
+ #
84
+ # def initialize
85
+ # @total = 0
86
+ # end
87
+ #
88
+ # def item(number,kind)
89
+ # @total = @total + (number.to_f * kind.build(self).to_f)
90
+ # end
91
+ #
92
+ # def fruit(kind_of_fruit)
93
+ # case kind_of_fruit
94
+ # when "apple"; 3.0
95
+ # when "pear"; 1.0
96
+ # else 10.0
97
+ # end
98
+ # end
99
+ # end
100
+ # counter = BasketPegBuilderExample.new
101
+ # BasketPeg.parse("1 apple 2 apples 3 pears").build(counter)
102
+ # counter.total.should == 12.0
103
+ def visit(builder = nil)
104
+ return builder.send(type,*self) if builder.respond_to?(type)
105
+ return self.first.visit(builder) if self.size == 1
106
+ self.map { |c| c.visit(builder) }
107
+ end
108
+
109
+ # Returns the node network as an abstract syntax tree
110
+ #
111
+ # BasketPeg.parse("1 apple 2 apples 3 pears").to_ast.should == [:basket, [:item, "1", [:fruit, "apple"]], [:item, "2", [:fruit, "apple"]], [:item, "3", [:fruit, "pear"]]]
112
+ # Note that the items wrapped in ignore {} in the parser, shuch as the spaces and the optional 's' in apples and pears do not appear.
113
+ def to_ast
114
+ [type,*self.map(&:to_ast)]
115
+ end
116
+
117
+ # Lists the non-terminal node and its children. Same content as #to_ast but in string form.
118
+ # BasketPeg.parse("1 apple 2 apples 3 pears").inspect.should == '[:basket, [:item, "1", [:fruit, "apple"]], [:item, "2", [:fruit, "apple"]], [:item, "3", [:fruit, "pear"]]]'
119
+ def inspect; to_ast.inspect end
120
+
121
+ # Returns the result of calling to_s on each of its children. By default, TerminalNode#to_s returns its text value, so:
122
+ # BasketPeg.parse("1 apple 2 apples 3 pears").to_s.should == "1apple2apple3pear"
123
+ # Note that the items wrapped in ignore {} in the parser, shuch as the spaces and the optional 's' in apples and pears do not appear.
124
+ def to_s; self.map(&:to_s).join end
125
+ end
126
+
127
+ module TerminalNode
128
+ def visit(builder)
129
+ self
130
+ end
131
+
132
+ def to_ast
133
+ self
134
+ end
135
+ end
136
+
137
+ class RubyPeg
138
+
139
+ # See #parse
140
+ def self.parse(text_to_parse)
141
+ self.new.parse(text_to_parse)
142
+ end
143
+
144
+ def self.parse_and_dump(text_to_parse, dump_positive_matches_only = false)
145
+ e = new
146
+ r = e.parse(text_to_parse)
147
+ e.pretty_print_cache(dump_positive_matches_only)
148
+ r
149
+ end
150
+
151
+ attr_accessor :index, :text_to_parse, :cache, :sequences
152
+
153
+ def parse(text_to_parse)
154
+ self.index = 0
155
+ self.text_to_parse = text_to_parse
156
+ self.cache = {}
157
+ self.sequences = [[]]
158
+ root
159
+ end
160
+
161
+ def root
162
+ terminal(/.*/m)
163
+ end
164
+
165
+ def ignore(&block)
166
+ result = sequence(&block)
167
+ return :ignore if result
168
+ nil
169
+ end
170
+
171
+ def any_character
172
+ terminal /./
173
+ end
174
+
175
+ def optional
176
+ return yield || :ignore
177
+ end
178
+
179
+ def one_or_more
180
+ results = []
181
+ while result = yield
182
+ results << result
183
+ end
184
+ return nil if results.empty?
185
+ results
186
+ end
187
+
188
+ def any_number_of
189
+ results = []
190
+ while result = yield
191
+ results << result
192
+ end
193
+ results
194
+ end
195
+
196
+ def sequence
197
+ start_index = self.index
198
+ self.sequences.push([])
199
+ if yield
200
+ results = self.sequences.pop
201
+ results.delete_if {|r| r == :ignore }
202
+ return results
203
+ else
204
+ self.sequences.pop
205
+ self.index = start_index
206
+ return nil
207
+ end
208
+ end
209
+
210
+ def followed_by(&block)
211
+ start_index = self.index
212
+ result = sequence(&block)
213
+ self.index = start_index
214
+ return :ignore if result
215
+ return nil
216
+ end
217
+
218
+ def not_followed_by(&block)
219
+ followed_by(&block) ? nil : :ignore
220
+ end
221
+
222
+ def terminal(t)
223
+ return put_in_sequence(cached(t)) if cached?(t)
224
+ put_in_sequence(cache(t,self.index,uncached_terminal(t)))
225
+ end
226
+
227
+ def node(t,&block)
228
+ return put_in_sequence(cached(t)) if cached?(t)
229
+ put_in_sequence(cache(t,self.index,uncached_node(t,&block)))
230
+ end
231
+
232
+ def pretty_print_cache(only_if_match = false)
233
+ (0...text_to_parse.size).each do |i|
234
+ print "#{text_to_parse[i].inspect[1...-1]}\t#{i}\t"
235
+ @cache.each do |name,indexes|
236
+ result = indexes[i]
237
+ next unless result
238
+ if only_if_match
239
+ print "[#{name.inspect},#{result.first.inspect}] " if result.first
240
+ else
241
+ print "[#{name.inspect},#{result.first.inspect}] "
242
+ end
243
+ end
244
+ print "\n"
245
+ end
246
+ end
247
+
248
+ private
249
+
250
+ def uncached_terminal(t)
251
+ return uncached_terminal_regexp(t) if t.is_a? Regexp
252
+ uncached_terminal_string(t.to_s)
253
+ end
254
+
255
+ def uncached_terminal_regexp(t)
256
+ return nil unless self.index == text_to_parse.index(t,self.index)
257
+ match = Regexp.last_match
258
+ self.index = match.end(0)
259
+ create_terminal_node match[0]
260
+ end
261
+
262
+ def uncached_terminal_string(t)
263
+ return nil unless self.index == text_to_parse.index(t,self.index)
264
+ self.index = self.index + t.size
265
+ create_terminal_node t
266
+ end
267
+
268
+ def create_terminal_node(text)
269
+ text.extend(TerminalNode)
270
+ end
271
+
272
+ def uncached_node(type,&block)
273
+ start_index = self.index
274
+ results = sequence(&block)
275
+ return create_non_terminal_node(type,results) if results
276
+ self.index = start_index
277
+ return nil
278
+ end
279
+
280
+ def create_non_terminal_node(type,children_array)
281
+ children_array.extend(NonTerminalNode)
282
+ children_array.type = type
283
+ children_array
284
+ end
285
+
286
+ def put_in_sequence(result)
287
+ self.sequences.last.push(result) if result
288
+ result
289
+ end
290
+
291
+ def cached?(name)
292
+ return false unless @cache.has_key?(name)
293
+ return false unless @cache[name].has_key?(self.index)
294
+ true
295
+ end
296
+
297
+ def cached(name)
298
+ r = @cache[name][self.index]
299
+ self.index = r.last
300
+ r.first
301
+ end
302
+
303
+ def cache(name,i,result)
304
+ if @cache.has_key?(name)
305
+ @cache[name][i] = [result,self.index]
306
+ else
307
+ @cache[name] = {i => [result,self.index]}
308
+ end
309
+ result
310
+ end
311
+
312
+
313
+ end
data/lib/textpeg.rb ADDED
@@ -0,0 +1,159 @@
1
+ require 'rubypeg'
2
+
3
+ class TextPeg < RubyPeg
4
+
5
+ def root
6
+ text_peg
7
+ end
8
+
9
+ def text_peg
10
+ node :text_peg do
11
+ any_number_of { (spacing && (_node || definition)) }
12
+ end
13
+ end
14
+
15
+ def _node
16
+ node :node do
17
+ identifier && assigns && expression && end_of_line
18
+ end
19
+ end
20
+
21
+ def definition
22
+ node :definition do
23
+ identifier && equals && expression && end_of_line
24
+ end
25
+ end
26
+
27
+ def identifier
28
+ node :identifier do
29
+ terminal(/[a-zA-Z_][a-zA-Z0-9_]*/) && spacing
30
+ end
31
+ end
32
+
33
+ def assigns
34
+ ignore { terminal(":=") } && spacing
35
+ end
36
+
37
+ def equals
38
+ ignore { terminal("=") } && spacing
39
+ end
40
+
41
+ def expression
42
+ alternatives || _sequence
43
+ end
44
+
45
+ def _sequence
46
+ node :sequence do
47
+ one_or_more { (elements && spacing) }
48
+ end
49
+ end
50
+
51
+ def alternatives
52
+ node :alternatives do
53
+ elements && one_or_more { (divider && elements) }
54
+ end
55
+ end
56
+
57
+ def divider
58
+ ignore { terminal("|") } && spacing
59
+ end
60
+
61
+ def elements
62
+ prefixed || suffixed || element
63
+ end
64
+
65
+ def prefixed
66
+ ignored || _not_followed_by || _followed_by
67
+ end
68
+
69
+ def suffixed
70
+ _optional || _any_number_of || _one_or_more
71
+ end
72
+
73
+ def _not_followed_by
74
+ node :not_followed_by do
75
+ ignore { terminal("!") } && element
76
+ end
77
+ end
78
+
79
+ def _followed_by
80
+ node :followed_by do
81
+ ignore { terminal("&") } && element
82
+ end
83
+ end
84
+
85
+ def ignored
86
+ node :ignored do
87
+ ignore { terminal("`") } && element
88
+ end
89
+ end
90
+
91
+ def _optional
92
+ node :optional do
93
+ element && ignore { terminal("?") }
94
+ end
95
+ end
96
+
97
+ def _any_number_of
98
+ node :any_number_of do
99
+ element && ignore { terminal("*") }
100
+ end
101
+ end
102
+
103
+ def _one_or_more
104
+ node :one_or_more do
105
+ element && ignore { terminal("+") }
106
+ end
107
+ end
108
+
109
+ def element
110
+ bracketed_expression || identifier || terminal_string || terminal_regexp || terminal_character_range || _any_character
111
+ end
112
+
113
+ def bracketed_expression
114
+ node :bracketed_expression do
115
+ ignore { terminal("(") } && spacing && expression && ignore { terminal(")") } && spacing
116
+ end
117
+ end
118
+
119
+ def terminal_string
120
+ node :terminal_string do
121
+ single_quoted_string || double_quoted_string
122
+ end
123
+ end
124
+
125
+ def double_quoted_string
126
+ ignore { terminal("\"") } && terminal(/[^"]*/) && ignore { terminal("\"") } && spacing
127
+ end
128
+
129
+ def single_quoted_string
130
+ ignore { terminal("'") } && terminal(/[^']*/) && ignore { terminal("'") } && spacing
131
+ end
132
+
133
+ def terminal_character_range
134
+ node :terminal_character_range do
135
+ terminal(/\[[a-zA-Z\-0-9]*\]/) && spacing
136
+ end
137
+ end
138
+
139
+ def terminal_regexp
140
+ node :terminal_regexp do
141
+ ignore { terminal("/") } && terminal(/(\\\/|[^\x2f])*/) && ignore { terminal("/") } && spacing
142
+ end
143
+ end
144
+
145
+ def _any_character
146
+ node :any_character do
147
+ ignore { terminal(".") } && spacing
148
+ end
149
+ end
150
+
151
+ def end_of_line
152
+ ignore { terminal(/[\n\r]+|\z/) }
153
+ end
154
+
155
+ def spacing
156
+ ignore { terminal(/[ \t]*/) }
157
+ end
158
+
159
+ end
data/lib/textpeg.txt ADDED
@@ -0,0 +1,29 @@
1
+ text_peg := (spacing (node | definition))*
2
+ node := identifier assigns expression end_of_line
3
+ definition := identifier equals expression end_of_line
4
+ identifier := /[a-zA-Z_][a-zA-Z0-9_]*/ spacing
5
+ assigns = `":=" spacing
6
+ equals = `"=" spacing
7
+ expression = alternatives | sequence
8
+ sequence := (elements spacing)+
9
+ alternatives := elements (divider elements)+
10
+ divider = `"|" spacing
11
+ elements = prefixed | suffixed | element
12
+ prefixed = ignored | not_followed_by | followed_by |
13
+ suffixed = optional | any_number_of | one_or_more
14
+ not_followed_by := `"!" element
15
+ followed_by := `"&" element
16
+ ignored := `"`" element
17
+ optional := element `"?"
18
+ any_number_of := element `"*"
19
+ one_or_more := element `"+"
20
+ element = bracketed_expression | identifier | terminal_string | terminal_regexp | terminal_character_range | any_character
21
+ bracketed_expression := `"(" spacing expression `")" spacing
22
+ terminal_string := single_quoted_string | double_quoted_string
23
+ double_quoted_string = `'"' /[^"]*/ `'"' spacing
24
+ single_quoted_string = `"'" /[^']*/ `"'" spacing
25
+ terminal_character_range := /\[[a-zA-Z\-0-9]*\]/ spacing
26
+ terminal_regexp := `'/' /(\\\/|[^\x2f])*/ `'/' spacing
27
+ any_character := `'.' spacing
28
+ end_of_line = `/[\n\r]+|\z/
29
+ spacing = `/[ \t]*/
@@ -0,0 +1,182 @@
1
+ require 'text_peg'
2
+
3
+ class String
4
+
5
+ def to_class_name
6
+ # Taken from ActiveSupport inflector
7
+ self.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ def to_method_name
11
+ # Taken from ActiveSupport inflector
12
+ self.gsub(/::/, '/').
13
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
14
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
15
+ tr("-", "_").
16
+ downcase
17
+ end
18
+
19
+ end
20
+
21
+ class TextPeg2RubyPeg
22
+
23
+
24
+ def TextPeg2RubyPeg.parse_to_ruby(text_peg)
25
+ TextPeg.parse(text_peg).visit(TextPeg2RubyPeg.new)
26
+ end
27
+
28
+ def TextPeg2RubyPeg.parse_to_loaded_class(text_peg)
29
+ builder = TextPeg2RubyPeg.new
30
+ ruby = TextPeg.parse(text_peg).visit(builder)
31
+ Kernel.eval(ruby)
32
+ Kernel.eval(builder.class_name)
33
+ end
34
+
35
+ def TextPeg2RubyPeg.parse_file_to_loaded_class(filename)
36
+ parse_to_loaded_class IO.readlines(filename).join
37
+ end
38
+
39
+ attr_accessor :ruby,:tabs,:class_name #:nodoc:
40
+
41
+ RESERVED_WORDS = %w{index text_to_parse cache sequences parse ignore any_character optional one_or_more any_number_of sequence followed_by not_followed_by uncached_terminal uncached_terminal_regexp uncached_terminal_string create_terminal_node create_non_terminal_node uncached_node terminal node put_in_sequence cached? cached cache pretty_print_cache} #:nodoc:
42
+
43
+ def identifier(name) #:nodoc:
44
+ return name.to_s unless RESERVED_WORDS.include?(name.to_s)
45
+ $stderr.puts "Identifier #{name} clashes with a reserved word in the parser, replacing with _#{name}"
46
+ "_#{name}"
47
+ end
48
+
49
+ def text_peg(*definitions) #:nodoc:
50
+ self.ruby = []
51
+ self.tabs = 0
52
+ definitions.map { |d| d.visit(self) }
53
+ close_class
54
+ to_ruby
55
+ end
56
+
57
+ def definition(identifier,expression) #:nodoc:
58
+ non_clashing_name = identifier.visit(self)
59
+ unless class_name
60
+ define_class non_clashing_name
61
+ define_root non_clashing_name
62
+ end
63
+ line "def #{non_clashing_name.to_method_name}"
64
+ indent
65
+ line expression.visit(self)
66
+ outdent
67
+ line "end"
68
+ line
69
+ end
70
+
71
+ def node(identifier,expression) #:nodoc:
72
+ original_name = identifier.to_s
73
+ non_clashing_name = identifier.visit(self)
74
+ unless class_name
75
+ define_class non_clashing_name
76
+ define_root non_clashing_name
77
+ end
78
+ line "def #{non_clashing_name.to_method_name}"
79
+ indent
80
+ line "node :#{original_name.to_method_name} do"
81
+ indent
82
+ line expression.visit(self)
83
+ outdent
84
+ line "end"
85
+ outdent
86
+ line "end"
87
+ line
88
+ end
89
+
90
+ def define_class(name) #:nodoc:
91
+ self.class_name = name.to_class_name
92
+ line "require 'rubypeg'"
93
+ line ""
94
+ line "class #{class_name} < RubyPeg"
95
+ indent
96
+ line
97
+ @first_definition = false
98
+ end
99
+
100
+ def define_root(name) #:nodoc:
101
+ line "def root"
102
+ indent
103
+ line name.to_method_name
104
+ outdent
105
+ line "end"
106
+ line
107
+ end
108
+
109
+ def not_followed_by(element) #:nodoc:
110
+ "not_followed_by { #{element.visit(self)} }"
111
+ end
112
+
113
+ def followed_by(element) #:nodoc:
114
+ "followed_by { #{element.visit(self)} }"
115
+ end
116
+
117
+ def ignored(element) #:nodoc:
118
+ "ignore { #{element.visit(self)} }"
119
+ end
120
+
121
+ def optional(element) #:nodoc:
122
+ "optional { #{element.visit(self)} }"
123
+ end
124
+
125
+ def one_or_more(element) #:nodoc:
126
+ "one_or_more { #{element.visit(self)} }"
127
+ end
128
+
129
+ def any_number_of(element) #:nodoc:
130
+ "any_number_of { #{element.visit(self)} }"
131
+ end
132
+
133
+ def sequence(*elements) #:nodoc:
134
+ elements.map { |e| e.visit(self) }.join(" && ")
135
+ end
136
+
137
+ def alternatives(*elements) #:nodoc:
138
+ elements.map { |e| e.visit(self) }.join(" || ")
139
+ end
140
+
141
+ def bracketed_expression(expression) #:nodoc:
142
+ "(#{expression.visit(self)})"
143
+ end
144
+
145
+ def terminal_string(string) #:nodoc:
146
+ %Q{terminal(#{string.visit(self).inspect})}
147
+ end
148
+
149
+ def terminal_regexp(regexp) #:nodoc:
150
+ "terminal(/#{regexp.visit(self)}/)"
151
+ end
152
+
153
+ def terminal_character_range(regexp) #:nodoc:
154
+ "terminal(/#{regexp.visit(self)}/)"
155
+ end
156
+
157
+ def any_character #:nodoc:
158
+ "any_character"
159
+ end
160
+
161
+ def close_class #:nodoc:
162
+ outdent
163
+ line "end\n"
164
+ end
165
+
166
+ def line(string = "") #:nodoc:
167
+ ruby << "#{" "*tabs}#{string}"
168
+ end
169
+
170
+ def indent #:nodoc:
171
+ self.tabs = tabs + 1
172
+ end
173
+
174
+ def outdent #:nodoc:
175
+ self.tabs = tabs - 1
176
+ end
177
+
178
+ def to_ruby #:nodoc:
179
+ ruby.join("\n")
180
+ end
181
+
182
+ end
@@ -0,0 +1,22 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
2
+ require 'rubypeg'
3
+
4
+ class AnyCharacter < RubyPeg
5
+ def root
6
+ node :root do
7
+ any_character
8
+ end
9
+ end
10
+ end
11
+
12
+ describe AnyCharacter do
13
+
14
+ it "matches one of any character" do
15
+ AnyCharacter.parse("abcd").to_ast.should == [:root,'a']
16
+ end
17
+
18
+ it "doesn't match no character" do
19
+ AnyCharacter.parse("").should == nil
20
+ end
21
+
22
+ end