rubypeg 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rubypeg.rb ADDED
@@ -0,0 +1,313 @@
1
+ # This file contains all the elements thare are required
2
+ # at runtime by a RubyPeg parser.
3
+ #
4
+ # You can either distribute it in your source code
5
+ # or include the rubypeg gem as a dependency for
6
+ # your source
7
+
8
+ # By default all non terminals that
9
+ # are returned by RubyPeg#parse are Arrays
10
+ # that have been extended with the NonTerminalNode
11
+ # module
12
+ #
13
+ # If we consider this example:
14
+ # class BasketPeg < RubyPeg
15
+ # def root
16
+ # node :basket do
17
+ # one_or_more { items }
18
+ # end
19
+ # end
20
+ #
21
+ # def items
22
+ # node :item do
23
+ # number && optional_space && fruit && optional_space
24
+ # end
25
+ # end
26
+ #
27
+ # def number
28
+ # terminal(/\d+/)
29
+ # end
30
+ #
31
+ # def fruit
32
+ # node :fruit do
33
+ # (terminal("apple") || terminal("pear")) && ignore{ optional{ terminal("s") } }
34
+ # end
35
+ # end
36
+ #
37
+ # def optional_space
38
+ # ignore{ optional{ terminal(" ") }}
39
+ # end
40
+ # end
41
+ # Then
42
+ # BasketPeg.parse("1 apple 2 apples 3 pears").should be_kind_of(NonTerminalNode)
43
+ #
44
+ # This is an array of children of this non terminal.
45
+ # The children may be other non-terminals or terminals
46
+ # The array will be empty if there are no children.
47
+ #
48
+ # basket = BasketPeg.parse("1 apple 2 apples 3 pears")
49
+ # basket.class.should == Array
50
+ # basket.size.should == 3
51
+ # basket.first.should be_kind_of(NonTerminalNode)
52
+ # basket.first.type.should == :item
53
+ # basket.first.class.should == Array
54
+ # basket.first.size.should == 2
55
+ # basket.first.first.should be_kind_of(TerminalNode)
56
+ # basket.first.first.should == "1"
57
+ # basket.first.last.should be_kind_of(NonTerminalNode)
58
+ # basket.first.last.type == :fruit
59
+ # basket.first.last.class.should == Array
60
+ # basket.first.last.size.should == 1
61
+ # basket.first.last.first.should be_kind_of(TerminalNode)
62
+ # basket.first.last.first.should == "apple"
63
+ module NonTerminalNode
64
+
65
+ # Contains the argument given to RubyPeg#node
66
+ # BasketPeg.parse("1 apple 2 apples 3 pears").type.should == :basket
67
+ attr_accessor :type
68
+
69
+
70
+ # This is a quick way of carrying out the visitor pattern on the parsed structure.
71
+ #
72
+ # If no visitor is supplied then a nested array of child nodes is returned, with terminals turned into strings:
73
+ # BasketPeg.parse("1 apple 2 apples 3 pears").build.should == [["1", "apple"], ["2", "apple"], ["3", "pear"]]
74
+ #
75
+ # If a visitor is supplied, then each non terminal node checks if there is a method on the visitor
76
+ # with a name the same as the non terminal's type. If there is, then the method is called with the
77
+ # children of the non terminal as arguments. If there isn't, then the build methods on the children
78
+ # of this node ar recursively called.
79
+ # E.g.,:
80
+ # BasketPeg.parse("1 apple 2 apples 3 pears").build.should == [["1", "apple"], ["2", "apple"], ["3", "pear"]]
81
+ # class BasketPegBuilderExample
82
+ # attr_accessor :total
83
+ #
84
+ # def initialize
85
+ # @total = 0
86
+ # end
87
+ #
88
+ # def item(number,kind)
89
+ # @total = @total + (number.to_f * kind.build(self).to_f)
90
+ # end
91
+ #
92
+ # def fruit(kind_of_fruit)
93
+ # case kind_of_fruit
94
+ # when "apple"; 3.0
95
+ # when "pear"; 1.0
96
+ # else 10.0
97
+ # end
98
+ # end
99
+ # end
100
+ # counter = BasketPegBuilderExample.new
101
+ # BasketPeg.parse("1 apple 2 apples 3 pears").build(counter)
102
+ # counter.total.should == 12.0
103
+ def visit(builder = nil)
104
+ return builder.send(type,*self) if builder.respond_to?(type)
105
+ return self.first.visit(builder) if self.size == 1
106
+ self.map { |c| c.visit(builder) }
107
+ end
108
+
109
+ # Returns the node network as an abstract syntax tree
110
+ #
111
+ # BasketPeg.parse("1 apple 2 apples 3 pears").to_ast.should == [:basket, [:item, "1", [:fruit, "apple"]], [:item, "2", [:fruit, "apple"]], [:item, "3", [:fruit, "pear"]]]
112
+ # Note that the items wrapped in ignore {} in the parser, shuch as the spaces and the optional 's' in apples and pears do not appear.
113
+ def to_ast
114
+ [type,*self.map(&:to_ast)]
115
+ end
116
+
117
+ # Lists the non-terminal node and its children. Same content as #to_ast but in string form.
118
+ # BasketPeg.parse("1 apple 2 apples 3 pears").inspect.should == '[:basket, [:item, "1", [:fruit, "apple"]], [:item, "2", [:fruit, "apple"]], [:item, "3", [:fruit, "pear"]]]'
119
+ def inspect; to_ast.inspect end
120
+
121
+ # Returns the result of calling to_s on each of its children. By default, TerminalNode#to_s returns its text value, so:
122
+ # BasketPeg.parse("1 apple 2 apples 3 pears").to_s.should == "1apple2apple3pear"
123
+ # Note that the items wrapped in ignore {} in the parser, shuch as the spaces and the optional 's' in apples and pears do not appear.
124
+ def to_s; self.map(&:to_s).join end
125
+ end
126
+
127
+ module TerminalNode
128
+ def visit(builder)
129
+ self
130
+ end
131
+
132
+ def to_ast
133
+ self
134
+ end
135
+ end
136
+
137
+ class RubyPeg
138
+
139
+ # See #parse
140
+ def self.parse(text_to_parse)
141
+ self.new.parse(text_to_parse)
142
+ end
143
+
144
+ def self.parse_and_dump(text_to_parse, dump_positive_matches_only = false)
145
+ e = new
146
+ r = e.parse(text_to_parse)
147
+ e.pretty_print_cache(dump_positive_matches_only)
148
+ r
149
+ end
150
+
151
+ attr_accessor :index, :text_to_parse, :cache, :sequences
152
+
153
+ def parse(text_to_parse)
154
+ self.index = 0
155
+ self.text_to_parse = text_to_parse
156
+ self.cache = {}
157
+ self.sequences = [[]]
158
+ root
159
+ end
160
+
161
+ def root
162
+ terminal(/.*/m)
163
+ end
164
+
165
+ def ignore(&block)
166
+ result = sequence(&block)
167
+ return :ignore if result
168
+ nil
169
+ end
170
+
171
+ def any_character
172
+ terminal /./
173
+ end
174
+
175
+ def optional
176
+ return yield || :ignore
177
+ end
178
+
179
+ def one_or_more
180
+ results = []
181
+ while result = yield
182
+ results << result
183
+ end
184
+ return nil if results.empty?
185
+ results
186
+ end
187
+
188
+ def any_number_of
189
+ results = []
190
+ while result = yield
191
+ results << result
192
+ end
193
+ results
194
+ end
195
+
196
+ def sequence
197
+ start_index = self.index
198
+ self.sequences.push([])
199
+ if yield
200
+ results = self.sequences.pop
201
+ results.delete_if {|r| r == :ignore }
202
+ return results
203
+ else
204
+ self.sequences.pop
205
+ self.index = start_index
206
+ return nil
207
+ end
208
+ end
209
+
210
+ def followed_by(&block)
211
+ start_index = self.index
212
+ result = sequence(&block)
213
+ self.index = start_index
214
+ return :ignore if result
215
+ return nil
216
+ end
217
+
218
+ def not_followed_by(&block)
219
+ followed_by(&block) ? nil : :ignore
220
+ end
221
+
222
+ def terminal(t)
223
+ return put_in_sequence(cached(t)) if cached?(t)
224
+ put_in_sequence(cache(t,self.index,uncached_terminal(t)))
225
+ end
226
+
227
+ def node(t,&block)
228
+ return put_in_sequence(cached(t)) if cached?(t)
229
+ put_in_sequence(cache(t,self.index,uncached_node(t,&block)))
230
+ end
231
+
232
+ def pretty_print_cache(only_if_match = false)
233
+ (0...text_to_parse.size).each do |i|
234
+ print "#{text_to_parse[i].inspect[1...-1]}\t#{i}\t"
235
+ @cache.each do |name,indexes|
236
+ result = indexes[i]
237
+ next unless result
238
+ if only_if_match
239
+ print "[#{name.inspect},#{result.first.inspect}] " if result.first
240
+ else
241
+ print "[#{name.inspect},#{result.first.inspect}] "
242
+ end
243
+ end
244
+ print "\n"
245
+ end
246
+ end
247
+
248
+ private
249
+
250
+ def uncached_terminal(t)
251
+ return uncached_terminal_regexp(t) if t.is_a? Regexp
252
+ uncached_terminal_string(t.to_s)
253
+ end
254
+
255
+ def uncached_terminal_regexp(t)
256
+ return nil unless self.index == text_to_parse.index(t,self.index)
257
+ match = Regexp.last_match
258
+ self.index = match.end(0)
259
+ create_terminal_node match[0]
260
+ end
261
+
262
+ def uncached_terminal_string(t)
263
+ return nil unless self.index == text_to_parse.index(t,self.index)
264
+ self.index = self.index + t.size
265
+ create_terminal_node t
266
+ end
267
+
268
+ def create_terminal_node(text)
269
+ text.extend(TerminalNode)
270
+ end
271
+
272
+ def uncached_node(type,&block)
273
+ start_index = self.index
274
+ results = sequence(&block)
275
+ return create_non_terminal_node(type,results) if results
276
+ self.index = start_index
277
+ return nil
278
+ end
279
+
280
+ def create_non_terminal_node(type,children_array)
281
+ children_array.extend(NonTerminalNode)
282
+ children_array.type = type
283
+ children_array
284
+ end
285
+
286
+ def put_in_sequence(result)
287
+ self.sequences.last.push(result) if result
288
+ result
289
+ end
290
+
291
+ def cached?(name)
292
+ return false unless @cache.has_key?(name)
293
+ return false unless @cache[name].has_key?(self.index)
294
+ true
295
+ end
296
+
297
+ def cached(name)
298
+ r = @cache[name][self.index]
299
+ self.index = r.last
300
+ r.first
301
+ end
302
+
303
+ def cache(name,i,result)
304
+ if @cache.has_key?(name)
305
+ @cache[name][i] = [result,self.index]
306
+ else
307
+ @cache[name] = {i => [result,self.index]}
308
+ end
309
+ result
310
+ end
311
+
312
+
313
+ end
data/lib/textpeg.rb ADDED
@@ -0,0 +1,159 @@
1
+ require 'rubypeg'
2
+
3
+ class TextPeg < RubyPeg
4
+
5
+ def root
6
+ text_peg
7
+ end
8
+
9
+ def text_peg
10
+ node :text_peg do
11
+ any_number_of { (spacing && (_node || definition)) }
12
+ end
13
+ end
14
+
15
+ def _node
16
+ node :node do
17
+ identifier && assigns && expression && end_of_line
18
+ end
19
+ end
20
+
21
+ def definition
22
+ node :definition do
23
+ identifier && equals && expression && end_of_line
24
+ end
25
+ end
26
+
27
+ def identifier
28
+ node :identifier do
29
+ terminal(/[a-zA-Z_][a-zA-Z0-9_]*/) && spacing
30
+ end
31
+ end
32
+
33
+ def assigns
34
+ ignore { terminal(":=") } && spacing
35
+ end
36
+
37
+ def equals
38
+ ignore { terminal("=") } && spacing
39
+ end
40
+
41
+ def expression
42
+ alternatives || _sequence
43
+ end
44
+
45
+ def _sequence
46
+ node :sequence do
47
+ one_or_more { (elements && spacing) }
48
+ end
49
+ end
50
+
51
+ def alternatives
52
+ node :alternatives do
53
+ elements && one_or_more { (divider && elements) }
54
+ end
55
+ end
56
+
57
+ def divider
58
+ ignore { terminal("|") } && spacing
59
+ end
60
+
61
+ def elements
62
+ prefixed || suffixed || element
63
+ end
64
+
65
+ def prefixed
66
+ ignored || _not_followed_by || _followed_by
67
+ end
68
+
69
+ def suffixed
70
+ _optional || _any_number_of || _one_or_more
71
+ end
72
+
73
+ def _not_followed_by
74
+ node :not_followed_by do
75
+ ignore { terminal("!") } && element
76
+ end
77
+ end
78
+
79
+ def _followed_by
80
+ node :followed_by do
81
+ ignore { terminal("&") } && element
82
+ end
83
+ end
84
+
85
+ def ignored
86
+ node :ignored do
87
+ ignore { terminal("`") } && element
88
+ end
89
+ end
90
+
91
+ def _optional
92
+ node :optional do
93
+ element && ignore { terminal("?") }
94
+ end
95
+ end
96
+
97
+ def _any_number_of
98
+ node :any_number_of do
99
+ element && ignore { terminal("*") }
100
+ end
101
+ end
102
+
103
+ def _one_or_more
104
+ node :one_or_more do
105
+ element && ignore { terminal("+") }
106
+ end
107
+ end
108
+
109
+ def element
110
+ bracketed_expression || identifier || terminal_string || terminal_regexp || terminal_character_range || _any_character
111
+ end
112
+
113
+ def bracketed_expression
114
+ node :bracketed_expression do
115
+ ignore { terminal("(") } && spacing && expression && ignore { terminal(")") } && spacing
116
+ end
117
+ end
118
+
119
+ def terminal_string
120
+ node :terminal_string do
121
+ single_quoted_string || double_quoted_string
122
+ end
123
+ end
124
+
125
+ def double_quoted_string
126
+ ignore { terminal("\"") } && terminal(/[^"]*/) && ignore { terminal("\"") } && spacing
127
+ end
128
+
129
+ def single_quoted_string
130
+ ignore { terminal("'") } && terminal(/[^']*/) && ignore { terminal("'") } && spacing
131
+ end
132
+
133
+ def terminal_character_range
134
+ node :terminal_character_range do
135
+ terminal(/\[[a-zA-Z\-0-9]*\]/) && spacing
136
+ end
137
+ end
138
+
139
+ def terminal_regexp
140
+ node :terminal_regexp do
141
+ ignore { terminal("/") } && terminal(/(\\\/|[^\x2f])*/) && ignore { terminal("/") } && spacing
142
+ end
143
+ end
144
+
145
+ def _any_character
146
+ node :any_character do
147
+ ignore { terminal(".") } && spacing
148
+ end
149
+ end
150
+
151
+ def end_of_line
152
+ ignore { terminal(/[\n\r]+|\z/) }
153
+ end
154
+
155
+ def spacing
156
+ ignore { terminal(/[ \t]*/) }
157
+ end
158
+
159
+ end
data/lib/textpeg.txt ADDED
@@ -0,0 +1,29 @@
1
+ text_peg := (spacing (node | definition))*
2
+ node := identifier assigns expression end_of_line
3
+ definition := identifier equals expression end_of_line
4
+ identifier := /[a-zA-Z_][a-zA-Z0-9_]*/ spacing
5
+ assigns = `":=" spacing
6
+ equals = `"=" spacing
7
+ expression = alternatives | sequence
8
+ sequence := (elements spacing)+
9
+ alternatives := elements (divider elements)+
10
+ divider = `"|" spacing
11
+ elements = prefixed | suffixed | element
12
+ prefixed = ignored | not_followed_by | followed_by |
13
+ suffixed = optional | any_number_of | one_or_more
14
+ not_followed_by := `"!" element
15
+ followed_by := `"&" element
16
+ ignored := `"`" element
17
+ optional := element `"?"
18
+ any_number_of := element `"*"
19
+ one_or_more := element `"+"
20
+ element = bracketed_expression | identifier | terminal_string | terminal_regexp | terminal_character_range | any_character
21
+ bracketed_expression := `"(" spacing expression `")" spacing
22
+ terminal_string := single_quoted_string | double_quoted_string
23
+ double_quoted_string = `'"' /[^"]*/ `'"' spacing
24
+ single_quoted_string = `"'" /[^']*/ `"'" spacing
25
+ terminal_character_range := /\[[a-zA-Z\-0-9]*\]/ spacing
26
+ terminal_regexp := `'/' /(\\\/|[^\x2f])*/ `'/' spacing
27
+ any_character := `'.' spacing
28
+ end_of_line = `/[\n\r]+|\z/
29
+ spacing = `/[ \t]*/
@@ -0,0 +1,182 @@
1
+ require 'text_peg'
2
+
3
+ class String
4
+
5
+ def to_class_name
6
+ # Taken from ActiveSupport inflector
7
+ self.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ def to_method_name
11
+ # Taken from ActiveSupport inflector
12
+ self.gsub(/::/, '/').
13
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
14
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
15
+ tr("-", "_").
16
+ downcase
17
+ end
18
+
19
+ end
20
+
21
+ class TextPeg2RubyPeg
22
+
23
+
24
+ def TextPeg2RubyPeg.parse_to_ruby(text_peg)
25
+ TextPeg.parse(text_peg).visit(TextPeg2RubyPeg.new)
26
+ end
27
+
28
+ def TextPeg2RubyPeg.parse_to_loaded_class(text_peg)
29
+ builder = TextPeg2RubyPeg.new
30
+ ruby = TextPeg.parse(text_peg).visit(builder)
31
+ Kernel.eval(ruby)
32
+ Kernel.eval(builder.class_name)
33
+ end
34
+
35
+ def TextPeg2RubyPeg.parse_file_to_loaded_class(filename)
36
+ parse_to_loaded_class IO.readlines(filename).join
37
+ end
38
+
39
+ attr_accessor :ruby,:tabs,:class_name #:nodoc:
40
+
41
+ RESERVED_WORDS = %w{index text_to_parse cache sequences parse ignore any_character optional one_or_more any_number_of sequence followed_by not_followed_by uncached_terminal uncached_terminal_regexp uncached_terminal_string create_terminal_node create_non_terminal_node uncached_node terminal node put_in_sequence cached? cached cache pretty_print_cache} #:nodoc:
42
+
43
+ def identifier(name) #:nodoc:
44
+ return name.to_s unless RESERVED_WORDS.include?(name.to_s)
45
+ $stderr.puts "Identifier #{name} clashes with a reserved word in the parser, replacing with _#{name}"
46
+ "_#{name}"
47
+ end
48
+
49
+ def text_peg(*definitions) #:nodoc:
50
+ self.ruby = []
51
+ self.tabs = 0
52
+ definitions.map { |d| d.visit(self) }
53
+ close_class
54
+ to_ruby
55
+ end
56
+
57
+ def definition(identifier,expression) #:nodoc:
58
+ non_clashing_name = identifier.visit(self)
59
+ unless class_name
60
+ define_class non_clashing_name
61
+ define_root non_clashing_name
62
+ end
63
+ line "def #{non_clashing_name.to_method_name}"
64
+ indent
65
+ line expression.visit(self)
66
+ outdent
67
+ line "end"
68
+ line
69
+ end
70
+
71
+ def node(identifier,expression) #:nodoc:
72
+ original_name = identifier.to_s
73
+ non_clashing_name = identifier.visit(self)
74
+ unless class_name
75
+ define_class non_clashing_name
76
+ define_root non_clashing_name
77
+ end
78
+ line "def #{non_clashing_name.to_method_name}"
79
+ indent
80
+ line "node :#{original_name.to_method_name} do"
81
+ indent
82
+ line expression.visit(self)
83
+ outdent
84
+ line "end"
85
+ outdent
86
+ line "end"
87
+ line
88
+ end
89
+
90
+ def define_class(name) #:nodoc:
91
+ self.class_name = name.to_class_name
92
+ line "require 'rubypeg'"
93
+ line ""
94
+ line "class #{class_name} < RubyPeg"
95
+ indent
96
+ line
97
+ @first_definition = false
98
+ end
99
+
100
+ def define_root(name) #:nodoc:
101
+ line "def root"
102
+ indent
103
+ line name.to_method_name
104
+ outdent
105
+ line "end"
106
+ line
107
+ end
108
+
109
+ def not_followed_by(element) #:nodoc:
110
+ "not_followed_by { #{element.visit(self)} }"
111
+ end
112
+
113
+ def followed_by(element) #:nodoc:
114
+ "followed_by { #{element.visit(self)} }"
115
+ end
116
+
117
+ def ignored(element) #:nodoc:
118
+ "ignore { #{element.visit(self)} }"
119
+ end
120
+
121
+ def optional(element) #:nodoc:
122
+ "optional { #{element.visit(self)} }"
123
+ end
124
+
125
+ def one_or_more(element) #:nodoc:
126
+ "one_or_more { #{element.visit(self)} }"
127
+ end
128
+
129
+ def any_number_of(element) #:nodoc:
130
+ "any_number_of { #{element.visit(self)} }"
131
+ end
132
+
133
+ def sequence(*elements) #:nodoc:
134
+ elements.map { |e| e.visit(self) }.join(" && ")
135
+ end
136
+
137
+ def alternatives(*elements) #:nodoc:
138
+ elements.map { |e| e.visit(self) }.join(" || ")
139
+ end
140
+
141
+ def bracketed_expression(expression) #:nodoc:
142
+ "(#{expression.visit(self)})"
143
+ end
144
+
145
+ def terminal_string(string) #:nodoc:
146
+ %Q{terminal(#{string.visit(self).inspect})}
147
+ end
148
+
149
+ def terminal_regexp(regexp) #:nodoc:
150
+ "terminal(/#{regexp.visit(self)}/)"
151
+ end
152
+
153
+ def terminal_character_range(regexp) #:nodoc:
154
+ "terminal(/#{regexp.visit(self)}/)"
155
+ end
156
+
157
+ def any_character #:nodoc:
158
+ "any_character"
159
+ end
160
+
161
+ def close_class #:nodoc:
162
+ outdent
163
+ line "end\n"
164
+ end
165
+
166
+ def line(string = "") #:nodoc:
167
+ ruby << "#{" "*tabs}#{string}"
168
+ end
169
+
170
+ def indent #:nodoc:
171
+ self.tabs = tabs + 1
172
+ end
173
+
174
+ def outdent #:nodoc:
175
+ self.tabs = tabs - 1
176
+ end
177
+
178
+ def to_ruby #:nodoc:
179
+ ruby.join("\n")
180
+ end
181
+
182
+ end
@@ -0,0 +1,22 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
2
+ require 'rubypeg'
3
+
4
+ class AnyCharacter < RubyPeg
5
+ def root
6
+ node :root do
7
+ any_character
8
+ end
9
+ end
10
+ end
11
+
12
+ describe AnyCharacter do
13
+
14
+ it "matches one of any character" do
15
+ AnyCharacter.parse("abcd").to_ast.should == [:root,'a']
16
+ end
17
+
18
+ it "doesn't match no character" do
19
+ AnyCharacter.parse("").should == nil
20
+ end
21
+
22
+ end