mok-parser 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,392 @@
1
+ # Copyright (C) garin <garin54@gmail.com> 2011
2
+ # See the included file COPYING for details.
3
+ class BlockParser
4
+ token HEADER WHITELINE HEADLINE PLAIN DESCLINE_TITLE DESCLINE PREFORMAT QUOTE INDENT DEDENT ITEMLIST ITEMLISTCONTINUE NUMLIST TABLELINE
5
+ preclow
6
+ nonassoc DUMMY
7
+ prechigh
8
+ options no_result_var
9
+
10
+ rule
11
+ document : blocks{ val[0].compact }
12
+
13
+ blocks : block { val }
14
+ | blocks block { [val[0], val[1]].flatten }
15
+
16
+ block : header
17
+ | paragraph { val[0] }
18
+ | preformat_block
19
+ | quote_block
20
+ | itemlist_blocks { ItemList.new(val[0].flatten) }
21
+ | numlist_blocks { NumList.new(val[0].flatten) }
22
+ | desc_block
23
+ | table_block
24
+ | headline
25
+ | WHITELINE { WhiteLine.new }
26
+
27
+ # ----- header
28
+ header : HEADER {
29
+ name, val = val[0].split(":",2)
30
+ if name.nil? or val.nil?
31
+ else
32
+ @metadata.update({name.strip.to_sym => val.strip })
33
+ end
34
+ nil }
35
+
36
+ # ----- headline
37
+ headline : HEADLINE { # val[0] is like [level, title, index]
38
+ title = val[0][1]
39
+ level = val[0][0]
40
+ if level == 1
41
+ @metadata[:subject] ||= title
42
+ else
43
+ @head_index.update(level)
44
+ end
45
+
46
+ @index[:head] ||= []
47
+ @index[:head] << {:title => title, :level => level, :index => @head_index.to_s}
48
+ HeadLine.new([level, title, @index[:head].size, @head_index.to_s]) }
49
+ # ----- paragraph
50
+ paragraph : plain_texts { Paragraph.new @inline_parser.parse(val) }
51
+
52
+ plain_texts : PLAIN { val[0] }
53
+ | plain_texts PLAIN { val[0] + val[1] }
54
+
55
+ # ----- desc
56
+ desc_block : DESCLINE_TITLE desclines {
57
+ if val[1].nil?
58
+ lines = [Plain.new("")]
59
+ else
60
+ lines = @inline_parser.parse(val[1])
61
+ end
62
+ Desc.new([val[0], lines])
63
+ }
64
+
65
+
66
+ desclines : DESCLINE { val[0] }
67
+ | desclines DESCLINE { val[0] + val[1] }
68
+ |
69
+
70
+
71
+ # ----- preformat
72
+ preformat_block : preformats { pr = val[0].strip ; Preformat.new([pr]) unless pr.empty? }
73
+
74
+ preformats : PREFORMAT { val[0] }
75
+ | preformats PREFORMAT { val[0] + val[1] }
76
+
77
+ # ----- preformat end
78
+ # ----- quote
79
+ quote_block : quotes { qu = val[0].strip ; Quote.new(@inline_parser.parse(qu)) unless qu.empty? }
80
+
81
+ quotes : QUOTE { val[0] }
82
+ | quotes QUOTE { val[0] + val[1] }
83
+ # ----- quote end
84
+
85
+ # ----- itemlist
86
+ itemlist_blocks : itemlist_block { val[0] }
87
+ | itemlist_blocks itemlist_block { val[0] << val[1] }
88
+
89
+ itemlist_block : itemlists { val[0] }
90
+ | itemlist_indent_blocks { val[0] }
91
+
92
+ itemlist_indent_blocks : INDENT itemlist_blocks DEDENT { val }
93
+
94
+ itemlists : itemlistitems {[PlainTextBlock.new(@inline_parser.parse(val[0]))]}
95
+ | itemlists itemlistitems { val[0] << PlainTextBlock.new(@inline_parser.parse(val[1])) }
96
+
97
+ itemlistitems : ITEMLIST { val[0] }
98
+ | ITEMLIST itemlist_continues { val[0] + val[1] }
99
+
100
+ itemlist_continues : ITEMLISTCONTINUE { "\n" + val[0] }
101
+ | itemlist_continues ITEMLISTCONTINUE { val[0] + "\n" + val[1] }
102
+
103
+ # ----- itemlist end
104
+ # ----- numlist
105
+ numlist_blocks : numlist_block { val[0] }
106
+ | numlist_blocks numlist_block { val[0] << val[1] }
107
+
108
+ numlist_block : numlists { val[0] }
109
+ | numlist_indent_blocks { val[0] }
110
+
111
+ numlist_indent_blocks : INDENT numlist_blocks DEDENT { val }
112
+
113
+ numlists : NUMLIST { [PlainTextBlock.new(@inline_parser.parse(val[0]))] }
114
+ | numlists NUMLIST { val[0] << PlainTextBlock.new(@inline_parser.parse(val[1])) }
115
+
116
+ # ----- numlist end
117
+
118
+ # ----- tableblock
119
+ table_block : tablelines { Table.new(val[0]) }
120
+
121
+ tablelines : TABLELINE { val }
122
+ | tablelines TABLELINE { val[0] << val[1] }
123
+
124
+ # ----- tableblock end
125
+ end # end of rule
126
+
127
+ ---- inner
128
+ include ParserUtility
129
+
130
+ class Line
131
+ def initialize(line)
132
+ @content = line
133
+ # @indent = get_line_indent(line)
134
+ # @type = nil
135
+ end
136
+ attr_reader :indent, :no
137
+ attr_accessor :type, :content
138
+ alias indent_size indent
139
+
140
+ def get_line_indent
141
+ return 0 if @content.nil?
142
+ @content =~ /(\s*)/
143
+ $1.size
144
+ end
145
+ alias indent get_line_indent
146
+ end
147
+
148
+
149
+ def initialize(options = {})
150
+ @inline_parser = InlineParser.new(options)
151
+ @metadata = {}
152
+ @inline_index = @inline_parser.index
153
+ @index = {}
154
+ @head_index = HeadIndex.new
155
+ end
156
+ attr_reader :metadata, :inline_index, :index
157
+
158
+ def parse(src)
159
+ @no = 0
160
+ # srcをerbで処理
161
+ src = ERB.new(src.join,4).result(binding.taint).split("\n").map {|s| "#{s}\n"}
162
+
163
+ @src = Array(src)
164
+ @line = Line.new("")
165
+ @line_pre = @line.dup
166
+ @indent_stack = []
167
+ @current_indent = 0
168
+ @current_type = :header
169
+ @yydebug = true
170
+ @view_token_type = false
171
+ do_parse
172
+ end
173
+
174
+ def on_error(token_id, value, stack)
175
+ lineno = @src[0..@no].to_s.split("\n").size
176
+ raise Racc::ParseError,
177
+ "mokblockpaser: line #{lineno}: syntax error on #{value.inspect}"
178
+ end
179
+
180
+ def next_token
181
+ @line_pre = @line.dup
182
+ @line = Line.new(@src[@no])
183
+ # puts "line: #{@line.content}" if @view_token_type
184
+ case @line.content
185
+ when nil
186
+ @line.content = ""
187
+ if_current_indent_equal("") do
188
+ puts "b: false: #{@line.content}" if @view_token_type
189
+ [false, false]
190
+ end
191
+ when /^$/
192
+ @line.content = ""
193
+ if_current_indent_equal("") do
194
+ if @current_type == :preformat
195
+ puts "b: :PREFORMAT: #{@line.content}" if @view_token_type
196
+ [:PREFORMAT, "\n"]
197
+ elsif @current_type == :quote
198
+ puts "b: :QUOTE: #{@line.content}" if @view_token_type
199
+ [:QUOTE, "\n"]
200
+ elsif @current_type == :descline
201
+ puts "b: DESCLINE: #{@line.content}" if @view_token_type
202
+ [:DESCLINE, " "]
203
+ else
204
+ puts "b: WHITELINE: #{@line.content}" if @view_token_type
205
+ @current_type = :whiteline
206
+ [:WHITELINE, :WHITELINE]
207
+ end
208
+ end
209
+ when /^\#(.*)/ # comment line
210
+ @no += 1
211
+ if @current_type == :header
212
+ puts "b: HEADER: #{@line.content}" if @view_token_type
213
+ [:HEADER, $1.strip]
214
+ else
215
+ puts "b: COMMENT(noop): #{@line.content}" if @view_token_type
216
+ next_token
217
+ end
218
+ when /^(={1,4})(?!=)\s*(?=\S)/, /^(\+{1,2})(?!\+)\s*(?=\S)/
219
+ rest = $' # '
220
+ rest.strip!
221
+ mark = $1
222
+ # if_current_indent_equal("") do
223
+ if_current_indent_equal(@line.indent) do
224
+ @current_type = :headline
225
+ puts "b: HEADLINE: #{@line.content}" if @view_token_type
226
+ [:HEADLINE, [mark_to_level(mark), rest]]
227
+ end
228
+ when /^\s\s+(.*)/ # type == preformat
229
+ puts "b: 2 WHITE SPACE(#{@current_type}) : #{@line.content}" if @view_token_type
230
+ case @current_type
231
+ when :itemlist
232
+ if @line.content =~ /^(\s*)(\*)(\s+)(.*)/
233
+ line = $4.strip
234
+ if line.empty?
235
+ @no += 1
236
+ next_token
237
+ else
238
+ if_current_indent_equal(@line.indent) do
239
+ puts "b: ITEMLIST: [#{line}]" if @view_token_type
240
+ @current_type = :itemlist
241
+ [:ITEMLIST, line]
242
+ end
243
+ end
244
+ else
245
+ line = @line.content.strip
246
+ if line.empty?
247
+ @no += 1
248
+ next_token
249
+ else
250
+ puts "b: ITEMLISTCONTINUE: [#{line.empty?}] --" if @view_token_type
251
+ @no += 1
252
+ @current_type = :itemlist
253
+ [:ITEMLISTCONTINUE, line]
254
+ end
255
+ end
256
+ when :numlist
257
+ @line.content =~ /^(\s*)(\(\d+\))(\s+)(.*)/
258
+ if $4.nil?
259
+ @line.content =~ /^(\s*)(\d\.)(\s+)(.*)/
260
+ end
261
+ line = $4
262
+ line ||= @line.content.strip
263
+ if line.empty?
264
+ @no += 1
265
+ next_token
266
+ else
267
+ if_current_indent_equal(@line.indent) do
268
+ puts "b: NUMLIST: [#{line}]" if @view_token_type
269
+ @current_type = :numlist
270
+ [:NUMLIST, line]
271
+ end
272
+ end
273
+ else
274
+ @no += 1
275
+ if @current_type == :descline
276
+ @current_type = :descline
277
+ puts "b: DESCLINE: #{@line.content}" if @view_token_type
278
+ [:DESCLINE, $1 + "\n"]
279
+ else
280
+ @current_type = :preformat
281
+ puts "b: PREFORMAT: #{$1}" if @view_token_type
282
+ [:PREFORMAT, @line.content.sub(" ","")]
283
+ end
284
+ end
285
+ when /^>\s(.*)/ # type == quote
286
+ puts "b: 2 WHITE SPACE(#{@current_type}) : #{@line.content}" if @view_token_type
287
+ @current_type = :quote
288
+ puts "b: QUOTE: #{$1}" if @view_token_type
289
+ if_current_indent_equal(@line.indent) do
290
+ [:QUOTE, @line.content.sub("> ","")]
291
+ end
292
+ when /^(\:)(.*)/ # type = desclist
293
+ if_current_indent_equal(@line.indent) do
294
+ @current_type = :descline
295
+ puts "b: DESCLINE_TILTE: #{$2.strip}" if @view_token_type
296
+ [:DESCLINE_TITLE, $2.strip]
297
+ end
298
+ when /^(\s*)(\*)(\s+)(.*)/ # type = itemlist
299
+ if_current_indent_equal(@line.indent) do
300
+ puts "b: ITEMLIST: #{@line.content}" if @view_token_type
301
+ @current_type = :itemlist
302
+ [:ITEMLIST, $4]
303
+ end
304
+ when /^(\s*)(\(\d+\))(\s+)(.*)/
305
+ if_current_indent_equal(@line.indent) do
306
+ puts "b: NUMLIST: #{@line.content}" if @view_token_type
307
+ @current_type = :numlist
308
+ [:NUMLIST, $4]
309
+ end
310
+ when /^(\s*)(\d+\.)(\s+)(.*)/ # type = numlist
311
+ if_current_indent_equal(@line.indent) do
312
+ puts "b: NUMLIST: #{@line.content}" if @view_token_type
313
+ @current_type = :numlist
314
+ [:NUMLIST, $4]
315
+ end
316
+ when /^\|-.*/ # type = table head
317
+ # テーブル内であればテーブルヘッダとして無視、そうでなければ普通のPLAINとして扱う
318
+ if @current_type == :table
319
+ @no += 1
320
+ next_token
321
+ else
322
+ @current_type = :plain
323
+ if_current_indent_equal(@line.indent) do
324
+ puts "b: PLAIN: #{@line.content}" if @view_token_type
325
+ [:PLAIN, @line.content]
326
+ end
327
+ end
328
+ when /^\|.*/ # type = table
329
+ @no += 1
330
+ @current_type = :table
331
+ lines = @line.content.chomp.split("|")
332
+ lines.shift
333
+ [:TABLELINE, lines]
334
+ when /(.*)/ # type == plain
335
+ @current_type = :plain
336
+ if_current_indent_equal(@line.indent) do
337
+ puts "b: PLAIN: #{@line.content}" if @view_token_type
338
+ [:PLAIN, @line.content]
339
+ end
340
+ else
341
+ puts "raise : #{@line}"
342
+ end
343
+ end
344
+
345
+ def if_current_indent_equal(ident)
346
+ indent_space = 2
347
+ puts "current: #{@current_indent}, line: #{@line.indent}, stack #{@indent_stack.size}:" if @view_token_type
348
+ indent_sabun = @current_indent - @line.indent
349
+ if indent_sabun >= -1 and indent_sabun <= 1
350
+ @no += 1
351
+ yield
352
+ elsif @current_indent < @line.indent
353
+ ((@line.indent - @current_indent) / indent_space).times do
354
+ @indent_stack.push("")
355
+ end
356
+ @current_indent = @line.indent
357
+ puts "b: INDENT" if @view_token_type
358
+ [:INDENT, :INDENT]
359
+ else
360
+ @indent_stack.pop
361
+ @current_indent = @line.indent if @line.indent == @indent_stack.size * indent_space
362
+ puts "b: DEDENT" if @view_token_type
363
+ [:DEDENT, :DEDENT]
364
+ end
365
+ end
366
+
367
+ ---- header
368
+ require "parserutility"
369
+ require "mokinlineparser.tab"
370
+ require "mokelement"
371
+ require "erb"
372
+
373
+ module Mok
374
+
375
+ ---- footer
376
+ if __FILE__ == $0
377
+ mok = BlockParser.new
378
+ src = $stdin.readlines
379
+ nodes = mok.parse(src)
380
+ puts "----- index -----"
381
+ mok.index.each do |key,val|
382
+ puts key
383
+ val.each do |v| p v end
384
+ end
385
+ puts "----- info -----"
386
+ p mok.info
387
+ puts "----- output -----"
388
+ nodes.each do |n|
389
+ puts n.apply
390
+ end
391
+ end
392
+ end # end of module Mok