mok-parser 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,392 @@
1
+ # Copyright (C) garin <garin54@gmail.com> 2011
2
+ # See the included file COPYING for details.
3
+ class BlockParser
4
+ token HEADER WHITELINE HEADLINE PLAIN DESCLINE_TITLE DESCLINE PREFORMAT QUOTE INDENT DEDENT ITEMLIST ITEMLISTCONTINUE NUMLIST TABLELINE
5
+ preclow
6
+ nonassoc DUMMY
7
+ prechigh
8
+ options no_result_var
9
+
10
+ rule
11
+ document : blocks{ val[0].compact }
12
+
13
+ blocks : block { val }
14
+ | blocks block { [val[0], val[1]].flatten }
15
+
16
+ block : header
17
+ | paragraph { val[0] }
18
+ | preformat_block
19
+ | quote_block
20
+ | itemlist_blocks { ItemList.new(val[0].flatten) }
21
+ | numlist_blocks { NumList.new(val[0].flatten) }
22
+ | desc_block
23
+ | table_block
24
+ | headline
25
+ | WHITELINE { WhiteLine.new }
26
+
27
+ # ----- header
28
+ header : HEADER {
29
+ name, val = val[0].split(":",2)
30
+ if name.nil? or val.nil?
31
+ else
32
+ @metadata.update({name.strip.to_sym => val.strip })
33
+ end
34
+ nil }
35
+
36
+ # ----- headline
37
+ headline : HEADLINE { # val[0] is like [level, title, index]
38
+ title = val[0][1]
39
+ level = val[0][0]
40
+ if level == 1
41
+ @metadata[:subject] ||= title
42
+ else
43
+ @head_index.update(level)
44
+ end
45
+
46
+ @index[:head] ||= []
47
+ @index[:head] << {:title => title, :level => level, :index => @head_index.to_s}
48
+ HeadLine.new([level, title, @index[:head].size, @head_index.to_s]) }
49
+ # ----- paragraph
50
+ paragraph : plain_texts { Paragraph.new @inline_parser.parse(val) }
51
+
52
+ plain_texts : PLAIN { val[0] }
53
+ | plain_texts PLAIN { val[0] + val[1] }
54
+
55
+ # ----- desc
56
+ desc_block : DESCLINE_TITLE desclines {
57
+ if val[1].nil?
58
+ lines = [Plain.new("")]
59
+ else
60
+ lines = @inline_parser.parse(val[1])
61
+ end
62
+ Desc.new([val[0], lines])
63
+ }
64
+
65
+
66
+ desclines : DESCLINE { val[0] }
67
+ | desclines DESCLINE { val[0] + val[1] }
68
+ |
69
+
70
+
71
+ # ----- preformat
72
+ preformat_block : preformats { pr = val[0].strip ; Preformat.new([pr]) unless pr.empty? }
73
+
74
+ preformats : PREFORMAT { val[0] }
75
+ | preformats PREFORMAT { val[0] + val[1] }
76
+
77
+ # ----- preformat end
78
+ # ----- quote
79
+ quote_block : quotes { qu = val[0].strip ; Quote.new(@inline_parser.parse(qu)) unless qu.empty? }
80
+
81
+ quotes : QUOTE { val[0] }
82
+ | quotes QUOTE { val[0] + val[1] }
83
+ # ----- quote end
84
+
85
+ # ----- itemlist
86
+ itemlist_blocks : itemlist_block { val[0] }
87
+ | itemlist_blocks itemlist_block { val[0] << val[1] }
88
+
89
+ itemlist_block : itemlists { val[0] }
90
+ | itemlist_indent_blocks { val[0] }
91
+
92
+ itemlist_indent_blocks : INDENT itemlist_blocks DEDENT { val }
93
+
94
+ itemlists : itemlistitems {[PlainTextBlock.new(@inline_parser.parse(val[0]))]}
95
+ | itemlists itemlistitems { val[0] << PlainTextBlock.new(@inline_parser.parse(val[1])) }
96
+
97
+ itemlistitems : ITEMLIST { val[0] }
98
+ | ITEMLIST itemlist_continues { val[0] + val[1] }
99
+
100
+ itemlist_continues : ITEMLISTCONTINUE { "\n" + val[0] }
101
+ | itemlist_continues ITEMLISTCONTINUE { val[0] + "\n" + val[1] }
102
+
103
+ # ----- itemlist end
104
+ # ----- numlist
105
+ numlist_blocks : numlist_block { val[0] }
106
+ | numlist_blocks numlist_block { val[0] << val[1] }
107
+
108
+ numlist_block : numlists { val[0] }
109
+ | numlist_indent_blocks { val[0] }
110
+
111
+ numlist_indent_blocks : INDENT numlist_blocks DEDENT { val }
112
+
113
+ numlists : NUMLIST { [PlainTextBlock.new(@inline_parser.parse(val[0]))] }
114
+ | numlists NUMLIST { val[0] << PlainTextBlock.new(@inline_parser.parse(val[1])) }
115
+
116
+ # ----- numlist end
117
+
118
+ # ----- tableblock
119
+ table_block : tablelines { Table.new(val[0]) }
120
+
121
+ tablelines : TABLELINE { val }
122
+ | tablelines TABLELINE { val[0] << val[1] }
123
+
124
+ # ----- tableblock end
125
+ end # end of rule
126
+
127
+ ---- inner
128
+ include ParserUtility
129
+
130
+ class Line
131
+ def initialize(line)
132
+ @content = line
133
+ # @indent = get_line_indent(line)
134
+ # @type = nil
135
+ end
136
+ attr_reader :indent, :no
137
+ attr_accessor :type, :content
138
+ alias indent_size indent
139
+
140
+ def get_line_indent
141
+ return 0 if @content.nil?
142
+ @content =~ /(\s*)/
143
+ $1.size
144
+ end
145
+ alias indent get_line_indent
146
+ end
147
+
148
+
149
+ def initialize(options = {})
150
+ @inline_parser = InlineParser.new(options)
151
+ @metadata = {}
152
+ @inline_index = @inline_parser.index
153
+ @index = {}
154
+ @head_index = HeadIndex.new
155
+ end
156
+ attr_reader :metadata, :inline_index, :index
157
+
158
+ def parse(src)
159
+ @no = 0
160
+ # srcをerbで処理
161
+ src = ERB.new(src.join,4).result(binding.taint).split("\n").map {|s| "#{s}\n"}
162
+
163
+ @src = Array(src)
164
+ @line = Line.new("")
165
+ @line_pre = @line.dup
166
+ @indent_stack = []
167
+ @current_indent = 0
168
+ @current_type = :header
169
+ @yydebug = true
170
+ @view_token_type = false
171
+ do_parse
172
+ end
173
+
174
+ def on_error(token_id, value, stack)
175
+ lineno = @src[0..@no].to_s.split("\n").size
176
+ raise Racc::ParseError,
177
+ "mokblockpaser: line #{lineno}: syntax error on #{value.inspect}"
178
+ end
179
+
180
+ def next_token
181
+ @line_pre = @line.dup
182
+ @line = Line.new(@src[@no])
183
+ # puts "line: #{@line.content}" if @view_token_type
184
+ case @line.content
185
+ when nil
186
+ @line.content = ""
187
+ if_current_indent_equal("") do
188
+ puts "b: false: #{@line.content}" if @view_token_type
189
+ [false, false]
190
+ end
191
+ when /^$/
192
+ @line.content = ""
193
+ if_current_indent_equal("") do
194
+ if @current_type == :preformat
195
+ puts "b: :PREFORMAT: #{@line.content}" if @view_token_type
196
+ [:PREFORMAT, "\n"]
197
+ elsif @current_type == :quote
198
+ puts "b: :QUOTE: #{@line.content}" if @view_token_type
199
+ [:QUOTE, "\n"]
200
+ elsif @current_type == :descline
201
+ puts "b: DESCLINE: #{@line.content}" if @view_token_type
202
+ [:DESCLINE, " "]
203
+ else
204
+ puts "b: WHITELINE: #{@line.content}" if @view_token_type
205
+ @current_type = :whiteline
206
+ [:WHITELINE, :WHITELINE]
207
+ end
208
+ end
209
+ when /^\#(.*)/ # comment line
210
+ @no += 1
211
+ if @current_type == :header
212
+ puts "b: HEADER: #{@line.content}" if @view_token_type
213
+ [:HEADER, $1.strip]
214
+ else
215
+ puts "b: COMMENT(noop): #{@line.content}" if @view_token_type
216
+ next_token
217
+ end
218
+ when /^(={1,4})(?!=)\s*(?=\S)/, /^(\+{1,2})(?!\+)\s*(?=\S)/
219
+ rest = $' # '
220
+ rest.strip!
221
+ mark = $1
222
+ # if_current_indent_equal("") do
223
+ if_current_indent_equal(@line.indent) do
224
+ @current_type = :headline
225
+ puts "b: HEADLINE: #{@line.content}" if @view_token_type
226
+ [:HEADLINE, [mark_to_level(mark), rest]]
227
+ end
228
+ when /^\s\s+(.*)/ # type == preformat
229
+ puts "b: 2 WHITE SPACE(#{@current_type}) : #{@line.content}" if @view_token_type
230
+ case @current_type
231
+ when :itemlist
232
+ if @line.content =~ /^(\s*)(\*)(\s+)(.*)/
233
+ line = $4.strip
234
+ if line.empty?
235
+ @no += 1
236
+ next_token
237
+ else
238
+ if_current_indent_equal(@line.indent) do
239
+ puts "b: ITEMLIST: [#{line}]" if @view_token_type
240
+ @current_type = :itemlist
241
+ [:ITEMLIST, line]
242
+ end
243
+ end
244
+ else
245
+ line = @line.content.strip
246
+ if line.empty?
247
+ @no += 1
248
+ next_token
249
+ else
250
+ puts "b: ITEMLISTCONTINUE: [#{line.empty?}] --" if @view_token_type
251
+ @no += 1
252
+ @current_type = :itemlist
253
+ [:ITEMLISTCONTINUE, line]
254
+ end
255
+ end
256
+ when :numlist
257
+ @line.content =~ /^(\s*)(\(\d+\))(\s+)(.*)/
258
+ if $4.nil?
259
+ @line.content =~ /^(\s*)(\d\.)(\s+)(.*)/
260
+ end
261
+ line = $4
262
+ line ||= @line.content.strip
263
+ if line.empty?
264
+ @no += 1
265
+ next_token
266
+ else
267
+ if_current_indent_equal(@line.indent) do
268
+ puts "b: NUMLIST: [#{line}]" if @view_token_type
269
+ @current_type = :numlist
270
+ [:NUMLIST, line]
271
+ end
272
+ end
273
+ else
274
+ @no += 1
275
+ if @current_type == :descline
276
+ @current_type = :descline
277
+ puts "b: DESCLINE: #{@line.content}" if @view_token_type
278
+ [:DESCLINE, $1 + "\n"]
279
+ else
280
+ @current_type = :preformat
281
+ puts "b: PREFORMAT: #{$1}" if @view_token_type
282
+ [:PREFORMAT, @line.content.sub(" ","")]
283
+ end
284
+ end
285
+ when /^>\s(.*)/ # type == quote
286
+ puts "b: 2 WHITE SPACE(#{@current_type}) : #{@line.content}" if @view_token_type
287
+ @current_type = :quote
288
+ puts "b: QUOTE: #{$1}" if @view_token_type
289
+ if_current_indent_equal(@line.indent) do
290
+ [:QUOTE, @line.content.sub("> ","")]
291
+ end
292
+ when /^(\:)(.*)/ # type = desclist
293
+ if_current_indent_equal(@line.indent) do
294
+ @current_type = :descline
295
+ puts "b: DESCLINE_TILTE: #{$2.strip}" if @view_token_type
296
+ [:DESCLINE_TITLE, $2.strip]
297
+ end
298
+ when /^(\s*)(\*)(\s+)(.*)/ # type = itemlist
299
+ if_current_indent_equal(@line.indent) do
300
+ puts "b: ITEMLIST: #{@line.content}" if @view_token_type
301
+ @current_type = :itemlist
302
+ [:ITEMLIST, $4]
303
+ end
304
+ when /^(\s*)(\(\d+\))(\s+)(.*)/
305
+ if_current_indent_equal(@line.indent) do
306
+ puts "b: NUMLIST: #{@line.content}" if @view_token_type
307
+ @current_type = :numlist
308
+ [:NUMLIST, $4]
309
+ end
310
+ when /^(\s*)(\d+\.)(\s+)(.*)/ # type = numlist
311
+ if_current_indent_equal(@line.indent) do
312
+ puts "b: NUMLIST: #{@line.content}" if @view_token_type
313
+ @current_type = :numlist
314
+ [:NUMLIST, $4]
315
+ end
316
+ when /^\|-.*/ # type = table head
317
+ # テーブル内であればテーブルヘッダとして無視、そうでなければ普通のPLAINとして扱う
318
+ if @current_type == :table
319
+ @no += 1
320
+ next_token
321
+ else
322
+ @current_type = :plain
323
+ if_current_indent_equal(@line.indent) do
324
+ puts "b: PLAIN: #{@line.content}" if @view_token_type
325
+ [:PLAIN, @line.content]
326
+ end
327
+ end
328
+ when /^\|.*/ # type = table
329
+ @no += 1
330
+ @current_type = :table
331
+ lines = @line.content.chomp.split("|")
332
+ lines.shift
333
+ [:TABLELINE, lines]
334
+ when /(.*)/ # type == plain
335
+ @current_type = :plain
336
+ if_current_indent_equal(@line.indent) do
337
+ puts "b: PLAIN: #{@line.content}" if @view_token_type
338
+ [:PLAIN, @line.content]
339
+ end
340
+ else
341
+ puts "raise : #{@line}"
342
+ end
343
+ end
344
+
345
+ def if_current_indent_equal(ident)
346
+ indent_space = 2
347
+ puts "current: #{@current_indent}, line: #{@line.indent}, stack #{@indent_stack.size}:" if @view_token_type
348
+ indent_sabun = @current_indent - @line.indent
349
+ if indent_sabun >= -1 and indent_sabun <= 1
350
+ @no += 1
351
+ yield
352
+ elsif @current_indent < @line.indent
353
+ ((@line.indent - @current_indent) / indent_space).times do
354
+ @indent_stack.push("")
355
+ end
356
+ @current_indent = @line.indent
357
+ puts "b: INDENT" if @view_token_type
358
+ [:INDENT, :INDENT]
359
+ else
360
+ @indent_stack.pop
361
+ @current_indent = @line.indent if @line.indent == @indent_stack.size * indent_space
362
+ puts "b: DEDENT" if @view_token_type
363
+ [:DEDENT, :DEDENT]
364
+ end
365
+ end
366
+
367
+ ---- header
368
+ require "parserutility"
369
+ require "mokinlineparser.tab"
370
+ require "mokelement"
371
+ require "erb"
372
+
373
+ module Mok
374
+
375
+ ---- footer
376
+ if __FILE__ == $0
377
+ mok = BlockParser.new
378
+ src = $stdin.readlines
379
+ nodes = mok.parse(src)
380
+ puts "----- index -----"
381
+ mok.index.each do |key,val|
382
+ puts key
383
+ val.each do |v| p v end
384
+ end
385
+ puts "----- info -----"
386
+ p mok.info
387
+ puts "----- output -----"
388
+ nodes.each do |n|
389
+ puts n.apply
390
+ end
391
+ end
392
+ end # end of module Mok