mediacloth 0.0.3 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/README.md +36 -0
  2. data/lib/mediacloth/mediawikiast.rb +58 -1
  3. data/lib/mediacloth/mediawikihtmlgenerator.rb +229 -73
  4. data/lib/mediacloth/mediawikilexer.rb +1030 -656
  5. data/lib/mediacloth/mediawikilinkhandler.rb +89 -0
  6. data/lib/mediacloth/mediawikiparams.rb +1 -10
  7. data/lib/mediacloth/mediawikiparser.rb +939 -409
  8. data/lib/mediacloth/mediawikiparser.tab.rb +1357 -0
  9. data/lib/mediacloth/mediawikiparser.y +256 -52
  10. data/lib/mediacloth/mediawikisignedwikigenerator.rb +42 -0
  11. data/lib/mediacloth/mediawikitemplatehandler.rb +8 -0
  12. data/lib/mediacloth/mediawikiwalker.rb +72 -1
  13. data/lib/mediacloth.rb +33 -10
  14. data/test/data/ast1 +68 -0
  15. data/test/data/ast10 +196 -0
  16. data/test/data/ast11 +34 -0
  17. data/test/data/ast12 +39 -0
  18. data/test/data/ast13 +25 -0
  19. data/test/data/ast14 +13 -0
  20. data/test/data/ast15 +25 -0
  21. data/test/data/ast16 +17 -0
  22. data/test/data/ast17 +9 -0
  23. data/test/data/ast18 +21 -0
  24. data/test/data/ast19 +32 -0
  25. data/test/data/ast2 +4 -0
  26. data/test/data/ast20 +10 -0
  27. data/test/data/ast21 +27 -0
  28. data/test/data/ast22 +22 -0
  29. data/test/data/ast23 +5 -0
  30. data/test/data/ast3 +6 -0
  31. data/test/data/ast4 +122 -0
  32. data/test/data/ast5 +122 -0
  33. data/test/data/ast6 +22 -0
  34. data/test/data/ast7 +143 -0
  35. data/test/data/ast8 +3 -0
  36. data/test/data/ast9 +11 -0
  37. data/test/data/html1 +33 -5
  38. data/test/data/html10 +31 -27
  39. data/test/data/html11 +19 -0
  40. data/test/data/html12 +32 -0
  41. data/test/data/html13 +29 -0
  42. data/test/data/html14 +4 -0
  43. data/test/data/html15 +29 -0
  44. data/test/data/html16 +28 -0
  45. data/test/data/html17 +10 -0
  46. data/test/data/html18 +8 -0
  47. data/test/data/html19 +27 -0
  48. data/test/data/html2 +1 -1
  49. data/test/data/html20 +7 -0
  50. data/test/data/html21 +5 -0
  51. data/test/data/html22 +24 -0
  52. data/test/data/html23 +7 -0
  53. data/test/data/html3 +1 -1
  54. data/test/data/html4 +60 -11
  55. data/test/data/html5 +45 -6
  56. data/test/data/html6 +5 -5
  57. data/test/data/html7 +59 -1
  58. data/test/data/html8 +1 -1
  59. data/test/data/html9 +10 -2
  60. data/test/data/input1 +4 -0
  61. data/test/data/input11 +19 -0
  62. data/test/data/input12 +32 -0
  63. data/test/data/input13 +10 -0
  64. data/test/data/input14 +8 -0
  65. data/test/data/input15 +10 -0
  66. data/test/data/input16 +28 -0
  67. data/test/data/input17 +10 -0
  68. data/test/data/input18 +16 -0
  69. data/test/data/input19 +29 -0
  70. data/test/data/input20 +8 -0
  71. data/test/data/input21 +18 -0
  72. data/test/data/input22 +20 -0
  73. data/test/data/input23 +8 -0
  74. data/test/data/input4 +13 -1
  75. data/test/data/input5 +45 -4
  76. data/test/data/input7 +25 -1
  77. data/test/data/lex1 +17 -18
  78. data/test/data/lex10 +57 -87
  79. data/test/data/lex11 +18 -0
  80. data/test/data/lex12 +32 -0
  81. data/test/data/lex13 +3 -0
  82. data/test/data/lex14 +1 -0
  83. data/test/data/lex15 +3 -0
  84. data/test/data/lex16 +27 -0
  85. data/test/data/lex17 +9 -0
  86. data/test/data/lex18 +4 -0
  87. data/test/data/lex19 +27 -0
  88. data/test/data/lex2 +2 -2
  89. data/test/data/lex20 +7 -0
  90. data/test/data/lex21 +4 -0
  91. data/test/data/lex22 +3 -0
  92. data/test/data/lex23 +7 -0
  93. data/test/data/lex3 +1 -1
  94. data/test/data/lex4 +35 -29
  95. data/test/data/lex5 +57 -18
  96. data/test/data/lex6 +7 -7
  97. data/test/data/lex7 +42 -18
  98. data/test/data/lex8 +1 -1
  99. data/test/data/lex9 +6 -6
  100. data/test/dataproducers/ast.rb +24 -0
  101. data/test/dataproducers/html.rb +11 -12
  102. data/test/dataproducers/lex.rb +9 -4
  103. data/test/debugwalker.rb +25 -11
  104. data/test/htmlgenerator.rb +170 -13
  105. data/test/lexer.rb +626 -83
  106. data/test/linkhandler.rb +39 -0
  107. data/test/parser.rb +176 -9
  108. data/test/signedwikigenerator.rb +113 -0
  109. metadata +158 -79
  110. data/README +0 -37
  111. data/lib/mediacloth/mediawikilexer.rb~ +0 -491
  112. data/lib/mediacloth/mediawikiparser.y~ +0 -210
  113. data/test/data/result1 +0 -48
  114. data/test/dataproducers/html.rb~ +0 -24
  115. data/test/dataproducers/lex.rb~ +0 -15
@@ -1,491 +0,0 @@
1
- #The lexer for MediaWiki language.
2
- #
3
- #Standalone usage:
4
- # file = File.new("somefile", "r")
5
- # input = file.read
6
- # lexer = MediaWikiLexer.new
7
- # lexer.tokenize(input)
8
- #
9
- #Inside RACC-generated parser:
10
- # ...
11
- # ---- inner ----
12
- # attr_accessor :lexer
13
- # def parse(input)
14
- # lexer.tokenize(input)
15
- # return do_parse
16
- # end
17
- # def next_token
18
- # return @lexer.lex
19
- # end
20
- # ...
21
- # parser = MediaWikiParser.new
22
- # parser.lexer = MediaWikiLexer.new
23
- # parser.parse(input)
24
- class MediaWikiLexer
25
-
26
- #Initialized the lexer with a match table.
27
- #
28
- #The match table tells the lexer which method to invoke
29
- #on given input char during "tokenize" phase.
30
- def initialize
31
- @position = 0
32
- @pair_stack = [[false, false]] #stack of tokens for which a pair should be found
33
- @list_stack = []
34
- @lexer_table = Hash.new(method(:match_other))
35
- @lexer_table["'"] = method(:match_italic_or_bold)
36
- @lexer_table["="] = method(:match_section)
37
- @lexer_table["["] = method(:match_link_start)
38
- @lexer_table["]"] = method(:match_link_end)
39
- @lexer_table[" "] = method(:match_space)
40
- @lexer_table["*"] = method(:match_list)
41
- @lexer_table["#"] = method(:match_list)
42
- @lexer_table[";"] = method(:match_list)
43
- @lexer_table[":"] = method(:match_list)
44
- @lexer_table["-"] = method(:match_line)
45
- @lexer_table["~"] = method(:match_signature)
46
- @lexer_table["h"] = method(:match_inline_link)
47
- @lexer_table["\n"] = method(:match_newline)
48
- end
49
-
50
- #Transforms input stream (string) into the stream of tokens.
51
- #Tokens are collected into an array of type [ [TOKEN_SYMBOL, TOKEN_VALUE], ..., [false, false] ].
52
- #This array can be given as input token-by token to RACC based parser with no
53
- #modification. The last token [false, false] inficates EOF.
54
- def tokenize(input)
55
- @tokens = []
56
- start_para
57
- @cursor = 0
58
- @text = input
59
- @next_token = []
60
-
61
- #This tokenizer algorithm assumes that everything that is not
62
- #matched by the lexer is going to be :TEXT token. Otherwise it's usual
63
- #lexer algo which call methods from the match table to define next tokens.
64
- while (@cursor < @text.length)
65
- @current_token = [:TEXT, ''] unless @current_token
66
- @token_start = @cursor
67
- @char = @text[@cursor, 1]
68
-
69
- if @lexer_table[@char].call == :TEXT
70
- @current_token[1] += @text[@token_start, 1]
71
- else
72
- #skip empty :TEXT tokens
73
- puts "not a text: #{@next_token[0]}"
74
- unless empty_text_token?
75
- @tokens << @current_token
76
- puts "chance to break para before #{@next_token[0]}"
77
- unless para_breaker?(@next_token[0])
78
- #if no paragraph was previously started
79
- #then we should start it
80
- start_para if !@para
81
- else
82
- #if we already have a paragraph this is the time to close it
83
- end_para if @para
84
- end
85
- end
86
-
87
- if para_breaker?(@next_token[0])
88
- if @tokens.last and @tokens.last[0] == :PARA_START
89
- #we need to remove para start token because no para end is possible
90
- @tokens.pop
91
- @para = false
92
- end
93
- end
94
-
95
- @next_token[1] = @text[@token_start, @cursor - @token_start]
96
- @tokens << @next_token
97
- #hack to enable sub-lexing!
98
- if @sub_tokens
99
- @tokens += @sub_tokens
100
- @sub_tokens = nil
101
- end
102
- #end of hack!
103
-
104
- #if the next token can start the paragraph, let's try that
105
- start_para if @tokens.last and para_starter?(@tokens.last[0])
106
-
107
- @current_token = nil
108
- @next_token = []
109
- end
110
- end
111
- #add the last TEXT token if it exists
112
- puts @current_token
113
- if @current_token and not empty_text_token?
114
- puts "here"
115
- if para_breaker?(@current_token[0])
116
- #if we already have a paragraph this is the time to close it
117
- end_para if @para
118
- end
119
- @tokens << @current_token
120
- end
121
-
122
- #remove empty para start or finish the paragraph if necessary
123
- if @tokens.last and @tokens.last[0] == :PARA_START
124
- @tokens.pop
125
- @para = false
126
- else
127
- end_para if @para
128
- end
129
- #RACC wants us to put this to indicate EOF
130
- @tokens << [false, false]
131
- @tokens
132
- end
133
-
134
- #Returns the next token from the stream. Useful for RACC parsers.
135
- def lex
136
- token = @tokens[@position]
137
- @position += 1
138
- return token
139
- end
140
-
141
-
142
- private
143
- #Returns true if the token breaks the paragraph.
144
- def para_breaker?(token)
145
- [:SECTION_START, :SECTION_END,
146
- :UL_START, :UL_END, :OL_START, :OL_END,
147
- :DL_START, :DL_END, :HLINE, :PRE].include?(token)
148
- end
149
-
150
- #Returns true if the paragraph can be started after the token
151
- def para_starter?(token)
152
- [:SECTION_END, :UL_END, :OL_END, :DL_END, :HLINE, :PRE].include?(token)
153
- end
154
-
155
- #-- ================== Match methods ================== ++#
156
-
157
- #Matches anything that was not matched. Returns :TEXT to indicate
158
- #that matched characters should go into :TEXT token.
159
- def match_other
160
- @cursor += 1
161
- return :TEXT
162
- end
163
-
164
- #Matches italic or bold symbols:
165
- # "'''" { return :BOLD; }
166
- # "''" { return :ITALIC; }
167
- def match_italic_or_bold
168
- if @text[@cursor, 3] == "'''" and @pair_stack.last[0] != :ITALICSTART
169
- matchBold
170
- @cursor += 3
171
- return
172
- end
173
- if @text[@cursor, 2] == "''"
174
- matchItalic
175
- @cursor += 2
176
- return
177
- end
178
- match_other
179
- end
180
-
181
- def matchBold
182
- if @pair_stack.last[0] == :BOLDSTART
183
- @next_token[0] = :BOLDEND
184
- @pair_stack.pop
185
- else
186
- @next_token[0] = :BOLDSTART
187
- @pair_stack.push @next_token
188
- end
189
- end
190
-
191
- def matchItalic
192
- if @pair_stack.last[0] == :ITALICSTART
193
- @next_token[0] = :ITALICEND
194
- @pair_stack.pop
195
- else
196
- @next_token[0] = :ITALICSTART
197
- @pair_stack.push @next_token
198
- end
199
- end
200
-
201
- #Matches sections
202
- def match_section
203
- if at_start_of_line? or (@pair_stack.last[0] == :SECTION_START)
204
- i = 0
205
- i += 1 while @text[@cursor+i, 1] == "="
206
- @cursor += i
207
-
208
- if @pair_stack.last[0] == :SECTION_START
209
- @next_token[0] = :SECTION_END
210
- @pair_stack.pop
211
- else
212
- @next_token[0] = :SECTION_START
213
- @pair_stack.push @next_token
214
- end
215
- else
216
- match_other
217
- end
218
- end
219
-
220
- #Matches start of the hyperlinks
221
- # "[[" { return INTLINKSTART; }
222
- # "[" { return LINKSTART; }
223
- def match_link_start
224
- if @text[@cursor, 2] == "[["
225
- @next_token[0] = :INTLINKSTART
226
- @pair_stack.push @next_token
227
- @cursor += 2
228
- elsif @text[@cursor, 1] == "[" and html_link?(@cursor+1)
229
- @next_token[0] = :LINKSTART
230
- @pair_stack.push @next_token
231
- @cursor += 1
232
- else
233
- match_other
234
- end
235
- end
236
-
237
- #Matches end of the hyperlinks
238
- # "]]" { return INTLINKEND; }
239
- # "]" { return LINKEND; }
240
- def match_link_end
241
- if @text[@cursor, 2] == "]]" and @pair_stack.last[0] == :INTLINKSTART
242
- @next_token[0] = :INTLINKEND
243
- @pair_stack.pop
244
- @cursor += 2
245
- elsif @text[@cursor, 1] == "]" and @pair_stack.last[0] == :LINKSTART
246
- @next_token[0] = :LINKEND
247
- @pair_stack.pop
248
- @cursor += 1
249
- else
250
- match_other
251
- end
252
- end
253
-
254
- #Matches inlined unformatted html link
255
- # "http://[^\s]*" { return [ LINKSTART TEXT LINKEND]; }
256
- def match_inline_link
257
- #if no link start token was detected and the text starts with http://
258
- #then it's the inlined unformatted html link
259
- if html_link?(@cursor) and @pair_stack.last[0] != :INTLINKSTART and
260
- @pair_stack.last[0] != :LINKSTART
261
- @next_token[0] = :LINKSTART
262
- linkText = extract_till_whitespace
263
- @sub_tokens = []
264
- @sub_tokens << [:TEXT, linkText]
265
- @sub_tokens << [:LINKEND, ']']
266
- @cursor += linkText.length
267
- @token_start = @cursor
268
- else
269
- match_other
270
- end
271
- end
272
-
273
- #Matches space to find preformatted areas which start with a space after a newline
274
- # "\n\s[^\n]*" { return PRE; }
275
- def match_space
276
- if at_start_of_line?
277
- match_untill_eol
278
- @next_token[0] = :PRE
279
- strip_ws_from_token_start
280
- else
281
- match_other
282
- end
283
- end
284
-
285
- #Matches any kind of list by using sublexing technique. MediaWiki lists are context-sensitive
286
- #therefore we need to do some special processing with lists. The idea here is to strip
287
- #the leftmost symbol indicating the list from the group of input lines and use separate
288
- #lexer to process extracted fragment.
289
- def match_list
290
- if at_start_of_line?
291
- list_id = @text[@cursor, 1]
292
- sub_text = extract_list_contents(list_id)
293
- extracted = 0
294
-
295
- #hack to tokenize everything inside the list
296
- @sub_tokens = []
297
- sub_lines = ""
298
- @sub_tokens << [:LI_START, ""]
299
- sub_text.each do |t|
300
- extracted += 1
301
- if text_is_list? t
302
- sub_lines += t
303
- else
304
- if not sub_lines.empty?
305
- @sub_tokens += sub_lex(sub_lines)
306
- sub_lines = ""
307
- end
308
- if @sub_tokens.last[0] != :LI_START
309
- @sub_tokens << [:LI_END, ""]
310
- @sub_tokens << [:LI_START, ""]
311
- end
312
- @sub_tokens += sub_lex(t.lstrip)
313
- end
314
- end
315
- if not sub_lines.empty?
316
- @sub_tokens += sub_lex(sub_lines)
317
- @sub_tokens << [:LI_END, ""]
318
- else
319
- @sub_tokens << [:LI_END, ""]
320
- end
321
-
322
- #end of hack
323
- @cursor += sub_text.length + extracted
324
- @token_start = @cursor
325
-
326
- case
327
- when list_id == "*"
328
- @next_token[0] = :UL_START
329
- @sub_tokens << [:UL_END, ""]
330
- when list_id == "#"
331
- @next_token[0] = :OL_START
332
- @sub_tokens << [:OL_END, ""]
333
- when list_id == ";", list_id == ":"
334
- @next_token[0] = :DL_START
335
- @sub_tokens << [:DL_END, ""]
336
- end
337
-
338
- else
339
- match_other
340
- end
341
- end
342
-
343
- #Matches the line until \n
344
- def match_untill_eol
345
- val = @text[@cursor, 1]
346
- while (val != "\n") and (!val.nil?)
347
- @cursor += 1
348
- val = @text[@cursor, 1]
349
- end
350
- @cursor += 1
351
- end
352
-
353
- #Matches hline tag that start with "-"
354
- # "\n----" { return HLINE; }
355
- def match_line
356
- if at_start_of_line? and @text[@cursor, 4] == "----"
357
- @next_token[0] = :HLINE
358
- @cursor += 4
359
- else
360
- match_other
361
- end
362
- end
363
-
364
- #Matches signature
365
- # "~~~~~" { return SIGNATURE_DATE; }
366
- # "~~~~" { return SIGNATURE_FULL; }
367
- # "~~~" { return SIGNATURE_NAME; }
368
- def match_signature
369
- if @text[@cursor, 5] == "~~~~~"
370
- @next_token[0] = :SIGNATURE_DATE
371
- @cursor += 5
372
- elsif @text[@cursor, 4] == "~~~~"
373
- @next_token[0] = :SIGNATURE_FULL
374
- @cursor += 4
375
- elsif @text[@cursor, 3] == "~~~"
376
- @next_token[0] = :SIGNATURE_NAME
377
- @cursor += 3
378
- else
379
- match_other
380
- end
381
- end
382
-
383
- #Matches new line and breaks the paragraph if two newlines are met
384
- def match_newline
385
- if @text[@cursor, 2] == "\n\n"
386
- if @para
387
- @next_token[0] = :PARA_END
388
- # @para = false
389
- @sub_tokens = [[:PARA_START, ""]]
390
- @cursor += 2
391
- return
392
- end
393
- end
394
- match_other
395
- end
396
-
397
- #-- ================== Helper methods ================== ++#
398
-
399
- #Checks if the token is placed at the start of the line.
400
- def at_start_of_line?
401
- if @cursor == 0 or @text[@cursor-1, 1] == "\n"
402
- true
403
- else
404
- false
405
- end
406
- end
407
-
408
- #Checks if the text at position contains the start of the html link
409
- def html_link?(position)
410
- return @text[position, 7] == 'http://'
411
- end
412
-
413
- #Adjusts @token_start to skip leading whitespaces
414
- def strip_ws_from_token_start
415
- @token_start += 1 while @text[@token_start, 1] == " "
416
- end
417
-
418
- #Returns true if the TEXT token is empty or contains newline only
419
- def empty_text_token?
420
- @current_token == [:TEXT, ''] or @current_token == [:TEXT, "\n"]
421
- end
422
-
423
- #Returns true if the text is a list, i.e. starts with one of #;*: symbols
424
- #that indicate a list
425
- def text_is_list?(text)
426
- return text =~ /^[#;*:].*/
427
- end
428
-
429
- #Runs sublexer to tokenize sub_text
430
- def sub_lex(sub_text, strip_paragraphs=true)
431
- sub_lexer = MediaWikiLexer.new
432
- sub_tokens = sub_lexer.tokenize(sub_text)
433
- sub_tokens.pop #false token
434
- if strip_paragraphs
435
- #the last PARA_END token
436
- sub_tokens.pop if sub_tokens.last[0] == :PARA_END
437
- #the first PARA_START token
438
- sub_tokens.delete_at(0) if sub_tokens[0][0] == :PARA_START
439
- end
440
- sub_tokens
441
- end
442
-
443
- #Extracts the text from current cursor position till the next whitespace
444
- def extract_till_whitespace
445
- i = @cursor
446
- text = ""
447
- while i < @text.length
448
- curr = @text[i, 1]
449
- if (curr == "\n") or (curr == "\t") or (curr == " ")
450
- break
451
- end
452
- text += curr
453
- i += 1
454
- end
455
- text
456
- end
457
-
458
- #Extract list contents of list type set by list_id variable.
459
- #Example list:
460
- # *a
461
- # **a
462
- #Extracted list with id "*" will look like:
463
- # a
464
- # *a
465
- def extract_list_contents(list_id)
466
- i = @cursor+1
467
- list = ""
468
- while i < @text.length
469
- curr = @text[i, 1]
470
- if (curr == "\n") and (@text[i+1, 1] != list_id)
471
- list+=curr
472
- break
473
- end
474
- list += curr unless (curr == list_id) and (@text[i-1, 1] == "\n")
475
- i += 1
476
- end
477
- list
478
- end
479
-
480
- def start_para
481
- @tokens << [:PARA_START, ""]
482
- @para = true
483
- end
484
-
485
- def end_para
486
- @tokens << [:PARA_END, ""]
487
- @para = false
488
- end
489
-
490
- end
491
-
@@ -1,210 +0,0 @@
1
- #The parser for the MediaWiki language.
2
- #
3
- #Usage together with a lexer:
4
- # inputFile = File.new("data/input1", "r")
5
- # input = inputFile.read
6
- # parser = MediaWikiParser.new
7
- # parser.lexer = MediaWikiLexer.new
8
- # parser.parse(input)
9
- class MediaWikiParser
10
-
11
- token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
12
- INTLINKSTART INTLINKEND SECTION_START SECTION_END TEXT PRE
13
- HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
14
- UL_START UL_END LI_START LI_END OL_START OL_END
15
- PARA_START PARA_END
16
-
17
- rule
18
-
19
- wiki:
20
- repeated_contents
21
- {
22
- @nodes.push WikiAST.new
23
- #@nodes.last.children.insert(0, val[0])
24
- #puts val[0]
25
- @nodes.last.children += val[0]
26
- }
27
- ;
28
-
29
- contents:
30
- text
31
- {
32
- result = val[0]
33
- }
34
- | bulleted_list
35
- {
36
- result = val[0]
37
- }
38
- | numbered_list
39
- {
40
- result = val[0]
41
- }
42
- | preformatted
43
- {
44
- p = PreformattedAST.new
45
- p.contents = val[0]
46
- result = p
47
- }
48
- | section
49
- {
50
- s = SectionAST.new
51
- s.contents = val[0][0]
52
- s.level = val[0][1]
53
- result = s
54
- }
55
- | PARA_START para_contents PARA_END
56
- {
57
- if val[1]
58
- p = ParagraphAST.new
59
- p.children = val[1]
60
- result = p
61
- end
62
- }
63
- | error
64
- {
65
- puts "ERR"
66
- yyerrok
67
- }
68
- ;
69
-
70
- #TODO: remove empty paragraphs in lexer
71
- para_contents:
72
- {
73
- result = nil
74
- }
75
- | repeated_contents
76
- {
77
- result = val[0]
78
- }
79
-
80
- repeated_contents: contents
81
- {
82
- result = []
83
- result << val[0]
84
- }
85
- | repeated_contents contents
86
- {
87
- result = []
88
- result += val[0]
89
- result << val[1]
90
- }
91
- ;
92
-
93
- text: element
94
- {
95
- p = TextAST.new
96
- p.formatting = val[0][0]
97
- p.contents = val[0][1]
98
- result = p
99
- }
100
- | formatted_element
101
- {
102
- result = val[0]
103
- }
104
- ;
105
-
106
- element: LINKSTART TEXT LINKEND
107
- { return [:Link, val[1]] }
108
- | INTLINKSTART TEXT INTLINKEND
109
- { return [:InternalLink, val[1]] }
110
- | TEXT
111
- { return [:None, val[0]] }
112
- | HLINE
113
- { return [:HLine, val[0]] }
114
- | SIGNATURE_DATE
115
- { return [:SignatureDate, val[0]] }
116
- | SIGNATURE_NAME
117
- { return [:SignatureName, val[0]] }
118
- | SIGNATURE_FULL
119
- { return [:SignatureFull, val[0]] }
120
- ;
121
-
122
- formatted_element: BOLDSTART repeated_contents BOLDEND
123
- {
124
- p = FormattedAST.new
125
- p.formatting = :Bold
126
- p.children += val[1]
127
- result = p
128
- }
129
- | ITALICSTART repeated_contents ITALICEND
130
- {
131
- p = FormattedAST.new
132
- p.formatting = :Italic
133
- p.children += val[1]
134
- result = p
135
- }
136
- ;
137
-
138
- bulleted_list: UL_START list_item list_contents UL_END
139
- {
140
- list = ListAST.new
141
- list.list_type = :Bulleted
142
- list.children << val[1]
143
- list.children += val[2]
144
- result = list
145
- }
146
- ;
147
-
148
- numbered_list: OL_START list_item list_contents OL_END
149
- {
150
- list = ListAST.new
151
- list.list_type = :Numbered
152
- list.children << val[1]
153
- list.children += val[2]
154
- result = list
155
- }
156
- ;
157
-
158
- list_contents:
159
- { result = [] }
160
- list_item list_contents
161
- {
162
- result << val[1]
163
- result += val[2]
164
- }
165
- |
166
- { result = [] }
167
- ;
168
-
169
- list_item: LI_START repeated_contents LI_END
170
- {
171
- li = ListItemAST.new
172
- li.children += val[1]
173
- result = li
174
- }
175
- ;
176
-
177
- preformatted: PRE
178
- { result = val[0] }
179
- ;
180
-
181
- section: SECTION_START TEXT SECTION_END
182
- { result = [val[1], val[0].length] }
183
- ;
184
-
185
- end
186
-
187
- ---- header ----
188
- require 'mediacloth/mediawikiast'
189
-
190
- ---- inner ----
191
-
192
- attr_accessor :lexer
193
-
194
- def initialize
195
- @nodes = []
196
- super
197
- end
198
-
199
- #Tokenizes input string and parses it.
200
- def parse(input)
201
- @yydebug=true
202
- lexer.tokenize(input)
203
- do_parse
204
- return @nodes.last
205
- end
206
-
207
- #Asks the lexer to return the next token.
208
- def next_token
209
- return @lexer.lex
210
- end