mediacloth 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/README +37 -0
  2. data/lib/mediacloth/mediawikiast.rb +50 -0
  3. data/lib/mediacloth/mediawikihtmlgenerator.rb +105 -0
  4. data/lib/mediacloth/mediawikihtmlgenerator.rb~ +105 -0
  5. data/lib/mediacloth/mediawikilexer.rb +407 -0
  6. data/lib/mediacloth/mediawikiparams.rb +33 -0
  7. data/lib/mediacloth/mediawikiparser.rb +429 -0
  8. data/lib/mediacloth/mediawikiparser.y +172 -0
  9. data/lib/mediacloth/mediawikiparser.y~ +172 -0
  10. data/lib/mediacloth/mediawikiwalker.rb +62 -0
  11. data/lib/mediacloth/mediawikiwalker.rb~ +62 -0
  12. data/lib/mediacloth.rb +23 -0
  13. data/lib/mediacloth.rb~ +23 -0
  14. data/test/data/html1 +21 -0
  15. data/test/data/html2 +2 -0
  16. data/test/data/html3 +1 -0
  17. data/test/data/html4 +1 -0
  18. data/test/data/html6 +8 -0
  19. data/test/data/html7 +1 -0
  20. data/test/data/input1 +29 -0
  21. data/test/data/input2 +2 -0
  22. data/test/data/input3 +2 -0
  23. data/test/data/input4 +1 -0
  24. data/test/data/input5 +12 -0
  25. data/test/data/input6 +8 -0
  26. data/test/data/input7 +2 -0
  27. data/test/data/lex1 +23 -0
  28. data/test/data/lex2 +2 -0
  29. data/test/data/lex3 +1 -0
  30. data/test/data/lex4 +1 -0
  31. data/test/data/lex5 +12 -0
  32. data/test/data/lex6 +8 -0
  33. data/test/data/lex7 +2 -0
  34. data/test/data/result1 +48 -0
  35. data/test/dataproducers/html.rb +23 -0
  36. data/test/dataproducers/html.rb~ +23 -0
  37. data/test/dataproducers/lex.rb +15 -0
  38. data/test/debugwalker.rb +63 -0
  39. data/test/debugwalker.rb~ +63 -0
  40. data/test/htmlgenerator.rb +25 -0
  41. data/test/htmlgenerator.rb~ +25 -0
  42. data/test/lexer.rb +57 -0
  43. data/test/lexer.rb~ +57 -0
  44. data/test/parser.rb +23 -0
  45. data/test/parser.rb~ +23 -0
  46. data/test/testhelper.rb +27 -0
  47. data/test/testhelper.rb~ +28 -0
  48. metadata +97 -0
@@ -0,0 +1,33 @@
1
+ require 'singleton'
2
+
3
+ #MediaWiki parser parameter handler object.
4
+ #
5
+ #Stores and gives access to various parser settings and
6
+ #parser environment variables.
7
+ class MediaWikiParams
8
+
9
+ #MediaWikiParams is a signleton class
10
+ include Singleton
11
+
12
+ #The name of the wiki page author
13
+ attr_accessor :author
14
+
15
+ def initialize
16
+ @author = "Creator"
17
+ end
18
+
19
+ #Creation time of the page. Use overrideTime method to override
20
+ #the value (useful for testing purposes).
21
+ def time
22
+ if @time
23
+ return @time
24
+ else
25
+ return Time.now
26
+ end
27
+ end
28
+
29
+ def time=(t)
30
+ @time = t
31
+ end
32
+
33
+ end
@@ -0,0 +1,429 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by racc 1.4.5
4
+ # from racc grammer file "mediawikiparser.y".
5
+ #
6
+
7
+ require 'racc/parser'
8
+
9
+
10
+ require 'mediacloth/mediawikiast'
11
+
12
+
13
+ class MediaWikiParser < Racc::Parser
14
+
15
+ module_eval <<'..end mediawikiparser.y modeval..id4f3872d37f', 'mediawikiparser.y', 153
16
+
17
+ attr_accessor :lexer
18
+
19
+ def initialize
20
+ @nodes = []
21
+ super
22
+ end
23
+
24
+ #Tokenizes input string and parses it.
25
+ def parse(input)
26
+ @yydebug=true
27
+ lexer.tokenize(input)
28
+ do_parse
29
+ return @nodes.last
30
+ end
31
+
32
+ #Asks the lexer to return the next token.
33
+ def next_token
34
+ return @lexer.lex
35
+ end
36
+ ..end mediawikiparser.y modeval..id4f3872d37f
37
+
38
+ ##### racc 1.4.5 generates ###
39
+
40
+ racc_reduce_table = [
41
+ 0, 0, :racc_error,
42
+ 1, 24, :_reduce_1,
43
+ 1, 26, :_reduce_2,
44
+ 1, 26, :_reduce_3,
45
+ 1, 26, :_reduce_4,
46
+ 1, 26, :_reduce_5,
47
+ 1, 25, :_reduce_6,
48
+ 2, 25, :_reduce_7,
49
+ 1, 27, :_reduce_8,
50
+ 1, 27, :_reduce_9,
51
+ 3, 31, :_reduce_10,
52
+ 3, 31, :_reduce_11,
53
+ 1, 31, :_reduce_12,
54
+ 1, 31, :_reduce_13,
55
+ 1, 31, :_reduce_14,
56
+ 1, 31, :_reduce_15,
57
+ 1, 31, :_reduce_16,
58
+ 3, 32, :_reduce_17,
59
+ 3, 32, :_reduce_18,
60
+ 4, 28, :_reduce_19,
61
+ 0, 35, :_reduce_20,
62
+ 3, 34, :_reduce_21,
63
+ 0, 34, :_reduce_22,
64
+ 3, 33, :_reduce_23,
65
+ 1, 29, :_reduce_24,
66
+ 3, 30, :_reduce_25 ]
67
+
68
+ racc_reduce_n = 26
69
+
70
+ racc_shift_n = 44
71
+
72
+ racc_action_table = [
73
+ 3, 32, 8, 29, 14, 30, 17, 28, 20, 1,
74
+ 2, 4, 6, 9, 11, 13, -22, 3, 42, 8,
75
+ 27, 14, 37, 17, 38, 20, 1, 2, 4, 6,
76
+ 9, 11, 13, 3, 39, 8, 40, 14, 27, 17,
77
+ 23, 20, 1, 2, 4, 6, 9, 11, 13, 3,
78
+ -22, 8, nil, 14, nil, 17, nil, 20, 1, 2,
79
+ 4, 6, 9, 11, 13, 3, nil, 8, nil, 14,
80
+ nil, 17, nil, 20, 1, 2, 4, 6, 9, 11,
81
+ 13, 3, nil, 8, nil, 14, nil, 17, nil, 20,
82
+ 1, 2, 4, 6, 9, 11, 13, 3, nil, 8,
83
+ 33, 14, nil, 17, nil, 20, 1, 2, 4, 6,
84
+ 9, 11, 13, 3, 31, 8, nil, 14, nil, 17,
85
+ nil, 20, 1, 2, 4, 6, 9, 11, 13 ]
86
+
87
+ racc_action_check = [
88
+ 36, 23, 36, 17, 36, 20, 36, 14, 36, 36,
89
+ 36, 36, 36, 36, 36, 36, 26, 8, 36, 8,
90
+ 13, 8, 28, 8, 29, 8, 8, 8, 8, 8,
91
+ 8, 8, 8, 0, 30, 0, 34, 0, 35, 0,
92
+ 5, 0, 0, 0, 0, 0, 0, 0, 0, 7,
93
+ 41, 7, nil, 7, nil, 7, nil, 7, 7, 7,
94
+ 7, 7, 7, 7, 7, 3, nil, 3, nil, 3,
95
+ nil, 3, nil, 3, 3, 3, 3, 3, 3, 3,
96
+ 3, 27, nil, 27, nil, 27, nil, 27, nil, 27,
97
+ 27, 27, 27, 27, 27, 27, 27, 25, nil, 25,
98
+ 25, 25, nil, 25, nil, 25, 25, 25, 25, 25,
99
+ 25, 25, 25, 22, 22, 22, nil, 22, nil, 22,
100
+ nil, 22, 22, 22, 22, 22, 22, 22, 22 ]
101
+
102
+ racc_action_pointer = [
103
+ 31, nil, nil, 63, nil, 40, nil, 47, 15, nil,
104
+ nil, nil, nil, 1, -4, nil, nil, -8, nil, nil,
105
+ -6, nil, 111, 1, nil, 95, -2, 79, 15, 15,
106
+ 24, nil, nil, nil, 18, 19, -2, nil, nil, nil,
107
+ nil, 32, nil, nil ]
108
+
109
+ racc_action_default = [
110
+ -26, -12, -24, -26, -13, -26, -15, -1, -26, -14,
111
+ -6, -16, -2, -26, -26, -3, -4, -26, -5, -8,
112
+ -26, -9, -26, -26, -7, -26, -20, -26, -26, -26,
113
+ -26, -17, 44, -18, -26, -26, -26, -10, -11, -25,
114
+ -19, -20, -23, -21 ]
115
+
116
+ racc_goto_table = [
117
+ 24, 7, 26, 5, 22, 34, nil, nil, nil, 25,
118
+ nil, nil, nil, nil, nil, 24, nil, nil, 24, nil,
119
+ 43, nil, nil, nil, 41, nil, nil, nil, 36, 24 ]
120
+
121
+ racc_goto_check = [
122
+ 3, 2, 10, 1, 2, 11, nil, nil, nil, 2,
123
+ nil, nil, nil, nil, nil, 3, nil, nil, 3, nil,
124
+ 11, nil, nil, nil, 10, nil, nil, nil, 2, 3 ]
125
+
126
+ racc_goto_pointer = [
127
+ nil, 3, 1, -7, nil, nil, nil, nil, nil, nil,
128
+ -11, -21, nil ]
129
+
130
+ racc_goto_default = [
131
+ nil, nil, nil, 10, 12, 15, 16, 18, 19, 21,
132
+ nil, nil, 35 ]
133
+
134
+ racc_token_table = {
135
+ false => 0,
136
+ Object.new => 1,
137
+ :BOLDSTART => 2,
138
+ :BOLDEND => 3,
139
+ :ITALICSTART => 4,
140
+ :ITALICEND => 5,
141
+ :LINKSTART => 6,
142
+ :LINKEND => 7,
143
+ :INTLINKSTART => 8,
144
+ :INTLINKEND => 9,
145
+ :SECTION => 10,
146
+ :TEXT => 11,
147
+ :PRE => 12,
148
+ :HLINE => 13,
149
+ :SIGNATURE_NAME => 14,
150
+ :SIGNATURE_DATE => 15,
151
+ :SIGNATURE_FULL => 16,
152
+ :UL_START => 17,
153
+ :UL_END => 18,
154
+ :LI_START => 19,
155
+ :LI_END => 20,
156
+ :OL_START => 21,
157
+ :OL_END => 22 }
158
+
159
+ racc_use_result_var = true
160
+
161
+ racc_nt_base = 23
162
+
163
+ Racc_arg = [
164
+ racc_action_table,
165
+ racc_action_check,
166
+ racc_action_default,
167
+ racc_action_pointer,
168
+ racc_goto_table,
169
+ racc_goto_check,
170
+ racc_goto_default,
171
+ racc_goto_pointer,
172
+ racc_nt_base,
173
+ racc_reduce_table,
174
+ racc_token_table,
175
+ racc_shift_n,
176
+ racc_reduce_n,
177
+ racc_use_result_var ]
178
+
179
+ Racc_token_to_s_table = [
180
+ '$end',
181
+ 'error',
182
+ 'BOLDSTART',
183
+ 'BOLDEND',
184
+ 'ITALICSTART',
185
+ 'ITALICEND',
186
+ 'LINKSTART',
187
+ 'LINKEND',
188
+ 'INTLINKSTART',
189
+ 'INTLINKEND',
190
+ 'SECTION',
191
+ 'TEXT',
192
+ 'PRE',
193
+ 'HLINE',
194
+ 'SIGNATURE_NAME',
195
+ 'SIGNATURE_DATE',
196
+ 'SIGNATURE_FULL',
197
+ 'UL_START',
198
+ 'UL_END',
199
+ 'LI_START',
200
+ 'LI_END',
201
+ 'OL_START',
202
+ 'OL_END',
203
+ '$start',
204
+ 'wiki',
205
+ 'repeated_contents',
206
+ 'contents',
207
+ 'text',
208
+ 'bulleted_list',
209
+ 'preformatted',
210
+ 'section',
211
+ 'element',
212
+ 'formatted_element',
213
+ 'list_item',
214
+ 'list_contents',
215
+ '@1']
216
+
217
+ Racc_debug_parser = false
218
+
219
+ ##### racc system variables end #####
220
+
221
+ # reduce 0 omitted
222
+
223
+ module_eval <<'.,.,', 'mediawikiparser.y', 25
224
+ def _reduce_1( val, _values, result )
225
+ @nodes.push WikiAST.new
226
+ #@nodes.last.children.insert(0, val[0])
227
+ #puts val[0]
228
+ @nodes.last.children += val[0]
229
+ result
230
+ end
231
+ .,.,
232
+
233
+ module_eval <<'.,.,', 'mediawikiparser.y', 32
234
+ def _reduce_2( val, _values, result )
235
+ result = val[0]
236
+ result
237
+ end
238
+ .,.,
239
+
240
+ module_eval <<'.,.,', 'mediawikiparser.y', 36
241
+ def _reduce_3( val, _values, result )
242
+ result = val[0]
243
+ result
244
+ end
245
+ .,.,
246
+
247
+ module_eval <<'.,.,', 'mediawikiparser.y', 42
248
+ def _reduce_4( val, _values, result )
249
+ p = PreformattedAST.new
250
+ p.contents = val[0]
251
+ result = p
252
+ result
253
+ end
254
+ .,.,
255
+
256
+ module_eval <<'.,.,', 'mediawikiparser.y', 49
257
+ def _reduce_5( val, _values, result )
258
+ s = SectionAST.new
259
+ s.contents = val[0][0]
260
+ s.level = val[0][1]
261
+ result = s
262
+ result
263
+ end
264
+ .,.,
265
+
266
+ module_eval <<'.,.,', 'mediawikiparser.y', 56
267
+ def _reduce_6( val, _values, result )
268
+ result = []
269
+ result << val[0]
270
+ result
271
+ end
272
+ .,.,
273
+
274
+ module_eval <<'.,.,', 'mediawikiparser.y', 62
275
+ def _reduce_7( val, _values, result )
276
+ result = []
277
+ result += val[0]
278
+ result << val[1]
279
+ result
280
+ end
281
+ .,.,
282
+
283
+ module_eval <<'.,.,', 'mediawikiparser.y', 71
284
+ def _reduce_8( val, _values, result )
285
+ p = TextAST.new
286
+ p.formatting = val[0][0]
287
+ p.contents = val[0][1]
288
+ result = p
289
+ result
290
+ end
291
+ .,.,
292
+
293
+ module_eval <<'.,.,', 'mediawikiparser.y', 75
294
+ def _reduce_9( val, _values, result )
295
+ result = val[0]
296
+ result
297
+ end
298
+ .,.,
299
+
300
+ module_eval <<'.,.,', 'mediawikiparser.y', 78
301
+ def _reduce_10( val, _values, result )
302
+ return [:Link, val[1]]
303
+ result
304
+ end
305
+ .,.,
306
+
307
+ module_eval <<'.,.,', 'mediawikiparser.y', 80
308
+ def _reduce_11( val, _values, result )
309
+ return [:InternalLink, val[1]]
310
+ result
311
+ end
312
+ .,.,
313
+
314
+ module_eval <<'.,.,', 'mediawikiparser.y', 82
315
+ def _reduce_12( val, _values, result )
316
+ return [:None, val[0]]
317
+ result
318
+ end
319
+ .,.,
320
+
321
+ module_eval <<'.,.,', 'mediawikiparser.y', 84
322
+ def _reduce_13( val, _values, result )
323
+ return [:HLine, val[0]]
324
+ result
325
+ end
326
+ .,.,
327
+
328
+ module_eval <<'.,.,', 'mediawikiparser.y', 86
329
+ def _reduce_14( val, _values, result )
330
+ return [:SignatureDate, val[0]]
331
+ result
332
+ end
333
+ .,.,
334
+
335
+ module_eval <<'.,.,', 'mediawikiparser.y', 88
336
+ def _reduce_15( val, _values, result )
337
+ return [:SignatureName, val[0]]
338
+ result
339
+ end
340
+ .,.,
341
+
342
+ module_eval <<'.,.,', 'mediawikiparser.y', 90
343
+ def _reduce_16( val, _values, result )
344
+ return [:SignatureFull, val[0]]
345
+ result
346
+ end
347
+ .,.,
348
+
349
+ module_eval <<'.,.,', 'mediawikiparser.y', 100
350
+ def _reduce_17( val, _values, result )
351
+ p = FormattedAST.new
352
+ p.formatting = :Bold
353
+ p.children += val[1]
354
+ result = p
355
+ result
356
+ end
357
+ .,.,
358
+
359
+ module_eval <<'.,.,', 'mediawikiparser.y', 107
360
+ def _reduce_18( val, _values, result )
361
+ p = FormattedAST.new
362
+ p.formatting = :Italic
363
+ p.children += val[1]
364
+ result = p
365
+ result
366
+ end
367
+ .,.,
368
+
369
+ module_eval <<'.,.,', 'mediawikiparser.y', 117
370
+ def _reduce_19( val, _values, result )
371
+ list = ListAST.new
372
+ list.type = :Bulleted
373
+ list.children << val[1]
374
+ list.children += val[2]
375
+ result = list
376
+ result
377
+ end
378
+ .,.,
379
+
380
+ module_eval <<'.,.,', 'mediawikiparser.y', 120
381
+ def _reduce_20( val, _values, result )
382
+ result = []
383
+ result
384
+ end
385
+ .,.,
386
+
387
+ module_eval <<'.,.,', 'mediawikiparser.y', 126
388
+ def _reduce_21( val, _values, result )
389
+ result << val[1]
390
+ result += val[2]
391
+ result
392
+ end
393
+ .,.,
394
+
395
+ module_eval <<'.,.,', 'mediawikiparser.y', 127
396
+ def _reduce_22( val, _values, result )
397
+ result = []
398
+ result
399
+ end
400
+ .,.,
401
+
402
+ module_eval <<'.,.,', 'mediawikiparser.y', 136
403
+ def _reduce_23( val, _values, result )
404
+ li = ListItemAST.new
405
+ li.children += val[1]
406
+ result = li
407
+ result
408
+ end
409
+ .,.,
410
+
411
+ module_eval <<'.,.,', 'mediawikiparser.y', 139
412
+ def _reduce_24( val, _values, result )
413
+ result = val[0]
414
+ result
415
+ end
416
+ .,.,
417
+
418
+ module_eval <<'.,.,', 'mediawikiparser.y', 143
419
+ def _reduce_25( val, _values, result )
420
+ result = [val[1], val[0].length]
421
+ result
422
+ end
423
+ .,.,
424
+
425
+ def _reduce_none( val, _values, result )
426
+ result
427
+ end
428
+
429
+ end # class MediaWikiParser
@@ -0,0 +1,172 @@
1
+ #The parser for the MediaWiki language.
2
+ #
3
+ #Usage together with a lexer:
4
+ # inputFile = File.new("data/input1", "r")
5
+ # input = inputFile.read
6
+ # parser = MediaWikiParser.new
7
+ # parser.lexer = MediaWikiLexer.new
8
+ # parser.parse(input)
9
+ class MediaWikiParser
10
+
11
+ token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
12
+ INTLINKSTART INTLINKEND SECTION TEXT PRE
13
+ HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
14
+ UL_START UL_END LI_START LI_END OL_START OL_END
15
+
16
+ rule
17
+
18
+ wiki:
19
+ repeated_contents
20
+ {
21
+ @nodes.push WikiAST.new
22
+ #@nodes.last.children.insert(0, val[0])
23
+ #puts val[0]
24
+ @nodes.last.children += val[0]
25
+ }
26
+ ;
27
+
28
+ contents:
29
+ text
30
+ {
31
+ result = val[0]
32
+ }
33
+ | bulleted_list
34
+ {
35
+ result = val[0]
36
+ }
37
+ | preformatted
38
+ {
39
+ p = PreformattedAST.new
40
+ p.contents = val[0]
41
+ result = p
42
+ }
43
+ | section
44
+ {
45
+ s = SectionAST.new
46
+ s.contents = val[0][0]
47
+ s.level = val[0][1]
48
+ result = s
49
+ }
50
+ ;
51
+
52
+ repeated_contents: contents
53
+ {
54
+ result = []
55
+ result << val[0]
56
+ }
57
+ | repeated_contents contents
58
+ {
59
+ result = []
60
+ result += val[0]
61
+ result << val[1]
62
+ }
63
+ ;
64
+
65
+ text: element
66
+ {
67
+ p = TextAST.new
68
+ p.formatting = val[0][0]
69
+ p.contents = val[0][1]
70
+ result = p
71
+ }
72
+ | formatted_element
73
+ {
74
+ result = val[0]
75
+ }
76
+ ;
77
+
78
+ element: LINKSTART TEXT LINKEND
79
+ { return [:Link, val[1]] }
80
+ | INTLINKSTART TEXT INTLINKEND
81
+ { return [:InternalLink, val[1]] }
82
+ | TEXT
83
+ { return [:None, val[0]] }
84
+ | HLINE
85
+ { return [:HLine, val[0]] }
86
+ | SIGNATURE_DATE
87
+ { return [:SignatureDate, val[0]] }
88
+ | SIGNATURE_NAME
89
+ { return [:SignatureName, val[0]] }
90
+ | SIGNATURE_FULL
91
+ { return [:SignatureFull, val[0]] }
92
+ ;
93
+
94
+ formatted_element: BOLDSTART repeated_contents BOLDEND
95
+ {
96
+ p = FormattedAST.new
97
+ p.formatting = :Bold
98
+ p.children += val[1]
99
+ result = p
100
+ }
101
+ | ITALICSTART repeated_contents ITALICEND
102
+ {
103
+ p = FormattedAST.new
104
+ p.formatting = :Italic
105
+ p.children += val[1]
106
+ result = p
107
+ }
108
+ ;
109
+
110
+ bulleted_list: UL_START list_item list_contents UL_END
111
+ {
112
+ list = ListAST.new
113
+ list.type = :Bulleted
114
+ list.children << val[1]
115
+ list.children += val[2]
116
+ result = list
117
+ }
118
+ ;
119
+
120
+ list_contents:
121
+ { result = [] }
122
+ list_item list_contents
123
+ {
124
+ result << val[1]
125
+ result += val[2]
126
+ }
127
+ |
128
+ { result = [] }
129
+ ;
130
+
131
+ list_item: LI_START repeated_contents LI_END
132
+ {
133
+ li = ListItemAST.new
134
+ li.children += val[1]
135
+ result = li
136
+ }
137
+ ;
138
+
139
+ preformatted: PRE
140
+ { result = val[0] }
141
+ ;
142
+
143
+ section: SECTION TEXT SECTION
144
+ { result = [val[1], val[0].length] }
145
+ ;
146
+
147
+ end
148
+
149
+ ---- header ----
150
+ require 'mediacloth/mediawikiast'
151
+
152
+ ---- inner ----
153
+
154
+ attr_accessor :lexer
155
+
156
+ def initialize
157
+ @nodes = []
158
+ super
159
+ end
160
+
161
+ #Tokenizes input string and parses it.
162
+ def parse(input)
163
+ @yydebug=true
164
+ lexer.tokenize(input)
165
+ do_parse
166
+ return @nodes.last
167
+ end
168
+
169
+ #Asks the lexer to return the next token.
170
+ def next_token
171
+ return @lexer.lex
172
+ end