mediacloth 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/README +37 -0
  2. data/lib/mediacloth/mediawikiast.rb +50 -0
  3. data/lib/mediacloth/mediawikihtmlgenerator.rb +105 -0
  4. data/lib/mediacloth/mediawikihtmlgenerator.rb~ +105 -0
  5. data/lib/mediacloth/mediawikilexer.rb +407 -0
  6. data/lib/mediacloth/mediawikiparams.rb +33 -0
  7. data/lib/mediacloth/mediawikiparser.rb +429 -0
  8. data/lib/mediacloth/mediawikiparser.y +172 -0
  9. data/lib/mediacloth/mediawikiparser.y~ +172 -0
  10. data/lib/mediacloth/mediawikiwalker.rb +62 -0
  11. data/lib/mediacloth/mediawikiwalker.rb~ +62 -0
  12. data/lib/mediacloth.rb +23 -0
  13. data/lib/mediacloth.rb~ +23 -0
  14. data/test/data/html1 +21 -0
  15. data/test/data/html2 +2 -0
  16. data/test/data/html3 +1 -0
  17. data/test/data/html4 +1 -0
  18. data/test/data/html6 +8 -0
  19. data/test/data/html7 +1 -0
  20. data/test/data/input1 +29 -0
  21. data/test/data/input2 +2 -0
  22. data/test/data/input3 +2 -0
  23. data/test/data/input4 +1 -0
  24. data/test/data/input5 +12 -0
  25. data/test/data/input6 +8 -0
  26. data/test/data/input7 +2 -0
  27. data/test/data/lex1 +23 -0
  28. data/test/data/lex2 +2 -0
  29. data/test/data/lex3 +1 -0
  30. data/test/data/lex4 +1 -0
  31. data/test/data/lex5 +12 -0
  32. data/test/data/lex6 +8 -0
  33. data/test/data/lex7 +2 -0
  34. data/test/data/result1 +48 -0
  35. data/test/dataproducers/html.rb +23 -0
  36. data/test/dataproducers/html.rb~ +23 -0
  37. data/test/dataproducers/lex.rb +15 -0
  38. data/test/debugwalker.rb +63 -0
  39. data/test/debugwalker.rb~ +63 -0
  40. data/test/htmlgenerator.rb +25 -0
  41. data/test/htmlgenerator.rb~ +25 -0
  42. data/test/lexer.rb +57 -0
  43. data/test/lexer.rb~ +57 -0
  44. data/test/parser.rb +23 -0
  45. data/test/parser.rb~ +23 -0
  46. data/test/testhelper.rb +27 -0
  47. data/test/testhelper.rb~ +28 -0
  48. metadata +97 -0
@@ -0,0 +1,33 @@
1
+ require 'singleton'
2
+
3
+ #MediaWiki parser parameter handler object.
4
+ #
5
+ #Stores and gives access to various parser settings and
6
+ #parser environment variables.
7
+ class MediaWikiParams
8
+
9
+ #MediaWikiParams is a signleton class
10
+ include Singleton
11
+
12
+ #The name of the wiki page author
13
+ attr_accessor :author
14
+
15
+ def initialize
16
+ @author = "Creator"
17
+ end
18
+
19
+ #Creation time of the page. Use overrideTime method to override
20
+ #the value (useful for testing purposes).
21
+ def time
22
+ if @time
23
+ return @time
24
+ else
25
+ return Time.now
26
+ end
27
+ end
28
+
29
+ def time=(t)
30
+ @time = t
31
+ end
32
+
33
+ end
@@ -0,0 +1,429 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by racc 1.4.5
4
+ # from racc grammer file "mediawikiparser.y".
5
+ #
6
+
7
+ require 'racc/parser'
8
+
9
+
10
+ require 'mediacloth/mediawikiast'
11
+
12
+
13
+ class MediaWikiParser < Racc::Parser
14
+
15
+ module_eval <<'..end mediawikiparser.y modeval..id4f3872d37f', 'mediawikiparser.y', 153
16
+
17
+ attr_accessor :lexer
18
+
19
+ def initialize
20
+ @nodes = []
21
+ super
22
+ end
23
+
24
+ #Tokenizes input string and parses it.
25
+ def parse(input)
26
+ @yydebug=true
27
+ lexer.tokenize(input)
28
+ do_parse
29
+ return @nodes.last
30
+ end
31
+
32
+ #Asks the lexer to return the next token.
33
+ def next_token
34
+ return @lexer.lex
35
+ end
36
+ ..end mediawikiparser.y modeval..id4f3872d37f
37
+
38
+ ##### racc 1.4.5 generates ###
39
+
40
+ racc_reduce_table = [
41
+ 0, 0, :racc_error,
42
+ 1, 24, :_reduce_1,
43
+ 1, 26, :_reduce_2,
44
+ 1, 26, :_reduce_3,
45
+ 1, 26, :_reduce_4,
46
+ 1, 26, :_reduce_5,
47
+ 1, 25, :_reduce_6,
48
+ 2, 25, :_reduce_7,
49
+ 1, 27, :_reduce_8,
50
+ 1, 27, :_reduce_9,
51
+ 3, 31, :_reduce_10,
52
+ 3, 31, :_reduce_11,
53
+ 1, 31, :_reduce_12,
54
+ 1, 31, :_reduce_13,
55
+ 1, 31, :_reduce_14,
56
+ 1, 31, :_reduce_15,
57
+ 1, 31, :_reduce_16,
58
+ 3, 32, :_reduce_17,
59
+ 3, 32, :_reduce_18,
60
+ 4, 28, :_reduce_19,
61
+ 0, 35, :_reduce_20,
62
+ 3, 34, :_reduce_21,
63
+ 0, 34, :_reduce_22,
64
+ 3, 33, :_reduce_23,
65
+ 1, 29, :_reduce_24,
66
+ 3, 30, :_reduce_25 ]
67
+
68
+ racc_reduce_n = 26
69
+
70
+ racc_shift_n = 44
71
+
72
+ racc_action_table = [
73
+ 3, 32, 8, 29, 14, 30, 17, 28, 20, 1,
74
+ 2, 4, 6, 9, 11, 13, -22, 3, 42, 8,
75
+ 27, 14, 37, 17, 38, 20, 1, 2, 4, 6,
76
+ 9, 11, 13, 3, 39, 8, 40, 14, 27, 17,
77
+ 23, 20, 1, 2, 4, 6, 9, 11, 13, 3,
78
+ -22, 8, nil, 14, nil, 17, nil, 20, 1, 2,
79
+ 4, 6, 9, 11, 13, 3, nil, 8, nil, 14,
80
+ nil, 17, nil, 20, 1, 2, 4, 6, 9, 11,
81
+ 13, 3, nil, 8, nil, 14, nil, 17, nil, 20,
82
+ 1, 2, 4, 6, 9, 11, 13, 3, nil, 8,
83
+ 33, 14, nil, 17, nil, 20, 1, 2, 4, 6,
84
+ 9, 11, 13, 3, 31, 8, nil, 14, nil, 17,
85
+ nil, 20, 1, 2, 4, 6, 9, 11, 13 ]
86
+
87
+ racc_action_check = [
88
+ 36, 23, 36, 17, 36, 20, 36, 14, 36, 36,
89
+ 36, 36, 36, 36, 36, 36, 26, 8, 36, 8,
90
+ 13, 8, 28, 8, 29, 8, 8, 8, 8, 8,
91
+ 8, 8, 8, 0, 30, 0, 34, 0, 35, 0,
92
+ 5, 0, 0, 0, 0, 0, 0, 0, 0, 7,
93
+ 41, 7, nil, 7, nil, 7, nil, 7, 7, 7,
94
+ 7, 7, 7, 7, 7, 3, nil, 3, nil, 3,
95
+ nil, 3, nil, 3, 3, 3, 3, 3, 3, 3,
96
+ 3, 27, nil, 27, nil, 27, nil, 27, nil, 27,
97
+ 27, 27, 27, 27, 27, 27, 27, 25, nil, 25,
98
+ 25, 25, nil, 25, nil, 25, 25, 25, 25, 25,
99
+ 25, 25, 25, 22, 22, 22, nil, 22, nil, 22,
100
+ nil, 22, 22, 22, 22, 22, 22, 22, 22 ]
101
+
102
+ racc_action_pointer = [
103
+ 31, nil, nil, 63, nil, 40, nil, 47, 15, nil,
104
+ nil, nil, nil, 1, -4, nil, nil, -8, nil, nil,
105
+ -6, nil, 111, 1, nil, 95, -2, 79, 15, 15,
106
+ 24, nil, nil, nil, 18, 19, -2, nil, nil, nil,
107
+ nil, 32, nil, nil ]
108
+
109
+ racc_action_default = [
110
+ -26, -12, -24, -26, -13, -26, -15, -1, -26, -14,
111
+ -6, -16, -2, -26, -26, -3, -4, -26, -5, -8,
112
+ -26, -9, -26, -26, -7, -26, -20, -26, -26, -26,
113
+ -26, -17, 44, -18, -26, -26, -26, -10, -11, -25,
114
+ -19, -20, -23, -21 ]
115
+
116
+ racc_goto_table = [
117
+ 24, 7, 26, 5, 22, 34, nil, nil, nil, 25,
118
+ nil, nil, nil, nil, nil, 24, nil, nil, 24, nil,
119
+ 43, nil, nil, nil, 41, nil, nil, nil, 36, 24 ]
120
+
121
+ racc_goto_check = [
122
+ 3, 2, 10, 1, 2, 11, nil, nil, nil, 2,
123
+ nil, nil, nil, nil, nil, 3, nil, nil, 3, nil,
124
+ 11, nil, nil, nil, 10, nil, nil, nil, 2, 3 ]
125
+
126
+ racc_goto_pointer = [
127
+ nil, 3, 1, -7, nil, nil, nil, nil, nil, nil,
128
+ -11, -21, nil ]
129
+
130
+ racc_goto_default = [
131
+ nil, nil, nil, 10, 12, 15, 16, 18, 19, 21,
132
+ nil, nil, 35 ]
133
+
134
+ racc_token_table = {
135
+ false => 0,
136
+ Object.new => 1,
137
+ :BOLDSTART => 2,
138
+ :BOLDEND => 3,
139
+ :ITALICSTART => 4,
140
+ :ITALICEND => 5,
141
+ :LINKSTART => 6,
142
+ :LINKEND => 7,
143
+ :INTLINKSTART => 8,
144
+ :INTLINKEND => 9,
145
+ :SECTION => 10,
146
+ :TEXT => 11,
147
+ :PRE => 12,
148
+ :HLINE => 13,
149
+ :SIGNATURE_NAME => 14,
150
+ :SIGNATURE_DATE => 15,
151
+ :SIGNATURE_FULL => 16,
152
+ :UL_START => 17,
153
+ :UL_END => 18,
154
+ :LI_START => 19,
155
+ :LI_END => 20,
156
+ :OL_START => 21,
157
+ :OL_END => 22 }
158
+
159
+ racc_use_result_var = true
160
+
161
+ racc_nt_base = 23
162
+
163
+ Racc_arg = [
164
+ racc_action_table,
165
+ racc_action_check,
166
+ racc_action_default,
167
+ racc_action_pointer,
168
+ racc_goto_table,
169
+ racc_goto_check,
170
+ racc_goto_default,
171
+ racc_goto_pointer,
172
+ racc_nt_base,
173
+ racc_reduce_table,
174
+ racc_token_table,
175
+ racc_shift_n,
176
+ racc_reduce_n,
177
+ racc_use_result_var ]
178
+
179
+ Racc_token_to_s_table = [
180
+ '$end',
181
+ 'error',
182
+ 'BOLDSTART',
183
+ 'BOLDEND',
184
+ 'ITALICSTART',
185
+ 'ITALICEND',
186
+ 'LINKSTART',
187
+ 'LINKEND',
188
+ 'INTLINKSTART',
189
+ 'INTLINKEND',
190
+ 'SECTION',
191
+ 'TEXT',
192
+ 'PRE',
193
+ 'HLINE',
194
+ 'SIGNATURE_NAME',
195
+ 'SIGNATURE_DATE',
196
+ 'SIGNATURE_FULL',
197
+ 'UL_START',
198
+ 'UL_END',
199
+ 'LI_START',
200
+ 'LI_END',
201
+ 'OL_START',
202
+ 'OL_END',
203
+ '$start',
204
+ 'wiki',
205
+ 'repeated_contents',
206
+ 'contents',
207
+ 'text',
208
+ 'bulleted_list',
209
+ 'preformatted',
210
+ 'section',
211
+ 'element',
212
+ 'formatted_element',
213
+ 'list_item',
214
+ 'list_contents',
215
+ '@1']
216
+
217
+ Racc_debug_parser = false
218
+
219
+ ##### racc system variables end #####
220
+
221
+ # reduce 0 omitted
222
+
223
+ module_eval <<'.,.,', 'mediawikiparser.y', 25
224
+ def _reduce_1( val, _values, result )
225
+ @nodes.push WikiAST.new
226
+ #@nodes.last.children.insert(0, val[0])
227
+ #puts val[0]
228
+ @nodes.last.children += val[0]
229
+ result
230
+ end
231
+ .,.,
232
+
233
+ module_eval <<'.,.,', 'mediawikiparser.y', 32
234
+ def _reduce_2( val, _values, result )
235
+ result = val[0]
236
+ result
237
+ end
238
+ .,.,
239
+
240
+ module_eval <<'.,.,', 'mediawikiparser.y', 36
241
+ def _reduce_3( val, _values, result )
242
+ result = val[0]
243
+ result
244
+ end
245
+ .,.,
246
+
247
+ module_eval <<'.,.,', 'mediawikiparser.y', 42
248
+ def _reduce_4( val, _values, result )
249
+ p = PreformattedAST.new
250
+ p.contents = val[0]
251
+ result = p
252
+ result
253
+ end
254
+ .,.,
255
+
256
+ module_eval <<'.,.,', 'mediawikiparser.y', 49
257
+ def _reduce_5( val, _values, result )
258
+ s = SectionAST.new
259
+ s.contents = val[0][0]
260
+ s.level = val[0][1]
261
+ result = s
262
+ result
263
+ end
264
+ .,.,
265
+
266
+ module_eval <<'.,.,', 'mediawikiparser.y', 56
267
+ def _reduce_6( val, _values, result )
268
+ result = []
269
+ result << val[0]
270
+ result
271
+ end
272
+ .,.,
273
+
274
+ module_eval <<'.,.,', 'mediawikiparser.y', 62
275
+ def _reduce_7( val, _values, result )
276
+ result = []
277
+ result += val[0]
278
+ result << val[1]
279
+ result
280
+ end
281
+ .,.,
282
+
283
+ module_eval <<'.,.,', 'mediawikiparser.y', 71
284
+ def _reduce_8( val, _values, result )
285
+ p = TextAST.new
286
+ p.formatting = val[0][0]
287
+ p.contents = val[0][1]
288
+ result = p
289
+ result
290
+ end
291
+ .,.,
292
+
293
+ module_eval <<'.,.,', 'mediawikiparser.y', 75
294
+ def _reduce_9( val, _values, result )
295
+ result = val[0]
296
+ result
297
+ end
298
+ .,.,
299
+
300
+ module_eval <<'.,.,', 'mediawikiparser.y', 78
301
+ def _reduce_10( val, _values, result )
302
+ return [:Link, val[1]]
303
+ result
304
+ end
305
+ .,.,
306
+
307
+ module_eval <<'.,.,', 'mediawikiparser.y', 80
308
+ def _reduce_11( val, _values, result )
309
+ return [:InternalLink, val[1]]
310
+ result
311
+ end
312
+ .,.,
313
+
314
+ module_eval <<'.,.,', 'mediawikiparser.y', 82
315
+ def _reduce_12( val, _values, result )
316
+ return [:None, val[0]]
317
+ result
318
+ end
319
+ .,.,
320
+
321
+ module_eval <<'.,.,', 'mediawikiparser.y', 84
322
+ def _reduce_13( val, _values, result )
323
+ return [:HLine, val[0]]
324
+ result
325
+ end
326
+ .,.,
327
+
328
+ module_eval <<'.,.,', 'mediawikiparser.y', 86
329
+ def _reduce_14( val, _values, result )
330
+ return [:SignatureDate, val[0]]
331
+ result
332
+ end
333
+ .,.,
334
+
335
+ module_eval <<'.,.,', 'mediawikiparser.y', 88
336
+ def _reduce_15( val, _values, result )
337
+ return [:SignatureName, val[0]]
338
+ result
339
+ end
340
+ .,.,
341
+
342
+ module_eval <<'.,.,', 'mediawikiparser.y', 90
343
+ def _reduce_16( val, _values, result )
344
+ return [:SignatureFull, val[0]]
345
+ result
346
+ end
347
+ .,.,
348
+
349
+ module_eval <<'.,.,', 'mediawikiparser.y', 100
350
+ def _reduce_17( val, _values, result )
351
+ p = FormattedAST.new
352
+ p.formatting = :Bold
353
+ p.children += val[1]
354
+ result = p
355
+ result
356
+ end
357
+ .,.,
358
+
359
+ module_eval <<'.,.,', 'mediawikiparser.y', 107
360
+ def _reduce_18( val, _values, result )
361
+ p = FormattedAST.new
362
+ p.formatting = :Italic
363
+ p.children += val[1]
364
+ result = p
365
+ result
366
+ end
367
+ .,.,
368
+
369
+ module_eval <<'.,.,', 'mediawikiparser.y', 117
370
+ def _reduce_19( val, _values, result )
371
+ list = ListAST.new
372
+ list.type = :Bulleted
373
+ list.children << val[1]
374
+ list.children += val[2]
375
+ result = list
376
+ result
377
+ end
378
+ .,.,
379
+
380
+ module_eval <<'.,.,', 'mediawikiparser.y', 120
381
+ def _reduce_20( val, _values, result )
382
+ result = []
383
+ result
384
+ end
385
+ .,.,
386
+
387
+ module_eval <<'.,.,', 'mediawikiparser.y', 126
388
+ def _reduce_21( val, _values, result )
389
+ result << val[1]
390
+ result += val[2]
391
+ result
392
+ end
393
+ .,.,
394
+
395
+ module_eval <<'.,.,', 'mediawikiparser.y', 127
396
+ def _reduce_22( val, _values, result )
397
+ result = []
398
+ result
399
+ end
400
+ .,.,
401
+
402
+ module_eval <<'.,.,', 'mediawikiparser.y', 136
403
+ def _reduce_23( val, _values, result )
404
+ li = ListItemAST.new
405
+ li.children += val[1]
406
+ result = li
407
+ result
408
+ end
409
+ .,.,
410
+
411
+ module_eval <<'.,.,', 'mediawikiparser.y', 139
412
+ def _reduce_24( val, _values, result )
413
+ result = val[0]
414
+ result
415
+ end
416
+ .,.,
417
+
418
+ module_eval <<'.,.,', 'mediawikiparser.y', 143
419
+ def _reduce_25( val, _values, result )
420
+ result = [val[1], val[0].length]
421
+ result
422
+ end
423
+ .,.,
424
+
425
+ def _reduce_none( val, _values, result )
426
+ result
427
+ end
428
+
429
+ end # class MediaWikiParser
@@ -0,0 +1,172 @@
1
+ #The parser for the MediaWiki language.
2
+ #
3
+ #Usage together with a lexer:
4
+ # inputFile = File.new("data/input1", "r")
5
+ # input = inputFile.read
6
+ # parser = MediaWikiParser.new
7
+ # parser.lexer = MediaWikiLexer.new
8
+ # parser.parse(input)
9
+ class MediaWikiParser
10
+
11
+ token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
12
+ INTLINKSTART INTLINKEND SECTION TEXT PRE
13
+ HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
14
+ UL_START UL_END LI_START LI_END OL_START OL_END
15
+
16
+ rule
17
+
18
+ wiki:
19
+ repeated_contents
20
+ {
21
+ @nodes.push WikiAST.new
22
+ #@nodes.last.children.insert(0, val[0])
23
+ #puts val[0]
24
+ @nodes.last.children += val[0]
25
+ }
26
+ ;
27
+
28
+ contents:
29
+ text
30
+ {
31
+ result = val[0]
32
+ }
33
+ | bulleted_list
34
+ {
35
+ result = val[0]
36
+ }
37
+ | preformatted
38
+ {
39
+ p = PreformattedAST.new
40
+ p.contents = val[0]
41
+ result = p
42
+ }
43
+ | section
44
+ {
45
+ s = SectionAST.new
46
+ s.contents = val[0][0]
47
+ s.level = val[0][1]
48
+ result = s
49
+ }
50
+ ;
51
+
52
+ repeated_contents: contents
53
+ {
54
+ result = []
55
+ result << val[0]
56
+ }
57
+ | repeated_contents contents
58
+ {
59
+ result = []
60
+ result += val[0]
61
+ result << val[1]
62
+ }
63
+ ;
64
+
65
+ text: element
66
+ {
67
+ p = TextAST.new
68
+ p.formatting = val[0][0]
69
+ p.contents = val[0][1]
70
+ result = p
71
+ }
72
+ | formatted_element
73
+ {
74
+ result = val[0]
75
+ }
76
+ ;
77
+
78
+ element: LINKSTART TEXT LINKEND
79
+ { return [:Link, val[1]] }
80
+ | INTLINKSTART TEXT INTLINKEND
81
+ { return [:InternalLink, val[1]] }
82
+ | TEXT
83
+ { return [:None, val[0]] }
84
+ | HLINE
85
+ { return [:HLine, val[0]] }
86
+ | SIGNATURE_DATE
87
+ { return [:SignatureDate, val[0]] }
88
+ | SIGNATURE_NAME
89
+ { return [:SignatureName, val[0]] }
90
+ | SIGNATURE_FULL
91
+ { return [:SignatureFull, val[0]] }
92
+ ;
93
+
94
+ formatted_element: BOLDSTART repeated_contents BOLDEND
95
+ {
96
+ p = FormattedAST.new
97
+ p.formatting = :Bold
98
+ p.children += val[1]
99
+ result = p
100
+ }
101
+ | ITALICSTART repeated_contents ITALICEND
102
+ {
103
+ p = FormattedAST.new
104
+ p.formatting = :Italic
105
+ p.children += val[1]
106
+ result = p
107
+ }
108
+ ;
109
+
110
+ bulleted_list: UL_START list_item list_contents UL_END
111
+ {
112
+ list = ListAST.new
113
+ list.type = :Bulleted
114
+ list.children << val[1]
115
+ list.children += val[2]
116
+ result = list
117
+ }
118
+ ;
119
+
120
+ list_contents:
121
+ { result = [] }
122
+ list_item list_contents
123
+ {
124
+ result << val[1]
125
+ result += val[2]
126
+ }
127
+ |
128
+ { result = [] }
129
+ ;
130
+
131
+ list_item: LI_START repeated_contents LI_END
132
+ {
133
+ li = ListItemAST.new
134
+ li.children += val[1]
135
+ result = li
136
+ }
137
+ ;
138
+
139
+ preformatted: PRE
140
+ { result = val[0] }
141
+ ;
142
+
143
+ section: SECTION TEXT SECTION
144
+ { result = [val[1], val[0].length] }
145
+ ;
146
+
147
+ end
148
+
149
+ ---- header ----
150
+ require 'mediacloth/mediawikiast'
151
+
152
+ ---- inner ----
153
+
154
+ attr_accessor :lexer
155
+
156
+ def initialize
157
+ @nodes = []
158
+ super
159
+ end
160
+
161
+ #Tokenizes input string and parses it.
162
+ def parse(input)
163
+ @yydebug=true
164
+ lexer.tokenize(input)
165
+ do_parse
166
+ return @nodes.last
167
+ end
168
+
169
+ #Asks the lexer to return the next token.
170
+ def next_token
171
+ return @lexer.lex
172
+ end