mediacloth 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/README +37 -0
  2. data/lib/mediacloth/mediawikiast.rb +50 -0
  3. data/lib/mediacloth/mediawikihtmlgenerator.rb +105 -0
  4. data/lib/mediacloth/mediawikihtmlgenerator.rb~ +105 -0
  5. data/lib/mediacloth/mediawikilexer.rb +407 -0
  6. data/lib/mediacloth/mediawikiparams.rb +33 -0
  7. data/lib/mediacloth/mediawikiparser.rb +429 -0
  8. data/lib/mediacloth/mediawikiparser.y +172 -0
  9. data/lib/mediacloth/mediawikiparser.y~ +172 -0
  10. data/lib/mediacloth/mediawikiwalker.rb +62 -0
  11. data/lib/mediacloth/mediawikiwalker.rb~ +62 -0
  12. data/lib/mediacloth.rb +23 -0
  13. data/lib/mediacloth.rb~ +23 -0
  14. data/test/data/html1 +21 -0
  15. data/test/data/html2 +2 -0
  16. data/test/data/html3 +1 -0
  17. data/test/data/html4 +1 -0
  18. data/test/data/html6 +8 -0
  19. data/test/data/html7 +1 -0
  20. data/test/data/input1 +29 -0
  21. data/test/data/input2 +2 -0
  22. data/test/data/input3 +2 -0
  23. data/test/data/input4 +1 -0
  24. data/test/data/input5 +12 -0
  25. data/test/data/input6 +8 -0
  26. data/test/data/input7 +2 -0
  27. data/test/data/lex1 +23 -0
  28. data/test/data/lex2 +2 -0
  29. data/test/data/lex3 +1 -0
  30. data/test/data/lex4 +1 -0
  31. data/test/data/lex5 +12 -0
  32. data/test/data/lex6 +8 -0
  33. data/test/data/lex7 +2 -0
  34. data/test/data/result1 +48 -0
  35. data/test/dataproducers/html.rb +23 -0
  36. data/test/dataproducers/html.rb~ +23 -0
  37. data/test/dataproducers/lex.rb +15 -0
  38. data/test/debugwalker.rb +63 -0
  39. data/test/debugwalker.rb~ +63 -0
  40. data/test/htmlgenerator.rb +25 -0
  41. data/test/htmlgenerator.rb~ +25 -0
  42. data/test/lexer.rb +57 -0
  43. data/test/lexer.rb~ +57 -0
  44. data/test/parser.rb +23 -0
  45. data/test/parser.rb~ +23 -0
  46. data/test/testhelper.rb +27 -0
  47. data/test/testhelper.rb~ +28 -0
  48. metadata +97 -0
@@ -0,0 +1,172 @@
1
+ #The parser for the MediaWiki language.
2
+ #
3
+ #Usage together with a lexer:
4
+ # inputFile = File.new("data/input1", "r")
5
+ # input = inputFile.read
6
+ # parser = MediaWikiParser.new
7
+ # parser.lexer = MediaWikiLexer.new
8
+ # parser.parse(input)
9
+ class MediaWikiParser
10
+
11
+ token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
12
+ INTLINKSTART INTLINKEND SECTION TEXT PRE
13
+ HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
14
+ UL_START UL_END LI_START LI_END OL_START OL_END
15
+
16
+ rule
17
+
18
+ wiki:
19
+ repeated_contents
20
+ {
21
+ @nodes.push WikiAST.new
22
+ #@nodes.last.children.insert(0, val[0])
23
+ #puts val[0]
24
+ @nodes.last.children += val[0]
25
+ }
26
+ ;
27
+
28
+ contents:
29
+ text
30
+ {
31
+ result = val[0]
32
+ }
33
+ | bulleted_list
34
+ {
35
+ result = val[0]
36
+ }
37
+ | preformatted
38
+ {
39
+ p = PreformattedAST.new
40
+ p.contents = val[0]
41
+ result = p
42
+ }
43
+ | section
44
+ {
45
+ s = SectionAST.new
46
+ s.contents = val[0][0]
47
+ s.level = val[0][1]
48
+ result = s
49
+ }
50
+ ;
51
+
52
+ repeated_contents: contents
53
+ {
54
+ result = []
55
+ result << val[0]
56
+ }
57
+ | repeated_contents contents
58
+ {
59
+ result = []
60
+ result += val[0]
61
+ result << val[1]
62
+ }
63
+ ;
64
+
65
+ text: element
66
+ {
67
+ p = TextAST.new
68
+ p.formatting = val[0][0]
69
+ p.contents = val[0][1]
70
+ result = p
71
+ }
72
+ | formatted_element
73
+ {
74
+ result = val[0]
75
+ }
76
+ ;
77
+
78
+ element: LINKSTART TEXT LINKEND
79
+ { return [:Link, val[1]] }
80
+ | INTLINKSTART TEXT INTLINKEND
81
+ { return [:InternalLink, val[1]] }
82
+ | TEXT
83
+ { return [:None, val[0]] }
84
+ | HLINE
85
+ { return [:HLine, val[0]] }
86
+ | SIGNATURE_DATE
87
+ { return [:SignatureDate, val[0]] }
88
+ | SIGNATURE_NAME
89
+ { return [:SignatureName, val[0]] }
90
+ | SIGNATURE_FULL
91
+ { return [:SignatureFull, val[0]] }
92
+ ;
93
+
94
+ formatted_element: BOLDSTART repeated_contents BOLDEND
95
+ {
96
+ p = FormattedAST.new
97
+ p.formatting = :Bold
98
+ p.children += val[1]
99
+ result = p
100
+ }
101
+ | ITALICSTART repeated_contents ITALICEND
102
+ {
103
+ p = FormattedAST.new
104
+ p.formatting = :Italic
105
+ p.children += val[1]
106
+ result = p
107
+ }
108
+ ;
109
+
110
+ bulleted_list: UL_START list_item list_contents UL_END
111
+ {
112
+ list = ListAST.new
113
+ list.type = :Bulleted
114
+ list.children << val[1]
115
+ list.children += val[2]
116
+ result = list
117
+ }
118
+ ;
119
+
120
+ list_contents:
121
+ { result = [] }
122
+ list_item list_contents
123
+ {
124
+ result << val[1]
125
+ result += val[2]
126
+ }
127
+ |
128
+ { result = [] }
129
+ ;
130
+
131
+ list_item: LI_START repeated_contents LI_END
132
+ {
133
+ li = ListItemAST.new
134
+ li.children += val[1]
135
+ result = li
136
+ }
137
+ ;
138
+
139
+ preformatted: PRE
140
+ { result = val[0] }
141
+ ;
142
+
143
+ section: SECTION TEXT SECTION
144
+ { result = [val[1], val[0].length] }
145
+ ;
146
+
147
+ end
148
+
149
+ ---- header ----
150
+ require 'mediawikiast'
151
+
152
+ ---- inner ----
153
+
154
+ attr_accessor :lexer
155
+
156
+ def initialize
157
+ @nodes = []
158
+ super
159
+ end
160
+
161
+ #Tokenizes input string and parses it.
162
+ def parse(input)
163
+ @yydebug=true
164
+ lexer.tokenize(input)
165
+ do_parse
166
+ return @nodes.last
167
+ end
168
+
169
+ #Asks the lexer to return the next token.
170
+ def next_token
171
+ return @lexer.lex
172
+ end
@@ -0,0 +1,62 @@
1
+ require 'mediacloth/mediawikiast'
2
+
3
+ #Default walker to traverse the parse tree.
4
+ #
5
+ #The walker traverses the entire parse tree and does nothing.
6
+ #To implement some functionality during this process, reimplement
7
+ #<i>parse...</i> methods and don't forget to call super() to not
8
+ #break the walk.
9
+ #
10
+ #Current implementations: MediaWikiHTMLGenerator, DebugWalker
11
+ class MediaWikiWalker
12
+
13
+ #Walks through the AST
14
+ def parse(ast)
15
+ parse_wiki_ast(ast)
16
+ end
17
+
18
+ protected
19
+
20
+ #===== reimplement these methods and don't forget to call super() ====#
21
+
22
+ #Reimplement this
23
+ def parse_wiki_ast(ast)
24
+ ast.children.each do |c|
25
+ parse_formatted(c) if c.class == FormattedAST
26
+ parse_text(c) if c.class == TextAST
27
+ parse_list(c) if c.class == ListAST
28
+ parse_preformatted(c) if c.class == PreformattedAST
29
+ parse_section(c) if c.class == SectionAST
30
+ end
31
+ end
32
+
33
+ #Reimplement this
34
+ def parse_formatted(ast)
35
+ parse_wiki_ast(ast)
36
+ end
37
+
38
+ #Reimplement this
39
+ def parse_text(ast)
40
+ end
41
+
42
+ #Reimplement this
43
+ def parse_list(ast)
44
+ ast.children.each do |c|
45
+ parse_list_item(c) if c.class == ListItemAST
46
+ end
47
+ end
48
+
49
+ #Reimplement this
50
+ def parse_list_item(ast)
51
+ parse_wiki_ast(ast)
52
+ end
53
+
54
+ #Reimplement this
55
+ def parse_preformatted(ast)
56
+ end
57
+
58
+ #Reimplement this
59
+ def parse_section(ast)
60
+ end
61
+
62
+ end
@@ -0,0 +1,62 @@
1
+ require 'mediawikiast'
2
+
3
+ #Default walker to traverse the parse tree.
4
+ #
5
+ #The walker traverses the entire parse tree and does nothing.
6
+ #To implement some functionality during this process, reimplement
7
+ #<i>parse...</i> methods and don't forget to call super() to not
8
+ #break the walk.
9
+ #
10
+ #Current implementations: MediaWikiHTMLGenerator, DebugWalker
11
+ class MediaWikiWalker
12
+
13
+ #Walks through the AST
14
+ def parse(ast)
15
+ parse_wiki_ast(ast)
16
+ end
17
+
18
+ protected
19
+
20
+ #===== reimplement these methods and don't forget to call super() ====#
21
+
22
+ #Reimplement this
23
+ def parse_wiki_ast(ast)
24
+ ast.children.each do |c|
25
+ parse_formatted(c) if c.class == FormattedAST
26
+ parse_text(c) if c.class == TextAST
27
+ parse_list(c) if c.class == ListAST
28
+ parse_preformatted(c) if c.class == PreformattedAST
29
+ parse_section(c) if c.class == SectionAST
30
+ end
31
+ end
32
+
33
+ #Reimplement this
34
+ def parse_formatted(ast)
35
+ parse_wiki_ast(ast)
36
+ end
37
+
38
+ #Reimplement this
39
+ def parse_text(ast)
40
+ end
41
+
42
+ #Reimplement this
43
+ def parse_list(ast)
44
+ ast.children.each do |c|
45
+ parse_list_item(c) if c.class == ListItemAST
46
+ end
47
+ end
48
+
49
+ #Reimplement this
50
+ def parse_list_item(ast)
51
+ parse_wiki_ast(ast)
52
+ end
53
+
54
+ #Reimplement this
55
+ def parse_preformatted(ast)
56
+ end
57
+
58
+ #Reimplement this
59
+ def parse_section(ast)
60
+ end
61
+
62
+ end
data/lib/mediacloth.rb ADDED
@@ -0,0 +1,23 @@
1
+ require 'mediacloth/mediawikilexer'
2
+ require 'mediacloth/mediawikiparser'
3
+ require 'mediacloth/mediawikiast'
4
+ require 'mediacloth/mediawikiparams'
5
+ require 'mediacloth/mediawikiwalker'
6
+ require 'mediacloth/mediawikihtmlgenerator'
7
+
8
+ #Helper module to facilitate MediaCloth usage.
9
+ module MediaCloth
10
+
11
+ #Parses wiki formatted +input+ and generates its html representation.
12
+ def wiki_to_html(input)
13
+ parser = MediaWikiParser.new
14
+ parser.lexer = MediaWikiLexer.new
15
+ ast = parser.parse(input)
16
+ walker = MediaWikiHTMLGenerator.new
17
+ walker.parse(ast)
18
+ walker.html
19
+ end
20
+
21
+ module_function :wiki_to_html
22
+
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'mediacloth/mediawikilexer'
2
+ require 'mediacloth/mediawikiparser'
3
+ require 'mediacloth/mediawikiast'
4
+ require 'mediacloth/mediawikiparams'
5
+ require 'mediacloth/mediawikiwalker'
6
+ require 'mediacloth/mediawikihtmlgenerator'
7
+
8
+ #Helper module to facilitate MediaCloth usage.
9
+ module MediaCloth
10
+
11
+
12
+ def wiki_to_html(input)
13
+ parser = MediaWikiParser.new
14
+ parser.lexer = MediaWikiLexer.new
15
+ ast = parser.parse(input)
16
+ walker = MediaWikiHTMLGenerator.new
17
+ walker.parse(ast)
18
+ walker.html
19
+ end
20
+
21
+ module_function :wiki_to_html
22
+
23
+ end
data/test/data/html1 ADDED
@@ -0,0 +1,21 @@
1
+ This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
2
+ One paragraph can be written in several lines.
3
+
4
+ Another paragraph starts after a blank line.
5
+
6
+
7
+ Another one.
8
+
9
+ This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.
10
+
11
+ We can have headlines:
12
+
13
+
14
+ <h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7>
15
+
16
+ <hr></hr>
17
+ This is a text after the line.
18
+
19
+ <ul><li>foo
20
+ </li><li>foo2
21
+ </li></ul>
data/test/data/html2 ADDED
@@ -0,0 +1,2 @@
1
+ <hr></hr>
2
+ This is a text
data/test/data/html3 ADDED
@@ -0,0 +1 @@
1
+ --Sat Jan 01 01:01:01 EET 2000CreatorCreator Sat Jan 01 01:01:01 EET 2000
data/test/data/html4 ADDED
@@ -0,0 +1 @@
1
+ <b><i>Foo</i></b>
data/test/data/html6 ADDED
@@ -0,0 +1,8 @@
1
+ This is some text with <b>bold</b> and <i>italic</i> formating.
2
+ The list is also here:
3
+ <ul><li>List Item 1
4
+ <ul><li>Sub list item 1
5
+ </li><li>Sub list item 2
6
+ </li></ul></li><li>List Item 2
7
+ </li></ul>The end
8
+ <hr></hr>
data/test/data/html7 ADDED
@@ -0,0 +1 @@
1
+ <a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
data/test/data/input1 ADDED
@@ -0,0 +1,29 @@
1
+ This is a simple text with '''Bold text''' and ''Italic text'' inside.
2
+ One paragraph can be written in several lines.
3
+
4
+ Another paragraph starts after a blank line.
5
+
6
+
7
+ Another one.
8
+
9
+ This is text with [[Internal Link]] and [http://www.example.com external link].
10
+
11
+ We can have headlines:
12
+
13
+
14
+ = Headline1 =
15
+ == Headline2 ==
16
+ === Headline3 ===
17
+ ==== Headline4 ====
18
+ ===== Headline5 =====
19
+ ====== Headline6 ======
20
+ ======= Headline7 =======
21
+
22
+ This is a preformatted ''' ''' << '' '' text
23
+ yes
24
+
25
+ ----
26
+ This is a text after the line.
27
+
28
+ *foo
29
+ * foo2
data/test/data/input2 ADDED
@@ -0,0 +1,2 @@
1
+ ----
2
+ This is a text
data/test/data/input3 ADDED
@@ -0,0 +1,2 @@
1
+ --~~~~~~~~
2
+ ~~~~
data/test/data/input4 ADDED
@@ -0,0 +1 @@
1
+ '''''Foo'''''
data/test/data/input5 ADDED
@@ -0,0 +1,12 @@
1
+ *A
2
+ **A
3
+ ***a
4
+ ***b
5
+ ****a
6
+ *B
7
+ **b
8
+
9
+ #a
10
+ ##a
11
+ ###a
12
+ ####a
data/test/data/input6 ADDED
@@ -0,0 +1,8 @@
1
+ This is some text with '''bold''' and ''italic'' formating.
2
+ The list is also here:
3
+ * List Item 1
4
+ ** Sub list item 1
5
+ ** Sub list item 2
6
+ * List Item 2
7
+ The end
8
+ ----
data/test/data/input7 ADDED
@@ -0,0 +1,2 @@
1
+ [http://www.example.com external link]
2
+ http://www.example.com [foo]
data/test/data/lex1 ADDED
@@ -0,0 +1,23 @@
1
+ TEXTThis is a simple text with BOLDSTART'''TEXTBold textBOLDEND'''TEXT and ITALICSTART''TEXTItalic textITALICEND''TEXT inside.
2
+ One paragraph can be written in several lines.
3
+
4
+ Another paragraph starts after a blank line.
5
+
6
+
7
+ Another one.
8
+
9
+ This is text with INTLINKSTART[[TEXTInternal LinkINTLINKEND]]TEXT and LINKSTART[TEXThttp://www.example.com external linkLINKEND]TEXT.
10
+
11
+ We can have headlines:
12
+
13
+
14
+ SECTION=TEXT Headline1 SECTION=SECTION==TEXT Headline2 SECTION==SECTION===TEXT Headline3 SECTION===SECTION====TEXT Headline4 SECTION====SECTION=====TEXT Headline5 SECTION=====SECTION======TEXT Headline6 SECTION======SECTION=======TEXT Headline7 SECTION=======TEXT
15
+
16
+ PREThis is a preformatted ''' ''' << '' '' text
17
+ PREyes
18
+ HLINE----TEXT
19
+ This is a text after the line.
20
+
21
+ UL_STARTLI_STARTTEXTfoo
22
+ LI_ENDLI_STARTTEXTfoo2
23
+ LI_ENDUL_ENDfalsefalse
data/test/data/lex2 ADDED
@@ -0,0 +1,2 @@
1
+ HLINE----TEXT
2
+ This is a textfalsefalse
data/test/data/lex3 ADDED
@@ -0,0 +1 @@
1
+ TEXT--SIGNATURE_DATE~~~~~SIGNATURE_NAME~~~SIGNATURE_FULL~~~~falsefalse
data/test/data/lex4 ADDED
@@ -0,0 +1 @@
1
+ BOLDSTART'''ITALICSTART''TEXTFooITALICEND''BOLDEND'''falsefalse
data/test/data/lex5 ADDED
@@ -0,0 +1,12 @@
1
+ UL_STARTLI_STARTTEXTA
2
+ UL_STARTLI_STARTTEXTA
3
+ UL_STARTLI_STARTTEXTa
4
+ LI_ENDLI_STARTTEXTb
5
+ UL_STARTLI_STARTTEXTa
6
+ LI_ENDUL_ENDLI_ENDUL_ENDLI_ENDUL_ENDLI_ENDLI_STARTTEXTB
7
+ UL_STARTLI_STARTTEXTb
8
+ LI_ENDUL_ENDLI_ENDUL_ENDOL_STARTLI_STARTTEXTa
9
+ OL_STARTLI_STARTTEXTa
10
+ OL_STARTLI_STARTTEXTa
11
+ OL_STARTLI_STARTTEXTa
12
+ LI_ENDOL_ENDLI_ENDOL_ENDLI_ENDOL_ENDLI_ENDOL_ENDfalsefalse
data/test/data/lex6 ADDED
@@ -0,0 +1,8 @@
1
+ TEXTThis is some text with BOLDSTART'''TEXTboldBOLDEND'''TEXT and ITALICSTART''TEXTitalicITALICEND''TEXT formating.
2
+ The list is also here:
3
+ UL_STARTLI_STARTTEXTList Item 1
4
+ UL_STARTLI_STARTTEXTSub list item 1
5
+ LI_ENDLI_STARTTEXTSub list item 2
6
+ LI_ENDUL_ENDLI_ENDLI_STARTTEXTList Item 2
7
+ LI_ENDUL_ENDTEXTThe end
8
+ HLINE----falsefalse
data/test/data/lex7 ADDED
@@ -0,0 +1,2 @@
1
+ LINKSTART[TEXThttp://www.example.com external linkLINKEND]LINKSTARTTEXThttp://www.example.comLINKEND]TEXT [foo]
2
+ falsefalse
data/test/data/result1 ADDED
@@ -0,0 +1,48 @@
1
+ <p>This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
2
+ One paragraph can be written in several lines.
3
+
4
+ </p><p>Another paragraph starts after a blank line.
5
+ </p><p><br>
6
+ Another one.
7
+ </p><p>This is text with <a href="/wiki/index.php?title=Internal_Link&amp;action=edit" class="new" title="Internal Link">Internal Link</a> and <a href="http://www.example.com" class="external text" title="http://www.example.com" rel="nofollow">external link</a>.
8
+ </p><p>We can have headlines:
9
+ </p><p><br>
10
+ </p>
11
+ <table id="toc" class="toc" summary="Contents"><tbody><tr><td><div id="toctitle"><h2>Contents</h2> <span class="toctoggle">[<a href="javascript:toggleToc()" class="internal" id="togglelink">hide</a>]</span></div>
12
+
13
+ <ul style="display: block;">
14
+ <li class="toclevel-1"><a href="#Headline1"><span class="tocnumber">1</span> <span class="toctext">Headline1</span></a>
15
+ <ul>
16
+ <li class="toclevel-2"><a href="#Headline2"><span class="tocnumber">1.1</span> <span class="toctext">Headline2</span></a>
17
+ <ul>
18
+ <li class="toclevel-3"><a href="#Headline3"><span class="tocnumber">1.1.1</span> <span class="toctext">Headline3</span></a>
19
+ <ul>
20
+ <li class="toclevel-4"><a href="#Headline4"><span class="tocnumber">1.1.1.1</span> <span class="toctext">Headline4</span></a></li>
21
+
22
+ </ul>
23
+ </li>
24
+ </ul>
25
+ </li>
26
+ </ul>
27
+ </li>
28
+ </ul>
29
+ </td></tr></tbody></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
30
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=1" title="Edit section: Headline1">edit</a>]</div><a name="Headline1"></a><h1> Headline1 </h1>
31
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=2" title="Edit section: Headline2">edit</a>]</div><a name="Headline2"></a><h2> Headline2 </h2>
32
+
33
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=3" title="Edit section: Headline3">edit</a>]</div><a name="Headline3"></a><h3> Headline3 </h3>
34
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=4" title="Edit section: Headline4">edit</a>]</div><a name="Headline4"></a><h4> Headline4 </h4>
35
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=5" title="Edit section: Headline5">edit</a>]</div><a name="Headline5"></a><h5> Headline5 </h5>
36
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=6" title="Edit section: Headline6">edit</a>]</div><a name="Headline6"></a><h6> Headline6 </h6>
37
+ <div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&amp;action=edit&amp;section=7" title="Edit section: = Headline7 =">edit</a>]</div><a name=".3D_Headline7_.3D"></a><h6>= Headline7 =</h6>
38
+ <pre>This is a preformatted <b> </b> &lt;&lt; <i> </i> text
39
+ yes
40
+
41
+
42
+ </pre>
43
+ <hr>
44
+ <p>This is a text after the line.
45
+ </p>
46
+ <ul><li>foo
47
+ </li><li> foo2
48
+ </li></ul>
@@ -0,0 +1,23 @@
1
+ require 'mediawikilexer'
2
+ require 'mediawikiparser'
3
+ require 'mediawikiparams'
4
+ require 'mediawikihtmlgenerator'
5
+
6
+ def produce(index)
7
+ file = File.new("../data/html#{index}", "w")
8
+ inputFile = File.new("../data/input#{index}", "r")
9
+ input = inputFile.read
10
+
11
+ parser = MediaWikiParser.new
12
+ parser.lexer = MediaWikiLexer.new
13
+ ast = parser.parse(input)
14
+ MediaWikiParams.instance.time = Time.mktime(2000, 1, 1, 1, 1, 1, 1)
15
+ generator = MediaWikiHTMLGenerator.new
16
+ generator.parse(ast)
17
+
18
+ file.write(generator.html)
19
+ file.close
20
+ end
21
+
22
+ # (3..5).each { |i| produce(i) }
23
+ produce(6)
@@ -0,0 +1,23 @@
1
+ require 'mediawikilexer'
2
+ require 'mediawikiparser'
3
+ require 'mediawikiparams'
4
+ require 'mediawikihtmlgenerator'
5
+
6
+ def produce(index)
7
+ file = File.new("../data/html#{index}", "w")
8
+ inputFile = File.new("../data/input#{index}", "r")
9
+ input = inputFile.read
10
+
11
+ parser = MediaWikiParser.new
12
+ parser.lexer = MediaWikiLexer.new
13
+ ast = parser.parse(input)
14
+ MediaWikiParams.instance.time = Time.mktime(2000, 1, 1, 1, 1, 1, 1)
15
+ generator = MediaWikiHTMLGenerator.new
16
+ generator.parse(ast)
17
+
18
+ file.write(generator.html)
19
+ file.close
20
+ end
21
+
22
+ # (3..5).each { |i| produce(i) }
23
+ produce(1)
@@ -0,0 +1,15 @@
1
+ require 'mediawikilexer'
2
+
3
+ def produce(index)
4
+ file = File.new("../data/lex#{index}", "w")
5
+ inputFile = File.new("../data/input#{index}", "r")
6
+ input = inputFile.read
7
+
8
+ lexer = MediaWikiLexer.new
9
+ tokens = lexer.tokenize(input)
10
+ file.write(tokens.to_s)
11
+ file.close
12
+ end
13
+
14
+ #1..5.each { |i| produce(1) }
15
+ produce(7)