mediacloth 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +37 -0
- data/lib/mediacloth/mediawikiast.rb +50 -0
- data/lib/mediacloth/mediawikihtmlgenerator.rb +105 -0
- data/lib/mediacloth/mediawikihtmlgenerator.rb~ +105 -0
- data/lib/mediacloth/mediawikilexer.rb +407 -0
- data/lib/mediacloth/mediawikiparams.rb +33 -0
- data/lib/mediacloth/mediawikiparser.rb +429 -0
- data/lib/mediacloth/mediawikiparser.y +172 -0
- data/lib/mediacloth/mediawikiparser.y~ +172 -0
- data/lib/mediacloth/mediawikiwalker.rb +62 -0
- data/lib/mediacloth/mediawikiwalker.rb~ +62 -0
- data/lib/mediacloth.rb +23 -0
- data/lib/mediacloth.rb~ +23 -0
- data/test/data/html1 +21 -0
- data/test/data/html2 +2 -0
- data/test/data/html3 +1 -0
- data/test/data/html4 +1 -0
- data/test/data/html6 +8 -0
- data/test/data/html7 +1 -0
- data/test/data/input1 +29 -0
- data/test/data/input2 +2 -0
- data/test/data/input3 +2 -0
- data/test/data/input4 +1 -0
- data/test/data/input5 +12 -0
- data/test/data/input6 +8 -0
- data/test/data/input7 +2 -0
- data/test/data/lex1 +23 -0
- data/test/data/lex2 +2 -0
- data/test/data/lex3 +1 -0
- data/test/data/lex4 +1 -0
- data/test/data/lex5 +12 -0
- data/test/data/lex6 +8 -0
- data/test/data/lex7 +2 -0
- data/test/data/result1 +48 -0
- data/test/dataproducers/html.rb +23 -0
- data/test/dataproducers/html.rb~ +23 -0
- data/test/dataproducers/lex.rb +15 -0
- data/test/debugwalker.rb +63 -0
- data/test/debugwalker.rb~ +63 -0
- data/test/htmlgenerator.rb +25 -0
- data/test/htmlgenerator.rb~ +25 -0
- data/test/lexer.rb +57 -0
- data/test/lexer.rb~ +57 -0
- data/test/parser.rb +23 -0
- data/test/parser.rb~ +23 -0
- data/test/testhelper.rb +27 -0
- data/test/testhelper.rb~ +28 -0
- metadata +97 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
#The parser for the MediaWiki language.
|
2
|
+
#
|
3
|
+
#Usage together with a lexer:
|
4
|
+
# inputFile = File.new("data/input1", "r")
|
5
|
+
# input = inputFile.read
|
6
|
+
# parser = MediaWikiParser.new
|
7
|
+
# parser.lexer = MediaWikiLexer.new
|
8
|
+
# parser.parse(input)
|
9
|
+
class MediaWikiParser
|
10
|
+
|
11
|
+
token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
|
12
|
+
INTLINKSTART INTLINKEND SECTION TEXT PRE
|
13
|
+
HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
|
14
|
+
UL_START UL_END LI_START LI_END OL_START OL_END
|
15
|
+
|
16
|
+
rule
|
17
|
+
|
18
|
+
wiki:
|
19
|
+
repeated_contents
|
20
|
+
{
|
21
|
+
@nodes.push WikiAST.new
|
22
|
+
#@nodes.last.children.insert(0, val[0])
|
23
|
+
#puts val[0]
|
24
|
+
@nodes.last.children += val[0]
|
25
|
+
}
|
26
|
+
;
|
27
|
+
|
28
|
+
contents:
|
29
|
+
text
|
30
|
+
{
|
31
|
+
result = val[0]
|
32
|
+
}
|
33
|
+
| bulleted_list
|
34
|
+
{
|
35
|
+
result = val[0]
|
36
|
+
}
|
37
|
+
| preformatted
|
38
|
+
{
|
39
|
+
p = PreformattedAST.new
|
40
|
+
p.contents = val[0]
|
41
|
+
result = p
|
42
|
+
}
|
43
|
+
| section
|
44
|
+
{
|
45
|
+
s = SectionAST.new
|
46
|
+
s.contents = val[0][0]
|
47
|
+
s.level = val[0][1]
|
48
|
+
result = s
|
49
|
+
}
|
50
|
+
;
|
51
|
+
|
52
|
+
repeated_contents: contents
|
53
|
+
{
|
54
|
+
result = []
|
55
|
+
result << val[0]
|
56
|
+
}
|
57
|
+
| repeated_contents contents
|
58
|
+
{
|
59
|
+
result = []
|
60
|
+
result += val[0]
|
61
|
+
result << val[1]
|
62
|
+
}
|
63
|
+
;
|
64
|
+
|
65
|
+
text: element
|
66
|
+
{
|
67
|
+
p = TextAST.new
|
68
|
+
p.formatting = val[0][0]
|
69
|
+
p.contents = val[0][1]
|
70
|
+
result = p
|
71
|
+
}
|
72
|
+
| formatted_element
|
73
|
+
{
|
74
|
+
result = val[0]
|
75
|
+
}
|
76
|
+
;
|
77
|
+
|
78
|
+
element: LINKSTART TEXT LINKEND
|
79
|
+
{ return [:Link, val[1]] }
|
80
|
+
| INTLINKSTART TEXT INTLINKEND
|
81
|
+
{ return [:InternalLink, val[1]] }
|
82
|
+
| TEXT
|
83
|
+
{ return [:None, val[0]] }
|
84
|
+
| HLINE
|
85
|
+
{ return [:HLine, val[0]] }
|
86
|
+
| SIGNATURE_DATE
|
87
|
+
{ return [:SignatureDate, val[0]] }
|
88
|
+
| SIGNATURE_NAME
|
89
|
+
{ return [:SignatureName, val[0]] }
|
90
|
+
| SIGNATURE_FULL
|
91
|
+
{ return [:SignatureFull, val[0]] }
|
92
|
+
;
|
93
|
+
|
94
|
+
formatted_element: BOLDSTART repeated_contents BOLDEND
|
95
|
+
{
|
96
|
+
p = FormattedAST.new
|
97
|
+
p.formatting = :Bold
|
98
|
+
p.children += val[1]
|
99
|
+
result = p
|
100
|
+
}
|
101
|
+
| ITALICSTART repeated_contents ITALICEND
|
102
|
+
{
|
103
|
+
p = FormattedAST.new
|
104
|
+
p.formatting = :Italic
|
105
|
+
p.children += val[1]
|
106
|
+
result = p
|
107
|
+
}
|
108
|
+
;
|
109
|
+
|
110
|
+
bulleted_list: UL_START list_item list_contents UL_END
|
111
|
+
{
|
112
|
+
list = ListAST.new
|
113
|
+
list.type = :Bulleted
|
114
|
+
list.children << val[1]
|
115
|
+
list.children += val[2]
|
116
|
+
result = list
|
117
|
+
}
|
118
|
+
;
|
119
|
+
|
120
|
+
list_contents:
|
121
|
+
{ result = [] }
|
122
|
+
list_item list_contents
|
123
|
+
{
|
124
|
+
result << val[1]
|
125
|
+
result += val[2]
|
126
|
+
}
|
127
|
+
|
|
128
|
+
{ result = [] }
|
129
|
+
;
|
130
|
+
|
131
|
+
list_item: LI_START repeated_contents LI_END
|
132
|
+
{
|
133
|
+
li = ListItemAST.new
|
134
|
+
li.children += val[1]
|
135
|
+
result = li
|
136
|
+
}
|
137
|
+
;
|
138
|
+
|
139
|
+
preformatted: PRE
|
140
|
+
{ result = val[0] }
|
141
|
+
;
|
142
|
+
|
143
|
+
section: SECTION TEXT SECTION
|
144
|
+
{ result = [val[1], val[0].length] }
|
145
|
+
;
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
---- header ----
|
150
|
+
require 'mediawikiast'
|
151
|
+
|
152
|
+
---- inner ----
|
153
|
+
|
154
|
+
attr_accessor :lexer
|
155
|
+
|
156
|
+
def initialize
|
157
|
+
@nodes = []
|
158
|
+
super
|
159
|
+
end
|
160
|
+
|
161
|
+
#Tokenizes input string and parses it.
|
162
|
+
def parse(input)
|
163
|
+
@yydebug=true
|
164
|
+
lexer.tokenize(input)
|
165
|
+
do_parse
|
166
|
+
return @nodes.last
|
167
|
+
end
|
168
|
+
|
169
|
+
#Asks the lexer to return the next token.
|
170
|
+
def next_token
|
171
|
+
return @lexer.lex
|
172
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'mediacloth/mediawikiast'
|
2
|
+
|
3
|
+
#Default walker to traverse the parse tree.
|
4
|
+
#
|
5
|
+
#The walker traverses the entire parse tree and does nothing.
|
6
|
+
#To implement some functionality during this process, reimplement
|
7
|
+
#<i>parse...</i> methods and don't forget to call super() to not
|
8
|
+
#break the walk.
|
9
|
+
#
|
10
|
+
#Current implementations: MediaWikiHTMLGenerator, DebugWalker
|
11
|
+
class MediaWikiWalker
|
12
|
+
|
13
|
+
#Walks through the AST
|
14
|
+
def parse(ast)
|
15
|
+
parse_wiki_ast(ast)
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
#===== reimplement these methods and don't forget to call super() ====#
|
21
|
+
|
22
|
+
#Reimplement this
|
23
|
+
def parse_wiki_ast(ast)
|
24
|
+
ast.children.each do |c|
|
25
|
+
parse_formatted(c) if c.class == FormattedAST
|
26
|
+
parse_text(c) if c.class == TextAST
|
27
|
+
parse_list(c) if c.class == ListAST
|
28
|
+
parse_preformatted(c) if c.class == PreformattedAST
|
29
|
+
parse_section(c) if c.class == SectionAST
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#Reimplement this
|
34
|
+
def parse_formatted(ast)
|
35
|
+
parse_wiki_ast(ast)
|
36
|
+
end
|
37
|
+
|
38
|
+
#Reimplement this
|
39
|
+
def parse_text(ast)
|
40
|
+
end
|
41
|
+
|
42
|
+
#Reimplement this
|
43
|
+
def parse_list(ast)
|
44
|
+
ast.children.each do |c|
|
45
|
+
parse_list_item(c) if c.class == ListItemAST
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#Reimplement this
|
50
|
+
def parse_list_item(ast)
|
51
|
+
parse_wiki_ast(ast)
|
52
|
+
end
|
53
|
+
|
54
|
+
#Reimplement this
|
55
|
+
def parse_preformatted(ast)
|
56
|
+
end
|
57
|
+
|
58
|
+
#Reimplement this
|
59
|
+
def parse_section(ast)
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'mediawikiast'
|
2
|
+
|
3
|
+
#Default walker to traverse the parse tree.
|
4
|
+
#
|
5
|
+
#The walker traverses the entire parse tree and does nothing.
|
6
|
+
#To implement some functionality during this process, reimplement
|
7
|
+
#<i>parse...</i> methods and don't forget to call super() to not
|
8
|
+
#break the walk.
|
9
|
+
#
|
10
|
+
#Current implementations: MediaWikiHTMLGenerator, DebugWalker
|
11
|
+
class MediaWikiWalker
|
12
|
+
|
13
|
+
#Walks through the AST
|
14
|
+
def parse(ast)
|
15
|
+
parse_wiki_ast(ast)
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
#===== reimplement these methods and don't forget to call super() ====#
|
21
|
+
|
22
|
+
#Reimplement this
|
23
|
+
def parse_wiki_ast(ast)
|
24
|
+
ast.children.each do |c|
|
25
|
+
parse_formatted(c) if c.class == FormattedAST
|
26
|
+
parse_text(c) if c.class == TextAST
|
27
|
+
parse_list(c) if c.class == ListAST
|
28
|
+
parse_preformatted(c) if c.class == PreformattedAST
|
29
|
+
parse_section(c) if c.class == SectionAST
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#Reimplement this
|
34
|
+
def parse_formatted(ast)
|
35
|
+
parse_wiki_ast(ast)
|
36
|
+
end
|
37
|
+
|
38
|
+
#Reimplement this
|
39
|
+
def parse_text(ast)
|
40
|
+
end
|
41
|
+
|
42
|
+
#Reimplement this
|
43
|
+
def parse_list(ast)
|
44
|
+
ast.children.each do |c|
|
45
|
+
parse_list_item(c) if c.class == ListItemAST
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#Reimplement this
|
50
|
+
def parse_list_item(ast)
|
51
|
+
parse_wiki_ast(ast)
|
52
|
+
end
|
53
|
+
|
54
|
+
#Reimplement this
|
55
|
+
def parse_preformatted(ast)
|
56
|
+
end
|
57
|
+
|
58
|
+
#Reimplement this
|
59
|
+
def parse_section(ast)
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
data/lib/mediacloth.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediacloth/mediawikilexer'
|
2
|
+
require 'mediacloth/mediawikiparser'
|
3
|
+
require 'mediacloth/mediawikiast'
|
4
|
+
require 'mediacloth/mediawikiparams'
|
5
|
+
require 'mediacloth/mediawikiwalker'
|
6
|
+
require 'mediacloth/mediawikihtmlgenerator'
|
7
|
+
|
8
|
+
#Helper module to facilitate MediaCloth usage.
|
9
|
+
module MediaCloth
|
10
|
+
|
11
|
+
#Parses wiki formatted +input+ and generates its html representation.
|
12
|
+
def wiki_to_html(input)
|
13
|
+
parser = MediaWikiParser.new
|
14
|
+
parser.lexer = MediaWikiLexer.new
|
15
|
+
ast = parser.parse(input)
|
16
|
+
walker = MediaWikiHTMLGenerator.new
|
17
|
+
walker.parse(ast)
|
18
|
+
walker.html
|
19
|
+
end
|
20
|
+
|
21
|
+
module_function :wiki_to_html
|
22
|
+
|
23
|
+
end
|
data/lib/mediacloth.rb~
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediacloth/mediawikilexer'
|
2
|
+
require 'mediacloth/mediawikiparser'
|
3
|
+
require 'mediacloth/mediawikiast'
|
4
|
+
require 'mediacloth/mediawikiparams'
|
5
|
+
require 'mediacloth/mediawikiwalker'
|
6
|
+
require 'mediacloth/mediawikihtmlgenerator'
|
7
|
+
|
8
|
+
#Helper module to facilitate MediaCloth usage.
|
9
|
+
module MediaCloth
|
10
|
+
|
11
|
+
|
12
|
+
def wiki_to_html(input)
|
13
|
+
parser = MediaWikiParser.new
|
14
|
+
parser.lexer = MediaWikiLexer.new
|
15
|
+
ast = parser.parse(input)
|
16
|
+
walker = MediaWikiHTMLGenerator.new
|
17
|
+
walker.parse(ast)
|
18
|
+
walker.html
|
19
|
+
end
|
20
|
+
|
21
|
+
module_function :wiki_to_html
|
22
|
+
|
23
|
+
end
|
data/test/data/html1
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
Another paragraph starts after a blank line.
|
5
|
+
|
6
|
+
|
7
|
+
Another one.
|
8
|
+
|
9
|
+
This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.
|
10
|
+
|
11
|
+
We can have headlines:
|
12
|
+
|
13
|
+
|
14
|
+
<h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7>
|
15
|
+
|
16
|
+
<hr></hr>
|
17
|
+
This is a text after the line.
|
18
|
+
|
19
|
+
<ul><li>foo
|
20
|
+
</li><li>foo2
|
21
|
+
</li></ul>
|
data/test/data/html2
ADDED
data/test/data/html3
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--Sat Jan 01 01:01:01 EET 2000CreatorCreator Sat Jan 01 01:01:01 EET 2000
|
data/test/data/html4
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<b><i>Foo</i></b>
|
data/test/data/html6
ADDED
data/test/data/html7
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
|
data/test/data/input1
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
This is a simple text with '''Bold text''' and ''Italic text'' inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
Another paragraph starts after a blank line.
|
5
|
+
|
6
|
+
|
7
|
+
Another one.
|
8
|
+
|
9
|
+
This is text with [[Internal Link]] and [http://www.example.com external link].
|
10
|
+
|
11
|
+
We can have headlines:
|
12
|
+
|
13
|
+
|
14
|
+
= Headline1 =
|
15
|
+
== Headline2 ==
|
16
|
+
=== Headline3 ===
|
17
|
+
==== Headline4 ====
|
18
|
+
===== Headline5 =====
|
19
|
+
====== Headline6 ======
|
20
|
+
======= Headline7 =======
|
21
|
+
|
22
|
+
This is a preformatted ''' ''' << '' '' text
|
23
|
+
yes
|
24
|
+
|
25
|
+
----
|
26
|
+
This is a text after the line.
|
27
|
+
|
28
|
+
*foo
|
29
|
+
* foo2
|
data/test/data/input2
ADDED
data/test/data/input3
ADDED
data/test/data/input4
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
'''''Foo'''''
|
data/test/data/input5
ADDED
data/test/data/input6
ADDED
data/test/data/input7
ADDED
data/test/data/lex1
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
TEXTThis is a simple text with BOLDSTART'''TEXTBold textBOLDEND'''TEXT and ITALICSTART''TEXTItalic textITALICEND''TEXT inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
Another paragraph starts after a blank line.
|
5
|
+
|
6
|
+
|
7
|
+
Another one.
|
8
|
+
|
9
|
+
This is text with INTLINKSTART[[TEXTInternal LinkINTLINKEND]]TEXT and LINKSTART[TEXThttp://www.example.com external linkLINKEND]TEXT.
|
10
|
+
|
11
|
+
We can have headlines:
|
12
|
+
|
13
|
+
|
14
|
+
SECTION=TEXT Headline1 SECTION=SECTION==TEXT Headline2 SECTION==SECTION===TEXT Headline3 SECTION===SECTION====TEXT Headline4 SECTION====SECTION=====TEXT Headline5 SECTION=====SECTION======TEXT Headline6 SECTION======SECTION=======TEXT Headline7 SECTION=======TEXT
|
15
|
+
|
16
|
+
PREThis is a preformatted ''' ''' << '' '' text
|
17
|
+
PREyes
|
18
|
+
HLINE----TEXT
|
19
|
+
This is a text after the line.
|
20
|
+
|
21
|
+
UL_STARTLI_STARTTEXTfoo
|
22
|
+
LI_ENDLI_STARTTEXTfoo2
|
23
|
+
LI_ENDUL_ENDfalsefalse
|
data/test/data/lex2
ADDED
data/test/data/lex3
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
TEXT--SIGNATURE_DATE~~~~~SIGNATURE_NAME~~~SIGNATURE_FULL~~~~falsefalse
|
data/test/data/lex4
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
BOLDSTART'''ITALICSTART''TEXTFooITALICEND''BOLDEND'''falsefalse
|
data/test/data/lex5
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
UL_STARTLI_STARTTEXTA
|
2
|
+
UL_STARTLI_STARTTEXTA
|
3
|
+
UL_STARTLI_STARTTEXTa
|
4
|
+
LI_ENDLI_STARTTEXTb
|
5
|
+
UL_STARTLI_STARTTEXTa
|
6
|
+
LI_ENDUL_ENDLI_ENDUL_ENDLI_ENDUL_ENDLI_ENDLI_STARTTEXTB
|
7
|
+
UL_STARTLI_STARTTEXTb
|
8
|
+
LI_ENDUL_ENDLI_ENDUL_ENDOL_STARTLI_STARTTEXTa
|
9
|
+
OL_STARTLI_STARTTEXTa
|
10
|
+
OL_STARTLI_STARTTEXTa
|
11
|
+
OL_STARTLI_STARTTEXTa
|
12
|
+
LI_ENDOL_ENDLI_ENDOL_ENDLI_ENDOL_ENDLI_ENDOL_ENDfalsefalse
|
data/test/data/lex6
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
TEXTThis is some text with BOLDSTART'''TEXTboldBOLDEND'''TEXT and ITALICSTART''TEXTitalicITALICEND''TEXT formating.
|
2
|
+
The list is also here:
|
3
|
+
UL_STARTLI_STARTTEXTList Item 1
|
4
|
+
UL_STARTLI_STARTTEXTSub list item 1
|
5
|
+
LI_ENDLI_STARTTEXTSub list item 2
|
6
|
+
LI_ENDUL_ENDLI_ENDLI_STARTTEXTList Item 2
|
7
|
+
LI_ENDUL_ENDTEXTThe end
|
8
|
+
HLINE----falsefalse
|
data/test/data/lex7
ADDED
data/test/data/result1
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
<p>This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
</p><p>Another paragraph starts after a blank line.
|
5
|
+
</p><p><br>
|
6
|
+
Another one.
|
7
|
+
</p><p>This is text with <a href="/wiki/index.php?title=Internal_Link&action=edit" class="new" title="Internal Link">Internal Link</a> and <a href="http://www.example.com" class="external text" title="http://www.example.com" rel="nofollow">external link</a>.
|
8
|
+
</p><p>We can have headlines:
|
9
|
+
</p><p><br>
|
10
|
+
</p>
|
11
|
+
<table id="toc" class="toc" summary="Contents"><tbody><tr><td><div id="toctitle"><h2>Contents</h2> <span class="toctoggle">[<a href="javascript:toggleToc()" class="internal" id="togglelink">hide</a>]</span></div>
|
12
|
+
|
13
|
+
<ul style="display: block;">
|
14
|
+
<li class="toclevel-1"><a href="#Headline1"><span class="tocnumber">1</span> <span class="toctext">Headline1</span></a>
|
15
|
+
<ul>
|
16
|
+
<li class="toclevel-2"><a href="#Headline2"><span class="tocnumber">1.1</span> <span class="toctext">Headline2</span></a>
|
17
|
+
<ul>
|
18
|
+
<li class="toclevel-3"><a href="#Headline3"><span class="tocnumber">1.1.1</span> <span class="toctext">Headline3</span></a>
|
19
|
+
<ul>
|
20
|
+
<li class="toclevel-4"><a href="#Headline4"><span class="tocnumber">1.1.1.1</span> <span class="toctext">Headline4</span></a></li>
|
21
|
+
|
22
|
+
</ul>
|
23
|
+
</li>
|
24
|
+
</ul>
|
25
|
+
</li>
|
26
|
+
</ul>
|
27
|
+
</li>
|
28
|
+
</ul>
|
29
|
+
</td></tr></tbody></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
|
30
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=1" title="Edit section: Headline1">edit</a>]</div><a name="Headline1"></a><h1> Headline1 </h1>
|
31
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=2" title="Edit section: Headline2">edit</a>]</div><a name="Headline2"></a><h2> Headline2 </h2>
|
32
|
+
|
33
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=3" title="Edit section: Headline3">edit</a>]</div><a name="Headline3"></a><h3> Headline3 </h3>
|
34
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=4" title="Edit section: Headline4">edit</a>]</div><a name="Headline4"></a><h4> Headline4 </h4>
|
35
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=5" title="Edit section: Headline5">edit</a>]</div><a name="Headline5"></a><h5> Headline5 </h5>
|
36
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=6" title="Edit section: Headline6">edit</a>]</div><a name="Headline6"></a><h6> Headline6 </h6>
|
37
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=7" title="Edit section: = Headline7 =">edit</a>]</div><a name=".3D_Headline7_.3D"></a><h6>= Headline7 =</h6>
|
38
|
+
<pre>This is a preformatted <b> </b> << <i> </i> text
|
39
|
+
yes
|
40
|
+
|
41
|
+
|
42
|
+
</pre>
|
43
|
+
<hr>
|
44
|
+
<p>This is a text after the line.
|
45
|
+
</p>
|
46
|
+
<ul><li>foo
|
47
|
+
</li><li> foo2
|
48
|
+
</li></ul>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediawikilexer'
|
2
|
+
require 'mediawikiparser'
|
3
|
+
require 'mediawikiparams'
|
4
|
+
require 'mediawikihtmlgenerator'
|
5
|
+
|
6
|
+
def produce(index)
|
7
|
+
file = File.new("../data/html#{index}", "w")
|
8
|
+
inputFile = File.new("../data/input#{index}", "r")
|
9
|
+
input = inputFile.read
|
10
|
+
|
11
|
+
parser = MediaWikiParser.new
|
12
|
+
parser.lexer = MediaWikiLexer.new
|
13
|
+
ast = parser.parse(input)
|
14
|
+
MediaWikiParams.instance.time = Time.mktime(2000, 1, 1, 1, 1, 1, 1)
|
15
|
+
generator = MediaWikiHTMLGenerator.new
|
16
|
+
generator.parse(ast)
|
17
|
+
|
18
|
+
file.write(generator.html)
|
19
|
+
file.close
|
20
|
+
end
|
21
|
+
|
22
|
+
# (3..5).each { |i| produce(i) }
|
23
|
+
produce(6)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediawikilexer'
|
2
|
+
require 'mediawikiparser'
|
3
|
+
require 'mediawikiparams'
|
4
|
+
require 'mediawikihtmlgenerator'
|
5
|
+
|
6
|
+
def produce(index)
|
7
|
+
file = File.new("../data/html#{index}", "w")
|
8
|
+
inputFile = File.new("../data/input#{index}", "r")
|
9
|
+
input = inputFile.read
|
10
|
+
|
11
|
+
parser = MediaWikiParser.new
|
12
|
+
parser.lexer = MediaWikiLexer.new
|
13
|
+
ast = parser.parse(input)
|
14
|
+
MediaWikiParams.instance.time = Time.mktime(2000, 1, 1, 1, 1, 1, 1)
|
15
|
+
generator = MediaWikiHTMLGenerator.new
|
16
|
+
generator.parse(ast)
|
17
|
+
|
18
|
+
file.write(generator.html)
|
19
|
+
file.close
|
20
|
+
end
|
21
|
+
|
22
|
+
# (3..5).each { |i| produce(i) }
|
23
|
+
produce(1)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'mediawikilexer'
|
2
|
+
|
3
|
+
def produce(index)
|
4
|
+
file = File.new("../data/lex#{index}", "w")
|
5
|
+
inputFile = File.new("../data/input#{index}", "r")
|
6
|
+
input = inputFile.read
|
7
|
+
|
8
|
+
lexer = MediaWikiLexer.new
|
9
|
+
tokens = lexer.tokenize(input)
|
10
|
+
file.write(tokens.to_s)
|
11
|
+
file.close
|
12
|
+
end
|
13
|
+
|
14
|
+
#1..5.each { |i| produce(1) }
|
15
|
+
produce(7)
|