mediacloth 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +37 -0
- data/lib/mediacloth/mediawikiast.rb +50 -0
- data/lib/mediacloth/mediawikihtmlgenerator.rb +105 -0
- data/lib/mediacloth/mediawikihtmlgenerator.rb~ +105 -0
- data/lib/mediacloth/mediawikilexer.rb +407 -0
- data/lib/mediacloth/mediawikiparams.rb +33 -0
- data/lib/mediacloth/mediawikiparser.rb +429 -0
- data/lib/mediacloth/mediawikiparser.y +172 -0
- data/lib/mediacloth/mediawikiparser.y~ +172 -0
- data/lib/mediacloth/mediawikiwalker.rb +62 -0
- data/lib/mediacloth/mediawikiwalker.rb~ +62 -0
- data/lib/mediacloth.rb +23 -0
- data/lib/mediacloth.rb~ +23 -0
- data/test/data/html1 +21 -0
- data/test/data/html2 +2 -0
- data/test/data/html3 +1 -0
- data/test/data/html4 +1 -0
- data/test/data/html6 +8 -0
- data/test/data/html7 +1 -0
- data/test/data/input1 +29 -0
- data/test/data/input2 +2 -0
- data/test/data/input3 +2 -0
- data/test/data/input4 +1 -0
- data/test/data/input5 +12 -0
- data/test/data/input6 +8 -0
- data/test/data/input7 +2 -0
- data/test/data/lex1 +23 -0
- data/test/data/lex2 +2 -0
- data/test/data/lex3 +1 -0
- data/test/data/lex4 +1 -0
- data/test/data/lex5 +12 -0
- data/test/data/lex6 +8 -0
- data/test/data/lex7 +2 -0
- data/test/data/result1 +48 -0
- data/test/dataproducers/html.rb +23 -0
- data/test/dataproducers/html.rb~ +23 -0
- data/test/dataproducers/lex.rb +15 -0
- data/test/debugwalker.rb +63 -0
- data/test/debugwalker.rb~ +63 -0
- data/test/htmlgenerator.rb +25 -0
- data/test/htmlgenerator.rb~ +25 -0
- data/test/lexer.rb +57 -0
- data/test/lexer.rb~ +57 -0
- data/test/parser.rb +23 -0
- data/test/parser.rb~ +23 -0
- data/test/testhelper.rb +27 -0
- data/test/testhelper.rb~ +28 -0
- metadata +97 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
#The parser for the MediaWiki language.
|
2
|
+
#
|
3
|
+
#Usage together with a lexer:
|
4
|
+
# inputFile = File.new("data/input1", "r")
|
5
|
+
# input = inputFile.read
|
6
|
+
# parser = MediaWikiParser.new
|
7
|
+
# parser.lexer = MediaWikiLexer.new
|
8
|
+
# parser.parse(input)
|
9
|
+
class MediaWikiParser
|
10
|
+
|
11
|
+
token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
|
12
|
+
INTLINKSTART INTLINKEND SECTION TEXT PRE
|
13
|
+
HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
|
14
|
+
UL_START UL_END LI_START LI_END OL_START OL_END
|
15
|
+
|
16
|
+
rule
|
17
|
+
|
18
|
+
wiki:
|
19
|
+
repeated_contents
|
20
|
+
{
|
21
|
+
@nodes.push WikiAST.new
|
22
|
+
#@nodes.last.children.insert(0, val[0])
|
23
|
+
#puts val[0]
|
24
|
+
@nodes.last.children += val[0]
|
25
|
+
}
|
26
|
+
;
|
27
|
+
|
28
|
+
contents:
|
29
|
+
text
|
30
|
+
{
|
31
|
+
result = val[0]
|
32
|
+
}
|
33
|
+
| bulleted_list
|
34
|
+
{
|
35
|
+
result = val[0]
|
36
|
+
}
|
37
|
+
| preformatted
|
38
|
+
{
|
39
|
+
p = PreformattedAST.new
|
40
|
+
p.contents = val[0]
|
41
|
+
result = p
|
42
|
+
}
|
43
|
+
| section
|
44
|
+
{
|
45
|
+
s = SectionAST.new
|
46
|
+
s.contents = val[0][0]
|
47
|
+
s.level = val[0][1]
|
48
|
+
result = s
|
49
|
+
}
|
50
|
+
;
|
51
|
+
|
52
|
+
repeated_contents: contents
|
53
|
+
{
|
54
|
+
result = []
|
55
|
+
result << val[0]
|
56
|
+
}
|
57
|
+
| repeated_contents contents
|
58
|
+
{
|
59
|
+
result = []
|
60
|
+
result += val[0]
|
61
|
+
result << val[1]
|
62
|
+
}
|
63
|
+
;
|
64
|
+
|
65
|
+
text: element
|
66
|
+
{
|
67
|
+
p = TextAST.new
|
68
|
+
p.formatting = val[0][0]
|
69
|
+
p.contents = val[0][1]
|
70
|
+
result = p
|
71
|
+
}
|
72
|
+
| formatted_element
|
73
|
+
{
|
74
|
+
result = val[0]
|
75
|
+
}
|
76
|
+
;
|
77
|
+
|
78
|
+
element: LINKSTART TEXT LINKEND
|
79
|
+
{ return [:Link, val[1]] }
|
80
|
+
| INTLINKSTART TEXT INTLINKEND
|
81
|
+
{ return [:InternalLink, val[1]] }
|
82
|
+
| TEXT
|
83
|
+
{ return [:None, val[0]] }
|
84
|
+
| HLINE
|
85
|
+
{ return [:HLine, val[0]] }
|
86
|
+
| SIGNATURE_DATE
|
87
|
+
{ return [:SignatureDate, val[0]] }
|
88
|
+
| SIGNATURE_NAME
|
89
|
+
{ return [:SignatureName, val[0]] }
|
90
|
+
| SIGNATURE_FULL
|
91
|
+
{ return [:SignatureFull, val[0]] }
|
92
|
+
;
|
93
|
+
|
94
|
+
formatted_element: BOLDSTART repeated_contents BOLDEND
|
95
|
+
{
|
96
|
+
p = FormattedAST.new
|
97
|
+
p.formatting = :Bold
|
98
|
+
p.children += val[1]
|
99
|
+
result = p
|
100
|
+
}
|
101
|
+
| ITALICSTART repeated_contents ITALICEND
|
102
|
+
{
|
103
|
+
p = FormattedAST.new
|
104
|
+
p.formatting = :Italic
|
105
|
+
p.children += val[1]
|
106
|
+
result = p
|
107
|
+
}
|
108
|
+
;
|
109
|
+
|
110
|
+
bulleted_list: UL_START list_item list_contents UL_END
|
111
|
+
{
|
112
|
+
list = ListAST.new
|
113
|
+
list.type = :Bulleted
|
114
|
+
list.children << val[1]
|
115
|
+
list.children += val[2]
|
116
|
+
result = list
|
117
|
+
}
|
118
|
+
;
|
119
|
+
|
120
|
+
list_contents:
|
121
|
+
{ result = [] }
|
122
|
+
list_item list_contents
|
123
|
+
{
|
124
|
+
result << val[1]
|
125
|
+
result += val[2]
|
126
|
+
}
|
127
|
+
|
|
128
|
+
{ result = [] }
|
129
|
+
;
|
130
|
+
|
131
|
+
list_item: LI_START repeated_contents LI_END
|
132
|
+
{
|
133
|
+
li = ListItemAST.new
|
134
|
+
li.children += val[1]
|
135
|
+
result = li
|
136
|
+
}
|
137
|
+
;
|
138
|
+
|
139
|
+
preformatted: PRE
|
140
|
+
{ result = val[0] }
|
141
|
+
;
|
142
|
+
|
143
|
+
section: SECTION TEXT SECTION
|
144
|
+
{ result = [val[1], val[0].length] }
|
145
|
+
;
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
---- header ----
|
150
|
+
require 'mediawikiast'
|
151
|
+
|
152
|
+
---- inner ----
|
153
|
+
|
154
|
+
attr_accessor :lexer
|
155
|
+
|
156
|
+
def initialize
|
157
|
+
@nodes = []
|
158
|
+
super
|
159
|
+
end
|
160
|
+
|
161
|
+
#Tokenizes input string and parses it.
|
162
|
+
def parse(input)
|
163
|
+
@yydebug=true
|
164
|
+
lexer.tokenize(input)
|
165
|
+
do_parse
|
166
|
+
return @nodes.last
|
167
|
+
end
|
168
|
+
|
169
|
+
#Asks the lexer to return the next token.
|
170
|
+
def next_token
|
171
|
+
return @lexer.lex
|
172
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'mediacloth/mediawikiast'
|
2
|
+
|
3
|
+
#Default walker to traverse the parse tree.
|
4
|
+
#
|
5
|
+
#The walker traverses the entire parse tree and does nothing.
|
6
|
+
#To implement some functionality during this process, reimplement
|
7
|
+
#<i>parse...</i> methods and don't forget to call super() to not
|
8
|
+
#break the walk.
|
9
|
+
#
|
10
|
+
#Current implementations: MediaWikiHTMLGenerator, DebugWalker
|
11
|
+
class MediaWikiWalker
|
12
|
+
|
13
|
+
#Walks through the AST
|
14
|
+
def parse(ast)
|
15
|
+
parse_wiki_ast(ast)
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
#===== reimplement these methods and don't forget to call super() ====#
|
21
|
+
|
22
|
+
#Reimplement this
|
23
|
+
def parse_wiki_ast(ast)
|
24
|
+
ast.children.each do |c|
|
25
|
+
parse_formatted(c) if c.class == FormattedAST
|
26
|
+
parse_text(c) if c.class == TextAST
|
27
|
+
parse_list(c) if c.class == ListAST
|
28
|
+
parse_preformatted(c) if c.class == PreformattedAST
|
29
|
+
parse_section(c) if c.class == SectionAST
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#Reimplement this
|
34
|
+
def parse_formatted(ast)
|
35
|
+
parse_wiki_ast(ast)
|
36
|
+
end
|
37
|
+
|
38
|
+
#Reimplement this
|
39
|
+
def parse_text(ast)
|
40
|
+
end
|
41
|
+
|
42
|
+
#Reimplement this
|
43
|
+
def parse_list(ast)
|
44
|
+
ast.children.each do |c|
|
45
|
+
parse_list_item(c) if c.class == ListItemAST
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#Reimplement this
|
50
|
+
def parse_list_item(ast)
|
51
|
+
parse_wiki_ast(ast)
|
52
|
+
end
|
53
|
+
|
54
|
+
#Reimplement this
|
55
|
+
def parse_preformatted(ast)
|
56
|
+
end
|
57
|
+
|
58
|
+
#Reimplement this
|
59
|
+
def parse_section(ast)
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'mediawikiast'
|
2
|
+
|
3
|
+
#Default walker to traverse the parse tree.
|
4
|
+
#
|
5
|
+
#The walker traverses the entire parse tree and does nothing.
|
6
|
+
#To implement some functionality during this process, reimplement
|
7
|
+
#<i>parse...</i> methods and don't forget to call super() to not
|
8
|
+
#break the walk.
|
9
|
+
#
|
10
|
+
#Current implementations: MediaWikiHTMLGenerator, DebugWalker
|
11
|
+
class MediaWikiWalker
|
12
|
+
|
13
|
+
#Walks through the AST
|
14
|
+
def parse(ast)
|
15
|
+
parse_wiki_ast(ast)
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
#===== reimplement these methods and don't forget to call super() ====#
|
21
|
+
|
22
|
+
#Reimplement this
|
23
|
+
def parse_wiki_ast(ast)
|
24
|
+
ast.children.each do |c|
|
25
|
+
parse_formatted(c) if c.class == FormattedAST
|
26
|
+
parse_text(c) if c.class == TextAST
|
27
|
+
parse_list(c) if c.class == ListAST
|
28
|
+
parse_preformatted(c) if c.class == PreformattedAST
|
29
|
+
parse_section(c) if c.class == SectionAST
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#Reimplement this
|
34
|
+
def parse_formatted(ast)
|
35
|
+
parse_wiki_ast(ast)
|
36
|
+
end
|
37
|
+
|
38
|
+
#Reimplement this
|
39
|
+
def parse_text(ast)
|
40
|
+
end
|
41
|
+
|
42
|
+
#Reimplement this
|
43
|
+
def parse_list(ast)
|
44
|
+
ast.children.each do |c|
|
45
|
+
parse_list_item(c) if c.class == ListItemAST
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#Reimplement this
|
50
|
+
def parse_list_item(ast)
|
51
|
+
parse_wiki_ast(ast)
|
52
|
+
end
|
53
|
+
|
54
|
+
#Reimplement this
|
55
|
+
def parse_preformatted(ast)
|
56
|
+
end
|
57
|
+
|
58
|
+
#Reimplement this
|
59
|
+
def parse_section(ast)
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
data/lib/mediacloth.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediacloth/mediawikilexer'
|
2
|
+
require 'mediacloth/mediawikiparser'
|
3
|
+
require 'mediacloth/mediawikiast'
|
4
|
+
require 'mediacloth/mediawikiparams'
|
5
|
+
require 'mediacloth/mediawikiwalker'
|
6
|
+
require 'mediacloth/mediawikihtmlgenerator'
|
7
|
+
|
8
|
+
#Helper module to facilitate MediaCloth usage.
|
9
|
+
module MediaCloth
|
10
|
+
|
11
|
+
#Parses wiki formatted +input+ and generates its html representation.
|
12
|
+
def wiki_to_html(input)
|
13
|
+
parser = MediaWikiParser.new
|
14
|
+
parser.lexer = MediaWikiLexer.new
|
15
|
+
ast = parser.parse(input)
|
16
|
+
walker = MediaWikiHTMLGenerator.new
|
17
|
+
walker.parse(ast)
|
18
|
+
walker.html
|
19
|
+
end
|
20
|
+
|
21
|
+
module_function :wiki_to_html
|
22
|
+
|
23
|
+
end
|
data/lib/mediacloth.rb~
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediacloth/mediawikilexer'
|
2
|
+
require 'mediacloth/mediawikiparser'
|
3
|
+
require 'mediacloth/mediawikiast'
|
4
|
+
require 'mediacloth/mediawikiparams'
|
5
|
+
require 'mediacloth/mediawikiwalker'
|
6
|
+
require 'mediacloth/mediawikihtmlgenerator'
|
7
|
+
|
8
|
+
#Helper module to facilitate MediaCloth usage.
|
9
|
+
module MediaCloth
|
10
|
+
|
11
|
+
|
12
|
+
def wiki_to_html(input)
|
13
|
+
parser = MediaWikiParser.new
|
14
|
+
parser.lexer = MediaWikiLexer.new
|
15
|
+
ast = parser.parse(input)
|
16
|
+
walker = MediaWikiHTMLGenerator.new
|
17
|
+
walker.parse(ast)
|
18
|
+
walker.html
|
19
|
+
end
|
20
|
+
|
21
|
+
module_function :wiki_to_html
|
22
|
+
|
23
|
+
end
|
data/test/data/html1
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
Another paragraph starts after a blank line.
|
5
|
+
|
6
|
+
|
7
|
+
Another one.
|
8
|
+
|
9
|
+
This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.
|
10
|
+
|
11
|
+
We can have headlines:
|
12
|
+
|
13
|
+
|
14
|
+
<h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7>
|
15
|
+
|
16
|
+
<hr></hr>
|
17
|
+
This is a text after the line.
|
18
|
+
|
19
|
+
<ul><li>foo
|
20
|
+
</li><li>foo2
|
21
|
+
</li></ul>
|
data/test/data/html2
ADDED
data/test/data/html3
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--Sat Jan 01 01:01:01 EET 2000CreatorCreator Sat Jan 01 01:01:01 EET 2000
|
data/test/data/html4
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<b><i>Foo</i></b>
|
data/test/data/html6
ADDED
data/test/data/html7
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
|
data/test/data/input1
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
This is a simple text with '''Bold text''' and ''Italic text'' inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
Another paragraph starts after a blank line.
|
5
|
+
|
6
|
+
|
7
|
+
Another one.
|
8
|
+
|
9
|
+
This is text with [[Internal Link]] and [http://www.example.com external link].
|
10
|
+
|
11
|
+
We can have headlines:
|
12
|
+
|
13
|
+
|
14
|
+
= Headline1 =
|
15
|
+
== Headline2 ==
|
16
|
+
=== Headline3 ===
|
17
|
+
==== Headline4 ====
|
18
|
+
===== Headline5 =====
|
19
|
+
====== Headline6 ======
|
20
|
+
======= Headline7 =======
|
21
|
+
|
22
|
+
This is a preformatted ''' ''' << '' '' text
|
23
|
+
yes
|
24
|
+
|
25
|
+
----
|
26
|
+
This is a text after the line.
|
27
|
+
|
28
|
+
*foo
|
29
|
+
* foo2
|
data/test/data/input2
ADDED
data/test/data/input3
ADDED
data/test/data/input4
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
'''''Foo'''''
|
data/test/data/input5
ADDED
data/test/data/input6
ADDED
data/test/data/input7
ADDED
data/test/data/lex1
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
TEXTThis is a simple text with BOLDSTART'''TEXTBold textBOLDEND'''TEXT and ITALICSTART''TEXTItalic textITALICEND''TEXT inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
Another paragraph starts after a blank line.
|
5
|
+
|
6
|
+
|
7
|
+
Another one.
|
8
|
+
|
9
|
+
This is text with INTLINKSTART[[TEXTInternal LinkINTLINKEND]]TEXT and LINKSTART[TEXThttp://www.example.com external linkLINKEND]TEXT.
|
10
|
+
|
11
|
+
We can have headlines:
|
12
|
+
|
13
|
+
|
14
|
+
SECTION=TEXT Headline1 SECTION=SECTION==TEXT Headline2 SECTION==SECTION===TEXT Headline3 SECTION===SECTION====TEXT Headline4 SECTION====SECTION=====TEXT Headline5 SECTION=====SECTION======TEXT Headline6 SECTION======SECTION=======TEXT Headline7 SECTION=======TEXT
|
15
|
+
|
16
|
+
PREThis is a preformatted ''' ''' << '' '' text
|
17
|
+
PREyes
|
18
|
+
HLINE----TEXT
|
19
|
+
This is a text after the line.
|
20
|
+
|
21
|
+
UL_STARTLI_STARTTEXTfoo
|
22
|
+
LI_ENDLI_STARTTEXTfoo2
|
23
|
+
LI_ENDUL_ENDfalsefalse
|
data/test/data/lex2
ADDED
data/test/data/lex3
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
TEXT--SIGNATURE_DATE~~~~~SIGNATURE_NAME~~~SIGNATURE_FULL~~~~falsefalse
|
data/test/data/lex4
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
BOLDSTART'''ITALICSTART''TEXTFooITALICEND''BOLDEND'''falsefalse
|
data/test/data/lex5
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
UL_STARTLI_STARTTEXTA
|
2
|
+
UL_STARTLI_STARTTEXTA
|
3
|
+
UL_STARTLI_STARTTEXTa
|
4
|
+
LI_ENDLI_STARTTEXTb
|
5
|
+
UL_STARTLI_STARTTEXTa
|
6
|
+
LI_ENDUL_ENDLI_ENDUL_ENDLI_ENDUL_ENDLI_ENDLI_STARTTEXTB
|
7
|
+
UL_STARTLI_STARTTEXTb
|
8
|
+
LI_ENDUL_ENDLI_ENDUL_ENDOL_STARTLI_STARTTEXTa
|
9
|
+
OL_STARTLI_STARTTEXTa
|
10
|
+
OL_STARTLI_STARTTEXTa
|
11
|
+
OL_STARTLI_STARTTEXTa
|
12
|
+
LI_ENDOL_ENDLI_ENDOL_ENDLI_ENDOL_ENDLI_ENDOL_ENDfalsefalse
|
data/test/data/lex6
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
TEXTThis is some text with BOLDSTART'''TEXTboldBOLDEND'''TEXT and ITALICSTART''TEXTitalicITALICEND''TEXT formating.
|
2
|
+
The list is also here:
|
3
|
+
UL_STARTLI_STARTTEXTList Item 1
|
4
|
+
UL_STARTLI_STARTTEXTSub list item 1
|
5
|
+
LI_ENDLI_STARTTEXTSub list item 2
|
6
|
+
LI_ENDUL_ENDLI_ENDLI_STARTTEXTList Item 2
|
7
|
+
LI_ENDUL_ENDTEXTThe end
|
8
|
+
HLINE----falsefalse
|
data/test/data/lex7
ADDED
data/test/data/result1
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
<p>This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
+
One paragraph can be written in several lines.
|
3
|
+
|
4
|
+
</p><p>Another paragraph starts after a blank line.
|
5
|
+
</p><p><br>
|
6
|
+
Another one.
|
7
|
+
</p><p>This is text with <a href="/wiki/index.php?title=Internal_Link&action=edit" class="new" title="Internal Link">Internal Link</a> and <a href="http://www.example.com" class="external text" title="http://www.example.com" rel="nofollow">external link</a>.
|
8
|
+
</p><p>We can have headlines:
|
9
|
+
</p><p><br>
|
10
|
+
</p>
|
11
|
+
<table id="toc" class="toc" summary="Contents"><tbody><tr><td><div id="toctitle"><h2>Contents</h2> <span class="toctoggle">[<a href="javascript:toggleToc()" class="internal" id="togglelink">hide</a>]</span></div>
|
12
|
+
|
13
|
+
<ul style="display: block;">
|
14
|
+
<li class="toclevel-1"><a href="#Headline1"><span class="tocnumber">1</span> <span class="toctext">Headline1</span></a>
|
15
|
+
<ul>
|
16
|
+
<li class="toclevel-2"><a href="#Headline2"><span class="tocnumber">1.1</span> <span class="toctext">Headline2</span></a>
|
17
|
+
<ul>
|
18
|
+
<li class="toclevel-3"><a href="#Headline3"><span class="tocnumber">1.1.1</span> <span class="toctext">Headline3</span></a>
|
19
|
+
<ul>
|
20
|
+
<li class="toclevel-4"><a href="#Headline4"><span class="tocnumber">1.1.1.1</span> <span class="toctext">Headline4</span></a></li>
|
21
|
+
|
22
|
+
</ul>
|
23
|
+
</li>
|
24
|
+
</ul>
|
25
|
+
</li>
|
26
|
+
</ul>
|
27
|
+
</li>
|
28
|
+
</ul>
|
29
|
+
</td></tr></tbody></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script>
|
30
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=1" title="Edit section: Headline1">edit</a>]</div><a name="Headline1"></a><h1> Headline1 </h1>
|
31
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=2" title="Edit section: Headline2">edit</a>]</div><a name="Headline2"></a><h2> Headline2 </h2>
|
32
|
+
|
33
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=3" title="Edit section: Headline3">edit</a>]</div><a name="Headline3"></a><h3> Headline3 </h3>
|
34
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=4" title="Edit section: Headline4">edit</a>]</div><a name="Headline4"></a><h4> Headline4 </h4>
|
35
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=5" title="Edit section: Headline5">edit</a>]</div><a name="Headline5"></a><h5> Headline5 </h5>
|
36
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=6" title="Edit section: Headline6">edit</a>]</div><a name="Headline6"></a><h6> Headline6 </h6>
|
37
|
+
<div class="editsection" style="float: right; margin-left: 5px;">[<a href="/wiki/index.php?title=Test&action=edit&section=7" title="Edit section: = Headline7 =">edit</a>]</div><a name=".3D_Headline7_.3D"></a><h6>= Headline7 =</h6>
|
38
|
+
<pre>This is a preformatted <b> </b> << <i> </i> text
|
39
|
+
yes
|
40
|
+
|
41
|
+
|
42
|
+
</pre>
|
43
|
+
<hr>
|
44
|
+
<p>This is a text after the line.
|
45
|
+
</p>
|
46
|
+
<ul><li>foo
|
47
|
+
</li><li> foo2
|
48
|
+
</li></ul>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediawikilexer'
|
2
|
+
require 'mediawikiparser'
|
3
|
+
require 'mediawikiparams'
|
4
|
+
require 'mediawikihtmlgenerator'
|
5
|
+
|
6
|
+
def produce(index)
|
7
|
+
file = File.new("../data/html#{index}", "w")
|
8
|
+
inputFile = File.new("../data/input#{index}", "r")
|
9
|
+
input = inputFile.read
|
10
|
+
|
11
|
+
parser = MediaWikiParser.new
|
12
|
+
parser.lexer = MediaWikiLexer.new
|
13
|
+
ast = parser.parse(input)
|
14
|
+
MediaWikiParams.instance.time = Time.mktime(2000, 1, 1, 1, 1, 1, 1)
|
15
|
+
generator = MediaWikiHTMLGenerator.new
|
16
|
+
generator.parse(ast)
|
17
|
+
|
18
|
+
file.write(generator.html)
|
19
|
+
file.close
|
20
|
+
end
|
21
|
+
|
22
|
+
# (3..5).each { |i| produce(i) }
|
23
|
+
produce(6)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mediawikilexer'
|
2
|
+
require 'mediawikiparser'
|
3
|
+
require 'mediawikiparams'
|
4
|
+
require 'mediawikihtmlgenerator'
|
5
|
+
|
6
|
+
def produce(index)
|
7
|
+
file = File.new("../data/html#{index}", "w")
|
8
|
+
inputFile = File.new("../data/input#{index}", "r")
|
9
|
+
input = inputFile.read
|
10
|
+
|
11
|
+
parser = MediaWikiParser.new
|
12
|
+
parser.lexer = MediaWikiLexer.new
|
13
|
+
ast = parser.parse(input)
|
14
|
+
MediaWikiParams.instance.time = Time.mktime(2000, 1, 1, 1, 1, 1, 1)
|
15
|
+
generator = MediaWikiHTMLGenerator.new
|
16
|
+
generator.parse(ast)
|
17
|
+
|
18
|
+
file.write(generator.html)
|
19
|
+
file.close
|
20
|
+
end
|
21
|
+
|
22
|
+
# (3..5).each { |i| produce(i) }
|
23
|
+
produce(1)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'mediawikilexer'
|
2
|
+
|
3
|
+
def produce(index)
|
4
|
+
file = File.new("../data/lex#{index}", "w")
|
5
|
+
inputFile = File.new("../data/input#{index}", "r")
|
6
|
+
input = inputFile.read
|
7
|
+
|
8
|
+
lexer = MediaWikiLexer.new
|
9
|
+
tokens = lexer.tokenize(input)
|
10
|
+
file.write(tokens.to_s)
|
11
|
+
file.close
|
12
|
+
end
|
13
|
+
|
14
|
+
#1..5.each { |i| produce(1) }
|
15
|
+
produce(7)
|