mediacloth 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mediacloth/mediawikiast.rb +5 -1
- data/lib/mediacloth/mediawikihtmlgenerator.rb +8 -2
- data/lib/mediacloth/mediawikilexer.rb +82 -7
- data/lib/mediacloth/mediawikiparser.rb +211 -139
- data/lib/mediacloth/mediawikiparser.y +36 -3
- data/lib/mediacloth/mediawikiwalker.rb +6 -0
- data/test/data/html1 +4 -19
- data/test/data/html2 +2 -2
- data/test/data/html3 +1 -1
- data/test/data/html4 +1 -1
- data/test/data/html5 +14 -0
- data/test/data/html6 +4 -4
- data/test/data/html7 +2 -1
- data/test/data/html8 +1 -0
- data/test/data/input5 +2 -0
- data/test/data/input8 +1 -0
- data/test/data/lex1 +10 -11
- data/test/data/lex2 +2 -2
- data/test/data/lex3 +1 -1
- data/test/data/lex4 +1 -1
- data/test/data/lex6 +4 -4
- data/test/data/lex7 +2 -2
- data/test/data/lex8 +1 -0
- data/test/dataproducers/html.rb +6 -5
- data/test/dataproducers/lex.rb +2 -2
- data/test/debugwalker.rb +5 -0
- data/test/lexer.rb +12 -3
- data/test/testhelper.rb +1 -1
- metadata +6 -12
- data/lib/mediacloth/mediawikihtmlgenerator.rb~ +0 -105
- data/lib/mediacloth/mediawikiparser.y~ +0 -172
- data/lib/mediacloth/mediawikiwalker.rb~ +0 -62
- data/lib/mediacloth.rb~ +0 -23
- data/test/dataproducers/html.rb~ +0 -23
- data/test/debugwalker.rb~ +0 -63
- data/test/htmlgenerator.rb~ +0 -25
- data/test/lexer.rb~ +0 -57
- data/test/parser.rb~ +0 -23
- data/test/testhelper.rb~ +0 -28
data/test/data/html1
CHANGED
@@ -1,21 +1,6 @@
|
|
1
|
-
This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
-
One paragraph can be written in several lines
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
Another one.
|
8
|
-
|
9
|
-
This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.
|
10
|
-
|
11
|
-
We can have headlines:
|
12
|
-
|
13
|
-
|
14
|
-
<h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7>
|
15
|
-
|
16
|
-
<hr></hr>
|
17
|
-
This is a text after the line.
|
18
|
-
|
19
|
-
<ul><li>foo
|
1
|
+
<p>This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
+
One paragraph can be written in several lines.</p><p>Another paragraph starts after a blank line.</p><p>
|
3
|
+
Another one.</p><p>This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.</p><p>We can have headlines:</p><h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7><hr></hr><p>
|
4
|
+
This is a text after the line.</p><ul><li>foo
|
20
5
|
</li><li>foo2
|
21
6
|
</li></ul>
|
data/test/data/html2
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
<hr></hr>
|
2
|
-
This is a text
|
1
|
+
<hr></hr><p>
|
2
|
+
This is a text</p>
|
data/test/data/html3
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
<p>--Sat Jan 01 01:01:01 EET 2000CreatorCreator Sat Jan 01 01:01:01 EET 2000</p>
|
data/test/data/html4
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<b><i>Foo</i></b>
|
1
|
+
<p><b><i>Foo</i></b></p>
|
data/test/data/html5
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
<ul><li>A
|
2
|
+
<ul><li>A
|
3
|
+
<ul><li>a
|
4
|
+
</li><li>b
|
5
|
+
<ul><li>a
|
6
|
+
</li></ul></li></ul></li></ul></li><li>B
|
7
|
+
<ul><li>b
|
8
|
+
</li></ul></li></ul><ol><li>a
|
9
|
+
<ol><li>a
|
10
|
+
<ol><li>a
|
11
|
+
<ol><li>a
|
12
|
+
</li></ol></li></ol></li></ol></li><li>a
|
13
|
+
<ol><li>aa
|
14
|
+
</li></ol></li></ol>
|
data/test/data/html6
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
This is some text with <b>bold</b> and <i>italic</i> formating.
|
1
|
+
<p>This is some text with <b>bold</b> and <i>italic</i> formating.
|
2
2
|
The list is also here:
|
3
|
-
|
3
|
+
</p><ul><li>List Item 1
|
4
4
|
<ul><li>Sub list item 1
|
5
5
|
</li><li>Sub list item 2
|
6
6
|
</li></ul></li><li>List Item 2
|
7
|
-
</li></ul>The end
|
8
|
-
|
7
|
+
</li></ul><p>The end
|
8
|
+
</p><hr></hr>
|
data/test/data/html7
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
<a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
|
1
|
+
<p><a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
|
2
|
+
</p>
|
data/test/data/html8
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<p>=== foo ===</p>
|
data/test/data/input8
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
=== foo ===
|
data/test/data/lex1
CHANGED
@@ -1,22 +1,21 @@
|
|
1
|
-
|
2
|
-
One paragraph can be written in several lines.
|
1
|
+
PARA_STARTTEXTThis is a simple text with BOLDSTART'''TEXTBold textBOLDEND'''TEXT and ITALICSTART''TEXTItalic textITALICEND''TEXT inside.
|
2
|
+
One paragraph can be written in several lines.PARA_END
|
3
3
|
|
4
|
-
|
4
|
+
PARA_STARTTEXTAnother paragraph starts after a blank line.PARA_END
|
5
5
|
|
6
|
+
PARA_STARTTEXT
|
7
|
+
Another one.PARA_END
|
6
8
|
|
7
|
-
|
9
|
+
PARA_STARTTEXTThis is text with INTLINKSTART[[TEXTInternal LinkINTLINKEND]]TEXT and LINKSTART[TEXThttp://www.example.com external linkLINKEND]TEXT.PARA_END
|
8
10
|
|
9
|
-
|
11
|
+
PARA_STARTTEXTWe can have headlines:PARA_END
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
SECTION=TEXT Headline1 SECTION=SECTION==TEXT Headline2 SECTION==SECTION===TEXT Headline3 SECTION===SECTION====TEXT Headline4 SECTION====SECTION=====TEXT Headline5 SECTION=====SECTION======TEXT Headline6 SECTION======SECTION=======TEXT Headline7 SECTION=======TEXT
|
13
|
+
SECTION_START=TEXT Headline1 SECTION_END=SECTION_START==TEXT Headline2 SECTION_END==SECTION_START===TEXT Headline3 SECTION_END===SECTION_START====TEXT Headline4 SECTION_END====SECTION_START=====TEXT Headline5 SECTION_END=====SECTION_START======TEXT Headline6 SECTION_END======SECTION_START=======TEXT Headline7 SECTION_END=======PARA_STARTPARA_END
|
15
14
|
|
16
15
|
PREThis is a preformatted ''' ''' << '' '' text
|
17
16
|
PREyes
|
18
|
-
HLINE----
|
19
|
-
This is a text after the line.
|
17
|
+
HLINE----PARA_STARTTEXT
|
18
|
+
This is a text after the line.PARA_END
|
20
19
|
|
21
20
|
UL_STARTLI_STARTTEXTfoo
|
22
21
|
LI_ENDLI_STARTTEXTfoo2
|
data/test/data/lex2
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
HLINE----
|
2
|
-
This is a
|
1
|
+
HLINE----PARA_STARTTEXT
|
2
|
+
This is a textPARA_ENDfalsefalse
|
data/test/data/lex3
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
PARA_STARTTEXT--SIGNATURE_DATE~~~~~SIGNATURE_NAME~~~SIGNATURE_FULL~~~~PARA_ENDfalsefalse
|
data/test/data/lex4
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
PARA_STARTBOLDSTART'''ITALICSTART''TEXTFooITALICEND''BOLDEND'''PARA_ENDfalsefalse
|
data/test/data/lex6
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
|
1
|
+
PARA_STARTTEXTThis is some text with BOLDSTART'''TEXTboldBOLDEND'''TEXT and ITALICSTART''TEXTitalicITALICEND''TEXT formating.
|
2
2
|
The list is also here:
|
3
|
-
|
3
|
+
PARA_ENDUL_STARTLI_STARTTEXTList Item 1
|
4
4
|
UL_STARTLI_STARTTEXTSub list item 1
|
5
5
|
LI_ENDLI_STARTTEXTSub list item 2
|
6
6
|
LI_ENDUL_ENDLI_ENDLI_STARTTEXTList Item 2
|
7
|
-
|
8
|
-
|
7
|
+
LI_ENDUL_ENDPARA_STARTTEXTThe end
|
8
|
+
PARA_ENDHLINE----falsefalse
|
data/test/data/lex7
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
PARA_STARTLINKSTART[TEXThttp://www.example.com external linkLINKEND]LINKSTARTTEXThttp://www.example.comLINKEND]TEXT [foo]
|
2
|
+
PARA_ENDfalsefalse
|
data/test/data/lex8
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
SECTION_START===TEXT foo SECTION_END===falsefalse
|
data/test/dataproducers/html.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'mediawikilexer'
|
2
|
-
require 'mediawikiparser'
|
3
|
-
require 'mediawikiparams'
|
4
|
-
require 'mediawikihtmlgenerator'
|
1
|
+
require 'mediacloth/mediawikilexer'
|
2
|
+
require 'mediacloth/mediawikiparser'
|
3
|
+
require 'mediacloth/mediawikiparams'
|
4
|
+
require 'mediacloth/mediawikihtmlgenerator'
|
5
5
|
|
6
6
|
def produce(index)
|
7
7
|
file = File.new("../data/html#{index}", "w")
|
@@ -15,9 +15,10 @@ def produce(index)
|
|
15
15
|
generator = MediaWikiHTMLGenerator.new
|
16
16
|
generator.parse(ast)
|
17
17
|
|
18
|
+
# puts generator.html
|
18
19
|
file.write(generator.html)
|
19
20
|
file.close
|
20
21
|
end
|
21
22
|
|
22
23
|
# (3..5).each { |i| produce(i) }
|
23
|
-
produce(
|
24
|
+
produce(5)
|
data/test/dataproducers/lex.rb
CHANGED
data/test/debugwalker.rb
CHANGED
data/test/lexer.rb
CHANGED
@@ -7,13 +7,19 @@ class Lexer_Test < Test::Unit::TestCase
|
|
7
7
|
include TestHelper
|
8
8
|
|
9
9
|
def test_input
|
10
|
-
test_files("lex") { |input,result|
|
10
|
+
test_files("lex") { |input,result,resultname|
|
11
11
|
lexer = MediaWikiLexer.new
|
12
12
|
tokens = lexer.tokenize(input)
|
13
13
|
assert_equal(tokens.to_s, result)
|
14
14
|
}
|
15
15
|
end
|
16
16
|
|
17
|
+
def test_paragraphs
|
18
|
+
assert_equal(lex("Before\n\n\n=Headline="),
|
19
|
+
[[:PARA_START, ""], [:TEXT, "Before"], [:PARA_END, "\n\n"],
|
20
|
+
[:SECTION_START, "="], [:TEXT, "Headline"], [:SECTION_END, "="], [false,false]])
|
21
|
+
end
|
22
|
+
|
17
23
|
def test_empty
|
18
24
|
assert_equal(lex(""), [[false,false]])
|
19
25
|
end
|
@@ -32,9 +38,12 @@ class Lexer_Test < Test::Unit::TestCase
|
|
32
38
|
|
33
39
|
def test_ending_text_token
|
34
40
|
#check for a problem when the last token is TEXT and it's not included
|
35
|
-
assert_equal(lex("\n----\nfoo\n"),
|
41
|
+
assert_equal(lex("\n----\nfoo\n"),
|
42
|
+
[[:HLINE, "----"], [:PARA_START, ""],
|
43
|
+
[:TEXT, "\nfoo\n"], [:PARA_END, ""], [false, false]])
|
36
44
|
assert_equal(lex("\n----\nfoo\n Hehe"),
|
37
|
-
[[:HLINE, "----"], [:
|
45
|
+
[[:HLINE, "----"], [:PARA_START, ""], [:TEXT, "\nfoo\n"],
|
46
|
+
[:PARA_END, ""], [:PRE, "Hehe"], [false, false]])
|
38
47
|
end
|
39
48
|
|
40
49
|
def test_bullets
|
data/test/testhelper.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: mediacloth
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.0.2
|
7
|
+
date: 2006-08-22 00:00:00 +03:00
|
8
8
|
summary: A MediaWiki syntax parser and HTML generator.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -28,29 +28,20 @@ cert_chain:
|
|
28
28
|
authors:
|
29
29
|
- Pluron Inc.
|
30
30
|
files:
|
31
|
-
- lib/mediacloth.rb~
|
32
31
|
- lib/mediacloth
|
33
32
|
- lib/mediacloth.rb
|
34
33
|
- lib/mediacloth/mediawikihtmlgenerator.rb
|
35
34
|
- lib/mediacloth/mediawikiparams.rb
|
36
35
|
- lib/mediacloth/mediawikiwalker.rb
|
37
36
|
- lib/mediacloth/mediawikiparser.rb
|
38
|
-
- lib/mediacloth/mediawikiparser.y~
|
39
|
-
- lib/mediacloth/mediawikihtmlgenerator.rb~
|
40
37
|
- lib/mediacloth/mediawikiparser.y
|
41
|
-
- lib/mediacloth/mediawikiwalker.rb~
|
42
38
|
- lib/mediacloth/mediawikilexer.rb
|
43
39
|
- lib/mediacloth/mediawikiast.rb
|
44
40
|
- test/data
|
45
|
-
- test/testhelper.rb~
|
46
41
|
- test/parser.rb
|
47
|
-
- test/parser.rb~
|
48
42
|
- test/testhelper.rb
|
49
|
-
- test/htmlgenerator.rb~
|
50
|
-
- test/debugwalker.rb~
|
51
43
|
- test/lexer.rb
|
52
44
|
- test/debugwalker.rb
|
53
|
-
- test/lexer.rb~
|
54
45
|
- test/dataproducers
|
55
46
|
- test/htmlgenerator.rb
|
56
47
|
- test/data/lex1
|
@@ -60,13 +51,16 @@ files:
|
|
60
51
|
- test/data/lex5
|
61
52
|
- test/data/lex6
|
62
53
|
- test/data/lex7
|
54
|
+
- test/data/lex8
|
63
55
|
- test/data/result1
|
64
56
|
- test/data/html1
|
65
57
|
- test/data/html2
|
66
58
|
- test/data/html3
|
67
59
|
- test/data/html4
|
60
|
+
- test/data/html5
|
68
61
|
- test/data/html6
|
69
62
|
- test/data/html7
|
63
|
+
- test/data/html8
|
70
64
|
- test/data/input1
|
71
65
|
- test/data/input2
|
72
66
|
- test/data/input3
|
@@ -74,7 +68,7 @@ files:
|
|
74
68
|
- test/data/input5
|
75
69
|
- test/data/input6
|
76
70
|
- test/data/input7
|
77
|
-
- test/
|
71
|
+
- test/data/input8
|
78
72
|
- test/dataproducers/lex.rb
|
79
73
|
- test/dataproducers/html.rb
|
80
74
|
- README
|
@@ -1,105 +0,0 @@
|
|
1
|
-
require 'mediawikiwalker'
|
2
|
-
require 'mediawikiparams'
|
3
|
-
|
4
|
-
#HTML generator for a MediaWiki parse tree
|
5
|
-
#
|
6
|
-
#Typical use case:
|
7
|
-
# parser = MediaWikiParser.new
|
8
|
-
# parser.lexer = MediaWikiLexer.new
|
9
|
-
# ast = parser.parse(input)
|
10
|
-
# walker = MediaWikiHTMLGenerator.new
|
11
|
-
# walker.parse(ast)
|
12
|
-
# puts walker.html
|
13
|
-
class MediaWikiHTMLGenerator < MediaWikiWalker
|
14
|
-
attr_reader :html
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
@html = ""
|
18
|
-
end
|
19
|
-
|
20
|
-
protected
|
21
|
-
|
22
|
-
def parse_wiki_ast(ast)
|
23
|
-
super(ast)
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse_text(ast)
|
27
|
-
tag = formatting_to_tag(ast)
|
28
|
-
if tag[0].empty?
|
29
|
-
@html += ast.contents
|
30
|
-
else
|
31
|
-
@html += "<#{tag[0]}#{tag[1]}>#{ast.contents}</#{tag[0]}>"
|
32
|
-
end
|
33
|
-
super(ast)
|
34
|
-
end
|
35
|
-
|
36
|
-
def parse_formatted(ast)
|
37
|
-
tag = formatting_to_tag(ast)
|
38
|
-
@html += "<#{tag}>"
|
39
|
-
super(ast)
|
40
|
-
@html += "</#{tag}>"
|
41
|
-
end
|
42
|
-
|
43
|
-
def parse_list(ast)
|
44
|
-
tag = list_tag(ast)
|
45
|
-
@html += "<#{tag}>"
|
46
|
-
super(ast)
|
47
|
-
@html += "</#{tag}>"
|
48
|
-
end
|
49
|
-
|
50
|
-
def parse_list_item(ast)
|
51
|
-
@html += "<li>"
|
52
|
-
super(ast)
|
53
|
-
@html += "</li>"
|
54
|
-
end
|
55
|
-
|
56
|
-
def parse_preformatted(ast)
|
57
|
-
super(ast)
|
58
|
-
end
|
59
|
-
|
60
|
-
def parse_section(ast)
|
61
|
-
@html += "<h#{ast.level}>"
|
62
|
-
@html += ast.contents.strip
|
63
|
-
@html += "</h#{ast.level}>"
|
64
|
-
super(ast)
|
65
|
-
end
|
66
|
-
|
67
|
-
private
|
68
|
-
|
69
|
-
#returns an array with a tag name and tag attributes
|
70
|
-
def formatting_to_tag(ast)
|
71
|
-
tag = ["", ""]
|
72
|
-
if ast.formatting == :Bold
|
73
|
-
tag = ["b", ""]
|
74
|
-
elsif ast.formatting == :Italic
|
75
|
-
tag = ["i", ""]
|
76
|
-
elsif ast.formatting == :Link or ast.formatting == :ExternalLink
|
77
|
-
links = ast.contents.split
|
78
|
-
link = links[0]
|
79
|
-
link_name = links[1, links.length-1].join(" ")
|
80
|
-
link_name = link if link_name.empty?
|
81
|
-
ast.contents = link_name
|
82
|
-
tag = ["a", " href=\"#{link}\" rel=\"nofollow\""]
|
83
|
-
elsif ast.formatting == :HLine
|
84
|
-
ast.contents = ""
|
85
|
-
tag = ["hr", ""]
|
86
|
-
elsif ast.formatting == :SignatureDate
|
87
|
-
ast.contents = MediaWikiParams.instance.time.to_s
|
88
|
-
elsif ast.formatting == :SignatureName
|
89
|
-
ast.contents = MediaWikiParams.instance.author
|
90
|
-
elsif ast.formatting == :SignatureFull
|
91
|
-
ast.contents = MediaWikiParams.instance.author + " " + MediaWikiParams.instance.time.to_s
|
92
|
-
end
|
93
|
-
tag
|
94
|
-
end
|
95
|
-
|
96
|
-
#returns a tag name of the list in ast node
|
97
|
-
def list_tag(ast)
|
98
|
-
if ast.type == :Bulleted
|
99
|
-
return "ul"
|
100
|
-
elsif ast.type == :Numbered
|
101
|
-
return "ol"
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
@@ -1,172 +0,0 @@
|
|
1
|
-
#The parser for the MediaWiki language.
|
2
|
-
#
|
3
|
-
#Usage together with a lexer:
|
4
|
-
# inputFile = File.new("data/input1", "r")
|
5
|
-
# input = inputFile.read
|
6
|
-
# parser = MediaWikiParser.new
|
7
|
-
# parser.lexer = MediaWikiLexer.new
|
8
|
-
# parser.parse(input)
|
9
|
-
class MediaWikiParser
|
10
|
-
|
11
|
-
token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
|
12
|
-
INTLINKSTART INTLINKEND SECTION TEXT PRE
|
13
|
-
HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
|
14
|
-
UL_START UL_END LI_START LI_END OL_START OL_END
|
15
|
-
|
16
|
-
rule
|
17
|
-
|
18
|
-
wiki:
|
19
|
-
repeated_contents
|
20
|
-
{
|
21
|
-
@nodes.push WikiAST.new
|
22
|
-
#@nodes.last.children.insert(0, val[0])
|
23
|
-
#puts val[0]
|
24
|
-
@nodes.last.children += val[0]
|
25
|
-
}
|
26
|
-
;
|
27
|
-
|
28
|
-
contents:
|
29
|
-
text
|
30
|
-
{
|
31
|
-
result = val[0]
|
32
|
-
}
|
33
|
-
| bulleted_list
|
34
|
-
{
|
35
|
-
result = val[0]
|
36
|
-
}
|
37
|
-
| preformatted
|
38
|
-
{
|
39
|
-
p = PreformattedAST.new
|
40
|
-
p.contents = val[0]
|
41
|
-
result = p
|
42
|
-
}
|
43
|
-
| section
|
44
|
-
{
|
45
|
-
s = SectionAST.new
|
46
|
-
s.contents = val[0][0]
|
47
|
-
s.level = val[0][1]
|
48
|
-
result = s
|
49
|
-
}
|
50
|
-
;
|
51
|
-
|
52
|
-
repeated_contents: contents
|
53
|
-
{
|
54
|
-
result = []
|
55
|
-
result << val[0]
|
56
|
-
}
|
57
|
-
| repeated_contents contents
|
58
|
-
{
|
59
|
-
result = []
|
60
|
-
result += val[0]
|
61
|
-
result << val[1]
|
62
|
-
}
|
63
|
-
;
|
64
|
-
|
65
|
-
text: element
|
66
|
-
{
|
67
|
-
p = TextAST.new
|
68
|
-
p.formatting = val[0][0]
|
69
|
-
p.contents = val[0][1]
|
70
|
-
result = p
|
71
|
-
}
|
72
|
-
| formatted_element
|
73
|
-
{
|
74
|
-
result = val[0]
|
75
|
-
}
|
76
|
-
;
|
77
|
-
|
78
|
-
element: LINKSTART TEXT LINKEND
|
79
|
-
{ return [:Link, val[1]] }
|
80
|
-
| INTLINKSTART TEXT INTLINKEND
|
81
|
-
{ return [:InternalLink, val[1]] }
|
82
|
-
| TEXT
|
83
|
-
{ return [:None, val[0]] }
|
84
|
-
| HLINE
|
85
|
-
{ return [:HLine, val[0]] }
|
86
|
-
| SIGNATURE_DATE
|
87
|
-
{ return [:SignatureDate, val[0]] }
|
88
|
-
| SIGNATURE_NAME
|
89
|
-
{ return [:SignatureName, val[0]] }
|
90
|
-
| SIGNATURE_FULL
|
91
|
-
{ return [:SignatureFull, val[0]] }
|
92
|
-
;
|
93
|
-
|
94
|
-
formatted_element: BOLDSTART repeated_contents BOLDEND
|
95
|
-
{
|
96
|
-
p = FormattedAST.new
|
97
|
-
p.formatting = :Bold
|
98
|
-
p.children += val[1]
|
99
|
-
result = p
|
100
|
-
}
|
101
|
-
| ITALICSTART repeated_contents ITALICEND
|
102
|
-
{
|
103
|
-
p = FormattedAST.new
|
104
|
-
p.formatting = :Italic
|
105
|
-
p.children += val[1]
|
106
|
-
result = p
|
107
|
-
}
|
108
|
-
;
|
109
|
-
|
110
|
-
bulleted_list: UL_START list_item list_contents UL_END
|
111
|
-
{
|
112
|
-
list = ListAST.new
|
113
|
-
list.type = :Bulleted
|
114
|
-
list.children << val[1]
|
115
|
-
list.children += val[2]
|
116
|
-
result = list
|
117
|
-
}
|
118
|
-
;
|
119
|
-
|
120
|
-
list_contents:
|
121
|
-
{ result = [] }
|
122
|
-
list_item list_contents
|
123
|
-
{
|
124
|
-
result << val[1]
|
125
|
-
result += val[2]
|
126
|
-
}
|
127
|
-
|
|
128
|
-
{ result = [] }
|
129
|
-
;
|
130
|
-
|
131
|
-
list_item: LI_START repeated_contents LI_END
|
132
|
-
{
|
133
|
-
li = ListItemAST.new
|
134
|
-
li.children += val[1]
|
135
|
-
result = li
|
136
|
-
}
|
137
|
-
;
|
138
|
-
|
139
|
-
preformatted: PRE
|
140
|
-
{ result = val[0] }
|
141
|
-
;
|
142
|
-
|
143
|
-
section: SECTION TEXT SECTION
|
144
|
-
{ result = [val[1], val[0].length] }
|
145
|
-
;
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
---- header ----
|
150
|
-
require 'mediawikiast'
|
151
|
-
|
152
|
-
---- inner ----
|
153
|
-
|
154
|
-
attr_accessor :lexer
|
155
|
-
|
156
|
-
def initialize
|
157
|
-
@nodes = []
|
158
|
-
super
|
159
|
-
end
|
160
|
-
|
161
|
-
#Tokenizes input string and parses it.
|
162
|
-
def parse(input)
|
163
|
-
@yydebug=true
|
164
|
-
lexer.tokenize(input)
|
165
|
-
do_parse
|
166
|
-
return @nodes.last
|
167
|
-
end
|
168
|
-
|
169
|
-
#Asks the lexer to return the next token.
|
170
|
-
def next_token
|
171
|
-
return @lexer.lex
|
172
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'mediawikiast'
|
2
|
-
|
3
|
-
#Default walker to traverse the parse tree.
|
4
|
-
#
|
5
|
-
#The walker traverses the entire parse tree and does nothing.
|
6
|
-
#To implement some functionality during this process, reimplement
|
7
|
-
#<i>parse...</i> methods and don't forget to call super() to not
|
8
|
-
#break the walk.
|
9
|
-
#
|
10
|
-
#Current implementations: MediaWikiHTMLGenerator, DebugWalker
|
11
|
-
class MediaWikiWalker
|
12
|
-
|
13
|
-
#Walks through the AST
|
14
|
-
def parse(ast)
|
15
|
-
parse_wiki_ast(ast)
|
16
|
-
end
|
17
|
-
|
18
|
-
protected
|
19
|
-
|
20
|
-
#===== reimplement these methods and don't forget to call super() ====#
|
21
|
-
|
22
|
-
#Reimplement this
|
23
|
-
def parse_wiki_ast(ast)
|
24
|
-
ast.children.each do |c|
|
25
|
-
parse_formatted(c) if c.class == FormattedAST
|
26
|
-
parse_text(c) if c.class == TextAST
|
27
|
-
parse_list(c) if c.class == ListAST
|
28
|
-
parse_preformatted(c) if c.class == PreformattedAST
|
29
|
-
parse_section(c) if c.class == SectionAST
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
#Reimplement this
|
34
|
-
def parse_formatted(ast)
|
35
|
-
parse_wiki_ast(ast)
|
36
|
-
end
|
37
|
-
|
38
|
-
#Reimplement this
|
39
|
-
def parse_text(ast)
|
40
|
-
end
|
41
|
-
|
42
|
-
#Reimplement this
|
43
|
-
def parse_list(ast)
|
44
|
-
ast.children.each do |c|
|
45
|
-
parse_list_item(c) if c.class == ListItemAST
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
#Reimplement this
|
50
|
-
def parse_list_item(ast)
|
51
|
-
parse_wiki_ast(ast)
|
52
|
-
end
|
53
|
-
|
54
|
-
#Reimplement this
|
55
|
-
def parse_preformatted(ast)
|
56
|
-
end
|
57
|
-
|
58
|
-
#Reimplement this
|
59
|
-
def parse_section(ast)
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|