mediacloth 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mediacloth/mediawikiast.rb +5 -1
- data/lib/mediacloth/mediawikihtmlgenerator.rb +8 -2
- data/lib/mediacloth/mediawikilexer.rb +82 -7
- data/lib/mediacloth/mediawikiparser.rb +211 -139
- data/lib/mediacloth/mediawikiparser.y +36 -3
- data/lib/mediacloth/mediawikiwalker.rb +6 -0
- data/test/data/html1 +4 -19
- data/test/data/html2 +2 -2
- data/test/data/html3 +1 -1
- data/test/data/html4 +1 -1
- data/test/data/html5 +14 -0
- data/test/data/html6 +4 -4
- data/test/data/html7 +2 -1
- data/test/data/html8 +1 -0
- data/test/data/input5 +2 -0
- data/test/data/input8 +1 -0
- data/test/data/lex1 +10 -11
- data/test/data/lex2 +2 -2
- data/test/data/lex3 +1 -1
- data/test/data/lex4 +1 -1
- data/test/data/lex6 +4 -4
- data/test/data/lex7 +2 -2
- data/test/data/lex8 +1 -0
- data/test/dataproducers/html.rb +6 -5
- data/test/dataproducers/lex.rb +2 -2
- data/test/debugwalker.rb +5 -0
- data/test/lexer.rb +12 -3
- data/test/testhelper.rb +1 -1
- metadata +6 -12
- data/lib/mediacloth/mediawikihtmlgenerator.rb~ +0 -105
- data/lib/mediacloth/mediawikiparser.y~ +0 -172
- data/lib/mediacloth/mediawikiwalker.rb~ +0 -62
- data/lib/mediacloth.rb~ +0 -23
- data/test/dataproducers/html.rb~ +0 -23
- data/test/debugwalker.rb~ +0 -63
- data/test/htmlgenerator.rb~ +0 -25
- data/test/lexer.rb~ +0 -57
- data/test/parser.rb~ +0 -23
- data/test/testhelper.rb~ +0 -28
data/test/data/html1
CHANGED
@@ -1,21 +1,6 @@
|
|
1
|
-
This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
-
One paragraph can be written in several lines
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
Another one.
|
8
|
-
|
9
|
-
This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.
|
10
|
-
|
11
|
-
We can have headlines:
|
12
|
-
|
13
|
-
|
14
|
-
<h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7>
|
15
|
-
|
16
|
-
<hr></hr>
|
17
|
-
This is a text after the line.
|
18
|
-
|
19
|
-
<ul><li>foo
|
1
|
+
<p>This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
|
2
|
+
One paragraph can be written in several lines.</p><p>Another paragraph starts after a blank line.</p><p>
|
3
|
+
Another one.</p><p>This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.</p><p>We can have headlines:</p><h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7><hr></hr><p>
|
4
|
+
This is a text after the line.</p><ul><li>foo
|
20
5
|
</li><li>foo2
|
21
6
|
</li></ul>
|
data/test/data/html2
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
<hr></hr>
|
2
|
-
This is a text
|
1
|
+
<hr></hr><p>
|
2
|
+
This is a text</p>
|
data/test/data/html3
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
<p>--Sat Jan 01 01:01:01 EET 2000CreatorCreator Sat Jan 01 01:01:01 EET 2000</p>
|
data/test/data/html4
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<b><i>Foo</i></b>
|
1
|
+
<p><b><i>Foo</i></b></p>
|
data/test/data/html5
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
<ul><li>A
|
2
|
+
<ul><li>A
|
3
|
+
<ul><li>a
|
4
|
+
</li><li>b
|
5
|
+
<ul><li>a
|
6
|
+
</li></ul></li></ul></li></ul></li><li>B
|
7
|
+
<ul><li>b
|
8
|
+
</li></ul></li></ul><ol><li>a
|
9
|
+
<ol><li>a
|
10
|
+
<ol><li>a
|
11
|
+
<ol><li>a
|
12
|
+
</li></ol></li></ol></li></ol></li><li>a
|
13
|
+
<ol><li>aa
|
14
|
+
</li></ol></li></ol>
|
data/test/data/html6
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
This is some text with <b>bold</b> and <i>italic</i> formating.
|
1
|
+
<p>This is some text with <b>bold</b> and <i>italic</i> formating.
|
2
2
|
The list is also here:
|
3
|
-
|
3
|
+
</p><ul><li>List Item 1
|
4
4
|
<ul><li>Sub list item 1
|
5
5
|
</li><li>Sub list item 2
|
6
6
|
</li></ul></li><li>List Item 2
|
7
|
-
</li></ul>The end
|
8
|
-
|
7
|
+
</li></ul><p>The end
|
8
|
+
</p><hr></hr>
|
data/test/data/html7
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
<a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
|
1
|
+
<p><a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
|
2
|
+
</p>
|
data/test/data/html8
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<p>=== foo ===</p>
|
data/test/data/input8
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
=== foo ===
|
data/test/data/lex1
CHANGED
@@ -1,22 +1,21 @@
|
|
1
|
-
|
2
|
-
One paragraph can be written in several lines.
|
1
|
+
PARA_STARTTEXTThis is a simple text with BOLDSTART'''TEXTBold textBOLDEND'''TEXT and ITALICSTART''TEXTItalic textITALICEND''TEXT inside.
|
2
|
+
One paragraph can be written in several lines.PARA_END
|
3
3
|
|
4
|
-
|
4
|
+
PARA_STARTTEXTAnother paragraph starts after a blank line.PARA_END
|
5
5
|
|
6
|
+
PARA_STARTTEXT
|
7
|
+
Another one.PARA_END
|
6
8
|
|
7
|
-
|
9
|
+
PARA_STARTTEXTThis is text with INTLINKSTART[[TEXTInternal LinkINTLINKEND]]TEXT and LINKSTART[TEXThttp://www.example.com external linkLINKEND]TEXT.PARA_END
|
8
10
|
|
9
|
-
|
11
|
+
PARA_STARTTEXTWe can have headlines:PARA_END
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
SECTION=TEXT Headline1 SECTION=SECTION==TEXT Headline2 SECTION==SECTION===TEXT Headline3 SECTION===SECTION====TEXT Headline4 SECTION====SECTION=====TEXT Headline5 SECTION=====SECTION======TEXT Headline6 SECTION======SECTION=======TEXT Headline7 SECTION=======TEXT
|
13
|
+
SECTION_START=TEXT Headline1 SECTION_END=SECTION_START==TEXT Headline2 SECTION_END==SECTION_START===TEXT Headline3 SECTION_END===SECTION_START====TEXT Headline4 SECTION_END====SECTION_START=====TEXT Headline5 SECTION_END=====SECTION_START======TEXT Headline6 SECTION_END======SECTION_START=======TEXT Headline7 SECTION_END=======PARA_STARTPARA_END
|
15
14
|
|
16
15
|
PREThis is a preformatted ''' ''' << '' '' text
|
17
16
|
PREyes
|
18
|
-
HLINE----
|
19
|
-
This is a text after the line.
|
17
|
+
HLINE----PARA_STARTTEXT
|
18
|
+
This is a text after the line.PARA_END
|
20
19
|
|
21
20
|
UL_STARTLI_STARTTEXTfoo
|
22
21
|
LI_ENDLI_STARTTEXTfoo2
|
data/test/data/lex2
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
HLINE----
|
2
|
-
This is a
|
1
|
+
HLINE----PARA_STARTTEXT
|
2
|
+
This is a textPARA_ENDfalsefalse
|
data/test/data/lex3
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
PARA_STARTTEXT--SIGNATURE_DATE~~~~~SIGNATURE_NAME~~~SIGNATURE_FULL~~~~PARA_ENDfalsefalse
|
data/test/data/lex4
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
PARA_STARTBOLDSTART'''ITALICSTART''TEXTFooITALICEND''BOLDEND'''PARA_ENDfalsefalse
|
data/test/data/lex6
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
|
1
|
+
PARA_STARTTEXTThis is some text with BOLDSTART'''TEXTboldBOLDEND'''TEXT and ITALICSTART''TEXTitalicITALICEND''TEXT formating.
|
2
2
|
The list is also here:
|
3
|
-
|
3
|
+
PARA_ENDUL_STARTLI_STARTTEXTList Item 1
|
4
4
|
UL_STARTLI_STARTTEXTSub list item 1
|
5
5
|
LI_ENDLI_STARTTEXTSub list item 2
|
6
6
|
LI_ENDUL_ENDLI_ENDLI_STARTTEXTList Item 2
|
7
|
-
|
8
|
-
|
7
|
+
LI_ENDUL_ENDPARA_STARTTEXTThe end
|
8
|
+
PARA_ENDHLINE----falsefalse
|
data/test/data/lex7
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
PARA_STARTLINKSTART[TEXThttp://www.example.com external linkLINKEND]LINKSTARTTEXThttp://www.example.comLINKEND]TEXT [foo]
|
2
|
+
PARA_ENDfalsefalse
|
data/test/data/lex8
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
SECTION_START===TEXT foo SECTION_END===falsefalse
|
data/test/dataproducers/html.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'mediawikilexer'
|
2
|
-
require 'mediawikiparser'
|
3
|
-
require 'mediawikiparams'
|
4
|
-
require 'mediawikihtmlgenerator'
|
1
|
+
require 'mediacloth/mediawikilexer'
|
2
|
+
require 'mediacloth/mediawikiparser'
|
3
|
+
require 'mediacloth/mediawikiparams'
|
4
|
+
require 'mediacloth/mediawikihtmlgenerator'
|
5
5
|
|
6
6
|
def produce(index)
|
7
7
|
file = File.new("../data/html#{index}", "w")
|
@@ -15,9 +15,10 @@ def produce(index)
|
|
15
15
|
generator = MediaWikiHTMLGenerator.new
|
16
16
|
generator.parse(ast)
|
17
17
|
|
18
|
+
# puts generator.html
|
18
19
|
file.write(generator.html)
|
19
20
|
file.close
|
20
21
|
end
|
21
22
|
|
22
23
|
# (3..5).each { |i| produce(i) }
|
23
|
-
produce(
|
24
|
+
produce(5)
|
data/test/dataproducers/lex.rb
CHANGED
data/test/debugwalker.rb
CHANGED
data/test/lexer.rb
CHANGED
@@ -7,13 +7,19 @@ class Lexer_Test < Test::Unit::TestCase
|
|
7
7
|
include TestHelper
|
8
8
|
|
9
9
|
def test_input
|
10
|
-
test_files("lex") { |input,result|
|
10
|
+
test_files("lex") { |input,result,resultname|
|
11
11
|
lexer = MediaWikiLexer.new
|
12
12
|
tokens = lexer.tokenize(input)
|
13
13
|
assert_equal(tokens.to_s, result)
|
14
14
|
}
|
15
15
|
end
|
16
16
|
|
17
|
+
def test_paragraphs
|
18
|
+
assert_equal(lex("Before\n\n\n=Headline="),
|
19
|
+
[[:PARA_START, ""], [:TEXT, "Before"], [:PARA_END, "\n\n"],
|
20
|
+
[:SECTION_START, "="], [:TEXT, "Headline"], [:SECTION_END, "="], [false,false]])
|
21
|
+
end
|
22
|
+
|
17
23
|
def test_empty
|
18
24
|
assert_equal(lex(""), [[false,false]])
|
19
25
|
end
|
@@ -32,9 +38,12 @@ class Lexer_Test < Test::Unit::TestCase
|
|
32
38
|
|
33
39
|
def test_ending_text_token
|
34
40
|
#check for a problem when the last token is TEXT and it's not included
|
35
|
-
assert_equal(lex("\n----\nfoo\n"),
|
41
|
+
assert_equal(lex("\n----\nfoo\n"),
|
42
|
+
[[:HLINE, "----"], [:PARA_START, ""],
|
43
|
+
[:TEXT, "\nfoo\n"], [:PARA_END, ""], [false, false]])
|
36
44
|
assert_equal(lex("\n----\nfoo\n Hehe"),
|
37
|
-
[[:HLINE, "----"], [:
|
45
|
+
[[:HLINE, "----"], [:PARA_START, ""], [:TEXT, "\nfoo\n"],
|
46
|
+
[:PARA_END, ""], [:PRE, "Hehe"], [false, false]])
|
38
47
|
end
|
39
48
|
|
40
49
|
def test_bullets
|
data/test/testhelper.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: mediacloth
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.0.2
|
7
|
+
date: 2006-08-22 00:00:00 +03:00
|
8
8
|
summary: A MediaWiki syntax parser and HTML generator.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -28,29 +28,20 @@ cert_chain:
|
|
28
28
|
authors:
|
29
29
|
- Pluron Inc.
|
30
30
|
files:
|
31
|
-
- lib/mediacloth.rb~
|
32
31
|
- lib/mediacloth
|
33
32
|
- lib/mediacloth.rb
|
34
33
|
- lib/mediacloth/mediawikihtmlgenerator.rb
|
35
34
|
- lib/mediacloth/mediawikiparams.rb
|
36
35
|
- lib/mediacloth/mediawikiwalker.rb
|
37
36
|
- lib/mediacloth/mediawikiparser.rb
|
38
|
-
- lib/mediacloth/mediawikiparser.y~
|
39
|
-
- lib/mediacloth/mediawikihtmlgenerator.rb~
|
40
37
|
- lib/mediacloth/mediawikiparser.y
|
41
|
-
- lib/mediacloth/mediawikiwalker.rb~
|
42
38
|
- lib/mediacloth/mediawikilexer.rb
|
43
39
|
- lib/mediacloth/mediawikiast.rb
|
44
40
|
- test/data
|
45
|
-
- test/testhelper.rb~
|
46
41
|
- test/parser.rb
|
47
|
-
- test/parser.rb~
|
48
42
|
- test/testhelper.rb
|
49
|
-
- test/htmlgenerator.rb~
|
50
|
-
- test/debugwalker.rb~
|
51
43
|
- test/lexer.rb
|
52
44
|
- test/debugwalker.rb
|
53
|
-
- test/lexer.rb~
|
54
45
|
- test/dataproducers
|
55
46
|
- test/htmlgenerator.rb
|
56
47
|
- test/data/lex1
|
@@ -60,13 +51,16 @@ files:
|
|
60
51
|
- test/data/lex5
|
61
52
|
- test/data/lex6
|
62
53
|
- test/data/lex7
|
54
|
+
- test/data/lex8
|
63
55
|
- test/data/result1
|
64
56
|
- test/data/html1
|
65
57
|
- test/data/html2
|
66
58
|
- test/data/html3
|
67
59
|
- test/data/html4
|
60
|
+
- test/data/html5
|
68
61
|
- test/data/html6
|
69
62
|
- test/data/html7
|
63
|
+
- test/data/html8
|
70
64
|
- test/data/input1
|
71
65
|
- test/data/input2
|
72
66
|
- test/data/input3
|
@@ -74,7 +68,7 @@ files:
|
|
74
68
|
- test/data/input5
|
75
69
|
- test/data/input6
|
76
70
|
- test/data/input7
|
77
|
-
- test/
|
71
|
+
- test/data/input8
|
78
72
|
- test/dataproducers/lex.rb
|
79
73
|
- test/dataproducers/html.rb
|
80
74
|
- README
|
@@ -1,105 +0,0 @@
|
|
1
|
-
require 'mediawikiwalker'
|
2
|
-
require 'mediawikiparams'
|
3
|
-
|
4
|
-
#HTML generator for a MediaWiki parse tree
|
5
|
-
#
|
6
|
-
#Typical use case:
|
7
|
-
# parser = MediaWikiParser.new
|
8
|
-
# parser.lexer = MediaWikiLexer.new
|
9
|
-
# ast = parser.parse(input)
|
10
|
-
# walker = MediaWikiHTMLGenerator.new
|
11
|
-
# walker.parse(ast)
|
12
|
-
# puts walker.html
|
13
|
-
class MediaWikiHTMLGenerator < MediaWikiWalker
|
14
|
-
attr_reader :html
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
@html = ""
|
18
|
-
end
|
19
|
-
|
20
|
-
protected
|
21
|
-
|
22
|
-
def parse_wiki_ast(ast)
|
23
|
-
super(ast)
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse_text(ast)
|
27
|
-
tag = formatting_to_tag(ast)
|
28
|
-
if tag[0].empty?
|
29
|
-
@html += ast.contents
|
30
|
-
else
|
31
|
-
@html += "<#{tag[0]}#{tag[1]}>#{ast.contents}</#{tag[0]}>"
|
32
|
-
end
|
33
|
-
super(ast)
|
34
|
-
end
|
35
|
-
|
36
|
-
def parse_formatted(ast)
|
37
|
-
tag = formatting_to_tag(ast)
|
38
|
-
@html += "<#{tag}>"
|
39
|
-
super(ast)
|
40
|
-
@html += "</#{tag}>"
|
41
|
-
end
|
42
|
-
|
43
|
-
def parse_list(ast)
|
44
|
-
tag = list_tag(ast)
|
45
|
-
@html += "<#{tag}>"
|
46
|
-
super(ast)
|
47
|
-
@html += "</#{tag}>"
|
48
|
-
end
|
49
|
-
|
50
|
-
def parse_list_item(ast)
|
51
|
-
@html += "<li>"
|
52
|
-
super(ast)
|
53
|
-
@html += "</li>"
|
54
|
-
end
|
55
|
-
|
56
|
-
def parse_preformatted(ast)
|
57
|
-
super(ast)
|
58
|
-
end
|
59
|
-
|
60
|
-
def parse_section(ast)
|
61
|
-
@html += "<h#{ast.level}>"
|
62
|
-
@html += ast.contents.strip
|
63
|
-
@html += "</h#{ast.level}>"
|
64
|
-
super(ast)
|
65
|
-
end
|
66
|
-
|
67
|
-
private
|
68
|
-
|
69
|
-
#returns an array with a tag name and tag attributes
|
70
|
-
def formatting_to_tag(ast)
|
71
|
-
tag = ["", ""]
|
72
|
-
if ast.formatting == :Bold
|
73
|
-
tag = ["b", ""]
|
74
|
-
elsif ast.formatting == :Italic
|
75
|
-
tag = ["i", ""]
|
76
|
-
elsif ast.formatting == :Link or ast.formatting == :ExternalLink
|
77
|
-
links = ast.contents.split
|
78
|
-
link = links[0]
|
79
|
-
link_name = links[1, links.length-1].join(" ")
|
80
|
-
link_name = link if link_name.empty?
|
81
|
-
ast.contents = link_name
|
82
|
-
tag = ["a", " href=\"#{link}\" rel=\"nofollow\""]
|
83
|
-
elsif ast.formatting == :HLine
|
84
|
-
ast.contents = ""
|
85
|
-
tag = ["hr", ""]
|
86
|
-
elsif ast.formatting == :SignatureDate
|
87
|
-
ast.contents = MediaWikiParams.instance.time.to_s
|
88
|
-
elsif ast.formatting == :SignatureName
|
89
|
-
ast.contents = MediaWikiParams.instance.author
|
90
|
-
elsif ast.formatting == :SignatureFull
|
91
|
-
ast.contents = MediaWikiParams.instance.author + " " + MediaWikiParams.instance.time.to_s
|
92
|
-
end
|
93
|
-
tag
|
94
|
-
end
|
95
|
-
|
96
|
-
#returns a tag name of the list in ast node
|
97
|
-
def list_tag(ast)
|
98
|
-
if ast.type == :Bulleted
|
99
|
-
return "ul"
|
100
|
-
elsif ast.type == :Numbered
|
101
|
-
return "ol"
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
@@ -1,172 +0,0 @@
|
|
1
|
-
#The parser for the MediaWiki language.
|
2
|
-
#
|
3
|
-
#Usage together with a lexer:
|
4
|
-
# inputFile = File.new("data/input1", "r")
|
5
|
-
# input = inputFile.read
|
6
|
-
# parser = MediaWikiParser.new
|
7
|
-
# parser.lexer = MediaWikiLexer.new
|
8
|
-
# parser.parse(input)
|
9
|
-
class MediaWikiParser
|
10
|
-
|
11
|
-
token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
|
12
|
-
INTLINKSTART INTLINKEND SECTION TEXT PRE
|
13
|
-
HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
|
14
|
-
UL_START UL_END LI_START LI_END OL_START OL_END
|
15
|
-
|
16
|
-
rule
|
17
|
-
|
18
|
-
wiki:
|
19
|
-
repeated_contents
|
20
|
-
{
|
21
|
-
@nodes.push WikiAST.new
|
22
|
-
#@nodes.last.children.insert(0, val[0])
|
23
|
-
#puts val[0]
|
24
|
-
@nodes.last.children += val[0]
|
25
|
-
}
|
26
|
-
;
|
27
|
-
|
28
|
-
contents:
|
29
|
-
text
|
30
|
-
{
|
31
|
-
result = val[0]
|
32
|
-
}
|
33
|
-
| bulleted_list
|
34
|
-
{
|
35
|
-
result = val[0]
|
36
|
-
}
|
37
|
-
| preformatted
|
38
|
-
{
|
39
|
-
p = PreformattedAST.new
|
40
|
-
p.contents = val[0]
|
41
|
-
result = p
|
42
|
-
}
|
43
|
-
| section
|
44
|
-
{
|
45
|
-
s = SectionAST.new
|
46
|
-
s.contents = val[0][0]
|
47
|
-
s.level = val[0][1]
|
48
|
-
result = s
|
49
|
-
}
|
50
|
-
;
|
51
|
-
|
52
|
-
repeated_contents: contents
|
53
|
-
{
|
54
|
-
result = []
|
55
|
-
result << val[0]
|
56
|
-
}
|
57
|
-
| repeated_contents contents
|
58
|
-
{
|
59
|
-
result = []
|
60
|
-
result += val[0]
|
61
|
-
result << val[1]
|
62
|
-
}
|
63
|
-
;
|
64
|
-
|
65
|
-
text: element
|
66
|
-
{
|
67
|
-
p = TextAST.new
|
68
|
-
p.formatting = val[0][0]
|
69
|
-
p.contents = val[0][1]
|
70
|
-
result = p
|
71
|
-
}
|
72
|
-
| formatted_element
|
73
|
-
{
|
74
|
-
result = val[0]
|
75
|
-
}
|
76
|
-
;
|
77
|
-
|
78
|
-
element: LINKSTART TEXT LINKEND
|
79
|
-
{ return [:Link, val[1]] }
|
80
|
-
| INTLINKSTART TEXT INTLINKEND
|
81
|
-
{ return [:InternalLink, val[1]] }
|
82
|
-
| TEXT
|
83
|
-
{ return [:None, val[0]] }
|
84
|
-
| HLINE
|
85
|
-
{ return [:HLine, val[0]] }
|
86
|
-
| SIGNATURE_DATE
|
87
|
-
{ return [:SignatureDate, val[0]] }
|
88
|
-
| SIGNATURE_NAME
|
89
|
-
{ return [:SignatureName, val[0]] }
|
90
|
-
| SIGNATURE_FULL
|
91
|
-
{ return [:SignatureFull, val[0]] }
|
92
|
-
;
|
93
|
-
|
94
|
-
formatted_element: BOLDSTART repeated_contents BOLDEND
|
95
|
-
{
|
96
|
-
p = FormattedAST.new
|
97
|
-
p.formatting = :Bold
|
98
|
-
p.children += val[1]
|
99
|
-
result = p
|
100
|
-
}
|
101
|
-
| ITALICSTART repeated_contents ITALICEND
|
102
|
-
{
|
103
|
-
p = FormattedAST.new
|
104
|
-
p.formatting = :Italic
|
105
|
-
p.children += val[1]
|
106
|
-
result = p
|
107
|
-
}
|
108
|
-
;
|
109
|
-
|
110
|
-
bulleted_list: UL_START list_item list_contents UL_END
|
111
|
-
{
|
112
|
-
list = ListAST.new
|
113
|
-
list.type = :Bulleted
|
114
|
-
list.children << val[1]
|
115
|
-
list.children += val[2]
|
116
|
-
result = list
|
117
|
-
}
|
118
|
-
;
|
119
|
-
|
120
|
-
list_contents:
|
121
|
-
{ result = [] }
|
122
|
-
list_item list_contents
|
123
|
-
{
|
124
|
-
result << val[1]
|
125
|
-
result += val[2]
|
126
|
-
}
|
127
|
-
|
|
128
|
-
{ result = [] }
|
129
|
-
;
|
130
|
-
|
131
|
-
list_item: LI_START repeated_contents LI_END
|
132
|
-
{
|
133
|
-
li = ListItemAST.new
|
134
|
-
li.children += val[1]
|
135
|
-
result = li
|
136
|
-
}
|
137
|
-
;
|
138
|
-
|
139
|
-
preformatted: PRE
|
140
|
-
{ result = val[0] }
|
141
|
-
;
|
142
|
-
|
143
|
-
section: SECTION TEXT SECTION
|
144
|
-
{ result = [val[1], val[0].length] }
|
145
|
-
;
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
---- header ----
|
150
|
-
require 'mediawikiast'
|
151
|
-
|
152
|
-
---- inner ----
|
153
|
-
|
154
|
-
attr_accessor :lexer
|
155
|
-
|
156
|
-
def initialize
|
157
|
-
@nodes = []
|
158
|
-
super
|
159
|
-
end
|
160
|
-
|
161
|
-
#Tokenizes input string and parses it.
|
162
|
-
def parse(input)
|
163
|
-
@yydebug=true
|
164
|
-
lexer.tokenize(input)
|
165
|
-
do_parse
|
166
|
-
return @nodes.last
|
167
|
-
end
|
168
|
-
|
169
|
-
#Asks the lexer to return the next token.
|
170
|
-
def next_token
|
171
|
-
return @lexer.lex
|
172
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'mediawikiast'
|
2
|
-
|
3
|
-
#Default walker to traverse the parse tree.
|
4
|
-
#
|
5
|
-
#The walker traverses the entire parse tree and does nothing.
|
6
|
-
#To implement some functionality during this process, reimplement
|
7
|
-
#<i>parse...</i> methods and don't forget to call super() to not
|
8
|
-
#break the walk.
|
9
|
-
#
|
10
|
-
#Current implementations: MediaWikiHTMLGenerator, DebugWalker
|
11
|
-
class MediaWikiWalker
|
12
|
-
|
13
|
-
#Walks through the AST
|
14
|
-
def parse(ast)
|
15
|
-
parse_wiki_ast(ast)
|
16
|
-
end
|
17
|
-
|
18
|
-
protected
|
19
|
-
|
20
|
-
#===== reimplement these methods and don't forget to call super() ====#
|
21
|
-
|
22
|
-
#Reimplement this
|
23
|
-
def parse_wiki_ast(ast)
|
24
|
-
ast.children.each do |c|
|
25
|
-
parse_formatted(c) if c.class == FormattedAST
|
26
|
-
parse_text(c) if c.class == TextAST
|
27
|
-
parse_list(c) if c.class == ListAST
|
28
|
-
parse_preformatted(c) if c.class == PreformattedAST
|
29
|
-
parse_section(c) if c.class == SectionAST
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
#Reimplement this
|
34
|
-
def parse_formatted(ast)
|
35
|
-
parse_wiki_ast(ast)
|
36
|
-
end
|
37
|
-
|
38
|
-
#Reimplement this
|
39
|
-
def parse_text(ast)
|
40
|
-
end
|
41
|
-
|
42
|
-
#Reimplement this
|
43
|
-
def parse_list(ast)
|
44
|
-
ast.children.each do |c|
|
45
|
-
parse_list_item(c) if c.class == ListItemAST
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
#Reimplement this
|
50
|
-
def parse_list_item(ast)
|
51
|
-
parse_wiki_ast(ast)
|
52
|
-
end
|
53
|
-
|
54
|
-
#Reimplement this
|
55
|
-
def parse_preformatted(ast)
|
56
|
-
end
|
57
|
-
|
58
|
-
#Reimplement this
|
59
|
-
def parse_section(ast)
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|