mediacloth 0.0.3 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +36 -0
- data/lib/mediacloth/mediawikiast.rb +58 -1
- data/lib/mediacloth/mediawikihtmlgenerator.rb +229 -73
- data/lib/mediacloth/mediawikilexer.rb +1030 -656
- data/lib/mediacloth/mediawikilinkhandler.rb +89 -0
- data/lib/mediacloth/mediawikiparams.rb +1 -10
- data/lib/mediacloth/mediawikiparser.rb +939 -409
- data/lib/mediacloth/mediawikiparser.tab.rb +1357 -0
- data/lib/mediacloth/mediawikiparser.y +256 -52
- data/lib/mediacloth/mediawikisignedwikigenerator.rb +42 -0
- data/lib/mediacloth/mediawikitemplatehandler.rb +8 -0
- data/lib/mediacloth/mediawikiwalker.rb +72 -1
- data/lib/mediacloth.rb +33 -10
- data/test/data/ast1 +68 -0
- data/test/data/ast10 +196 -0
- data/test/data/ast11 +34 -0
- data/test/data/ast12 +39 -0
- data/test/data/ast13 +25 -0
- data/test/data/ast14 +13 -0
- data/test/data/ast15 +25 -0
- data/test/data/ast16 +17 -0
- data/test/data/ast17 +9 -0
- data/test/data/ast18 +21 -0
- data/test/data/ast19 +32 -0
- data/test/data/ast2 +4 -0
- data/test/data/ast20 +10 -0
- data/test/data/ast21 +27 -0
- data/test/data/ast22 +22 -0
- data/test/data/ast23 +5 -0
- data/test/data/ast3 +6 -0
- data/test/data/ast4 +122 -0
- data/test/data/ast5 +122 -0
- data/test/data/ast6 +22 -0
- data/test/data/ast7 +143 -0
- data/test/data/ast8 +3 -0
- data/test/data/ast9 +11 -0
- data/test/data/html1 +33 -5
- data/test/data/html10 +31 -27
- data/test/data/html11 +19 -0
- data/test/data/html12 +32 -0
- data/test/data/html13 +29 -0
- data/test/data/html14 +4 -0
- data/test/data/html15 +29 -0
- data/test/data/html16 +28 -0
- data/test/data/html17 +10 -0
- data/test/data/html18 +8 -0
- data/test/data/html19 +27 -0
- data/test/data/html2 +1 -1
- data/test/data/html20 +7 -0
- data/test/data/html21 +5 -0
- data/test/data/html22 +24 -0
- data/test/data/html23 +7 -0
- data/test/data/html3 +1 -1
- data/test/data/html4 +60 -11
- data/test/data/html5 +45 -6
- data/test/data/html6 +5 -5
- data/test/data/html7 +59 -1
- data/test/data/html8 +1 -1
- data/test/data/html9 +10 -2
- data/test/data/input1 +4 -0
- data/test/data/input11 +19 -0
- data/test/data/input12 +32 -0
- data/test/data/input13 +10 -0
- data/test/data/input14 +8 -0
- data/test/data/input15 +10 -0
- data/test/data/input16 +28 -0
- data/test/data/input17 +10 -0
- data/test/data/input18 +16 -0
- data/test/data/input19 +29 -0
- data/test/data/input20 +8 -0
- data/test/data/input21 +18 -0
- data/test/data/input22 +20 -0
- data/test/data/input23 +8 -0
- data/test/data/input4 +13 -1
- data/test/data/input5 +45 -4
- data/test/data/input7 +25 -1
- data/test/data/lex1 +17 -18
- data/test/data/lex10 +57 -87
- data/test/data/lex11 +18 -0
- data/test/data/lex12 +32 -0
- data/test/data/lex13 +3 -0
- data/test/data/lex14 +1 -0
- data/test/data/lex15 +3 -0
- data/test/data/lex16 +27 -0
- data/test/data/lex17 +9 -0
- data/test/data/lex18 +4 -0
- data/test/data/lex19 +27 -0
- data/test/data/lex2 +2 -2
- data/test/data/lex20 +7 -0
- data/test/data/lex21 +4 -0
- data/test/data/lex22 +3 -0
- data/test/data/lex23 +7 -0
- data/test/data/lex3 +1 -1
- data/test/data/lex4 +35 -29
- data/test/data/lex5 +57 -18
- data/test/data/lex6 +7 -7
- data/test/data/lex7 +42 -18
- data/test/data/lex8 +1 -1
- data/test/data/lex9 +6 -6
- data/test/dataproducers/ast.rb +24 -0
- data/test/dataproducers/html.rb +11 -12
- data/test/dataproducers/lex.rb +9 -4
- data/test/debugwalker.rb +25 -11
- data/test/htmlgenerator.rb +170 -13
- data/test/lexer.rb +626 -83
- data/test/linkhandler.rb +39 -0
- data/test/parser.rb +176 -9
- data/test/signedwikigenerator.rb +113 -0
- metadata +158 -79
- data/README +0 -37
- data/lib/mediacloth/mediawikilexer.rb~ +0 -491
- data/lib/mediacloth/mediawikiparser.y~ +0 -210
- data/test/data/result1 +0 -48
- data/test/dataproducers/html.rb~ +0 -24
- data/test/dataproducers/lex.rb~ +0 -15
data/README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
MediaCloth is the MediaWiki syntax parser and html generator written in ruby. It's small, fast and aims to recognize the complete MediaWiki language.
|
|
2
|
+
|
|
3
|
+
## Installation
|
|
4
|
+
To install the library run:
|
|
5
|
+
|
|
6
|
+
ruby setup.rb
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
The quickest way to parse your input and produce html formatted text is:
|
|
11
|
+
|
|
12
|
+
require 'mediacloth'
|
|
13
|
+
puts MediaCloth::wiki_to_html("'''Hello'''''World''!")
|
|
14
|
+
|
|
15
|
+
You can also provide a hash with custom options if you want to use another generator or link handler:
|
|
16
|
+
|
|
17
|
+
require 'mediacloth'
|
|
18
|
+
puts MediaCloth::wiki_to_html("'''Hello'''''World''!", :link_handler => MyLinkHandler.new)
|
|
19
|
+
|
|
20
|
+
Both examples should produce
|
|
21
|
+
|
|
22
|
+
<b>Hello</b><i>World</i>!
|
|
23
|
+
|
|
24
|
+
## API Docs
|
|
25
|
+
To generate API documentation run:
|
|
26
|
+
|
|
27
|
+
rake rdoc
|
|
28
|
+
|
|
29
|
+
## Development
|
|
30
|
+
To run tests execute
|
|
31
|
+
|
|
32
|
+
rake test
|
|
33
|
+
|
|
34
|
+
To regenerate test data (html and lex files from wiki input), run:
|
|
35
|
+
|
|
36
|
+
rake test:regenerate
|
|
@@ -3,11 +3,15 @@ class AST
|
|
|
3
3
|
attr_accessor :contents
|
|
4
4
|
attr_accessor :parent
|
|
5
5
|
attr_accessor :children
|
|
6
|
+
attr_accessor :index
|
|
7
|
+
attr_accessor :length
|
|
6
8
|
|
|
7
|
-
def initialize
|
|
9
|
+
def initialize(index = 0,length = 0)
|
|
8
10
|
@children = []
|
|
9
11
|
@parent = nil
|
|
10
12
|
@contents = ""
|
|
13
|
+
@index = index
|
|
14
|
+
@length = length
|
|
11
15
|
end
|
|
12
16
|
end
|
|
13
17
|
|
|
@@ -20,6 +24,10 @@ end
|
|
|
20
24
|
class ParagraphAST < AST
|
|
21
25
|
end
|
|
22
26
|
|
|
27
|
+
#The node to represent paragraph with text pasted into wiki
|
|
28
|
+
class PasteAST < AST
|
|
29
|
+
end
|
|
30
|
+
|
|
23
31
|
#The node to represent a simple or formatted text
|
|
24
32
|
#with more AST nodes inside.
|
|
25
33
|
class FormattedAST < AST
|
|
@@ -36,6 +44,7 @@ end
|
|
|
36
44
|
class LinkAST < AST
|
|
37
45
|
#The link's URL
|
|
38
46
|
attr_accessor :url
|
|
47
|
+
attr_accessor :link_type
|
|
39
48
|
end
|
|
40
49
|
|
|
41
50
|
#The node to represent a Mediawiki internal link
|
|
@@ -45,6 +54,13 @@ class InternalLinkAST < AST
|
|
|
45
54
|
attr_accessor :locator
|
|
46
55
|
end
|
|
47
56
|
|
|
57
|
+
#The node to represent a Mediawiki category link
|
|
58
|
+
class CategoryLinkAST < AST
|
|
59
|
+
#Holds the category locator, which is composed of a category name only
|
|
60
|
+
#(e.g. the name of the category)
|
|
61
|
+
attr_accessor :locator
|
|
62
|
+
end
|
|
63
|
+
|
|
48
64
|
#The node to represent a MediaWiki resource reference (embedded images, videos,
|
|
49
65
|
#etc.)
|
|
50
66
|
class ResourceLinkAST < AST
|
|
@@ -72,6 +88,7 @@ end
|
|
|
72
88
|
class TableCellAST < AST
|
|
73
89
|
#the type of cell, :head or :body
|
|
74
90
|
attr_accessor :type
|
|
91
|
+
attr_accessor :attributes
|
|
75
92
|
end
|
|
76
93
|
|
|
77
94
|
#The node to represent a list
|
|
@@ -84,6 +101,14 @@ end
|
|
|
84
101
|
class ListItemAST < AST
|
|
85
102
|
end
|
|
86
103
|
|
|
104
|
+
# The node to represent a leading term in a dictionary list
|
|
105
|
+
class ListTermAST < AST
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# The node to represent a definition in a dictionary list
|
|
109
|
+
class ListDefinitionAST < AST
|
|
110
|
+
end
|
|
111
|
+
|
|
87
112
|
#The node to represent a section
|
|
88
113
|
class SectionAST < AST
|
|
89
114
|
#The level of the section (1,2,3...) that would correspond to
|
|
@@ -93,4 +118,36 @@ end
|
|
|
93
118
|
|
|
94
119
|
#The node to represent a preformatted contents
|
|
95
120
|
class PreformattedAST < AST
|
|
121
|
+
attr_accessor :indented
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
#The node to represent an XHTML element and its contents
|
|
125
|
+
class ElementAST < AST
|
|
126
|
+
attr_accessor :name, :attributes
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# The node to represent special Mediawiki keywords, such as __TOC__. The text
|
|
130
|
+
# attribute contains the entire string inbetween '__' and '__'.
|
|
131
|
+
class KeywordAST < AST
|
|
132
|
+
attr_accessor :text
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# The node to represent templates and pre-defined (or user-defined) variables, such as
|
|
136
|
+
# {{Date}}.
|
|
137
|
+
class TemplateAST < AST
|
|
138
|
+
attr_accessor :template_name
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# The node to represent template parameter
|
|
142
|
+
class TemplateParameterAST < AST
|
|
143
|
+
attr_accessor :parameter_name #not used atm
|
|
144
|
+
attr_accessor :parameter_value
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
#The node to represent categories to which this page belongs
|
|
148
|
+
class CategoryAST < AST
|
|
149
|
+
#Holds the name of the category
|
|
150
|
+
attr_accessor :locator
|
|
151
|
+
#Holds the string the page is to be sorted as
|
|
152
|
+
attr_accessor :sort_as
|
|
96
153
|
end
|
|
@@ -11,55 +11,37 @@ require 'mediacloth/mediawikiparams'
|
|
|
11
11
|
# walker.parse(ast)
|
|
12
12
|
# puts walker.html
|
|
13
13
|
class MediaWikiHTMLGenerator < MediaWikiWalker
|
|
14
|
+
|
|
14
15
|
attr_reader :html
|
|
15
16
|
|
|
16
|
-
def initialize
|
|
17
|
-
@html = ""
|
|
18
|
-
end
|
|
19
|
-
|
|
20
17
|
def parse(ast)
|
|
18
|
+
@html = ""
|
|
19
|
+
@ast = ast
|
|
21
20
|
@html = super(ast)
|
|
22
21
|
end
|
|
23
22
|
|
|
24
|
-
#
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
#Method invoked to resolve references to resources of unknown types. The
|
|
39
|
-
#type is indicated by the resource prefix. Examples of inline links to
|
|
40
|
-
#unknown references include:
|
|
41
|
-
#* <tt>[[Media:video.mpg]]</tt> (prefix <tt>Media</tt>, resource <tt>video.mpg</tt>)
|
|
42
|
-
#* <tt>[[Image:pretty.png|100px|A ''pretty'' picture]]</tt> (prefix <tt>Image</tt>,
|
|
43
|
-
# resource <tt>pretty.png</tt>, and options <tt>100px</tt> and <tt>A
|
|
44
|
-
# <i>pretty</i> picture</tt>.
|
|
45
|
-
#The return value should be a well-formed hyperlink, image, object or
|
|
46
|
-
#applet tag.
|
|
47
|
-
def link_for(prefix, resource, options=[])
|
|
48
|
-
"<a href=\"javascript:void(0)\">#{prefix}:#{resource}(#{options.join(', ')})</a>"
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
#Set this generator's URL handler.
|
|
53
|
-
def link_handler=(handler)
|
|
54
|
-
@link_handler = handler
|
|
23
|
+
# Utility method that returns the string with '<', '>', '&' and '"' escaped as
|
|
24
|
+
# XHTML character entities
|
|
25
|
+
def MediaWikiHTMLGenerator.escape(str)
|
|
26
|
+
r = str.gsub(%r{[<>&"]}) do
|
|
27
|
+
|match|
|
|
28
|
+
case match
|
|
29
|
+
when '<' then '<'
|
|
30
|
+
when '>' then '>'
|
|
31
|
+
when '&' then '&'
|
|
32
|
+
when '"' then '"'
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
r
|
|
55
36
|
end
|
|
56
37
|
|
|
57
|
-
#
|
|
58
|
-
#
|
|
59
|
-
def
|
|
60
|
-
|
|
38
|
+
# Utility method that converts the string specified into a specially formatted text
|
|
39
|
+
# string which can be used as an XHTML link anchor name.
|
|
40
|
+
def MediaWikiHTMLGenerator.anchor_for(str)
|
|
41
|
+
str.strip.squeeze(' ').gsub(' ', '_').gsub('\'', '_')
|
|
61
42
|
end
|
|
62
43
|
|
|
44
|
+
|
|
63
45
|
protected
|
|
64
46
|
|
|
65
47
|
def parse_wiki_ast(ast)
|
|
@@ -67,20 +49,39 @@ protected
|
|
|
67
49
|
end
|
|
68
50
|
|
|
69
51
|
def parse_paragraph(ast)
|
|
70
|
-
|
|
52
|
+
if (children = ast.children)
|
|
53
|
+
if children.size == 1 and ((text = children.first.contents) == "\n\n" || text == "\r\n\r\n")
|
|
54
|
+
"<p><br />#{text}</p>"
|
|
55
|
+
else
|
|
56
|
+
"<p>#{super(ast)}</p>"
|
|
57
|
+
end
|
|
58
|
+
else
|
|
59
|
+
"<p><br /></p>"
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def parse_paste(ast)
|
|
64
|
+
return '' unless ast.children
|
|
65
|
+
"<div class=\"paste\" style=\"white-space: pre-wrap;\">#{super(ast)}</div>"
|
|
71
66
|
end
|
|
72
67
|
|
|
73
68
|
def parse_text(ast)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
ast.contents
|
|
69
|
+
if ast.formatting
|
|
70
|
+
case(ast.formatting)
|
|
71
|
+
when :None then MediaWikiHTMLGenerator.escape(ast.contents)
|
|
72
|
+
when :CharacterEntity then "&#{ast.contents};"
|
|
73
|
+
when :HLine then "<hr/>"
|
|
74
|
+
when :SignatureDate then @params.time.to_s
|
|
75
|
+
when :SignatureName then link_handler.link_for("User:#{@params.author}", @params.author)
|
|
76
|
+
when :SignatureFull then "#{link_handler.link_for("User:#{@params.author}", @params.author)} #{@params.time.to_s}"
|
|
77
|
+
end
|
|
77
78
|
else
|
|
78
|
-
|
|
79
|
+
escape(ast.contents)
|
|
79
80
|
end
|
|
80
81
|
end
|
|
81
82
|
|
|
82
83
|
def parse_formatted(ast)
|
|
83
|
-
tag =
|
|
84
|
+
tag = ast.formatting == :Bold ? 'b' : 'i'
|
|
84
85
|
"<#{tag}>" + super(ast) + "</#{tag}>"
|
|
85
86
|
end
|
|
86
87
|
|
|
@@ -95,28 +96,74 @@ protected
|
|
|
95
96
|
"<li>" + super(ast) + "</li>"
|
|
96
97
|
end
|
|
97
98
|
|
|
99
|
+
def parse_list_term(ast)
|
|
100
|
+
"<dt>" + super(ast) + "</dt>"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def parse_list_definition(ast)
|
|
104
|
+
"<dd>" + super(ast) + "</dd>"
|
|
105
|
+
end
|
|
106
|
+
|
|
98
107
|
def parse_preformatted(ast)
|
|
108
|
+
if ast.indented
|
|
109
|
+
original_text = super(ast)
|
|
110
|
+
lines = original_text.split("\n").sort
|
|
111
|
+
shortest_space = lines.last.scan(/^\s+/)[0]
|
|
112
|
+
contents = ""
|
|
113
|
+
if shortest_space
|
|
114
|
+
original_text.each_line do |line|
|
|
115
|
+
contents << line.sub(shortest_space, "")
|
|
116
|
+
end
|
|
117
|
+
else
|
|
118
|
+
contents = original_text
|
|
119
|
+
end
|
|
120
|
+
"<pre class=\"indent\">" + contents + "</pre>"
|
|
121
|
+
else
|
|
122
|
+
"<pre>" + super(ast) + "</pre>"
|
|
123
|
+
end
|
|
99
124
|
end
|
|
100
125
|
|
|
101
126
|
def parse_section(ast)
|
|
102
|
-
|
|
127
|
+
generator = TextGenerator.new
|
|
128
|
+
anchor = MediaWikiHTMLGenerator.anchor_for(generator.parse(ast).join(' '))
|
|
129
|
+
"<h#{ast.level}><a name='#{anchor}'></a>" + super(ast) + "</h#{ast.level}>\n"
|
|
103
130
|
end
|
|
104
131
|
|
|
105
132
|
def parse_internal_link(ast)
|
|
106
133
|
text = parse_wiki_ast(ast)
|
|
107
|
-
text = ast.locator if text.length == 0
|
|
108
|
-
|
|
109
|
-
"<a href=\"#{href}\">#{text}</a>"
|
|
134
|
+
text = MediaWikiHTMLGenerator.escape(ast.locator) if text.length == 0
|
|
135
|
+
link_handler.link_for(ast.locator, text)
|
|
110
136
|
end
|
|
111
|
-
|
|
137
|
+
|
|
112
138
|
def parse_resource_link(ast)
|
|
113
139
|
options = ast.children.map do |node|
|
|
114
140
|
parse_internal_link_item(node)
|
|
115
141
|
end
|
|
116
|
-
link_handler.
|
|
142
|
+
link_handler.link_for_resource(ast.prefix, ast.locator, options)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def parse_template(ast)
|
|
146
|
+
parameters = ast.children.map do |node|
|
|
147
|
+
if node.parameter_value
|
|
148
|
+
node.parameter_value
|
|
149
|
+
else
|
|
150
|
+
parse_template(node.children.first)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
template_handler.included_template(ast.template_name, parameters)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def parse_category_link(ast)
|
|
157
|
+
text = parse_wiki_ast(ast)
|
|
158
|
+
text = MediaWikiHTMLGenerator.escape(ast.locator) if text.length == 0
|
|
159
|
+
link_handler.link_for_category(ast.locator, text)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def parse_category(ast)
|
|
163
|
+
text = parse_wiki_ast(ast)
|
|
164
|
+
link_handler.category_add(ast.locator, ast.sort_as)
|
|
117
165
|
end
|
|
118
166
|
|
|
119
|
-
#Reimplement this
|
|
120
167
|
def parse_internal_link_item(ast)
|
|
121
168
|
text = super(ast)
|
|
122
169
|
text.strip
|
|
@@ -125,13 +172,15 @@ protected
|
|
|
125
172
|
def parse_link(ast)
|
|
126
173
|
text = super(ast)
|
|
127
174
|
href = ast.url
|
|
128
|
-
text = href if text.length == 0
|
|
129
|
-
|
|
175
|
+
text = MediaWikiHTMLGenerator.escape(href) if text.length == 0
|
|
176
|
+
link_handler.absolute_link_for(href, text, ast.link_type)
|
|
130
177
|
end
|
|
131
178
|
|
|
132
179
|
#Reimplement this
|
|
133
180
|
def parse_table(ast)
|
|
134
181
|
options = ast.options ? ' ' + ast.options.strip : ''
|
|
182
|
+
options << ' cellpadding="5"' unless options.include?('cellpadding')
|
|
183
|
+
options << ' border="1"' unless options.include?('border')
|
|
135
184
|
"<table#{options}>" + super(ast) + "</table>\n"
|
|
136
185
|
end
|
|
137
186
|
|
|
@@ -146,28 +195,33 @@ protected
|
|
|
146
195
|
if ast.type == :head
|
|
147
196
|
"<th>" + super(ast) + "</th>"
|
|
148
197
|
else
|
|
149
|
-
|
|
198
|
+
if ast.attributes
|
|
199
|
+
"<td #{ast.attributes.first.contents}>" + super(ast) + "</td>"
|
|
200
|
+
else
|
|
201
|
+
"<td>" + super(ast) + "</td>"
|
|
202
|
+
end
|
|
150
203
|
end
|
|
151
204
|
end
|
|
152
205
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
206
|
+
def parse_element(ast)
|
|
207
|
+
attr = ''
|
|
208
|
+
if ast.attributes
|
|
209
|
+
attr = ' ' + ast.attributes.collect{ |name, value|
|
|
210
|
+
name + '="' + MediaWikiHTMLGenerator.escape(value) + '"' }.join(' ')
|
|
211
|
+
end
|
|
212
|
+
if ast.children.size == 0
|
|
213
|
+
"<#{ast.name}#{attr} />"
|
|
214
|
+
else
|
|
215
|
+
"<#{ast.name}#{attr}>" + super(ast) + "</#{ast.name}>"
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def parse_keyword(ast)
|
|
220
|
+
if ast.text == 'TOC'
|
|
221
|
+
generator = TocGenerator.new
|
|
222
|
+
generator.parse(@ast)
|
|
223
|
+
generator.html
|
|
224
|
+
end
|
|
171
225
|
end
|
|
172
226
|
|
|
173
227
|
#returns a tag name of the list in ast node
|
|
@@ -176,7 +230,109 @@ protected
|
|
|
176
230
|
return "ul"
|
|
177
231
|
elsif ast.list_type == :Numbered
|
|
178
232
|
return "ol"
|
|
233
|
+
elsif ast.list_type == :Dictionary
|
|
234
|
+
return "dl"
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# AST walker that generates a table of contents, containing links to all
|
|
239
|
+
# section headings in the page.
|
|
240
|
+
class TocGenerator < MediaWikiHTMLGenerator
|
|
241
|
+
|
|
242
|
+
class TocNode
|
|
243
|
+
attr_accessor :children
|
|
244
|
+
attr_accessor :parent
|
|
245
|
+
attr_accessor :section
|
|
246
|
+
def initialize
|
|
247
|
+
@children = []
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def add_child(child)
|
|
251
|
+
@children << child
|
|
252
|
+
child.parent = self
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def level
|
|
256
|
+
res = 0
|
|
257
|
+
node = self
|
|
258
|
+
while p = node.parent
|
|
259
|
+
res += 1
|
|
260
|
+
node = p
|
|
261
|
+
end
|
|
262
|
+
res
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def number
|
|
266
|
+
res = ''
|
|
267
|
+
node = self
|
|
268
|
+
while p = node.parent
|
|
269
|
+
res = "#{p.children.index(node)+1}." + res
|
|
270
|
+
node = p
|
|
271
|
+
end
|
|
272
|
+
res
|
|
273
|
+
end
|
|
179
274
|
end
|
|
275
|
+
|
|
276
|
+
def parse(ast)
|
|
277
|
+
@html = ''
|
|
278
|
+
@text_generator = TextGenerator.new
|
|
279
|
+
|
|
280
|
+
root = TocNode.new
|
|
281
|
+
root_stack = [root]
|
|
282
|
+
|
|
283
|
+
parse_branch = lambda do |ast|
|
|
284
|
+
ast.children.each do |child|
|
|
285
|
+
if child.class == SectionAST
|
|
286
|
+
root_stack.pop while child.level <= ((sec = root_stack.last.section) ? sec.level : 0)
|
|
287
|
+
|
|
288
|
+
node = TocNode.new
|
|
289
|
+
node.section = child
|
|
290
|
+
root_stack.last.add_child(node)
|
|
291
|
+
|
|
292
|
+
root_stack.push node
|
|
293
|
+
end
|
|
294
|
+
parse_branch.call(child)
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
parse_branch.call(ast)
|
|
298
|
+
|
|
299
|
+
@html += parse_section(root)
|
|
300
|
+
@html = "<div class=\"wikitoc\">\n<div class=\"wikitoctitle\">Contents</div>#{@html}\n</div>\n" if @html != ''
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
protected
|
|
304
|
+
|
|
305
|
+
def parse_section(toc_node)
|
|
306
|
+
html = ''
|
|
307
|
+
if toc_node.section
|
|
308
|
+
anchor = MediaWikiHTMLGenerator.anchor_for(@text_generator.parse(toc_node.section).join(' '))
|
|
309
|
+
html += "\n<li><a href='##{anchor}'><span class=\"wikitocnumber\">#{toc_node.number}</span><span class=\"wikitoctext\">#{parse_wiki_ast(toc_node.section).strip}</span></a>"
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
unless toc_node.children.empty?
|
|
313
|
+
html += "\n<ul>"
|
|
314
|
+
toc_node.children.each do |child_node|
|
|
315
|
+
html += parse_section(child_node)
|
|
316
|
+
end
|
|
317
|
+
html += "\n</ul>"
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
html += "</li>" if html[0,4] == "<li>"
|
|
321
|
+
html
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# AST walker that outputs just the text portions of a page.
|
|
328
|
+
class TextGenerator < MediaWikiWalker
|
|
329
|
+
|
|
330
|
+
protected
|
|
331
|
+
|
|
332
|
+
def parse_text(ast)
|
|
333
|
+
MediaWikiHTMLGenerator.escape(ast.contents)
|
|
334
|
+
end
|
|
335
|
+
|
|
180
336
|
end
|
|
181
337
|
|
|
182
338
|
end
|