mediacloth 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mediacloth/mediawikiast.rb +42 -0
- data/lib/mediacloth/mediawikihtmlgenerator.rb +100 -29
- data/lib/mediacloth/mediawikilexer.rb +292 -37
- data/lib/mediacloth/mediawikilexer.rb~ +491 -0
- data/lib/mediacloth/mediawikiparser.rb +535 -173
- data/lib/mediacloth/mediawikiparser.y +183 -15
- data/lib/mediacloth/mediawikiparser.y~ +210 -0
- data/lib/mediacloth/mediawikiwalker.rb +56 -8
- data/test/data/html1 +1 -1
- data/test/data/html10 +98 -0
- data/test/data/html3 +1 -1
- data/test/data/html4 +11 -1
- data/test/data/html5 +5 -1
- data/test/data/html7 +1 -2
- data/test/data/html8 +1 -1
- data/test/data/html9 +6 -0
- data/test/data/input1 +5 -0
- data/test/data/input10 +124 -0
- data/test/data/input4 +50 -1
- data/test/data/input5 +8 -0
- data/test/data/input7 +35 -2
- data/test/data/input9 +14 -0
- data/test/data/lex1 +5 -1
- data/test/data/lex10 +87 -0
- data/test/data/lex4 +47 -1
- data/test/data/lex5 +7 -1
- data/test/data/lex7 +35 -2
- data/test/data/lex9 +14 -0
- data/test/dataproducers/html.rb +2 -2
- data/test/dataproducers/html.rb~ +24 -0
- data/test/dataproducers/lex.rb +3 -3
- data/test/dataproducers/lex.rb~ +15 -0
- data/test/debugwalker.rb +1 -1
- data/test/htmlgenerator.rb +5 -4
- data/test/lexer.rb +40 -3
- data/test/parser.rb +0 -1
- metadata +14 -3
@@ -32,6 +32,48 @@ class TextAST < FormattedAST
|
|
32
32
|
#Currently recognized formatting: :Link, :InternalLink, :HLine
|
33
33
|
end
|
34
34
|
|
35
|
+
#The node to represent a simple Mediawiki link.
|
36
|
+
class LinkAST < AST
|
37
|
+
#The link's URL
|
38
|
+
attr_accessor :url
|
39
|
+
end
|
40
|
+
|
41
|
+
#The node to represent a Mediawiki internal link
|
42
|
+
class InternalLinkAST < AST
|
43
|
+
#Holds the link locator, which is composed of a resource name only (e.g. the
|
44
|
+
#name of a wiki page)
|
45
|
+
attr_accessor :locator
|
46
|
+
end
|
47
|
+
|
48
|
+
#The node to represent a MediaWiki resource reference (embedded images, videos,
|
49
|
+
#etc.)
|
50
|
+
class ResourceLinkAST < AST
|
51
|
+
#The resource prefix that indicates the type of resource (e.g. an image
|
52
|
+
#resource is prefixed by "Image")
|
53
|
+
attr_accessor :prefix
|
54
|
+
#The resource locator
|
55
|
+
attr_accessor :locator
|
56
|
+
end
|
57
|
+
|
58
|
+
class InternalLinkItemAST < AST
|
59
|
+
end
|
60
|
+
|
61
|
+
#The node to represent a table
|
62
|
+
class TableAST < AST
|
63
|
+
attr_accessor :options
|
64
|
+
end
|
65
|
+
|
66
|
+
#The node to represent a table
|
67
|
+
class TableRowAST < AST
|
68
|
+
attr_accessor :options
|
69
|
+
end
|
70
|
+
|
71
|
+
#The node to represent a table
|
72
|
+
class TableCellAST < AST
|
73
|
+
#the type of cell, :head or :body
|
74
|
+
attr_accessor :type
|
75
|
+
end
|
76
|
+
|
35
77
|
#The node to represent a list
|
36
78
|
class ListAST < AST
|
37
79
|
#Currently recognized types: :Bulleted, :Numbered
|
@@ -17,60 +17,138 @@ class MediaWikiHTMLGenerator < MediaWikiWalker
|
|
17
17
|
@html = ""
|
18
18
|
end
|
19
19
|
|
20
|
+
def parse(ast)
|
21
|
+
@html = super(ast)
|
22
|
+
end
|
23
|
+
|
24
|
+
#The default link handler. A custom link handler may extend this class.
|
25
|
+
class MediaWikiLinkHandler
|
26
|
+
|
27
|
+
#Method invoked to resolve references to wiki pages when they occur in an
|
28
|
+
#internal link. In all the following internal links, the page name is
|
29
|
+
#<tt>My Page</tt>:
|
30
|
+
#* <tt>[[My Page]]</tt>
|
31
|
+
#* <tt>[[My Page|Click here to view my page]]</tt>
|
32
|
+
#* <tt>[[My Page|Click ''here'' to view my page]]</tt>
|
33
|
+
#The return value should be a URL that references the page resource.
|
34
|
+
def url_for(resource)
|
35
|
+
"javascript:void(0)"
|
36
|
+
end
|
37
|
+
|
38
|
+
#Method invoked to resolve references to resources of unknown types. The
|
39
|
+
#type is indicated by the resource prefix. Examples of inline links to
|
40
|
+
#unknown references include:
|
41
|
+
#* <tt>[[Media:video.mpg]]</tt> (prefix <tt>Media</tt>, resource <tt>video.mpg</tt>)
|
42
|
+
#* <tt>[[Image:pretty.png|100px|A ''pretty'' picture]]</tt> (prefix <tt>Image</tt>,
|
43
|
+
# resource <tt>pretty.png</tt>, and options <tt>100px</tt> and <tt>A
|
44
|
+
# <i>pretty</i> picture</tt>.
|
45
|
+
#The return value should be a well-formed hyperlink, image, object or
|
46
|
+
#applet tag.
|
47
|
+
def link_for(prefix, resource, options=[])
|
48
|
+
"<a href=\"javascript:void(0)\">#{prefix}:#{resource}(#{options.join(', ')})</a>"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
#Set this generator's URL handler.
|
53
|
+
def link_handler=(handler)
|
54
|
+
@link_handler = handler
|
55
|
+
end
|
56
|
+
|
57
|
+
#Returns's this generator URL handler. If no handler was set, returns the default
|
58
|
+
#handler.
|
59
|
+
def link_handler
|
60
|
+
@link_handler ||= MediaWikiLinkHandler.new
|
61
|
+
end
|
62
|
+
|
20
63
|
protected
|
21
64
|
|
22
65
|
def parse_wiki_ast(ast)
|
23
|
-
super(ast)
|
66
|
+
super(ast).join
|
24
67
|
end
|
25
68
|
|
26
69
|
def parse_paragraph(ast)
|
27
|
-
|
28
|
-
super(ast)
|
29
|
-
@html += "</p>"
|
70
|
+
"<p>" + super(ast) + "</p>"
|
30
71
|
end
|
31
72
|
|
32
73
|
def parse_text(ast)
|
33
74
|
tag = formatting_to_tag(ast)
|
34
75
|
if tag[0].empty?
|
35
|
-
|
76
|
+
ast.contents
|
36
77
|
else
|
37
|
-
|
78
|
+
"<#{tag[0]}#{tag[1]}>#{ast.contents}</#{tag[0]}>"
|
38
79
|
end
|
39
|
-
super(ast)
|
40
80
|
end
|
41
81
|
|
42
82
|
def parse_formatted(ast)
|
43
83
|
tag = formatting_to_tag(ast)
|
44
|
-
|
45
|
-
super(ast)
|
46
|
-
@html += "</#{tag}>"
|
84
|
+
"<#{tag}>" + super(ast) + "</#{tag}>"
|
47
85
|
end
|
48
86
|
|
49
87
|
def parse_list(ast)
|
50
88
|
tag = list_tag(ast)
|
51
|
-
|
52
|
-
|
53
|
-
|
89
|
+
(["<#{tag}>"] +
|
90
|
+
super(ast) +
|
91
|
+
["</#{tag}>"]).join
|
54
92
|
end
|
55
93
|
|
56
94
|
def parse_list_item(ast)
|
57
|
-
|
58
|
-
super(ast)
|
59
|
-
@html += "</li>"
|
95
|
+
"<li>" + super(ast) + "</li>"
|
60
96
|
end
|
61
97
|
|
62
98
|
def parse_preformatted(ast)
|
63
|
-
super(ast)
|
64
99
|
end
|
65
100
|
|
66
101
|
def parse_section(ast)
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
102
|
+
"<h#{ast.level}>" + super(ast) + "</h#{ast.level}>"
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_internal_link(ast)
|
106
|
+
text = parse_wiki_ast(ast)
|
107
|
+
text = ast.locator if text.length == 0
|
108
|
+
href = link_handler.url_for(ast.locator)
|
109
|
+
"<a href=\"#{href}\">#{text}</a>"
|
110
|
+
end
|
111
|
+
|
112
|
+
def parse_resource_link(ast)
|
113
|
+
options = ast.children.map do |node|
|
114
|
+
parse_internal_link_item(node)
|
115
|
+
end
|
116
|
+
link_handler.link_for(ast.prefix, ast.locator, options)
|
117
|
+
end
|
118
|
+
|
119
|
+
#Reimplement this
|
120
|
+
def parse_internal_link_item(ast)
|
121
|
+
text = super(ast)
|
122
|
+
text.strip
|
123
|
+
end
|
124
|
+
|
125
|
+
def parse_link(ast)
|
126
|
+
text = super(ast)
|
127
|
+
href = ast.url
|
128
|
+
text = href if text.length == 0
|
129
|
+
"<a href=\"#{href}\">#{text}</a>"
|
71
130
|
end
|
72
131
|
|
73
|
-
|
132
|
+
#Reimplement this
|
133
|
+
def parse_table(ast)
|
134
|
+
options = ast.options ? ' ' + ast.options.strip : ''
|
135
|
+
"<table#{options}>" + super(ast) + "</table>\n"
|
136
|
+
end
|
137
|
+
|
138
|
+
#Reimplement this
|
139
|
+
def parse_table_row(ast)
|
140
|
+
options = ast.options ? ' ' + ast.options.strip : ''
|
141
|
+
"<tr#{options}>" + super(ast) + "</tr>\n"
|
142
|
+
end
|
143
|
+
|
144
|
+
#Reimplement this
|
145
|
+
def parse_table_cell(ast)
|
146
|
+
if ast.type == :head
|
147
|
+
"<th>" + super(ast) + "</th>"
|
148
|
+
else
|
149
|
+
"<td>" + super(ast) + "</td>"
|
150
|
+
end
|
151
|
+
end
|
74
152
|
|
75
153
|
#returns an array with a tag name and tag attributes
|
76
154
|
def formatting_to_tag(ast)
|
@@ -79,13 +157,6 @@ private
|
|
79
157
|
tag = ["b", ""]
|
80
158
|
elsif ast.formatting == :Italic
|
81
159
|
tag = ["i", ""]
|
82
|
-
elsif ast.formatting == :Link or ast.formatting == :ExternalLink
|
83
|
-
links = ast.contents.split
|
84
|
-
link = links[0]
|
85
|
-
link_name = links[1, links.length-1].join(" ")
|
86
|
-
link_name = link if link_name.empty?
|
87
|
-
ast.contents = link_name
|
88
|
-
tag = ["a", " href=\"#{link}\" rel=\"nofollow\""]
|
89
160
|
elsif ast.formatting == :HLine
|
90
161
|
ast.contents = ""
|
91
162
|
tag = ["hr", ""]
|