mediacloth 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,12 +8,15 @@
8
8
  # parser.parse(input)
9
9
  class MediaWikiParser
10
10
 
11
- token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
12
- INTLINKSTART INTLINKEND SECTION_START SECTION_END TEXT PRE
11
+ token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND LINKSEP
12
+ INTLINKSTART INTLINKEND INTLINKSEP RESOURCE_SEP
13
+ SECTION_START SECTION_END TEXT PRE
13
14
  HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
14
15
  UL_START UL_END LI_START LI_END OL_START OL_END
16
+ TABLE_START TABLE_END ROW_START ROW_END HEAD_START HEAD_END CELL_START CELL_END
15
17
  PARA_START PARA_END
16
18
 
19
+
17
20
  rule
18
21
 
19
22
  wiki:
@@ -47,10 +50,7 @@ contents:
47
50
  }
48
51
  | section
49
52
  {
50
- s = SectionAST.new
51
- s.contents = val[0][0]
52
- s.level = val[0][1]
53
- result = s
53
+ result = val[0]
54
54
  }
55
55
  | PARA_START para_contents PARA_END
56
56
  {
@@ -60,6 +60,29 @@ contents:
60
60
  result = p
61
61
  end
62
62
  }
63
+ | LINKSTART link_contents LINKEND
64
+ {
65
+ l = LinkAST.new
66
+ l.url = val[1][0]
67
+ l.children += val[1][1..-1] if val[1].length > 1
68
+ result = l
69
+ }
70
+ | INTLINKSTART TEXT RESOURCE_SEP TEXT reslink_repeated_contents INTLINKEND
71
+ {
72
+ l = ResourceLinkAST.new
73
+ l.prefix = val[1]
74
+ l.locator = val[3]
75
+ l.children = val[4] unless val[4].nil? or val[4].empty?
76
+ result = l
77
+ }
78
+ | INTLINKSTART TEXT intlink_repeated_contents INTLINKEND
79
+ {
80
+ l = InternalLinkAST.new
81
+ l.locator = val[1]
82
+ l.children = val[2] unless val[2].nil? or val[2].empty?
83
+ result = l
84
+ }
85
+ | table
63
86
  ;
64
87
 
65
88
  #TODO: remove empty paragraphs in lexer
@@ -71,6 +94,60 @@ para_contents:
71
94
  {
72
95
  result = val[0]
73
96
  }
97
+ ;
98
+
99
+ link_contents:
100
+ TEXT
101
+ {
102
+ result = val
103
+ }
104
+ | TEXT LINKSEP link_repeated_contents
105
+ {
106
+ result = [val[0]]
107
+ result += val[2]
108
+ }
109
+ ;
110
+
111
+
112
+ link_repeated_contents:
113
+ repeated_contents
114
+ {
115
+ result = val[0]
116
+ }
117
+ | repeated_contents LINKSEP link_repeated_contents
118
+ {
119
+ result = val[0]
120
+ result += val[2] if val[2]
121
+ }
122
+ ;
123
+
124
+
125
+ intlink_repeated_contents:
126
+ {
127
+ result = nil
128
+ }
129
+ | INTLINKSEP repeated_contents
130
+ {
131
+ result = val[1]
132
+ }
133
+ ;
134
+
135
+ reslink_repeated_contents:
136
+ {
137
+ result = nil
138
+ }
139
+ | INTLINKSEP reslink_repeated_contents
140
+ {
141
+ result = val[1]
142
+ }
143
+ | INTLINKSEP repeated_contents reslink_repeated_contents
144
+ {
145
+ i = InternalLinkItemAST.new
146
+ i.children = val[1]
147
+ result = [i]
148
+ result += val[2] if val[2]
149
+ }
150
+ ;
74
151
 
75
152
  repeated_contents: contents
76
153
  {
@@ -98,11 +175,79 @@ text: element
98
175
  }
99
176
  ;
100
177
 
101
- element: LINKSTART TEXT LINKEND
102
- { return [:Link, val[1]] }
103
- | INTLINKSTART TEXT INTLINKEND
104
- { return [:InternalLink, val[1]] }
105
- | TEXT
178
+ table:
179
+ TABLE_START table_contents TABLE_END
180
+ {
181
+ table = TableAST.new
182
+ table.children = val[1] unless val[1].nil? or val[1].empty?
183
+ result = table
184
+ }
185
+ | TABLE_START TEXT table_contents TABLE_END
186
+ {
187
+ table = TableAST.new
188
+ table.options = val[1]
189
+ table.children = val[2] unless val[2].nil? or val[2].empty?
190
+ result = table
191
+ }
192
+
193
+ table_contents:
194
+ {
195
+ result = nil
196
+ }
197
+ | ROW_START row_contents ROW_END table_contents
198
+ {
199
+ row = TableRowAST.new
200
+ row.children = val[1] unless val[1].nil? or val[1].empty?
201
+ result = [row]
202
+ result += val[3] unless val[3].nil? or val[3].empty?
203
+ }
204
+ | ROW_START TEXT row_contents ROW_END table_contents
205
+ {
206
+ row = TableRowAST.new
207
+ row.children = val[2] unless val[2].nil? or val[2].empty?
208
+ row.options = val[1]
209
+ result = [row]
210
+ result += val[4] unless val[4].nil? or val[4].empty?
211
+ }
212
+
213
+ row_contents:
214
+ {
215
+ result = nil
216
+ }
217
+ | HEAD_START HEAD_END row_contents
218
+ {
219
+ cell = TableCellAST.new
220
+ cell.type = :head
221
+ result = [cell]
222
+ result += val[2] unless val[2].nil? or val[2].empty?
223
+ }
224
+ | HEAD_START repeated_contents HEAD_END row_contents
225
+ {
226
+ cell = TableCellAST.new
227
+ cell.children = val[1] unless val[1].nil? or val[1].empty?
228
+ cell.type = :head
229
+ result = [cell]
230
+ result += val[3] unless val[3].nil? or val[3].empty?
231
+ }
232
+ | CELL_START CELL_END row_contents
233
+ {
234
+ cell = TableCellAST.new
235
+ cell.type = :body
236
+ result = [cell]
237
+ result += val[2] unless val[2].nil? or val[2].empty?
238
+ }
239
+ | CELL_START repeated_contents CELL_END row_contents
240
+ {
241
+ cell = TableCellAST.new
242
+ cell.children = val[1] unless val[1].nil? or val[1].empty?
243
+ cell.type = :body
244
+ result = [cell]
245
+ result += val[3] unless val[3].nil? or val[3].empty?
246
+ }
247
+
248
+
249
+ element:
250
+ TEXT
106
251
  { return [:None, val[0]] }
107
252
  | HLINE
108
253
  { return [:HLine, val[0]] }
@@ -114,7 +259,20 @@ element: LINKSTART TEXT LINKEND
114
259
  { return [:SignatureFull, val[0]] }
115
260
  ;
116
261
 
117
- formatted_element: BOLDSTART repeated_contents BOLDEND
262
+ formatted_element:
263
+ BOLDSTART BOLDEND
264
+ {
265
+ result = FormattedAST.new
266
+ result.formatting = :Bold
267
+ result
268
+ }
269
+ | ITALICSTART ITALICEND
270
+ {
271
+ result = FormattedAST.new
272
+ result.formatting = :Italic
273
+ result
274
+ }
275
+ | BOLDSTART repeated_contents BOLDEND
118
276
  {
119
277
  p = FormattedAST.new
120
278
  p.formatting = :Bold
@@ -161,7 +319,12 @@ list_contents:
161
319
  { result = [] }
162
320
  ;
163
321
 
164
- list_item: LI_START repeated_contents LI_END
322
+ list_item:
323
+ LI_START LI_END
324
+ {
325
+ result = ListItemAST.new
326
+ }
327
+ | LI_START repeated_contents LI_END
165
328
  {
166
329
  li = ListItemAST.new
167
330
  li.children += val[1]
@@ -173,8 +336,13 @@ preformatted: PRE
173
336
  { result = val[0] }
174
337
  ;
175
338
 
176
- section: SECTION_START TEXT SECTION_END
177
- { result = [val[1], val[0].length] }
339
+ section: SECTION_START repeated_contents SECTION_END
340
+ { result = [val[1], val[0].length]
341
+ s = SectionAST.new
342
+ s.children = val[1]
343
+ s.level = val[0].length
344
+ result = s
345
+ }
178
346
  ;
179
347
 
180
348
  end
@@ -0,0 +1,210 @@
1
+ #The parser for the MediaWiki language.
2
+ #
3
+ #Usage together with a lexer:
4
+ # inputFile = File.new("data/input1", "r")
5
+ # input = inputFile.read
6
+ # parser = MediaWikiParser.new
7
+ # parser.lexer = MediaWikiLexer.new
8
+ # parser.parse(input)
9
+ class MediaWikiParser
10
+
11
+ token BOLDSTART BOLDEND ITALICSTART ITALICEND LINKSTART LINKEND
12
+ INTLINKSTART INTLINKEND SECTION_START SECTION_END TEXT PRE
13
+ HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
14
+ UL_START UL_END LI_START LI_END OL_START OL_END
15
+ PARA_START PARA_END
16
+
17
+ rule
18
+
19
+ wiki:
20
+ repeated_contents
21
+ {
22
+ @nodes.push WikiAST.new
23
+ #@nodes.last.children.insert(0, val[0])
24
+ #puts val[0]
25
+ @nodes.last.children += val[0]
26
+ }
27
+ ;
28
+
29
+ contents:
30
+ text
31
+ {
32
+ result = val[0]
33
+ }
34
+ | bulleted_list
35
+ {
36
+ result = val[0]
37
+ }
38
+ | numbered_list
39
+ {
40
+ result = val[0]
41
+ }
42
+ | preformatted
43
+ {
44
+ p = PreformattedAST.new
45
+ p.contents = val[0]
46
+ result = p
47
+ }
48
+ | section
49
+ {
50
+ s = SectionAST.new
51
+ s.contents = val[0][0]
52
+ s.level = val[0][1]
53
+ result = s
54
+ }
55
+ | PARA_START para_contents PARA_END
56
+ {
57
+ if val[1]
58
+ p = ParagraphAST.new
59
+ p.children = val[1]
60
+ result = p
61
+ end
62
+ }
63
+ | error
64
+ {
65
+ puts "ERR"
66
+ yyerrok
67
+ }
68
+ ;
69
+
70
+ #TODO: remove empty paragraphs in lexer
71
+ para_contents:
72
+ {
73
+ result = nil
74
+ }
75
+ | repeated_contents
76
+ {
77
+ result = val[0]
78
+ }
79
+
80
+ repeated_contents: contents
81
+ {
82
+ result = []
83
+ result << val[0]
84
+ }
85
+ | repeated_contents contents
86
+ {
87
+ result = []
88
+ result += val[0]
89
+ result << val[1]
90
+ }
91
+ ;
92
+
93
+ text: element
94
+ {
95
+ p = TextAST.new
96
+ p.formatting = val[0][0]
97
+ p.contents = val[0][1]
98
+ result = p
99
+ }
100
+ | formatted_element
101
+ {
102
+ result = val[0]
103
+ }
104
+ ;
105
+
106
+ element: LINKSTART TEXT LINKEND
107
+ { return [:Link, val[1]] }
108
+ | INTLINKSTART TEXT INTLINKEND
109
+ { return [:InternalLink, val[1]] }
110
+ | TEXT
111
+ { return [:None, val[0]] }
112
+ | HLINE
113
+ { return [:HLine, val[0]] }
114
+ | SIGNATURE_DATE
115
+ { return [:SignatureDate, val[0]] }
116
+ | SIGNATURE_NAME
117
+ { return [:SignatureName, val[0]] }
118
+ | SIGNATURE_FULL
119
+ { return [:SignatureFull, val[0]] }
120
+ ;
121
+
122
+ formatted_element: BOLDSTART repeated_contents BOLDEND
123
+ {
124
+ p = FormattedAST.new
125
+ p.formatting = :Bold
126
+ p.children += val[1]
127
+ result = p
128
+ }
129
+ | ITALICSTART repeated_contents ITALICEND
130
+ {
131
+ p = FormattedAST.new
132
+ p.formatting = :Italic
133
+ p.children += val[1]
134
+ result = p
135
+ }
136
+ ;
137
+
138
+ bulleted_list: UL_START list_item list_contents UL_END
139
+ {
140
+ list = ListAST.new
141
+ list.list_type = :Bulleted
142
+ list.children << val[1]
143
+ list.children += val[2]
144
+ result = list
145
+ }
146
+ ;
147
+
148
+ numbered_list: OL_START list_item list_contents OL_END
149
+ {
150
+ list = ListAST.new
151
+ list.list_type = :Numbered
152
+ list.children << val[1]
153
+ list.children += val[2]
154
+ result = list
155
+ }
156
+ ;
157
+
158
+ list_contents:
159
+ { result = [] }
160
+ list_item list_contents
161
+ {
162
+ result << val[1]
163
+ result += val[2]
164
+ }
165
+ |
166
+ { result = [] }
167
+ ;
168
+
169
+ list_item: LI_START repeated_contents LI_END
170
+ {
171
+ li = ListItemAST.new
172
+ li.children += val[1]
173
+ result = li
174
+ }
175
+ ;
176
+
177
+ preformatted: PRE
178
+ { result = val[0] }
179
+ ;
180
+
181
+ section: SECTION_START TEXT SECTION_END
182
+ { result = [val[1], val[0].length] }
183
+ ;
184
+
185
+ end
186
+
187
+ ---- header ----
188
+ require 'mediacloth/mediawikiast'
189
+
190
+ ---- inner ----
191
+
192
+ attr_accessor :lexer
193
+
194
+ def initialize
195
+ @nodes = []
196
+ super
197
+ end
198
+
199
+ #Tokenizes input string and parses it.
200
+ def parse(input)
201
+ @yydebug=true
202
+ lexer.tokenize(input)
203
+ do_parse
204
+ return @nodes.last
205
+ end
206
+
207
+ #Asks the lexer to return the next token.
208
+ def next_token
209
+ return @lexer.lex
210
+ end
@@ -21,13 +21,21 @@ protected
21
21
 
22
22
  #Reimplement this
23
23
  def parse_wiki_ast(ast)
24
- ast.children.each do |c|
25
- parse_formatted(c) if c.class == FormattedAST
26
- parse_text(c) if c.class == TextAST
27
- parse_list(c) if c.class == ListAST
28
- parse_preformatted(c) if c.class == PreformattedAST
29
- parse_section(c) if c.class == SectionAST
30
- parse_paragraph(c) if c.class == ParagraphAST
24
+ ast.children.map do |c|
25
+ r = parse_formatted(c) if c.class == FormattedAST
26
+ r = parse_text(c) if c.class == TextAST
27
+ r = parse_list(c) if c.class == ListAST
28
+ r = parse_preformatted(c) if c.class == PreformattedAST
29
+ r = parse_section(c) if c.class == SectionAST
30
+ r = parse_paragraph(c) if c.class == ParagraphAST
31
+ r = parse_link(c) if c.class == LinkAST
32
+ r = parse_internal_link(c) if c.class == InternalLinkAST
33
+ r = parse_resource_link(c) if c.class == ResourceLinkAST
34
+ r = parse_internal_link_item(c) if c.class == InternalLinkItemAST
35
+ r = parse_table(c) if c.class == TableAST
36
+ r = parse_table_row(c) if c.class == TableRowAST
37
+ r = parse_table_cell(c) if c.class == TableCellAST
38
+ r
31
39
  end
32
40
  end
33
41
 
@@ -47,7 +55,7 @@ protected
47
55
 
48
56
  #Reimplement this
49
57
  def parse_list(ast)
50
- ast.children.each do |c|
58
+ ast.children.map do |c|
51
59
  parse_list_item(c) if c.class == ListItemAST
52
60
  end
53
61
  end
@@ -63,6 +71,46 @@ protected
63
71
 
64
72
  #Reimplement this
65
73
  def parse_section(ast)
74
+ parse_wiki_ast(ast)
75
+ end
76
+
77
+ #Reimplement this
78
+ def parse_link(ast)
79
+ parse_wiki_ast(ast)
80
+ end
81
+
82
+ #Reimplement this
83
+ def parse_internal_link(ast)
84
+ ast.children.map do |c|
85
+ parse_internal_link_item(c) if c.class == InternalLinkItemAST
86
+ end
87
+ end
88
+
89
+ #Reimplement this
90
+ def parse_resource_link(ast)
91
+ ast.children.map do |c|
92
+ parse_internal_link_item(c) if c.class == InternalLinkItemAST
93
+ end
94
+ end
95
+
96
+ #Reimplement this
97
+ def parse_internal_link_item(ast)
98
+ parse_wiki_ast(ast)
99
+ end
100
+
101
+ #Reimplement this
102
+ def parse_table(ast)
103
+ parse_wiki_ast(ast)
104
+ end
105
+
106
+ #Reimplement this
107
+ def parse_table_row(ast)
108
+ parse_wiki_ast(ast)
109
+ end
110
+
111
+ #Reimplement this
112
+ def parse_table_cell(ast)
113
+ parse_wiki_ast(ast)
66
114
  end
67
115
 
68
116
  end
data/test/data/html1 CHANGED
@@ -1,6 +1,6 @@
1
1
  <p>This is a simple text with <b>Bold text</b> and <i>Italic text</i> inside.
2
2
  One paragraph can be written in several lines.</p><p>Another paragraph starts after a blank line.</p><p>
3
- Another one.</p><p>This is text with Internal Link and <a href="http://www.example.com" rel="nofollow">external link</a>.</p><p>We can have headlines:</p><h1>Headline1</h1><h2>Headline2</h2><h3>Headline3</h3><h4>Headline4</h4><h5>Headline5</h5><h6>Headline6</h6><h7>Headline7</h7><hr></hr><p>
3
+ Another one.</p><p>This is text with <a href="javascript:void(0)">Internal Link</a> and <a href="http://www.example.com">external link</a>.</p><p>We can have headlines:</p><h1> Headline1 </h1><h2> Headline2 </h2><h3> Headline3 </h3><h4> Headline4 </h4><h5> Headline5 </h5><h6> Headline6 </h6><h7> Headline7 </h7><p>Headlines may have formatting:</p><h1> See <a href="javascript:void(0)">Internal Link</a> for more info </h1><h2> This is an <b>important</b> heading </h2><hr></hr><p>
4
4
  This is a text after the line.</p><ul><li>foo
5
5
  </li><li>foo2
6
6
  </li></ul>
data/test/data/html10 ADDED
@@ -0,0 +1,98 @@
1
+ <p>Some examples of tables.</p><table></table>
2
+ <table><tr></tr>
3
+ </table>
4
+ <table><tr><td> a
5
+ </td><td> b
6
+ </td></tr>
7
+ </table>
8
+ <table><tr><td> a
9
+ </td><td> b
10
+ </td></tr>
11
+ <tr><td> 1
12
+ </td><td> 2
13
+ </td></tr>
14
+ </table>
15
+ <table><tr><td> a </td><td> b
16
+ </td></tr>
17
+ <tr><td> 1 </td><td> 2
18
+ </td></tr>
19
+ </table>
20
+ <table><tr><th> a
21
+ </th><th> b
22
+ </th></tr>
23
+ <tr><td> 1
24
+ </td><td> 2
25
+ </td></tr>
26
+ </table>
27
+ <table><tr><th> a </th><th> b
28
+ </th></tr>
29
+ <tr><td> 1 </td><td> 2
30
+ </td></tr>
31
+ </table>
32
+ <table><tr><td> a
33
+ </td></tr>
34
+ <tr><td> 1
35
+ </td><td> 2
36
+ </td></tr>
37
+ </table>
38
+ <table><tr><td> a
39
+ </td><td> b
40
+ </td></tr>
41
+ <tr><td> 1
42
+ </td></tr>
43
+ </table>
44
+ <table><tr><td> a
45
+ </td><td> b
46
+ </td></tr>
47
+ <tr><td></td><td> 2
48
+ </td></tr>
49
+ </table>
50
+ <table><tr><td> <a href="http://example.com">Example</a></td><td> <b>bold</b></td></tr>
51
+ <tr><td> <a href="javascript:void(0)">Example</a></td><td> <a href="javascript:void(0)">image:example.jpg(1, 2, 3)</a></td></tr>
52
+ </table>
53
+ <table><tr><td> <a href="http://example.com">Example
54
+ </a></td><td> <b>bold
55
+ </b></td></tr>
56
+ <tr><td> <a href="javascript:void(0)">Example
57
+ </a></td><td> <b>bold <i>italic
58
+ </i></b></td></tr>
59
+ </table>
60
+ <table><tr><td> a
61
+ </td><td> b
62
+ </td></tr>
63
+ <tr><td> A list in a cell:
64
+ <ul><li>one
65
+ </li><li>two
66
+ </li></ul></td></tr>
67
+ </table>
68
+ <table><tr><td> a
69
+ </td><td> b
70
+ </td></tr>
71
+ <tr><td> A table in a cell:
72
+ <table><tr><td> 1
73
+ </td><td> 2
74
+ </td></tr>
75
+ <tr><td> one
76
+ </td><td> two
77
+ </td></tr>
78
+ </table>
79
+ </td></tr>
80
+ </table>
81
+ <table border="1"><tr><th> a
82
+ </th><th> b
83
+ </th></tr>
84
+ <tr align="left"><td> 1
85
+ </td><td> 2
86
+ </td></tr>
87
+ </table>
88
+ <p>
89
+ Text before
90
+ </p><table><tr><td> a
91
+ </td><td> b
92
+ </td></tr>
93
+ </table>
94
+ <p>Text before with <i>italic
95
+ </i></p><table><tr><td> a
96
+ </td><td> b
97
+ </td></tr>
98
+ </table>
data/test/data/html3 CHANGED
@@ -1 +1 @@
1
- <p>--Sat Jan 01 01:01:01 EET 2000CreatorCreator Sat Jan 01 01:01:01 EET 2000</p>
1
+ <p>--Sat Jan 01 01:01:01 UTC 2000CreatorCreator Sat Jan 01 01:01:01 UTC 2000</p>
data/test/data/html4 CHANGED
@@ -1 +1,11 @@
1
- <p><b><i>Foo</i></b></p>
1
+ <p>Test mixing of bold and italic formatting:</p><p><i>italic<b>bold</b>italic</i></p><p><b>bold<i>italic</i>bold</b></p><p><i><b>boldalic</b></i></p><p><i>italic<b>bold</b>italic</i><b>bold<i>italic</i>bold</b></p><p>
2
+ Test bold and italic wrapped around inline links:</p><p><i><a href="http://example.com">http://example.com</a></i></p><p><b><a href="http://example.com">http://example.com</a></b></p><p><i><a href="http://example.com'">http://example.com'</a></i></p><p><b><a href="http://example.com'">http://example.com'</a> is good</b></p><p><i><a href="http://example.com'">http://example.com'</a> is good</i></p><p>
3
+ Test unclosed bold and italic formatting:</p><p><i>Some italic and now </i>bold
4
+ </p><h2>Heading</h2><p>
5
+ Text</p><p><i>Some italic and now <b>bold
6
+ </b></i></p><h2>Heading</h2><p>
7
+ Text</p><p><i>Some italic and now <b>bold</b></i></p><h2>Heading</h2><p>
8
+ Text</p><p><i>Some italic and now <b>bold
9
+ </b></i></p><ul><li>one
10
+ </li><li>two
11
+ </li></ul><p><i>Some italic and now <b>bold</b></i></p><p>Text</p>
data/test/data/html5 CHANGED
@@ -11,4 +11,8 @@
11
11
  <ol><li>a
12
12
  </li></ol></li></ol></li></ol></li><li>a
13
13
  <ol><li>aa
14
- </li></ol></li></ol>
14
+ </li></ol></li></ol><ul><li>A
15
+ </li><li>B
16
+ </li><li></li></ul><ol><li>A
17
+ </li><li>B
18
+ </li><li></li></ol>
data/test/data/html7 CHANGED
@@ -1,2 +1 @@
1
- <p><a href="http://www.example.com" rel="nofollow">external link</a><a href="http://www.example.com" rel="nofollow">http://www.example.com</a> [foo]
2
- </p>
1
+ <p><a href="http://sun.com">http://sun.com</a></p><p><a href="http://sun.com">http://sun.com</a></p><p><a href="mailto:joe@sun.com">mailto:joe@sun.com</a></p><p><a href="http://sun.com">stars</a></p><p><a href="http://sun.com">stars and moon</a></p><p><a href="http://sun.com">stars and <i>moon</i>and <b>trees</b>and birds</a></p><p><a href="javascript:void(0)">sun</a></p><p><a href="javascript:void(0)">All about Sun</a></p><p><a href="javascript:void(0)">image:sun(All about Sun)</a></p><p><a href="javascript:void(0)">nofollow|All about Sun</a></p><p><a href="javascript:void(0)">image:sun(nofollow, All about Sun)</a></p><p><a href="javascript:void(0)">image:sun(All about <a href="javascript:void(0)">Sun</a>)</a></p><p><a href="javascript:void(0)">image:sun(All about <a href="javascript:void(0)">More about</a>)</a></p><p><a href="javascript:void(0)">image:sun(one, two, three)</a></p><p>[]</p><p>[ ]</p><p>[[]]</p><p>[[ ]]</p>
data/test/data/html8 CHANGED
@@ -1 +1 @@
1
- <p>=== foo ===</p>
1
+ <h3> foo </h3>