parsehtml 1.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +19 -0
- data/PostInstall.txt +2 -0
- data/README.rdoc +52 -0
- data/Rakefile +28 -0
- data/config/website.yml +2 -0
- data/lib/parsehtml.rb +452 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +71 -0
- data/test/test_helper.rb +3 -0
- data/test/test_parse.rb +40 -0
- data/test/test_parsehtml.rb +11 -0
- data/website/index.html +90 -0
- data/website/index.txt +62 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +159 -0
- data/website/template.html.erb +50 -0
- metadata +98 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
PostInstall.txt
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
config/website.yml
|
7
|
+
lib/parsehtml.rb
|
8
|
+
script/console
|
9
|
+
script/destroy
|
10
|
+
script/generate
|
11
|
+
script/txt2html
|
12
|
+
test/test_helper.rb
|
13
|
+
test/test_parsehtml.rb
|
14
|
+
test/test_parse.rb
|
15
|
+
website/index.html
|
16
|
+
website/index.txt
|
17
|
+
website/javascripts/rounded_corners_lite.inc.js
|
18
|
+
website/stylesheets/screen.css
|
19
|
+
website/template.html.erb
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
= parsehtml
|
2
|
+
|
3
|
+
http://parsehtml.rubyforge.org
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
ParseHTML is an HTML parser which works with Ruby 1.8 and above. ParseHTML will even try to handle invalid HTML to some degree.
|
8
|
+
|
9
|
+
== SYNOPSIS:
|
10
|
+
|
11
|
+
FIX (code sample of usage)
|
12
|
+
|
13
|
+
== REQUIREMENTS:
|
14
|
+
|
15
|
+
Ruby 1.8
|
16
|
+
|
17
|
+
== INSTALL:
|
18
|
+
|
19
|
+
sudo gem install parsehtml
|
20
|
+
|
21
|
+
== DEVELOPERS:
|
22
|
+
|
23
|
+
Craig P Jolicoeur - http://github.com/cpjolicoeur
|
24
|
+
|
25
|
+
== ACKNOWLEDGEMENTS:
|
26
|
+
|
27
|
+
ParseHTML is heavily based on the ParseHTML PHP library by Milian Wolf (http://milianw.de)
|
28
|
+
|
29
|
+
== LICENSE:
|
30
|
+
|
31
|
+
(The MIT License)
|
32
|
+
|
33
|
+
Copyright (c) 2008 Craig P Jolicoeur
|
34
|
+
|
35
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
36
|
+
a copy of this software and associated documentation files (the
|
37
|
+
'Software'), to deal in the Software without restriction, including
|
38
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
39
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
40
|
+
permit persons to whom the Software is furnished to do so, subject to
|
41
|
+
the following conditions:
|
42
|
+
|
43
|
+
The above copyright notice and this permission notice shall be
|
44
|
+
included in all copies or substantial portions of the Software.
|
45
|
+
|
46
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
47
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
48
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
49
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
50
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
51
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
52
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
%w[rubygems rake rake/clean fileutils newgem rubigen].each { |f| require f }
|
2
|
+
require File.dirname(__FILE__) + '/lib/parsehtml'
|
3
|
+
|
4
|
+
# Generate all the Rake tasks
|
5
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
6
|
+
$hoe = Hoe.new('parsehtml', ParseHTML::VERSION) do |p|
|
7
|
+
p.developer('ParseHTML', 'cpjolicoeur@gmail.com')
|
8
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
9
|
+
p.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
10
|
+
p.rubyforge_name = p.name # TODO this is default value
|
11
|
+
# p.extra_deps = [
|
12
|
+
# ['activesupport','>= 2.0.2'],
|
13
|
+
# ]
|
14
|
+
p.extra_dev_deps = [
|
15
|
+
['newgem', ">= #{::Newgem::VERSION}"]
|
16
|
+
]
|
17
|
+
|
18
|
+
p.clean_globs |= %w[**/.DS_Store tmp *.log]
|
19
|
+
path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
|
20
|
+
p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
|
21
|
+
p.rsync_args = '-av --delete --ignore-errors'
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'newgem/tasks' # load /tasks/*.rake
|
25
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
26
|
+
|
27
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
28
|
+
# task :default => [:spec, :features]
|
data/config/website.yml
ADDED
data/lib/parsehtml.rb
ADDED
@@ -0,0 +1,452 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
class ParseHTML
|
5
|
+
VERSION = '1.12.0'
|
6
|
+
|
7
|
+
# tags which are always empty (<br />, etc.)
|
8
|
+
EMPTY_TAGS = %w(br hr input img area link meta param)
|
9
|
+
|
10
|
+
# tags with preformatted text - whitespace won't be touched in them
|
11
|
+
PREFORMATTED_TAGS = %w(script style pre code)
|
12
|
+
|
13
|
+
# list of block elements
|
14
|
+
# - tag_name => bool (is block level)
|
15
|
+
BLOCK_ELEMENTS = {'address' => true,
|
16
|
+
'blockquote' => true,
|
17
|
+
'center' => true,
|
18
|
+
'del' => true,
|
19
|
+
'dir' => true,
|
20
|
+
'div' => true,
|
21
|
+
'dl' => true,
|
22
|
+
'fieldset' => true,
|
23
|
+
'form' => true,
|
24
|
+
'h1' => true,
|
25
|
+
'h2' => true,
|
26
|
+
'h3' => true,
|
27
|
+
'h4' => true,
|
28
|
+
'h5' => true,
|
29
|
+
'h6' => true,
|
30
|
+
'hr' => true,
|
31
|
+
'ins' => true,
|
32
|
+
'isindex' => true,
|
33
|
+
'menu' => true,
|
34
|
+
'noframes' => true,
|
35
|
+
'noscript' => true,
|
36
|
+
'ol' => true,
|
37
|
+
'p' => true,
|
38
|
+
'pre' => true,
|
39
|
+
'table' => true,
|
40
|
+
'ul' => true,
|
41
|
+
# set table elements and list items to block as well
|
42
|
+
'thead' => true,
|
43
|
+
'tbody' => true,
|
44
|
+
'tfoot' => true,
|
45
|
+
'td' => true,
|
46
|
+
'tr' => true,
|
47
|
+
'th' => true,
|
48
|
+
'li' => true,
|
49
|
+
'dd' => true,
|
50
|
+
'dt' => true,
|
51
|
+
# header items and html / body as well
|
52
|
+
'html' => true,
|
53
|
+
'body' => true,
|
54
|
+
'head' => true,
|
55
|
+
'meta' => true,
|
56
|
+
'link' => true,
|
57
|
+
'style' => true,
|
58
|
+
'title' => true,
|
59
|
+
# media tags to render as block
|
60
|
+
'map' => true,
|
61
|
+
'object' => true,
|
62
|
+
'param' => true,
|
63
|
+
'embed' => true,
|
64
|
+
'area' => true,
|
65
|
+
# inline elements
|
66
|
+
'a' => false,
|
67
|
+
'abbr' => false,
|
68
|
+
'acronym' => false,
|
69
|
+
'applet' => false,
|
70
|
+
'b' => false,
|
71
|
+
'basefont' => false,
|
72
|
+
'bdo' => false,
|
73
|
+
'big' => false,
|
74
|
+
'br' => false,
|
75
|
+
'button' => false,
|
76
|
+
'cite' => false,
|
77
|
+
'code' => false,
|
78
|
+
'del' => false,
|
79
|
+
'dfn' => false,
|
80
|
+
'em' => false,
|
81
|
+
'font' => false,
|
82
|
+
'i' => false,
|
83
|
+
'img' => false,
|
84
|
+
'ins' => false,
|
85
|
+
'input' => false,
|
86
|
+
'iframe' => false,
|
87
|
+
'kbd' => false,
|
88
|
+
'label' => false,
|
89
|
+
'q' => false,
|
90
|
+
'samp' => false,
|
91
|
+
'script' => false,
|
92
|
+
'select' => false,
|
93
|
+
'small' => false,
|
94
|
+
'span' => false,
|
95
|
+
'strong' => false,
|
96
|
+
'sub' => false,
|
97
|
+
'sup' => false,
|
98
|
+
'textarea' => false,
|
99
|
+
'tt' => false,
|
100
|
+
'var' => false}
|
101
|
+
|
102
|
+
# html to be parsed
|
103
|
+
attr_accessor :html
|
104
|
+
|
105
|
+
# node type:
|
106
|
+
# - tag (see isStartTag)
|
107
|
+
# - text (include cdata)
|
108
|
+
# - comment
|
109
|
+
# - doctype
|
110
|
+
# - pi (processing instruction)
|
111
|
+
attr_reader :node_type
|
112
|
+
|
113
|
+
# current node context
|
114
|
+
# - either a simple string (text node) or something like
|
115
|
+
# - <tag attrib="value"...>
|
116
|
+
attr_accessor :node
|
117
|
+
|
118
|
+
# supress HTML tags inside preformatted tags
|
119
|
+
attr_accessor :no_tags_in_code
|
120
|
+
|
121
|
+
# whether the current node is an opening tag (<a>) or not (</a>)
|
122
|
+
# - set to nil if current node is not a tag
|
123
|
+
# - NOTE: empty tags (<br />) set this to true as well!
|
124
|
+
attr_reader :is_start_tag
|
125
|
+
|
126
|
+
# whether current node is an empty tag (<br />) or not (<a></a>)
|
127
|
+
attr_reader :is_empty_tag
|
128
|
+
|
129
|
+
# whether the current tag is a block level element
|
130
|
+
attr_reader :is_block_element
|
131
|
+
|
132
|
+
# tag name
|
133
|
+
attr_reader :tag_name
|
134
|
+
|
135
|
+
# attributes of current_tag (in hash)
|
136
|
+
attr_reader :tag_attributes
|
137
|
+
|
138
|
+
# keep whitespace formatting
|
139
|
+
attr_reader :keep_whitespace
|
140
|
+
|
141
|
+
# list of open tags (array)
|
142
|
+
# - count this to get current depth
|
143
|
+
attr_reader :open_tags
|
144
|
+
|
145
|
+
|
146
|
+
def initialize(html = '')
|
147
|
+
@html = html
|
148
|
+
@open_tags = []
|
149
|
+
@node_type, @node, @tag_name = '', '', ''
|
150
|
+
@is_start_tag, @is_empty_tag, @is_block_element, @no_tags_in_code = false, false, false, false
|
151
|
+
@tag_attributes = nil
|
152
|
+
@keep_whitespace = 0
|
153
|
+
end
|
154
|
+
|
155
|
+
# get next node
|
156
|
+
def next_node
|
157
|
+
return false if (@html.nil? || @html.empty?)
|
158
|
+
|
159
|
+
skip_whitespace = true # FIXME: should probably be a class variable?
|
160
|
+
if (@is_start_tag && !@is_empty_tag)
|
161
|
+
@open_tags << @tag_name
|
162
|
+
@keep_whitespace += 1 if PREFORMATTED_TAGS.include?(@tag_name)
|
163
|
+
end
|
164
|
+
|
165
|
+
if (@html[0,1] == '<')
|
166
|
+
token = html[0,9]
|
167
|
+
if (token[0,2] == '<?')
|
168
|
+
# xml, prolog, or other pi's
|
169
|
+
# TODO: trigger error (this might need some work)
|
170
|
+
pos = @html.index('>')
|
171
|
+
set_node('pi', pos+1)
|
172
|
+
return true;
|
173
|
+
end # end pi tag
|
174
|
+
if (token[0,4] == '<!--')
|
175
|
+
# HTML comment
|
176
|
+
pos = @html.index('-->')
|
177
|
+
if pos.nil?
|
178
|
+
# could not find a closing -->, use next gt tag instead
|
179
|
+
# this is what firefox does with its parsing
|
180
|
+
pos = @html.index('>') + 1
|
181
|
+
else
|
182
|
+
pos += 3
|
183
|
+
end
|
184
|
+
set_node('comment', pos)
|
185
|
+
return true
|
186
|
+
end # end comment tag
|
187
|
+
if (token == '<!DOCTYPE')
|
188
|
+
# doctype
|
189
|
+
set_node('doctype', @html.index('>')+1)
|
190
|
+
@skip_whitespace = true
|
191
|
+
return true
|
192
|
+
end # end <!DOCTYPE tag
|
193
|
+
if (token == '<![CDATA[')
|
194
|
+
# cdata, use text mode
|
195
|
+
|
196
|
+
# remove leading <![CDATA[
|
197
|
+
@html = @html[9, @html.size-9]
|
198
|
+
set_node('text', @html.index(']]>')+3)
|
199
|
+
|
200
|
+
# remove trailing ]]> and trim
|
201
|
+
@node = @node[0, -3]
|
202
|
+
handle_whitespaces
|
203
|
+
|
204
|
+
@skip_whitespace = true
|
205
|
+
return true
|
206
|
+
end # end cdata
|
207
|
+
if (parse_tag)
|
208
|
+
# seems to be a tag so handle whitespaces
|
209
|
+
skip_whitespace = @is_block_element ? true : false
|
210
|
+
return true
|
211
|
+
end # end parse_tag
|
212
|
+
end
|
213
|
+
|
214
|
+
skip_whitespace = false if @keep_whitespace
|
215
|
+
|
216
|
+
# when we get here it seems to be a text node
|
217
|
+
pos = @html.index('<') || @html.size
|
218
|
+
|
219
|
+
set_node('text', pos)
|
220
|
+
handle_whitespaces
|
221
|
+
return next_node if (skip_whitespace && @node == ' ')
|
222
|
+
skip_whitespace = false
|
223
|
+
return true
|
224
|
+
end # end next_node
|
225
|
+
|
226
|
+
# normalize self.node
|
227
|
+
def normalize_node
|
228
|
+
@node = '<'
|
229
|
+
unless (@is_start_tag)
|
230
|
+
@node << "/#{@tag_name}>"
|
231
|
+
return
|
232
|
+
end
|
233
|
+
@node << @tag_name
|
234
|
+
@tag_attributes.each do |name, value|
|
235
|
+
str = " #{name}=\"" + value.gsub(/\"/, '"') + "\""
|
236
|
+
@node << str
|
237
|
+
end
|
238
|
+
@node << ' /' if (@is_empty_tag)
|
239
|
+
@node << '>'
|
240
|
+
end
|
241
|
+
|
242
|
+
private
|
243
|
+
|
244
|
+
# parse tag, set tag name and attributes, check for closing tag, etc...
|
245
|
+
def parse_tag
|
246
|
+
a_ord = ?a
|
247
|
+
z_ord = ?z
|
248
|
+
special_ords = [?:, ?-] # for xml:lang and http-equiv
|
249
|
+
|
250
|
+
tag_name = ''
|
251
|
+
pos = 1
|
252
|
+
is_start_tag = (@html[pos,1] != '/')
|
253
|
+
pos += 1 unless is_start_tag
|
254
|
+
|
255
|
+
# get tag name
|
256
|
+
while (@html[pos,1])
|
257
|
+
char = @html.downcase[pos,1]
|
258
|
+
pos_ord = char[0]
|
259
|
+
if ((pos_ord >= a_ord && pos_ord <= z_ord) || (!tag_name.empty? && is_numeric?(char)))
|
260
|
+
tag_name << char
|
261
|
+
pos += 1
|
262
|
+
else
|
263
|
+
pos -= 1
|
264
|
+
break
|
265
|
+
end
|
266
|
+
end # end while
|
267
|
+
|
268
|
+
tag_name.downcase!
|
269
|
+
if (tag_name.empty? || !BLOCK_ELEMENTS.include?(tag_name))
|
270
|
+
# something went wrong, invalid tag
|
271
|
+
invalid_tag
|
272
|
+
return false
|
273
|
+
end
|
274
|
+
|
275
|
+
if (@no_tags_in_code && @open_tags.last == 'code' && !(tag_name == 'code' && !is_start_tag))
|
276
|
+
# supress all HTML tags inside code tags
|
277
|
+
invalid_tag
|
278
|
+
return false
|
279
|
+
end
|
280
|
+
|
281
|
+
# get tag attributes
|
282
|
+
# TODO: in HTML 4 attributes dont need to be quoted
|
283
|
+
is_empty_tag = false
|
284
|
+
attributes = {}
|
285
|
+
curr_attribute = ''
|
286
|
+
while (@html[pos+1,1])
|
287
|
+
pos += 1
|
288
|
+
# close tag
|
289
|
+
if (@html[pos,1] == '>' || @html[pos,2] == '/>')
|
290
|
+
if (@html[pos,1] == '/')
|
291
|
+
is_empty_tag = true
|
292
|
+
pos += 1
|
293
|
+
end
|
294
|
+
break
|
295
|
+
end
|
296
|
+
|
297
|
+
char = @html.downcase[pos,1]
|
298
|
+
pos_ord = char[0]
|
299
|
+
if (pos_ord >= a_ord && pos_ord <= z_ord)
|
300
|
+
# attribute name
|
301
|
+
curr_attribute << char
|
302
|
+
elsif ([' ', "\t", "\n"].include?(char))
|
303
|
+
# drop whitespace
|
304
|
+
elsif
|
305
|
+
# get attribute value
|
306
|
+
pos += 1
|
307
|
+
await = @html[pos,1] # single or double quote
|
308
|
+
pos += 1
|
309
|
+
value = ''
|
310
|
+
while (@html[pos,1] && @html[pos,1] != await)
|
311
|
+
value << @html[pos,1]
|
312
|
+
pos += 1
|
313
|
+
end # end while
|
314
|
+
attributes[curr_attribute] = value
|
315
|
+
curr_attribute = ''
|
316
|
+
else
|
317
|
+
invalid_tag
|
318
|
+
return false
|
319
|
+
end
|
320
|
+
end # end while
|
321
|
+
|
322
|
+
if (@html[pos, 1] != '>')
|
323
|
+
invalid_tag
|
324
|
+
return false
|
325
|
+
end
|
326
|
+
|
327
|
+
if (!curr_attribute.empty?)
|
328
|
+
# html4 allows something like <option selected> instead of <option selected="selected">
|
329
|
+
attributes[curr_attribute] = curr_attribute
|
330
|
+
end
|
331
|
+
|
332
|
+
unless (is_start_tag)
|
333
|
+
if (!attributes.empty? || (tag_name != @open_tags.last))
|
334
|
+
# end tags must not contain any attributes
|
335
|
+
# or maybe we did not expect a different tag to be closed
|
336
|
+
invalid_tag
|
337
|
+
return false
|
338
|
+
end
|
339
|
+
@open_tags.pop
|
340
|
+
if (PREFORMATTED_TAGS.include?(tag_name))
|
341
|
+
@keep_whitespace -= 1
|
342
|
+
end
|
343
|
+
end
|
344
|
+
pos += 1
|
345
|
+
@node = @html[0,pos]
|
346
|
+
@html = @html[pos, @html.size-pos]
|
347
|
+
@tag_name = tag_name
|
348
|
+
@tag_attributes = attributes
|
349
|
+
@is_start_tag = is_start_tag
|
350
|
+
@is_empty_tag = is_empty_tag || EMPTY_TAGS.include?(tag_name)
|
351
|
+
if (@is_empty_tag)
|
352
|
+
# might not be well formed
|
353
|
+
@node.gsub!(/ *\/? *>$/, ' />')
|
354
|
+
end
|
355
|
+
@node_type = 'tag'
|
356
|
+
@is_block_element = BLOCK_ELEMENTS[tag_name]
|
357
|
+
return true
|
358
|
+
end
|
359
|
+
|
360
|
+
# handle invalid tags
|
361
|
+
def invalid_tag
|
362
|
+
@html = '<' + @html.slice(1, @html.size - 1)
|
363
|
+
end
|
364
|
+
|
365
|
+
# update all variables and make @html shorter
|
366
|
+
# - param type => @nodeType
|
367
|
+
# - param pos => which position to cut at
|
368
|
+
def set_node(type, pos)
|
369
|
+
if (@node_type == 'tag') # (type == 'tag')
|
370
|
+
# set specific tag vars to null
|
371
|
+
# type == tag should not be called here
|
372
|
+
# see parse_tag for more info
|
373
|
+
@tag_name = nil
|
374
|
+
@tag_attributes = nil
|
375
|
+
@is_start_tag = false
|
376
|
+
@is_empty_tag = false
|
377
|
+
@is_block_element = false
|
378
|
+
end
|
379
|
+
@node_type = type
|
380
|
+
@node = @html[0, pos]
|
381
|
+
@html = @html[pos, @html.size-pos]
|
382
|
+
end # end set_node
|
383
|
+
|
384
|
+
# check if @html begins with a specific string
|
385
|
+
def match?(str)
|
386
|
+
@html.slice(0, str.size) == str
|
387
|
+
end
|
388
|
+
|
389
|
+
# truncate whitespaces
|
390
|
+
def handle_whitespaces
|
391
|
+
return if (@keep_whitespace.nil? || @keep_whitespace.zero?)
|
392
|
+
@node.gsub!(/\s+/, ' ')
|
393
|
+
end
|
394
|
+
|
395
|
+
# check if a string is a valid numeric value
|
396
|
+
def is_numeric?(val)
|
397
|
+
Float val rescue false
|
398
|
+
end
|
399
|
+
|
400
|
+
# indent HTML properly
|
401
|
+
def self.indent_html(html, indent = ' ', no_tags_in_code = false)
|
402
|
+
parser = ParseHTML.new(html)
|
403
|
+
parser.no_tags_in_code = no_tags_in_code
|
404
|
+
html = ''
|
405
|
+
last = true # last tag was block element
|
406
|
+
indent_a = []
|
407
|
+
|
408
|
+
while (parser.next_node)
|
409
|
+
parser.normalize_node if (parser.node_type == 'tag')
|
410
|
+
if ((parser.node_type == 'tag') && parser.is_block_element)
|
411
|
+
is_pre_or_code = ['code', 'pre'].include?(parser.tag_name)
|
412
|
+
if(parser.keep_whitespace.zero? && !last && !is_pre_or_code)
|
413
|
+
html = html.rstrip + "\n"
|
414
|
+
end
|
415
|
+
if (parser.is_start_tag)
|
416
|
+
html << indent_a.join(' ')
|
417
|
+
if (!parser.is_empty_tag)
|
418
|
+
indent_a << indent
|
419
|
+
end
|
420
|
+
else
|
421
|
+
indent_a.pop
|
422
|
+
if (!is_pre_or_code)
|
423
|
+
html << indent_a.join(' ')
|
424
|
+
end
|
425
|
+
end
|
426
|
+
html << parser.node
|
427
|
+
if (parser.keep_whitespace.zero? && !(is_pre_or_code && parser.is_start_tag))
|
428
|
+
html << "\n"
|
429
|
+
end
|
430
|
+
last = true
|
431
|
+
else
|
432
|
+
if (parser.node_type == 'tag' && parser.tag_name == 'br')
|
433
|
+
html << (parser.node + "\n")
|
434
|
+
last = true
|
435
|
+
next
|
436
|
+
elsif (last && parser.keep_whitespace.zero?)
|
437
|
+
html << indent_a.join(' ')
|
438
|
+
parser.node = parser.node.lstrip
|
439
|
+
end
|
440
|
+
html << parser.node
|
441
|
+
|
442
|
+
if (['comment', 'pi', 'doctype'].include?(parser.node_type))
|
443
|
+
html << "\n"
|
444
|
+
else
|
445
|
+
last = false
|
446
|
+
end
|
447
|
+
end
|
448
|
+
end # end while
|
449
|
+
return html
|
450
|
+
end
|
451
|
+
|
452
|
+
end # end class ParseHTML
|