html5small 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/LICENSE +674 -0
- data/README.md +7 -0
- data/Rakefile +38 -0
- data/VERSION +1 -0
- data/bin/html5small +3 -0
- data/fixtures/attribute-value-ampersand.html +4 -0
- data/fixtures/attribute-value-ampersand.html.min +1 -0
- data/fixtures/attribute-value-quot.html +4 -0
- data/fixtures/attribute-value-quot.html.min +1 -0
- data/fixtures/dl.html +10 -0
- data/fixtures/dl.html.min +1 -0
- data/fixtures/entities-expand.html +4 -0
- data/fixtures/entities-expand.html.min +1 -0
- data/fixtures/entities-no-expand.html +3 -0
- data/fixtures/entities-no-expand.html.min +1 -0
- data/fixtures/ie.html +8 -0
- data/fixtures/ie.html.min +3 -0
- data/fixtures/lists.html +8 -0
- data/fixtures/lists.html.min +1 -0
- data/fixtures/newlines.html +6 -0
- data/fixtures/newlines.html.min +1 -0
- data/fixtures/normalise-attribute-name.html +4 -0
- data/fixtures/normalise-attribute-name.html.min +1 -0
- data/fixtures/normalise-tag-name.html +5 -0
- data/fixtures/normalise-tag-name.html.min +1 -0
- data/fixtures/pre-entities.html +7 -0
- data/fixtures/pre-entities.html.min +4 -0
- data/fixtures/pre.html +9 -0
- data/fixtures/pre.html.min +6 -0
- data/fixtures/quot-entity.html +5 -0
- data/fixtures/quot-entity.html.min +1 -0
- data/fixtures/skeleton.html +12 -0
- data/fixtures/skeleton.html.min +1 -0
- data/fixtures/sort-attributes.html +4 -0
- data/fixtures/sort-attributes.html.min +1 -0
- data/fixtures/table.html +21 -0
- data/fixtures/table.html.min +1 -0
- data/fixtures/tabs.html +4 -0
- data/fixtures/tabs.html.min +1 -0
- data/fixtures/whitespace-complex.html +9 -0
- data/fixtures/whitespace-complex.html.min +1 -0
- data/fixtures/whitespace-p.html +4 -0
- data/fixtures/whitespace-p.html.min +1 -0
- data/lib/html5small.rb +10 -0
- data/lib/html5small/Minifier.rb +156 -0
- data/lib/html5small/optional.rb +134 -0
- data/spec/h5-min_spec.rb +37 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +162 -0
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "html5small"
|
8
|
+
gem.summary = %Q{HTML5small}
|
9
|
+
gem.description = %Q{Minifier for HTML5 documents}
|
10
|
+
gem.email = "ruben.verborgh@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/RubenVerborgh/HTML5small"
|
12
|
+
gem.authors = ["Run Paint Run Run", "Ruben Verborgh"]
|
13
|
+
gem.add_dependency "htmlentities", ">= 4.1.0"
|
14
|
+
gem.add_dependency "nokogiri", ">= 1.5.0"
|
15
|
+
gem.add_development_dependency "rspec", ">= 2.0.0"
|
16
|
+
gem.add_development_dependency "yard", ">= 0"
|
17
|
+
gem.executables << 'html5small'
|
18
|
+
end
|
19
|
+
Jeweler::GemcutterTasks.new
|
20
|
+
rescue LoadError
|
21
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'rspec/core/rake_task'
|
25
|
+
RSpec::Core::RakeTask.new(:spec)
|
26
|
+
|
27
|
+
task :spec => :check_dependencies
|
28
|
+
|
29
|
+
task :default => :spec
|
30
|
+
|
31
|
+
begin
|
32
|
+
require 'yard'
|
33
|
+
YARD::Rake::YardocTask.new
|
34
|
+
rescue LoadError
|
35
|
+
task :yardoc do
|
36
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
37
|
+
end
|
38
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/bin/html5small
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=foo&bar>a</a>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=a"b>a</a>
|
data/fixtures/dl.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><dl><dt>Term<dd>Desc<dt>Term<dt>Term<dd>Desc</dl>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>⋔ Ь
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>1 & 2 > ?
|
data/fixtures/ie.html
ADDED
data/fixtures/lists.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><ul><li><i>one</i><li><i>two</i></ul>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>Two lines
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><dfn title="a b">d</dfn>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>p<p>p
|
data/fixtures/pre.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>"q'<pre>&"'</pre>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><title>Untitled</title><meta charset=utf-8><link href=//creativecommons.org/licenses/by-sa/3.0/ rel=license><h1>Title</h1><p>Test
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><a class=c href=/ hreflang=az-Latn-x-latn style=s title=t>a</a>
|
data/fixtures/table.html
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<meta charset=utf-8>
|
3
|
+
<title>Untitled</title>
|
4
|
+
<table class=c>
|
5
|
+
<thead class=h>
|
6
|
+
<tr>
|
7
|
+
<th>H</th>
|
8
|
+
<th>H</th>
|
9
|
+
</tr>
|
10
|
+
</thead>
|
11
|
+
<tbody class=b>
|
12
|
+
<tr>
|
13
|
+
<td>D</td>
|
14
|
+
<td>D</td>
|
15
|
+
</tr>
|
16
|
+
<tr>
|
17
|
+
<td>D</td>
|
18
|
+
<td>D</td>
|
19
|
+
</tr>
|
20
|
+
</tbody>
|
21
|
+
</table>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><table class=c><thead class=h><tr><th>H<th>H<tbody class=b><tr><td>D<td>D<tr><td>D<td>D</table>
|
data/fixtures/tabs.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>Tab tab
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><section><p>A <code>b</code> <i>c</i> d <dfn>e <code>f</code> .</dfn></section>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>This is a<code>string</code>.
|
data/lib/html5small.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require_relative 'html5small/minifier'
|
2
|
+
require_relative 'html5small/optional'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
def self.minify html
|
6
|
+
minifier = HTML5::Minifier.new
|
7
|
+
Nokogiri::HTML::SAX::Parser.new(minifier).parse(html)
|
8
|
+
OptionalTags.remove minifier.buf.strip
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'htmlentities'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
class Minifier < Nokogiri::XML::SAX::Document
|
6
|
+
# Elements in which whitespace is significant, so can't be normalised
|
7
|
+
PRE_TAGS = [:pre, :style, :script, :textarea]
|
8
|
+
|
9
|
+
# Elements representing flow content
|
10
|
+
FLOW_ELEMENTS = %w{a abbr address area article aside audio b bdo blockquote br
|
11
|
+
button canvas cite code command datalist del details dfn div
|
12
|
+
dl em embed fieldset figure footer form h1 h2 h3 h4 h5 h6 header
|
13
|
+
hgroup hr i iframe img input ins kbd keygen label link
|
14
|
+
map mark math menu meta meter nav noscript object ol output
|
15
|
+
p pre progress q ruby samp script section select small span
|
16
|
+
strong style sub sup svg table textarea time ul var video wbr
|
17
|
+
}.map(&:to_sym)
|
18
|
+
|
19
|
+
BOOL_ATTR = {
|
20
|
+
_: [:itemscope, :hidden],
|
21
|
+
audio: [:loop, :autoplay, :controls],
|
22
|
+
button: [:formnovalidate, :disabled, :autofocus],
|
23
|
+
command: [:disabled, :checked],
|
24
|
+
details: [:open],
|
25
|
+
fieldset: [:disabled],
|
26
|
+
form: [:novalidate],
|
27
|
+
iframe: [:seamless],
|
28
|
+
img: [:ismap],
|
29
|
+
input: [:autocomplete, :autofocus, :defaultchecked,
|
30
|
+
:checked, :disabled, :formnovalidate, :indeterminate,
|
31
|
+
:multiple, :readonly, :required],
|
32
|
+
keygen: [:disabled, :autofocus],
|
33
|
+
optgroup: [:disabled],
|
34
|
+
option: [:disabled, :defaultselected, :selected],
|
35
|
+
ol: [:reversed],
|
36
|
+
select: [:autofocus, :disabled, :multiple],
|
37
|
+
script: [:async, :defer],
|
38
|
+
style: [:scoped],
|
39
|
+
textarea: [:autofocus, :disabled, :readonly, :required],
|
40
|
+
time: [:pubdate],
|
41
|
+
video: [:loop, :autoplay, :controls],
|
42
|
+
}
|
43
|
+
|
44
|
+
attr_accessor :buf, :text_node, :entities
|
45
|
+
|
46
|
+
def initialize
|
47
|
+
@buf, @text_node = '', ''
|
48
|
+
@stack = []
|
49
|
+
@entities = HTMLEntities.new :expanded
|
50
|
+
end
|
51
|
+
|
52
|
+
# HTML5 documents begin with the doctype
|
53
|
+
def start_document
|
54
|
+
buf << "<!doctype html>"
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element name, attrs = []
|
58
|
+
name = normalise_name name
|
59
|
+
dump_text_node
|
60
|
+
@stack.push name
|
61
|
+
buf << "<#{name}" + format_attributes(attrs, name) + ">"
|
62
|
+
end
|
63
|
+
|
64
|
+
def end_element name
|
65
|
+
name = normalise_name name
|
66
|
+
dump_text_node
|
67
|
+
buf.rstrip! unless in_pre_element?
|
68
|
+
@stack.pop
|
69
|
+
buf << "</#{name}>"
|
70
|
+
end
|
71
|
+
|
72
|
+
def comment string
|
73
|
+
# I.E "conditional comments" should be retained as-is
|
74
|
+
if string =~ /\[if\s+lt\s+IE\s+\d+\]/i
|
75
|
+
buf << "<!--#{string}-->"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def cdata_block string
|
80
|
+
text_node << string
|
81
|
+
end
|
82
|
+
|
83
|
+
def characters chars
|
84
|
+
text_node << chars
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def format_attribute_value value
|
89
|
+
value = format_entities value
|
90
|
+
value_needs_quoting?(value) ? %Q{"#{value}"} : value
|
91
|
+
end
|
92
|
+
|
93
|
+
def normalise_name name
|
94
|
+
name.downcase.to_sym
|
95
|
+
end
|
96
|
+
|
97
|
+
def format_attributes attrs, element
|
98
|
+
return '' if attrs.empty?
|
99
|
+
Hash[attrs].map do |name, value|
|
100
|
+
[normalise_name(name), format_attribute_value(value)]
|
101
|
+
end.sort_by do |name, value|
|
102
|
+
name
|
103
|
+
end.map do |name, value|
|
104
|
+
if boolean_attribute?(element, name)
|
105
|
+
name.to_s
|
106
|
+
else
|
107
|
+
"#{name}=#{value}"
|
108
|
+
end
|
109
|
+
end.join(' ').insert(0, ' ')
|
110
|
+
end
|
111
|
+
|
112
|
+
# Can the given value be legally unquoted as per
|
113
|
+
# http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
|
114
|
+
# ?
|
115
|
+
def value_needs_quoting? value
|
116
|
+
# must not contain any " ", """, "'", ">", or "=", characters
|
117
|
+
value =~ /[[:space:]"'><=`]/ or value.empty?
|
118
|
+
end
|
119
|
+
|
120
|
+
def boolean_attribute? element, attribute
|
121
|
+
e, a = [element, attribute].map(&:to_sym)
|
122
|
+
BOOL_ATTR[:_].include?(a) or
|
123
|
+
(BOOL_ATTR.key?(e) and BOOL_ATTR[e].include?(a))
|
124
|
+
end
|
125
|
+
|
126
|
+
def format_entities html, except={}
|
127
|
+
html = entities.encode(entities.decode(html), :basic)
|
128
|
+
except.each{|name, replace| html.gsub!(/&#{name};/, replace)}
|
129
|
+
html
|
130
|
+
end
|
131
|
+
|
132
|
+
def format_text_node
|
133
|
+
text = format_entities text_node, {quot: ?", apos: ?'}
|
134
|
+
return text if in_pre_element?
|
135
|
+
text.gsub!(/[\n\t]/,' ')
|
136
|
+
# Don't strip inter-element white space for flow elements
|
137
|
+
unless buf =~ %r{</\w+>\s*\Z} and in_flow_element?
|
138
|
+
text.lstrip!
|
139
|
+
end
|
140
|
+
text.squeeze(' ')
|
141
|
+
end
|
142
|
+
|
143
|
+
def in_flow_element?
|
144
|
+
not (FLOW_ELEMENTS & @stack).empty?
|
145
|
+
end
|
146
|
+
|
147
|
+
def in_pre_element?
|
148
|
+
not (PRE_TAGS & @stack).empty?
|
149
|
+
end
|
150
|
+
|
151
|
+
def dump_text_node
|
152
|
+
buf << format_text_node
|
153
|
+
text_node.clear
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|