html5small 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/LICENSE +674 -0
- data/README.md +7 -0
- data/Rakefile +38 -0
- data/VERSION +1 -0
- data/bin/html5small +3 -0
- data/fixtures/attribute-value-ampersand.html +4 -0
- data/fixtures/attribute-value-ampersand.html.min +1 -0
- data/fixtures/attribute-value-quot.html +4 -0
- data/fixtures/attribute-value-quot.html.min +1 -0
- data/fixtures/dl.html +10 -0
- data/fixtures/dl.html.min +1 -0
- data/fixtures/entities-expand.html +4 -0
- data/fixtures/entities-expand.html.min +1 -0
- data/fixtures/entities-no-expand.html +3 -0
- data/fixtures/entities-no-expand.html.min +1 -0
- data/fixtures/ie.html +8 -0
- data/fixtures/ie.html.min +3 -0
- data/fixtures/lists.html +8 -0
- data/fixtures/lists.html.min +1 -0
- data/fixtures/newlines.html +6 -0
- data/fixtures/newlines.html.min +1 -0
- data/fixtures/normalise-attribute-name.html +4 -0
- data/fixtures/normalise-attribute-name.html.min +1 -0
- data/fixtures/normalise-tag-name.html +5 -0
- data/fixtures/normalise-tag-name.html.min +1 -0
- data/fixtures/pre-entities.html +7 -0
- data/fixtures/pre-entities.html.min +4 -0
- data/fixtures/pre.html +9 -0
- data/fixtures/pre.html.min +6 -0
- data/fixtures/quot-entity.html +5 -0
- data/fixtures/quot-entity.html.min +1 -0
- data/fixtures/skeleton.html +12 -0
- data/fixtures/skeleton.html.min +1 -0
- data/fixtures/sort-attributes.html +4 -0
- data/fixtures/sort-attributes.html.min +1 -0
- data/fixtures/table.html +21 -0
- data/fixtures/table.html.min +1 -0
- data/fixtures/tabs.html +4 -0
- data/fixtures/tabs.html.min +1 -0
- data/fixtures/whitespace-complex.html +9 -0
- data/fixtures/whitespace-complex.html.min +1 -0
- data/fixtures/whitespace-p.html +4 -0
- data/fixtures/whitespace-p.html.min +1 -0
- data/lib/html5small.rb +10 -0
- data/lib/html5small/Minifier.rb +156 -0
- data/lib/html5small/optional.rb +134 -0
- data/spec/h5-min_spec.rb +37 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +162 -0
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "html5small"
|
8
|
+
gem.summary = %Q{HTML5small}
|
9
|
+
gem.description = %Q{Minifier for HTML5 documents}
|
10
|
+
gem.email = "ruben.verborgh@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/RubenVerborgh/HTML5small"
|
12
|
+
gem.authors = ["Run Paint Run Run", "Ruben Verborgh"]
|
13
|
+
gem.add_dependency "htmlentities", ">= 4.1.0"
|
14
|
+
gem.add_dependency "nokogiri", ">= 1.5.0"
|
15
|
+
gem.add_development_dependency "rspec", ">= 2.0.0"
|
16
|
+
gem.add_development_dependency "yard", ">= 0"
|
17
|
+
gem.executables << 'html5small'
|
18
|
+
end
|
19
|
+
Jeweler::GemcutterTasks.new
|
20
|
+
rescue LoadError
|
21
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'rspec/core/rake_task'
|
25
|
+
RSpec::Core::RakeTask.new(:spec)
|
26
|
+
|
27
|
+
task :spec => :check_dependencies
|
28
|
+
|
29
|
+
task :default => :spec
|
30
|
+
|
31
|
+
begin
|
32
|
+
require 'yard'
|
33
|
+
YARD::Rake::YardocTask.new
|
34
|
+
rescue LoadError
|
35
|
+
task :yardoc do
|
36
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
37
|
+
end
|
38
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/bin/html5small
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=foo&bar>a</a>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=a"b>a</a>
|
data/fixtures/dl.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><dl><dt>Term<dd>Desc<dt>Term<dt>Term<dd>Desc</dl>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>⋔ Ь
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>1 & 2 > ?
|
data/fixtures/ie.html
ADDED
data/fixtures/lists.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><ul><li><i>one</i><li><i>two</i></ul>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>Two lines
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><dfn title="a b">d</dfn>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>p<p>p
|
data/fixtures/pre.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>"q'<pre>&"'</pre>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><title>Untitled</title><meta charset=utf-8><link href=//creativecommons.org/licenses/by-sa/3.0/ rel=license><h1>Title</h1><p>Test
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p><a class=c href=/ hreflang=az-Latn-x-latn style=s title=t>a</a>
|
data/fixtures/table.html
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<meta charset=utf-8>
|
3
|
+
<title>Untitled</title>
|
4
|
+
<table class=c>
|
5
|
+
<thead class=h>
|
6
|
+
<tr>
|
7
|
+
<th>H</th>
|
8
|
+
<th>H</th>
|
9
|
+
</tr>
|
10
|
+
</thead>
|
11
|
+
<tbody class=b>
|
12
|
+
<tr>
|
13
|
+
<td>D</td>
|
14
|
+
<td>D</td>
|
15
|
+
</tr>
|
16
|
+
<tr>
|
17
|
+
<td>D</td>
|
18
|
+
<td>D</td>
|
19
|
+
</tr>
|
20
|
+
</tbody>
|
21
|
+
</table>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><table class=c><thead class=h><tr><th>H<th>H<tbody class=b><tr><td>D<td>D<tr><td>D<td>D</table>
|
data/fixtures/tabs.html
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>Tab tab
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><section><p>A <code>b</code> <i>c</i> d <dfn>e <code>f</code> .</dfn></section>
|
@@ -0,0 +1 @@
|
|
1
|
+
<!doctype html><meta charset=utf-8><title>Untitled</title><p>This is a<code>string</code>.
|
data/lib/html5small.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require_relative 'html5small/minifier'
|
2
|
+
require_relative 'html5small/optional'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
def self.minify html
|
6
|
+
minifier = HTML5::Minifier.new
|
7
|
+
Nokogiri::HTML::SAX::Parser.new(minifier).parse(html)
|
8
|
+
OptionalTags.remove minifier.buf.strip
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'htmlentities'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
class Minifier < Nokogiri::XML::SAX::Document
|
6
|
+
# Elements in which whitespace is significant, so can't be normalised
|
7
|
+
PRE_TAGS = [:pre, :style, :script, :textarea]
|
8
|
+
|
9
|
+
# Elements representing flow content
|
10
|
+
FLOW_ELEMENTS = %w{a abbr address area article aside audio b bdo blockquote br
|
11
|
+
button canvas cite code command datalist del details dfn div
|
12
|
+
dl em embed fieldset figure footer form h1 h2 h3 h4 h5 h6 header
|
13
|
+
hgroup hr i iframe img input ins kbd keygen label link
|
14
|
+
map mark math menu meta meter nav noscript object ol output
|
15
|
+
p pre progress q ruby samp script section select small span
|
16
|
+
strong style sub sup svg table textarea time ul var video wbr
|
17
|
+
}.map(&:to_sym)
|
18
|
+
|
19
|
+
BOOL_ATTR = {
|
20
|
+
_: [:itemscope, :hidden],
|
21
|
+
audio: [:loop, :autoplay, :controls],
|
22
|
+
button: [:formnovalidate, :disabled, :autofocus],
|
23
|
+
command: [:disabled, :checked],
|
24
|
+
details: [:open],
|
25
|
+
fieldset: [:disabled],
|
26
|
+
form: [:novalidate],
|
27
|
+
iframe: [:seamless],
|
28
|
+
img: [:ismap],
|
29
|
+
input: [:autocomplete, :autofocus, :defaultchecked,
|
30
|
+
:checked, :disabled, :formnovalidate, :indeterminate,
|
31
|
+
:multiple, :readonly, :required],
|
32
|
+
keygen: [:disabled, :autofocus],
|
33
|
+
optgroup: [:disabled],
|
34
|
+
option: [:disabled, :defaultselected, :selected],
|
35
|
+
ol: [:reversed],
|
36
|
+
select: [:autofocus, :disabled, :multiple],
|
37
|
+
script: [:async, :defer],
|
38
|
+
style: [:scoped],
|
39
|
+
textarea: [:autofocus, :disabled, :readonly, :required],
|
40
|
+
time: [:pubdate],
|
41
|
+
video: [:loop, :autoplay, :controls],
|
42
|
+
}
|
43
|
+
|
44
|
+
attr_accessor :buf, :text_node, :entities
|
45
|
+
|
46
|
+
def initialize
|
47
|
+
@buf, @text_node = '', ''
|
48
|
+
@stack = []
|
49
|
+
@entities = HTMLEntities.new :expanded
|
50
|
+
end
|
51
|
+
|
52
|
+
# HTML5 documents begin with the doctype
|
53
|
+
def start_document
|
54
|
+
buf << "<!doctype html>"
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element name, attrs = []
|
58
|
+
name = normalise_name name
|
59
|
+
dump_text_node
|
60
|
+
@stack.push name
|
61
|
+
buf << "<#{name}" + format_attributes(attrs, name) + ">"
|
62
|
+
end
|
63
|
+
|
64
|
+
def end_element name
|
65
|
+
name = normalise_name name
|
66
|
+
dump_text_node
|
67
|
+
buf.rstrip! unless in_pre_element?
|
68
|
+
@stack.pop
|
69
|
+
buf << "</#{name}>"
|
70
|
+
end
|
71
|
+
|
72
|
+
def comment string
|
73
|
+
# I.E "conditional comments" should be retained as-is
|
74
|
+
if string =~ /\[if\s+lt\s+IE\s+\d+\]/i
|
75
|
+
buf << "<!--#{string}-->"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def cdata_block string
|
80
|
+
text_node << string
|
81
|
+
end
|
82
|
+
|
83
|
+
def characters chars
|
84
|
+
text_node << chars
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def format_attribute_value value
|
89
|
+
value = format_entities value
|
90
|
+
value_needs_quoting?(value) ? %Q{"#{value}"} : value
|
91
|
+
end
|
92
|
+
|
93
|
+
def normalise_name name
|
94
|
+
name.downcase.to_sym
|
95
|
+
end
|
96
|
+
|
97
|
+
def format_attributes attrs, element
|
98
|
+
return '' if attrs.empty?
|
99
|
+
Hash[attrs].map do |name, value|
|
100
|
+
[normalise_name(name), format_attribute_value(value)]
|
101
|
+
end.sort_by do |name, value|
|
102
|
+
name
|
103
|
+
end.map do |name, value|
|
104
|
+
if boolean_attribute?(element, name)
|
105
|
+
name.to_s
|
106
|
+
else
|
107
|
+
"#{name}=#{value}"
|
108
|
+
end
|
109
|
+
end.join(' ').insert(0, ' ')
|
110
|
+
end
|
111
|
+
|
112
|
+
# Can the given value be legally unquoted as per
|
113
|
+
# http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
|
114
|
+
# ?
|
115
|
+
def value_needs_quoting? value
|
116
|
+
# must not contain any " ", """, "'", ">", or "=", characters
|
117
|
+
value =~ /[[:space:]"'><=`]/ or value.empty?
|
118
|
+
end
|
119
|
+
|
120
|
+
def boolean_attribute? element, attribute
|
121
|
+
e, a = [element, attribute].map(&:to_sym)
|
122
|
+
BOOL_ATTR[:_].include?(a) or
|
123
|
+
(BOOL_ATTR.key?(e) and BOOL_ATTR[e].include?(a))
|
124
|
+
end
|
125
|
+
|
126
|
+
def format_entities html, except={}
|
127
|
+
html = entities.encode(entities.decode(html), :basic)
|
128
|
+
except.each{|name, replace| html.gsub!(/&#{name};/, replace)}
|
129
|
+
html
|
130
|
+
end
|
131
|
+
|
132
|
+
def format_text_node
|
133
|
+
text = format_entities text_node, {quot: ?", apos: ?'}
|
134
|
+
return text if in_pre_element?
|
135
|
+
text.gsub!(/[\n\t]/,' ')
|
136
|
+
# Don't strip inter-element white space for flow elements
|
137
|
+
unless buf =~ %r{</\w+>\s*\Z} and in_flow_element?
|
138
|
+
text.lstrip!
|
139
|
+
end
|
140
|
+
text.squeeze(' ')
|
141
|
+
end
|
142
|
+
|
143
|
+
def in_flow_element?
|
144
|
+
not (FLOW_ELEMENTS & @stack).empty?
|
145
|
+
end
|
146
|
+
|
147
|
+
def in_pre_element?
|
148
|
+
not (PRE_TAGS & @stack).empty?
|
149
|
+
end
|
150
|
+
|
151
|
+
def dump_text_node
|
152
|
+
buf << format_text_node
|
153
|
+
text_node.clear
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|