html5small 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/.document +5 -0
  2. data/LICENSE +674 -0
  3. data/README.md +7 -0
  4. data/Rakefile +38 -0
  5. data/VERSION +1 -0
  6. data/bin/html5small +3 -0
  7. data/fixtures/attribute-value-ampersand.html +4 -0
  8. data/fixtures/attribute-value-ampersand.html.min +1 -0
  9. data/fixtures/attribute-value-quot.html +4 -0
  10. data/fixtures/attribute-value-quot.html.min +1 -0
  11. data/fixtures/dl.html +10 -0
  12. data/fixtures/dl.html.min +1 -0
  13. data/fixtures/entities-expand.html +4 -0
  14. data/fixtures/entities-expand.html.min +1 -0
  15. data/fixtures/entities-no-expand.html +3 -0
  16. data/fixtures/entities-no-expand.html.min +1 -0
  17. data/fixtures/ie.html +8 -0
  18. data/fixtures/ie.html.min +3 -0
  19. data/fixtures/lists.html +8 -0
  20. data/fixtures/lists.html.min +1 -0
  21. data/fixtures/newlines.html +6 -0
  22. data/fixtures/newlines.html.min +1 -0
  23. data/fixtures/normalise-attribute-name.html +4 -0
  24. data/fixtures/normalise-attribute-name.html.min +1 -0
  25. data/fixtures/normalise-tag-name.html +5 -0
  26. data/fixtures/normalise-tag-name.html.min +1 -0
  27. data/fixtures/pre-entities.html +7 -0
  28. data/fixtures/pre-entities.html.min +4 -0
  29. data/fixtures/pre.html +9 -0
  30. data/fixtures/pre.html.min +6 -0
  31. data/fixtures/quot-entity.html +5 -0
  32. data/fixtures/quot-entity.html.min +1 -0
  33. data/fixtures/skeleton.html +12 -0
  34. data/fixtures/skeleton.html.min +1 -0
  35. data/fixtures/sort-attributes.html +4 -0
  36. data/fixtures/sort-attributes.html.min +1 -0
  37. data/fixtures/table.html +21 -0
  38. data/fixtures/table.html.min +1 -0
  39. data/fixtures/tabs.html +4 -0
  40. data/fixtures/tabs.html.min +1 -0
  41. data/fixtures/whitespace-complex.html +9 -0
  42. data/fixtures/whitespace-complex.html.min +1 -0
  43. data/fixtures/whitespace-p.html +4 -0
  44. data/fixtures/whitespace-p.html.min +1 -0
  45. data/lib/html5small.rb +10 -0
  46. data/lib/html5small/Minifier.rb +156 -0
  47. data/lib/html5small/optional.rb +134 -0
  48. data/spec/h5-min_spec.rb +37 -0
  49. data/spec/spec.opts +1 -0
  50. data/spec/spec_helper.rb +9 -0
  51. metadata +162 -0
data/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # HTML5small
2
+
3
+ HTML5small is a general-purpose minifier for HTML5 documents.
4
+
5
+ ## Origin
6
+ HTML5small is based on [h5-min](https://github.com/runpaint/h5-min),
7
+ which is currently [unmaintained](https://github.com/runpaint/h5-min/issues).
data/Rakefile ADDED
@@ -0,0 +1,38 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "html5small"
8
+ gem.summary = %Q{HTML5small}
9
+ gem.description = %Q{Minifier for HTML5 documents}
10
+ gem.email = "ruben.verborgh@gmail.com"
11
+ gem.homepage = "http://github.com/RubenVerborgh/HTML5small"
12
+ gem.authors = ["Run Paint Run Run", "Ruben Verborgh"]
13
+ gem.add_dependency "htmlentities", ">= 4.1.0"
14
+ gem.add_dependency "nokogiri", ">= 1.5.0"
15
+ gem.add_development_dependency "rspec", ">= 2.0.0"
16
+ gem.add_development_dependency "yard", ">= 0"
17
+ gem.executables << 'html5small'
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ require 'rspec/core/rake_task'
25
+ RSpec::Core::RakeTask.new(:spec)
26
+
27
+ task :spec => :check_dependencies
28
+
29
+ task :default => :spec
30
+
31
+ begin
32
+ require 'yard'
33
+ YARD::Rake::YardocTask.new
34
+ rescue LoadError
35
+ task :yardoc do
36
+ abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
37
+ end
38
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/bin/html5small ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/html5small'
3
+ print HTML5.minify ARGF.read
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><a href=foo&amp;bar>a</a>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=foo&amp;bar>a</a>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><a href=a"b>a</a>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=a&quot;b>a</a>
data/fixtures/dl.html ADDED
@@ -0,0 +1,10 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <dl>
5
+ <dt>Term</dt>
6
+ <dd>Desc</dd>
7
+ <dt>Term</dt>
8
+ <dt>Term</dt>
9
+ <dd>Desc</dt>
10
+ </dl>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><dl><dt>Term<dd>Desc<dt>Term<dt>Term<dd>Desc</dl>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>&fork; &SOFTcy;
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>⋔ Ь
@@ -0,0 +1,3 @@
1
+ <!DOCTYPE html><meta charset=utf-8><title>Untitled</title>
2
+ <p>1 &amp; 2
3
+ &gt; ?
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>1 &amp; 2 &gt; ?
data/fixtures/ie.html ADDED
@@ -0,0 +1,8 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <!--[if lt IE 9]>
5
+ <script src=//html5shiv.googlecode.com/svn/trunk/html5.js></script>
6
+ <![endif]-->
7
+
8
+ <h1>I.E!?!</h1>
@@ -0,0 +1,3 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><!--[if lt IE 9]>
2
+ <script src=//html5shiv.googlecode.com/svn/trunk/html5.js></script>
3
+ <![endif]--><h1>I.E!?!</h1>
@@ -0,0 +1,8 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <ul>
5
+ <li> <i>one</i><li>
6
+ <i> two
7
+ </i>
8
+
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><ul><li><i>one</i><li><i>two</i></ul>
@@ -0,0 +1,6 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>Two
5
+ lines
6
+ </p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>Two lines
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta Charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><dfn TITLE='a b'>d</dfn></p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><dfn title="a b">d</dfn>
@@ -0,0 +1,5 @@
1
+ <!DOCTYPE html>
2
+ <Meta charset=utf-8>
3
+ <title>Untitled</Title>
4
+ <p>p
5
+ <P>p
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>p<p>p
@@ -0,0 +1,7 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <pre>
5
+ &lt; - &gt;
6
+ &amp;
7
+ </pre>
@@ -0,0 +1,4 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><pre>
2
+ &lt; - &gt;
3
+ &amp;
4
+ </pre>
data/fixtures/pre.html ADDED
@@ -0,0 +1,9 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <pre>
5
+ This
6
+ <b>is</b>
7
+ pre-formatted .
8
+ and in<code>code</code>
9
+ </pre>
@@ -0,0 +1,6 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><pre>
2
+ This
3
+ <b>is</b>
4
+ pre-formatted .
5
+ and in<code>code</code>
6
+ </pre>
@@ -0,0 +1,5 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>&quot;q&apos;
5
+ <pre>&amp;&quot;&apos;</pre>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>"q'<pre>&amp;"'</pre>
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head>
3
+ <title>Untitled</title>
4
+ <meta charset="utf-8">
5
+ <link rel='license' href='//creativecommons.org/licenses/by-sa/3.0/'>
6
+ </head>
7
+ <body>
8
+ <h1>Title</h1> <!-- title -->
9
+
10
+ <p>Test</p>
11
+ </body>
12
+ </html>
@@ -0,0 +1 @@
1
+ <!doctype html><title>Untitled</title><meta charset=utf-8><link href=//creativecommons.org/licenses/by-sa/3.0/ rel=license><h1>Title</h1><p>Test
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><a title=t href=/ class=c stylE=s HREFLANG="az-Latn-x-latn" >a</a></p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><a class=c href=/ hreflang=az-Latn-x-latn style=s title=t>a</a>
@@ -0,0 +1,21 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <table class=c>
5
+ <thead class=h>
6
+ <tr>
7
+ <th>H</th>
8
+ <th>H</th>
9
+ </tr>
10
+ </thead>
11
+ <tbody class=b>
12
+ <tr>
13
+ <td>D</td>
14
+ <td>D</td>
15
+ </tr>
16
+ <tr>
17
+ <td>D</td>
18
+ <td>D</td>
19
+ </tr>
20
+ </tbody>
21
+ </table>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><table class=c><thead class=h><tr><th>H<th>H<tbody class=b><tr><td>D<td>D<tr><td>D<td>D</table>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>Tab tab</p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>Tab tab
@@ -0,0 +1,9 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <section>
5
+ <p>
6
+ A <code>b</code> <i>c</i> d <dfn>e <code> f
7
+ </code>
8
+
9
+ .
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><section><p>A <code>b</code> <i>c</i> d <dfn>e <code>f</code> .</dfn></section>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>This is a<code>string </code>.
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>This is a<code>string</code>.
data/lib/html5small.rb ADDED
@@ -0,0 +1,10 @@
1
+ require_relative 'html5small/minifier'
2
+ require_relative 'html5small/optional'
3
+
4
+ module HTML5
5
+ def self.minify html
6
+ minifier = HTML5::Minifier.new
7
+ Nokogiri::HTML::SAX::Parser.new(minifier).parse(html)
8
+ OptionalTags.remove minifier.buf.strip
9
+ end
10
+ end
@@ -0,0 +1,156 @@
1
+ require 'nokogiri'
2
+ require 'htmlentities'
3
+
4
+ module HTML5
5
+ class Minifier < Nokogiri::XML::SAX::Document
6
+ # Elements in which whitespace is significant, so can't be normalised
7
+ PRE_TAGS = [:pre, :style, :script, :textarea]
8
+
9
+ # Elements representing flow content
10
+ FLOW_ELEMENTS = %w{a abbr address area article aside audio b bdo blockquote br
11
+ button canvas cite code command datalist del details dfn div
12
+ dl em embed fieldset figure footer form h1 h2 h3 h4 h5 h6 header
13
+ hgroup hr i iframe img input ins kbd keygen label link
14
+ map mark math menu meta meter nav noscript object ol output
15
+ p pre progress q ruby samp script section select small span
16
+ strong style sub sup svg table textarea time ul var video wbr
17
+ }.map(&:to_sym)
18
+
19
+ BOOL_ATTR = {
20
+ _: [:itemscope, :hidden],
21
+ audio: [:loop, :autoplay, :controls],
22
+ button: [:formnovalidate, :disabled, :autofocus],
23
+ command: [:disabled, :checked],
24
+ details: [:open],
25
+ fieldset: [:disabled],
26
+ form: [:novalidate],
27
+ iframe: [:seamless],
28
+ img: [:ismap],
29
+ input: [:autocomplete, :autofocus, :defaultchecked,
30
+ :checked, :disabled, :formnovalidate, :indeterminate,
31
+ :multiple, :readonly, :required],
32
+ keygen: [:disabled, :autofocus],
33
+ optgroup: [:disabled],
34
+ option: [:disabled, :defaultselected, :selected],
35
+ ol: [:reversed],
36
+ select: [:autofocus, :disabled, :multiple],
37
+ script: [:async, :defer],
38
+ style: [:scoped],
39
+ textarea: [:autofocus, :disabled, :readonly, :required],
40
+ time: [:pubdate],
41
+ video: [:loop, :autoplay, :controls],
42
+ }
43
+
44
+ attr_accessor :buf, :text_node, :entities
45
+
46
+ def initialize
47
+ @buf, @text_node = '', ''
48
+ @stack = []
49
+ @entities = HTMLEntities.new :expanded
50
+ end
51
+
52
+ # HTML5 documents begin with the doctype
53
+ def start_document
54
+ buf << "<!doctype html>"
55
+ end
56
+
57
+ def start_element name, attrs = []
58
+ name = normalise_name name
59
+ dump_text_node
60
+ @stack.push name
61
+ buf << "<#{name}" + format_attributes(attrs, name) + ">"
62
+ end
63
+
64
+ def end_element name
65
+ name = normalise_name name
66
+ dump_text_node
67
+ buf.rstrip! unless in_pre_element?
68
+ @stack.pop
69
+ buf << "</#{name}>"
70
+ end
71
+
72
+ def comment string
73
+ # I.E "conditional comments" should be retained as-is
74
+ if string =~ /\[if\s+lt\s+IE\s+\d+\]/i
75
+ buf << "<!--#{string}-->"
76
+ end
77
+ end
78
+
79
+ def cdata_block string
80
+ text_node << string
81
+ end
82
+
83
+ def characters chars
84
+ text_node << chars
85
+ end
86
+
87
+ private
88
+ def format_attribute_value value
89
+ value = format_entities value
90
+ value_needs_quoting?(value) ? %Q{"#{value}"} : value
91
+ end
92
+
93
+ def normalise_name name
94
+ name.downcase.to_sym
95
+ end
96
+
97
+ def format_attributes attrs, element
98
+ return '' if attrs.empty?
99
+ Hash[attrs].map do |name, value|
100
+ [normalise_name(name), format_attribute_value(value)]
101
+ end.sort_by do |name, value|
102
+ name
103
+ end.map do |name, value|
104
+ if boolean_attribute?(element, name)
105
+ name.to_s
106
+ else
107
+ "#{name}=#{value}"
108
+ end
109
+ end.join(' ').insert(0, ' ')
110
+ end
111
+
112
+ # Can the given value be legally unquoted as per
113
+ # http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
114
+ # ?
115
+ def value_needs_quoting? value
116
+ # must not contain any " ", """, "'", ">", or "=", characters
117
+ value =~ /[[:space:]"'><=`]/ or value.empty?
118
+ end
119
+
120
+ def boolean_attribute? element, attribute
121
+ e, a = [element, attribute].map(&:to_sym)
122
+ BOOL_ATTR[:_].include?(a) or
123
+ (BOOL_ATTR.key?(e) and BOOL_ATTR[e].include?(a))
124
+ end
125
+
126
+ def format_entities html, except={}
127
+ html = entities.encode(entities.decode(html), :basic)
128
+ except.each{|name, replace| html.gsub!(/&#{name};/, replace)}
129
+ html
130
+ end
131
+
132
+ def format_text_node
133
+ text = format_entities text_node, {quot: ?", apos: ?'}
134
+ return text if in_pre_element?
135
+ text.gsub!(/[\n\t]/,' ')
136
+ # Don't strip inter-element white space for flow elements
137
+ unless buf =~ %r{</\w+>\s*\Z} and in_flow_element?
138
+ text.lstrip!
139
+ end
140
+ text.squeeze(' ')
141
+ end
142
+
143
+ def in_flow_element?
144
+ not (FLOW_ELEMENTS & @stack).empty?
145
+ end
146
+
147
+ def in_pre_element?
148
+ not (PRE_TAGS & @stack).empty?
149
+ end
150
+
151
+ def dump_text_node
152
+ buf << format_text_node
153
+ text_node.clear
154
+ end
155
+ end
156
+ end