html5small 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/.document +5 -0
  2. data/LICENSE +674 -0
  3. data/README.md +7 -0
  4. data/Rakefile +38 -0
  5. data/VERSION +1 -0
  6. data/bin/html5small +3 -0
  7. data/fixtures/attribute-value-ampersand.html +4 -0
  8. data/fixtures/attribute-value-ampersand.html.min +1 -0
  9. data/fixtures/attribute-value-quot.html +4 -0
  10. data/fixtures/attribute-value-quot.html.min +1 -0
  11. data/fixtures/dl.html +10 -0
  12. data/fixtures/dl.html.min +1 -0
  13. data/fixtures/entities-expand.html +4 -0
  14. data/fixtures/entities-expand.html.min +1 -0
  15. data/fixtures/entities-no-expand.html +3 -0
  16. data/fixtures/entities-no-expand.html.min +1 -0
  17. data/fixtures/ie.html +8 -0
  18. data/fixtures/ie.html.min +3 -0
  19. data/fixtures/lists.html +8 -0
  20. data/fixtures/lists.html.min +1 -0
  21. data/fixtures/newlines.html +6 -0
  22. data/fixtures/newlines.html.min +1 -0
  23. data/fixtures/normalise-attribute-name.html +4 -0
  24. data/fixtures/normalise-attribute-name.html.min +1 -0
  25. data/fixtures/normalise-tag-name.html +5 -0
  26. data/fixtures/normalise-tag-name.html.min +1 -0
  27. data/fixtures/pre-entities.html +7 -0
  28. data/fixtures/pre-entities.html.min +4 -0
  29. data/fixtures/pre.html +9 -0
  30. data/fixtures/pre.html.min +6 -0
  31. data/fixtures/quot-entity.html +5 -0
  32. data/fixtures/quot-entity.html.min +1 -0
  33. data/fixtures/skeleton.html +12 -0
  34. data/fixtures/skeleton.html.min +1 -0
  35. data/fixtures/sort-attributes.html +4 -0
  36. data/fixtures/sort-attributes.html.min +1 -0
  37. data/fixtures/table.html +21 -0
  38. data/fixtures/table.html.min +1 -0
  39. data/fixtures/tabs.html +4 -0
  40. data/fixtures/tabs.html.min +1 -0
  41. data/fixtures/whitespace-complex.html +9 -0
  42. data/fixtures/whitespace-complex.html.min +1 -0
  43. data/fixtures/whitespace-p.html +4 -0
  44. data/fixtures/whitespace-p.html.min +1 -0
  45. data/lib/html5small.rb +10 -0
  46. data/lib/html5small/Minifier.rb +156 -0
  47. data/lib/html5small/optional.rb +134 -0
  48. data/spec/h5-min_spec.rb +37 -0
  49. data/spec/spec.opts +1 -0
  50. data/spec/spec_helper.rb +9 -0
  51. metadata +162 -0
data/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # HTML5small
2
+
3
+ HTML5small is a general-purpose minifier for HTML5 documents.
4
+
5
+ ## Origin
6
+ HTML5small is based on [h5-min](https://github.com/runpaint/h5-min),
7
+ which is currently [unmaintained](https://github.com/runpaint/h5-min/issues).
data/Rakefile ADDED
@@ -0,0 +1,38 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "html5small"
8
+ gem.summary = %Q{HTML5small}
9
+ gem.description = %Q{Minifier for HTML5 documents}
10
+ gem.email = "ruben.verborgh@gmail.com"
11
+ gem.homepage = "http://github.com/RubenVerborgh/HTML5small"
12
+ gem.authors = ["Run Paint Run Run", "Ruben Verborgh"]
13
+ gem.add_dependency "htmlentities", ">= 4.1.0"
14
+ gem.add_dependency "nokogiri", ">= 1.5.0"
15
+ gem.add_development_dependency "rspec", ">= 2.0.0"
16
+ gem.add_development_dependency "yard", ">= 0"
17
+ gem.executables << 'html5small'
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ require 'rspec/core/rake_task'
25
+ RSpec::Core::RakeTask.new(:spec)
26
+
27
+ task :spec => :check_dependencies
28
+
29
+ task :default => :spec
30
+
31
+ begin
32
+ require 'yard'
33
+ YARD::Rake::YardocTask.new
34
+ rescue LoadError
35
+ task :yardoc do
36
+ abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
37
+ end
38
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/bin/html5small ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/html5small'
3
+ print HTML5.minify ARGF.read
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><a href=foo&amp;bar>a</a>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=foo&amp;bar>a</a>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><a href=a"b>a</a>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><a href=a&quot;b>a</a>
data/fixtures/dl.html ADDED
@@ -0,0 +1,10 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <dl>
5
+ <dt>Term</dt>
6
+ <dd>Desc</dd>
7
+ <dt>Term</dt>
8
+ <dt>Term</dt>
9
+ <dd>Desc</dt>
10
+ </dl>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><dl><dt>Term<dd>Desc<dt>Term<dt>Term<dd>Desc</dl>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>&fork; &SOFTcy;
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>⋔ Ь
@@ -0,0 +1,3 @@
1
+ <!DOCTYPE html><meta charset=utf-8><title>Untitled</title>
2
+ <p>1 &amp; 2
3
+ &gt; ?
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>1 &amp; 2 &gt; ?
data/fixtures/ie.html ADDED
@@ -0,0 +1,8 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <!--[if lt IE 9]>
5
+ <script src=//html5shiv.googlecode.com/svn/trunk/html5.js></script>
6
+ <![endif]-->
7
+
8
+ <h1>I.E!?!</h1>
@@ -0,0 +1,3 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><!--[if lt IE 9]>
2
+ <script src=//html5shiv.googlecode.com/svn/trunk/html5.js></script>
3
+ <![endif]--><h1>I.E!?!</h1>
@@ -0,0 +1,8 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <ul>
5
+ <li> <i>one</i><li>
6
+ <i> two
7
+ </i>
8
+
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><ul><li><i>one</i><li><i>two</i></ul>
@@ -0,0 +1,6 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>Two
5
+ lines
6
+ </p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>Two lines
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta Charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><dfn TITLE='a b'>d</dfn></p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><dfn title="a b">d</dfn>
@@ -0,0 +1,5 @@
1
+ <!DOCTYPE html>
2
+ <Meta charset=utf-8>
3
+ <title>Untitled</Title>
4
+ <p>p
5
+ <P>p
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>p<p>p
@@ -0,0 +1,7 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <pre>
5
+ &lt; - &gt;
6
+ &amp;
7
+ </pre>
@@ -0,0 +1,4 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><pre>
2
+ &lt; - &gt;
3
+ &amp;
4
+ </pre>
data/fixtures/pre.html ADDED
@@ -0,0 +1,9 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <pre>
5
+ This
6
+ <b>is</b>
7
+ pre-formatted .
8
+ and in<code>code</code>
9
+ </pre>
@@ -0,0 +1,6 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><pre>
2
+ This
3
+ <b>is</b>
4
+ pre-formatted .
5
+ and in<code>code</code>
6
+ </pre>
@@ -0,0 +1,5 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>&quot;q&apos;
5
+ <pre>&amp;&quot;&apos;</pre>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>"q'<pre>&amp;"'</pre>
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head>
3
+ <title>Untitled</title>
4
+ <meta charset="utf-8">
5
+ <link rel='license' href='//creativecommons.org/licenses/by-sa/3.0/'>
6
+ </head>
7
+ <body>
8
+ <h1>Title</h1> <!-- title -->
9
+
10
+ <p>Test</p>
11
+ </body>
12
+ </html>
@@ -0,0 +1 @@
1
+ <!doctype html><title>Untitled</title><meta charset=utf-8><link href=//creativecommons.org/licenses/by-sa/3.0/ rel=license><h1>Title</h1><p>Test
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p><a title=t href=/ class=c stylE=s HREFLANG="az-Latn-x-latn" >a</a></p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p><a class=c href=/ hreflang=az-Latn-x-latn style=s title=t>a</a>
@@ -0,0 +1,21 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <table class=c>
5
+ <thead class=h>
6
+ <tr>
7
+ <th>H</th>
8
+ <th>H</th>
9
+ </tr>
10
+ </thead>
11
+ <tbody class=b>
12
+ <tr>
13
+ <td>D</td>
14
+ <td>D</td>
15
+ </tr>
16
+ <tr>
17
+ <td>D</td>
18
+ <td>D</td>
19
+ </tr>
20
+ </tbody>
21
+ </table>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><table class=c><thead class=h><tr><th>H<th>H<tbody class=b><tr><td>D<td>D<tr><td>D<td>D</table>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>Tab tab</p>
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>Tab tab
@@ -0,0 +1,9 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <section>
5
+ <p>
6
+ A <code>b</code> <i>c</i> d <dfn>e <code> f
7
+ </code>
8
+
9
+ .
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><section><p>A <code>b</code> <i>c</i> d <dfn>e <code>f</code> .</dfn></section>
@@ -0,0 +1,4 @@
1
+ <!DOCTYPE html>
2
+ <meta charset=utf-8>
3
+ <title>Untitled</title>
4
+ <p>This is a<code>string </code>.
@@ -0,0 +1 @@
1
+ <!doctype html><meta charset=utf-8><title>Untitled</title><p>This is a<code>string</code>.
data/lib/html5small.rb ADDED
@@ -0,0 +1,10 @@
1
+ require_relative 'html5small/minifier'
2
+ require_relative 'html5small/optional'
3
+
4
+ module HTML5
5
+ def self.minify html
6
+ minifier = HTML5::Minifier.new
7
+ Nokogiri::HTML::SAX::Parser.new(minifier).parse(html)
8
+ OptionalTags.remove minifier.buf.strip
9
+ end
10
+ end
@@ -0,0 +1,156 @@
1
+ require 'nokogiri'
2
+ require 'htmlentities'
3
+
4
+ module HTML5
5
+ class Minifier < Nokogiri::XML::SAX::Document
6
+ # Elements in which whitespace is significant, so can't be normalised
7
+ PRE_TAGS = [:pre, :style, :script, :textarea]
8
+
9
+ # Elements representing flow content
10
+ FLOW_ELEMENTS = %w{a abbr address area article aside audio b bdo blockquote br
11
+ button canvas cite code command datalist del details dfn div
12
+ dl em embed fieldset figure footer form h1 h2 h3 h4 h5 h6 header
13
+ hgroup hr i iframe img input ins kbd keygen label link
14
+ map mark math menu meta meter nav noscript object ol output
15
+ p pre progress q ruby samp script section select small span
16
+ strong style sub sup svg table textarea time ul var video wbr
17
+ }.map(&:to_sym)
18
+
19
+ BOOL_ATTR = {
20
+ _: [:itemscope, :hidden],
21
+ audio: [:loop, :autoplay, :controls],
22
+ button: [:formnovalidate, :disabled, :autofocus],
23
+ command: [:disabled, :checked],
24
+ details: [:open],
25
+ fieldset: [:disabled],
26
+ form: [:novalidate],
27
+ iframe: [:seamless],
28
+ img: [:ismap],
29
+ input: [:autocomplete, :autofocus, :defaultchecked,
30
+ :checked, :disabled, :formnovalidate, :indeterminate,
31
+ :multiple, :readonly, :required],
32
+ keygen: [:disabled, :autofocus],
33
+ optgroup: [:disabled],
34
+ option: [:disabled, :defaultselected, :selected],
35
+ ol: [:reversed],
36
+ select: [:autofocus, :disabled, :multiple],
37
+ script: [:async, :defer],
38
+ style: [:scoped],
39
+ textarea: [:autofocus, :disabled, :readonly, :required],
40
+ time: [:pubdate],
41
+ video: [:loop, :autoplay, :controls],
42
+ }
43
+
44
+ attr_accessor :buf, :text_node, :entities
45
+
46
+ def initialize
47
+ @buf, @text_node = '', ''
48
+ @stack = []
49
+ @entities = HTMLEntities.new :expanded
50
+ end
51
+
52
+ # HTML5 documents begin with the doctype
53
+ def start_document
54
+ buf << "<!doctype html>"
55
+ end
56
+
57
+ def start_element name, attrs = []
58
+ name = normalise_name name
59
+ dump_text_node
60
+ @stack.push name
61
+ buf << "<#{name}" + format_attributes(attrs, name) + ">"
62
+ end
63
+
64
+ def end_element name
65
+ name = normalise_name name
66
+ dump_text_node
67
+ buf.rstrip! unless in_pre_element?
68
+ @stack.pop
69
+ buf << "</#{name}>"
70
+ end
71
+
72
+ def comment string
73
+ # I.E "conditional comments" should be retained as-is
74
+ if string =~ /\[if\s+lt\s+IE\s+\d+\]/i
75
+ buf << "<!--#{string}-->"
76
+ end
77
+ end
78
+
79
+ def cdata_block string
80
+ text_node << string
81
+ end
82
+
83
+ def characters chars
84
+ text_node << chars
85
+ end
86
+
87
+ private
88
+ def format_attribute_value value
89
+ value = format_entities value
90
+ value_needs_quoting?(value) ? %Q{"#{value}"} : value
91
+ end
92
+
93
+ def normalise_name name
94
+ name.downcase.to_sym
95
+ end
96
+
97
+ def format_attributes attrs, element
98
+ return '' if attrs.empty?
99
+ Hash[attrs].map do |name, value|
100
+ [normalise_name(name), format_attribute_value(value)]
101
+ end.sort_by do |name, value|
102
+ name
103
+ end.map do |name, value|
104
+ if boolean_attribute?(element, name)
105
+ name.to_s
106
+ else
107
+ "#{name}=#{value}"
108
+ end
109
+ end.join(' ').insert(0, ' ')
110
+ end
111
+
112
+ # Can the given value be legally unquoted as per
113
+ # http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
114
+ # ?
115
+ def value_needs_quoting? value
116
+ # must not contain any " ", """, "'", ">", or "=", characters
117
+ value =~ /[[:space:]"'><=`]/ or value.empty?
118
+ end
119
+
120
+ def boolean_attribute? element, attribute
121
+ e, a = [element, attribute].map(&:to_sym)
122
+ BOOL_ATTR[:_].include?(a) or
123
+ (BOOL_ATTR.key?(e) and BOOL_ATTR[e].include?(a))
124
+ end
125
+
126
+ def format_entities html, except={}
127
+ html = entities.encode(entities.decode(html), :basic)
128
+ except.each{|name, replace| html.gsub!(/&#{name};/, replace)}
129
+ html
130
+ end
131
+
132
+ def format_text_node
133
+ text = format_entities text_node, {quot: ?", apos: ?'}
134
+ return text if in_pre_element?
135
+ text.gsub!(/[\n\t]/,' ')
136
+ # Don't strip inter-element white space for flow elements
137
+ unless buf =~ %r{</\w+>\s*\Z} and in_flow_element?
138
+ text.lstrip!
139
+ end
140
+ text.squeeze(' ')
141
+ end
142
+
143
+ def in_flow_element?
144
+ not (FLOW_ELEMENTS & @stack).empty?
145
+ end
146
+
147
+ def in_pre_element?
148
+ not (PRE_TAGS & @stack).empty?
149
+ end
150
+
151
+ def dump_text_node
152
+ buf << format_text_node
153
+ text_node.clear
154
+ end
155
+ end
156
+ end