html5small 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/.document +5 -0
  2. data/LICENSE +674 -0
  3. data/README.md +7 -0
  4. data/Rakefile +38 -0
  5. data/VERSION +1 -0
  6. data/bin/html5small +3 -0
  7. data/fixtures/attribute-value-ampersand.html +4 -0
  8. data/fixtures/attribute-value-ampersand.html.min +1 -0
  9. data/fixtures/attribute-value-quot.html +4 -0
  10. data/fixtures/attribute-value-quot.html.min +1 -0
  11. data/fixtures/dl.html +10 -0
  12. data/fixtures/dl.html.min +1 -0
  13. data/fixtures/entities-expand.html +4 -0
  14. data/fixtures/entities-expand.html.min +1 -0
  15. data/fixtures/entities-no-expand.html +3 -0
  16. data/fixtures/entities-no-expand.html.min +1 -0
  17. data/fixtures/ie.html +8 -0
  18. data/fixtures/ie.html.min +3 -0
  19. data/fixtures/lists.html +8 -0
  20. data/fixtures/lists.html.min +1 -0
  21. data/fixtures/newlines.html +6 -0
  22. data/fixtures/newlines.html.min +1 -0
  23. data/fixtures/normalise-attribute-name.html +4 -0
  24. data/fixtures/normalise-attribute-name.html.min +1 -0
  25. data/fixtures/normalise-tag-name.html +5 -0
  26. data/fixtures/normalise-tag-name.html.min +1 -0
  27. data/fixtures/pre-entities.html +7 -0
  28. data/fixtures/pre-entities.html.min +4 -0
  29. data/fixtures/pre.html +9 -0
  30. data/fixtures/pre.html.min +6 -0
  31. data/fixtures/quot-entity.html +5 -0
  32. data/fixtures/quot-entity.html.min +1 -0
  33. data/fixtures/skeleton.html +12 -0
  34. data/fixtures/skeleton.html.min +1 -0
  35. data/fixtures/sort-attributes.html +4 -0
  36. data/fixtures/sort-attributes.html.min +1 -0
  37. data/fixtures/table.html +21 -0
  38. data/fixtures/table.html.min +1 -0
  39. data/fixtures/tabs.html +4 -0
  40. data/fixtures/tabs.html.min +1 -0
  41. data/fixtures/whitespace-complex.html +9 -0
  42. data/fixtures/whitespace-complex.html.min +1 -0
  43. data/fixtures/whitespace-p.html +4 -0
  44. data/fixtures/whitespace-p.html.min +1 -0
  45. data/lib/html5small.rb +10 -0
  46. data/lib/html5small/Minifier.rb +156 -0
  47. data/lib/html5small/optional.rb +134 -0
  48. data/spec/h5-min_spec.rb +37 -0
  49. data/spec/spec.opts +1 -0
  50. data/spec/spec_helper.rb +9 -0
  51. metadata +162 -0
@@ -0,0 +1,134 @@
1
+ # coding: utf-8
2
+ module HTML5
3
+ module OptionalTags
4
+ # Optional tags as per
5
+ # http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#optional-tags
6
+ OPTIONAL = [
7
+ # An html element's start tag may be omitted if the first thing inside
8
+ # the html element is not a comment.
9
+ %r{<html>},
10
+ # An html element's end tag may be omitted if the html element is not
11
+ # immediately followed by a comment.
12
+ %r{</html>},
13
+ # A head element's start tag may be omitted if the element is empty, or
14
+ # if the first thing inside the head element is an element.
15
+ %r{<head>},
16
+ # A head element's end tag may be omitted if the head element is not
17
+ # immediately followed by a space character or a comment.
18
+ %r{</head>},
19
+ # A body element's start tag may be omitted if the element is empty, or
20
+ # if the first thing inside the body element is not a space character or
21
+ # a comment, except if the first thing inside the body element is a
22
+ # script or style element.
23
+ %r{<body>\s?(?!<(script|style))},
24
+ # A body element's end tag may be omitted if the body element is not
25
+ # immediately followed by a comment.
26
+ %r{</body>},
27
+ # A li element's end tag may be omitted if the li element is immediately
28
+ # followed by another li element or if there is no more content in the
29
+ # parent element.
30
+ %r{</li>\s?(?=(<li|</[uo]l))},
31
+ # A dt element's end tag may be omitted if the dt element is immediately
32
+ # followed by another dt element or a dd element.
33
+ %r{</dt>\s?(?=<d[td])},
34
+ # A dd element's end tag may be omitted if the dd element is immediately
35
+ # followed by another dd element or a dt element, or if there is no more
36
+ # content in the parent element.
37
+ %r{</dd>\s?(?=(<d[dt]|</dl))},
38
+ # A p element's end tag may be omitted if the p element is immediately
39
+ # followed by an address, article, aside, blockquote, dir, div, dl,
40
+ # fieldset, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr,
41
+ # menu, nav, ol, p, pre, section, table, or ul, element, or if there is
42
+ # no more content in the parent element and the parent element is not an
43
+ # a element.
44
+ %r{</p>\s?(?=(<(address|article|aside|blockquote|dir|div|dl|fieldset|footer|
45
+ form|h\d|header|hgroup|hr|menu|nav|ol|p|pre|section|table|ul)|</))}x,
46
+ %r{</p>\s?\Z},
47
+ # An rt element's end tag may be omitted if the rt element is
48
+ # immediately followed by an rt or rp element, or if there is no more
49
+ # content in the parent element.
50
+ %r{</rt>\s?(?=(<r[tp]|</))},
51
+ # An rp element's end tag may be omitted if the rp element is
52
+ # immediately followed by an rt or rp element, or if there is no more
53
+ # content in the parent element.
54
+ %r{</rp>\s?(?=(<r[tp]|</))},
55
+ # An optgroup element's end tag may be omitted if the optgroup element
56
+ # is immediately followed by another optgroup element, or if there is no
57
+ # more content in the parent element.
58
+ %r{</optgroup>\s?(?=(<optgroup|</))},
59
+ # An option element's end tag may be omitted if the option element is
60
+ # immediately followed by another option element, or if it is
61
+ # immediately followed by an optgroup element, or if there is no more
62
+ # content in the parent element.
63
+ %r{</option>\s?(?=(<(option|optgroup)|</))},
64
+ # A colgroup element's start tag may be omitted if the first thing
65
+ # inside the colgroup element is a col element, and if the element is
66
+ # not immediately preceded by another colgroup element whose end tag has
67
+ # been omitted. (It can't be omitted if the element is empty.)
68
+ %r{<colgroup>\s?(?=<col)}, # FIXME: Incomplete
69
+ # A colgroup element's end tag may be omitted if the colgroup element is
70
+ # not immediately followed by a space character or a comment.
71
+ %r{</colgroup>},
72
+ # A thead element's end tag may be omitted if the thead element is
73
+ # immediately followed by a tbody or tfoot element.
74
+ %r{</thead>\s?(?=<t(body|foot))},
75
+ # A tbody element's start tag may be omitted if the first thing inside
76
+ # the tbody element is a tr element, and if the element is not
77
+ # immediately preceded by a tbody, thead, or tfoot element whose end tag
78
+ # has been omitted. (It can't be omitted if the element is empty.)
79
+ %r{(?<=</t(head|body|foot)>)\s?<tbody>\s?(?=<tr)}x, # TODO: Look again
80
+ # A tbody element's end tag may be omitted if the tbody element is
81
+ # immediately followed by a tbody or tfoot element, or if there is no
82
+ # more content in the parent element.
83
+ %r{</tbody>\s?(?=(<t(body|foot)|</))},
84
+ # A tfoot element's end tag may be omitted if the tfoot element is
85
+ # immediately followed by a tbody element, or if there is no more
86
+ # content in the parent element.
87
+ %r{</tfoot>\s?(?=(<tbody|</))},
88
+ # A tr element's end tag may be omitted if the tr element is immediately
89
+ # followed by another tr element, or if there is no more content in the
90
+ # parent element.
91
+ %r{</tr>\s?(?=(<tr|</))},
92
+ %r{</tr>\s?(?=(<t(body|foot|head)))}, # We may have already removed the
93
+ # parent's end tag
94
+ # A td element's end tag may be omitted if the td element is immediately
95
+ # followed by a td or th element, or if there is no more content in the
96
+ # parent element.
97
+ %r{</td>\s?(?=(<t[dhr]|</))},
98
+ # A th element's end tag may be omitted if the th element is immediately
99
+ # followed by a td or th element, or if there is no more content in the
100
+ # parent element.
101
+ %r{</th>\s?(?=(<t[dhr]|</))},
102
+ %r{</th>\s?(?=((<t(body|foot))|</))},
103
+
104
+ #However, a start tag must never be omitted if it has any attributes.
105
+
106
+ # The following are void elements
107
+ # (http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#void-elements)
108
+ # Therefore, their end tag must always be omitted
109
+ %r{</area},
110
+ %r{</base>},
111
+ %r{</br>},
112
+ %r{</col>},
113
+ %r{</command>},
114
+ %r{</embed>},
115
+ %r{</hr>},
116
+ %r{</img>},
117
+ %r{</input>},
118
+ %r{</keygen>},
119
+ %r{</link>},
120
+ %r{</meta>},
121
+ %r{</param>},
122
+ %r{</source>},
123
+ %r{</track>},
124
+ %r{</wbr>},
125
+ ]
126
+
127
+ def self.remove html
128
+ OPTIONAL.each do |regex|
129
+ html.gsub!(/#{regex}/i, '')
130
+ end
131
+ html
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,37 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ def minify(source)
4
+ source = 'fixtures/' + source
5
+ [HTML5.minify(File.read source), File.read(source + '.min').chomp]
6
+ end
7
+
8
+ SPECS = {
9
+ skeleton: "can minify a skeleton HTML document",
10
+ pre: "doesn't collpase whitespace inside <pre> tags",
11
+ whitespace_p: "collpases whitespace inside <p> tags",
12
+ whitespace_complex: "collpases complex whitespace inside <p> tags",
13
+ lists: "collpases whitespace inside lists",
14
+ ie: "doesn't remove I.E conditional comments",
15
+ table: "removes optional elements in tables",
16
+ dl: "removes optional elements in definition lists",
17
+ normalise_tag_name: "normalises case of element names",
18
+ normalise_attribute_name: "normalises case of attribute names",
19
+ entities_no_expand: "doesn't decode unsafe HTML entities",
20
+ pre_entities: "doesn't decode unsafe HTML entities in preformatted elements",
21
+ attribute_value_ampersand: "doesn't decode ampersand entity in attribute values",
22
+ attribute_value_quot: "encodes quotation marks in attribute values",
23
+ entities_expand: "decodes safe HTML entities",
24
+ sort_attributes: "sorts attribute names alphabetically",
25
+ quot_entity: "expands &quot; entity in text nodes",
26
+ newlines: "should treat newlines in text as a space",
27
+ tabs: "should treat tabs in text as a space",
28
+ }
29
+
30
+ describe HTML5, '.minify' do
31
+ SPECS.each do |fix, desc|
32
+ it desc do
33
+ source, target = minify("#{fix.to_s.tr(?_, ?-)}.html")
34
+ source.to_s.should == target
35
+ end
36
+ end
37
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'html5small'
4
+ require 'rspec'
5
+ require 'rspec/autorun'
6
+
7
+ RSpec.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,162 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: html5small
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Run Paint Run Run
9
+ - Ruben Verborgh
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2012-11-04 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: htmlentities
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: 4.1.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ! '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 4.1.0
31
+ - !ruby/object:Gem::Dependency
32
+ name: nokogiri
33
+ requirement: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: 1.5.0
39
+ type: :runtime
40
+ prerelease: false
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: 1.5.0
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.0.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: 2.0.0
63
+ - !ruby/object:Gem::Dependency
64
+ name: yard
65
+ requirement: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ type: :development
72
+ prerelease: false
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ description: Minifier for HTML5 documents
80
+ email: ruben.verborgh@gmail.com
81
+ executables:
82
+ - html5small
83
+ extensions: []
84
+ extra_rdoc_files:
85
+ - LICENSE
86
+ - README.md
87
+ files:
88
+ - .document
89
+ - LICENSE
90
+ - README.md
91
+ - Rakefile
92
+ - VERSION
93
+ - bin/html5small
94
+ - fixtures/attribute-value-ampersand.html
95
+ - fixtures/attribute-value-ampersand.html.min
96
+ - fixtures/attribute-value-quot.html
97
+ - fixtures/attribute-value-quot.html.min
98
+ - fixtures/dl.html
99
+ - fixtures/dl.html.min
100
+ - fixtures/entities-expand.html
101
+ - fixtures/entities-expand.html.min
102
+ - fixtures/entities-no-expand.html
103
+ - fixtures/entities-no-expand.html.min
104
+ - fixtures/ie.html
105
+ - fixtures/ie.html.min
106
+ - fixtures/lists.html
107
+ - fixtures/lists.html.min
108
+ - fixtures/newlines.html
109
+ - fixtures/newlines.html.min
110
+ - fixtures/normalise-attribute-name.html
111
+ - fixtures/normalise-attribute-name.html.min
112
+ - fixtures/normalise-tag-name.html
113
+ - fixtures/normalise-tag-name.html.min
114
+ - fixtures/pre-entities.html
115
+ - fixtures/pre-entities.html.min
116
+ - fixtures/pre.html
117
+ - fixtures/pre.html.min
118
+ - fixtures/quot-entity.html
119
+ - fixtures/quot-entity.html.min
120
+ - fixtures/skeleton.html
121
+ - fixtures/skeleton.html.min
122
+ - fixtures/sort-attributes.html
123
+ - fixtures/sort-attributes.html.min
124
+ - fixtures/table.html
125
+ - fixtures/table.html.min
126
+ - fixtures/tabs.html
127
+ - fixtures/tabs.html.min
128
+ - fixtures/whitespace-complex.html
129
+ - fixtures/whitespace-complex.html.min
130
+ - fixtures/whitespace-p.html
131
+ - fixtures/whitespace-p.html.min
132
+ - lib/html5small.rb
133
+ - lib/html5small/Minifier.rb
134
+ - lib/html5small/optional.rb
135
+ - spec/h5-min_spec.rb
136
+ - spec/spec.opts
137
+ - spec/spec_helper.rb
138
+ homepage: http://github.com/RubenVerborgh/HTML5small
139
+ licenses: []
140
+ post_install_message:
141
+ rdoc_options: []
142
+ require_paths:
143
+ - lib
144
+ required_ruby_version: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ none: false
152
+ requirements:
153
+ - - ! '>='
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ requirements: []
157
+ rubyforge_project:
158
+ rubygems_version: 1.8.23
159
+ signing_key:
160
+ specification_version: 3
161
+ summary: HTML5small
162
+ test_files: []