html_press 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -6
- data/.travis.yml +8 -8
- data/Gemfile +6 -6
- data/Readme.md +73 -71
- data/html_press.gemspec +26 -26
- data/lib/html_press.rb +24 -23
- data/lib/html_press/html.rb +325 -325
- data/lib/html_press/html_entities.rb +36 -36
- data/lib/html_press/version.rb +3 -3
- data/profile/index.html +37689 -37689
- data/profile/profile.rb +28 -28
- data/spec/html_press_spec.rb +258 -256
- metadata +19 -37
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: efbc2fa52751f8f1f80c58c759d884e26bfd11bd
|
4
|
+
data.tar.gz: 458a9c3dddb8c1af510d222c5e629952374c1908
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b30dcd0a49adda32a97912a24f6e7a1ee68a3d860d3bca4ac4ed06545be1bcad7a59808754a7d337e936b1b8a2efc0f5f47ee1e047dbe140123792862dcb529a
|
7
|
+
data.tar.gz: 4e46df1cbc717db5294c9474006dd70bc113659c9730dbe8dfa353f554fa009cd5978e1f88e3a9368640dee4be998527774fcce4f831f8f177d733f683212db5
|
data/.gitignore
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
*.gem
|
2
|
-
.bundle
|
3
|
-
Gemfile.lock
|
4
|
-
pkg/*
|
5
|
-
profile/reports/*
|
6
|
-
/.project
|
1
|
+
*.gem
|
2
|
+
.bundle
|
3
|
+
Gemfile.lock
|
4
|
+
pkg/*
|
5
|
+
profile/reports/*
|
6
|
+
/.project
|
data/.travis.yml
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
language: ruby
|
2
|
-
rvm:
|
3
|
-
- 1.8.7
|
4
|
-
- 1.9.2
|
5
|
-
- 1.9.3
|
6
|
-
- jruby-18mode
|
7
|
-
- rbx-18mode
|
8
|
-
# - jruby-19mode
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.8.7
|
4
|
+
- 1.9.2
|
5
|
+
- 1.9.3
|
6
|
+
- jruby-18mode
|
7
|
+
- rbx-18mode
|
8
|
+
# - jruby-19mode
|
9
9
|
# - rbx-19mode
|
data/Gemfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
source
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in html_press.gemspec
|
4
|
-
gemspec
|
5
|
-
|
6
|
-
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in html_press.gemspec
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
gem "ruby-prof", :platforms => :mri
|
data/Readme.md
CHANGED
@@ -1,71 +1,73 @@
|
|
1
|
-
# HtmlPress [
|
23
|
-
|
24
|
-
### Rails
|
25
|
-
TODO :exclamation:
|
26
|
-
|
27
|
-
### Sinatra
|
28
|
-
TODO :exclamation:
|
29
|
-
|
30
|
-
### Command line
|
31
|
-
TODO :exclamation:
|
32
|
-
|
33
|
-
## TODO
|
34
|
-
- use parser ([鋸](https://github.com/tenderlove/nokogiri)) instead of regexp's
|
35
|
-
- add option to convert relative urls to absolute urls (for SEO)
|
36
|
-
- [ambigious ampersands](http://mathiasbynens.be/notes/ambiguous-ampersands) for compression?
|
37
|
-
- Support other js/css minifiers (Closure, YUI compressor)
|
38
|
-
- htmlTydi
|
39
|
-
- Rack plugin
|
40
|
-
- add script to benchmark real projects like amazon or stackoverflow
|
41
|
-
- support html5 tags
|
42
|
-
- add more options
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
- [
|
47
|
-
-
|
48
|
-
- [
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
- [
|
55
|
-
-
|
56
|
-
- [
|
57
|
-
-
|
58
|
-
- [
|
59
|
-
- [
|
60
|
-
- [
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
1
|
+
# HtmlPress  [](https://travis-ci.org/stereobooster/html_press) [](https://gemnasium.com/stereobooster/html_press) [](https://codeclimate.com/github/stereobooster/html_press)
|
2
|
+
|
3
|
+
## How it works
|
4
|
+
|
5
|
+
Remove all whitespace junk. Leave only HTML
|
6
|
+
|
7
|
+
```
|
8
|
+
1. ┌――――――――――――――――――╖ 2. ┌――――――――――――――――――╖
|
9
|
+
●――――――――――――――├―╢ <html> ws junk ║ ●――――――――├―――――――――╢ <html> ║
|
10
|
+
└――――――――――――――――――╜ └――――――――――――――――――╜
|
11
|
+
```
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
### Ruby
|
16
|
+
```ruby
|
17
|
+
require 'html_press'
|
18
|
+
compressed_html = HtmlPress.press html
|
19
|
+
```
|
20
|
+
|
21
|
+
### Jekyll
|
22
|
+
see [jekyll_press](https://github.com/stereobooster/jekyll_press)
|
23
|
+
|
24
|
+
### Rails
|
25
|
+
TODO :exclamation:
|
26
|
+
|
27
|
+
### Sinatra
|
28
|
+
TODO :exclamation:
|
29
|
+
|
30
|
+
### Command line
|
31
|
+
TODO :exclamation:
|
32
|
+
|
33
|
+
## TODO
|
34
|
+
- use parser ([鋸](https://github.com/tenderlove/nokogiri)) instead of regexp's
|
35
|
+
- add option to convert relative urls to absolute urls (for SEO)
|
36
|
+
- [ambigious ampersands](http://mathiasbynens.be/notes/ambiguous-ampersands) for compression?
|
37
|
+
- Support other js/css minifiers (Closure, YUI compressor)
|
38
|
+
- htmlTydi
|
39
|
+
- Rack plugin
|
40
|
+
- add script to benchmark real projects like amazon or stackoverflow
|
41
|
+
- support html5 tags
|
42
|
+
- add more options
|
43
|
+
- Optimization: make substring replace based on substring length and its position in initial string
|
44
|
+
|
45
|
+
## Alternatives
|
46
|
+
- [html-minifier](https://github.com/kangax/html-minifier) (js), [test suite](https://github.com/kangax/html-minifier/blob/gh-pages/tests/index.html), ruby wrapper - [html_minifier](https://github.com/stereobooster/html_minifier)
|
47
|
+
- [htmlcompressor](http://code.google.com/p/htmlcompressor/) (java), [test suite](http://code.google.com/p/htmlcompressor/source/browse/#svn%2Ftrunk%2Fsrc%2Ftest%2Fresources%2Fhtml%253Fstate%253Dclosed)
|
48
|
+
- PHPTal compress (php), [test suite](https://svn.motion-twin.com/phptal/trunk/tests/CompressTest.php)
|
49
|
+
- [W3 total cache](http://wordpress.org/extend/plugins/w3-total-cache/) - WP plugin from smashingmagazine contains html minifier (php)
|
50
|
+
|
51
|
+
## Additional tools
|
52
|
+
- [jeanny](https://github.com/gfranco/jeanny) - rename css classes and ids in css and html files
|
53
|
+
- make shorter pathes for images in css
|
54
|
+
- [deadweight](https://github.com/aanand/deadweight) - remove unused css rules from css files
|
55
|
+
- [csscss](http://zmoazeni.github.com/csscss/) will parse any CSS files you give it and let you know which rulesets have duplicated declarations.
|
56
|
+
- [css-spriter](https://github.com/aberant/css-spriter), [sprite-factory](https://github.com/jakesgordon/sprite-factory) - combine images in sprites
|
57
|
+
- resize images by size defined in html and vice versa embed size of images in html
|
58
|
+
- [#1](http://habrahabr.ru/post/90761/), [#2](http://ap-project.org/English/Article/View/53/) - inline small images in css
|
59
|
+
- [smusher](https://github.com/grosser/smusher), jpegtran, optipng - losslessly minify images
|
60
|
+
- [sprockets](https://github.com/sstephenson/sprockets), [jammit](https://github.com/documentcloud/jammit) - asset bundlers
|
61
|
+
- [w3c_validators](https://github.com/alexdunae/w3c_validators)
|
62
|
+
- [reduce](https://github.com/grosser/reduce)
|
63
|
+
|
64
|
+
## Resources
|
65
|
+
|
66
|
+
### Minimize HTML
|
67
|
+
- http://perfectionkills.com/experimenting-with-html-minifier
|
68
|
+
- http://perfectionkills.com/optimizing-html
|
69
|
+
- https://developers.google.com/speed/articles/optimizing-html
|
70
|
+
|
71
|
+
### Front-end optimization
|
72
|
+
- https://developers.google.com/speed/docs/insights/rules
|
73
|
+
- http://developer.yahoo.com/performance/rules.html
|
data/html_press.gemspec
CHANGED
@@ -1,26 +1,26 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
$:.push File.expand_path("../lib", __FILE__)
|
3
|
-
require "html_press/version"
|
4
|
-
|
5
|
-
Gem::Specification.new do |s|
|
6
|
-
s.name = "html_press"
|
7
|
-
s.version = HtmlPress::VERSION
|
8
|
-
s.authors = ["stereobooster"]
|
9
|
-
s.email = ["stereobooster@gmail.com"]
|
10
|
-
s.homepage = "https://github.com/stereobooster/html_press"
|
11
|
-
s.summary = %q{Compress html}
|
12
|
-
s.description = %q{Ruby gem for compressing html}
|
13
|
-
s.license = "MIT"
|
14
|
-
|
15
|
-
s.files = `git ls-files`.split("\n")
|
16
|
-
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
-
s.require_paths = ["lib"]
|
19
|
-
|
20
|
-
s.add_development_dependency "rspec"
|
21
|
-
s.add_development_dependency "rake"
|
22
|
-
|
23
|
-
s.add_dependency "multi_css", ">= 0.1.0"
|
24
|
-
s.add_dependency "multi_js"
|
25
|
-
s.add_dependency "htmlentities"
|
26
|
-
end
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "html_press/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "html_press"
|
7
|
+
s.version = HtmlPress::VERSION
|
8
|
+
s.authors = ["stereobooster"]
|
9
|
+
s.email = ["stereobooster@gmail.com"]
|
10
|
+
s.homepage = "https://github.com/stereobooster/html_press"
|
11
|
+
s.summary = %q{Compress html}
|
12
|
+
s.description = %q{Ruby gem for compressing html}
|
13
|
+
s.license = "MIT"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.add_development_dependency "rspec"
|
21
|
+
s.add_development_dependency "rake"
|
22
|
+
|
23
|
+
s.add_dependency "multi_css", ">= 0.1.0"
|
24
|
+
s.add_dependency "multi_js", ">= 0.1.0"
|
25
|
+
s.add_dependency "htmlentities"
|
26
|
+
end
|
data/lib/html_press.rb
CHANGED
@@ -1,23 +1,24 @@
|
|
1
|
-
require "html_press/version"
|
2
|
-
require "html_press/html_entities"
|
3
|
-
require "html_press/html"
|
4
|
-
|
5
|
-
require 'multi_css'
|
6
|
-
require 'multi_js'
|
7
|
-
|
8
|
-
module HtmlPress
|
9
|
-
def self.press(text, options = {})
|
10
|
-
HtmlPress::Html.new(options).press text
|
11
|
-
end
|
12
|
-
|
13
|
-
# for backward compatibility
|
14
|
-
def self.compress(text, options = {})
|
15
|
-
HtmlPress::Html.new(options).press text
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.js_compressor (text, options = nil)
|
19
|
-
options ||= {}
|
20
|
-
options[:
|
21
|
-
|
22
|
-
|
23
|
-
end
|
1
|
+
require "html_press/version"
|
2
|
+
require "html_press/html_entities"
|
3
|
+
require "html_press/html"
|
4
|
+
|
5
|
+
require 'multi_css'
|
6
|
+
require 'multi_js'
|
7
|
+
|
8
|
+
module HtmlPress
|
9
|
+
def self.press(text, options = {})
|
10
|
+
HtmlPress::Html.new(options).press text
|
11
|
+
end
|
12
|
+
|
13
|
+
# for backward compatibility
|
14
|
+
def self.compress(text, options = {})
|
15
|
+
HtmlPress::Html.new(options).press text
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.js_compressor (text, options = nil)
|
19
|
+
options ||= {}
|
20
|
+
options[:output] ||= {}
|
21
|
+
options[:output][:inline_script] = true
|
22
|
+
MultiJs.compile(text, options).gsub(/;$/,'')
|
23
|
+
end
|
24
|
+
end
|
data/lib/html_press/html.rb
CHANGED
@@ -1,325 +1,325 @@
|
|
1
|
-
module HtmlPress
|
2
|
-
class Html
|
3
|
-
|
4
|
-
DEFAULTS = {
|
5
|
-
:logger => false,
|
6
|
-
:unquoted_attributes => false,
|
7
|
-
:drop_empty_values => false,
|
8
|
-
:strip_crlf => false,
|
9
|
-
:js_minifier_options => false
|
10
|
-
}
|
11
|
-
|
12
|
-
def initialize (options = {})
|
13
|
-
@options = DEFAULTS.merge(options)
|
14
|
-
if @options.keys.include? :dump_empty_values
|
15
|
-
@options[:drop_empty_values] = @options.delete(:dump_empty_values)
|
16
|
-
warn "dump_empty_values deprecated use drop_empty_values"
|
17
|
-
end
|
18
|
-
if @options[:logger] && !@options[:logger].respond_to?(:error)
|
19
|
-
raise ArgumentError, 'Logger has no error method'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def press (html)
|
24
|
-
out = html.respond_to?(:read) ? html.read : html.dup
|
25
|
-
|
26
|
-
@replacement_hash = 'MINIFYHTML' + Time.now.to_i.to_s
|
27
|
-
@placeholders = []
|
28
|
-
|
29
|
-
out = process_ie_conditional_comments out
|
30
|
-
out = process_scripts out
|
31
|
-
out = process_styles out
|
32
|
-
out = process_html_comments out
|
33
|
-
out = process_pres out
|
34
|
-
|
35
|
-
out = HtmlPress.entities_compressor out
|
36
|
-
|
37
|
-
out = trim_lines out
|
38
|
-
out = process_block_elements out
|
39
|
-
out = process_textareas out
|
40
|
-
|
41
|
-
# use newlines before 1st attribute in open tags (to limit line lengths)
|
42
|
-
# out.gsub!(/(<[a-z\-:]+)\s+([^>]+>)/i, "\\1\n\\2")
|
43
|
-
|
44
|
-
out = process_attributes out
|
45
|
-
out = process_whitespaces out
|
46
|
-
out = fill_placeholders out
|
47
|
-
|
48
|
-
out
|
49
|
-
end
|
50
|
-
|
51
|
-
# for backward compatibility
|
52
|
-
alias :compile :press
|
53
|
-
|
54
|
-
protected
|
55
|
-
|
56
|
-
# IE conditional comments
|
57
|
-
def process_ie_conditional_comments (out)
|
58
|
-
out.gsub /(<!--\[[^\]]+\]>([\s\S]*?)<!\[[^\]]+\]-->)\s*/ do
|
59
|
-
m = $1
|
60
|
-
comment = $2
|
61
|
-
comment_compressed = Html.new.press(comment)
|
62
|
-
m.gsub!(comment, comment_compressed)
|
63
|
-
reserve m
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
# replace SCRIPTs (and minify) with placeholders
|
68
|
-
def process_scripts (out)
|
69
|
-
out.gsub /(<script\b[^>]*?>([\s\S]*?)<\/script>)\s*/i do
|
70
|
-
js = $2
|
71
|
-
m = $1.gsub /^<script\s([^>]+)>/i do |m|
|
72
|
-
attrs(m, 'script', true)
|
73
|
-
end
|
74
|
-
begin
|
75
|
-
js_compressed = HtmlPress.js_compressor js, @options[:js_minifier_options]
|
76
|
-
m.gsub!(js, js_compressed)
|
77
|
-
rescue MultiJs::ParseError => e
|
78
|
-
log e.message
|
79
|
-
end
|
80
|
-
reserve m
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
# replace STYLEs (and minify) with placeholders
|
85
|
-
def process_styles (out)
|
86
|
-
out.gsub /(<style\b[^>]*?>([\s\S]*?)<\/style>)\s*/i do
|
87
|
-
css = $2
|
88
|
-
m = $1.gsub /^<style\s([^>]+)>/i do |m|
|
89
|
-
attrs(m, 'style', true)
|
90
|
-
end
|
91
|
-
begin
|
92
|
-
css_compressed = MultiCss.min css
|
93
|
-
m.gsub!(css, css_compressed)
|
94
|
-
rescue Exception => e
|
95
|
-
log e.message
|
96
|
-
end
|
97
|
-
reserve m
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# remove html comments (not IE conditional comments)
|
102
|
-
def process_html_comments (out)
|
103
|
-
out.gsub /<!--([\s\S]*?)-->/, ''
|
104
|
-
end
|
105
|
-
|
106
|
-
# replace PREs with placeholders
|
107
|
-
def process_pres (out)
|
108
|
-
out.gsub /(<pre\b[^>]*?>([\s\S]*?)<\/pre>)\s*/i do
|
109
|
-
pre = $2
|
110
|
-
m = $1
|
111
|
-
pre_compressed = pre.lines.map{ |l| l.gsub(/\s+$/, '') }.join("\n")
|
112
|
-
pre_compressed = HtmlPress.entities_compressor pre_compressed
|
113
|
-
m.gsub!(pre, pre_compressed)
|
114
|
-
reserve m
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
# trim each line
|
119
|
-
def trim_lines (out)
|
120
|
-
out.gsub(/^\s+|\s+$/m, '')
|
121
|
-
end
|
122
|
-
|
123
|
-
# remove whitespaces outside of block elements
|
124
|
-
def process_block_elements (out)
|
125
|
-
re = '\\s+(<\\/?(?:area|base(?:font)?|blockquote|body' +
|
126
|
-
'|caption|center|cite|col(?:group)?|dd|dir|div|dl|dt|fieldset|form' +
|
127
|
-
'|frame(?:set)?|h[1-6]|head|hr|html|legend|li|link|map|menu|meta' +
|
128
|
-
'|ol|opt(?:group|ion)|p|param|t(?:able|body|head|d|h|r|foot|itle)' +
|
129
|
-
'|ul)\\b[^>]*>)'
|
130
|
-
|
131
|
-
re = Regexp.new(re)
|
132
|
-
out.gsub!(re, '\\1')
|
133
|
-
|
134
|
-
# remove whitespaces outside of all elements
|
135
|
-
out.gsub! />([^<]+)</ do |m|
|
136
|
-
m.gsub(/^\s+|\s+$/, ' ')
|
137
|
-
end
|
138
|
-
|
139
|
-
out
|
140
|
-
end
|
141
|
-
|
142
|
-
# replace TEXTAREAs with placeholders
|
143
|
-
def process_textareas (out)
|
144
|
-
out.gsub /(<textarea\b[^>]*?>[\s\S]*?<\/textarea>)\s*/i do |m|
|
145
|
-
reserve m
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
# attributes
|
150
|
-
def process_attributes (out)
|
151
|
-
out.gsub /<[a-z\-:]+\s([^>]+)>/i do |m|
|
152
|
-
reserve attrs(m, '[a-z\-:]+', true)
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# replace two or more whitespaces with one
|
157
|
-
def process_whitespaces (out)
|
158
|
-
out.gsub!(/[\r\n]+/, @options[:strip_crlf] ? ' ' : "\n")
|
159
|
-
out.gsub!(/\s+/, ' ')
|
160
|
-
out
|
161
|
-
end
|
162
|
-
|
163
|
-
# fill placeholders
|
164
|
-
def fill_placeholders (out)
|
165
|
-
re = Regexp.new('%' + @replacement_hash + '%(\d+)%')
|
166
|
-
out.gsub re do |m|
|
167
|
-
m.gsub!(re, "\\1")
|
168
|
-
@placeholders[m.to_i]
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def log (text)
|
173
|
-
@options[:logger].error text if @options[:logger]
|
174
|
-
end
|
175
|
-
|
176
|
-
def reserve (content)
|
177
|
-
@placeholders.push content
|
178
|
-
'%' + @replacement_hash + '%' + (@placeholders.size - 1).to_s + '%'
|
179
|
-
end
|
180
|
-
|
181
|
-
def attrs (m, tag_name, r)
|
182
|
-
re = "<(" + tag_name + ")(\s[^>]+)?>"
|
183
|
-
re = Regexp.new(re, true)
|
184
|
-
attributes = m.gsub(re, "\\2")
|
185
|
-
if r
|
186
|
-
tag = m.gsub(re, "\\1")
|
187
|
-
else
|
188
|
-
tag = tag_name
|
189
|
-
end
|
190
|
-
|
191
|
-
if attributes.size > 0
|
192
|
-
attributes_compressed = attributes.gsub(/([a-z\-_:]+(="[^"]*")?(='[^']*')?)\s*/i, " \\1")
|
193
|
-
|
194
|
-
attributes_compressed.gsub! /([a-z\-_:]+="[^"]*")/i do |k|
|
195
|
-
attr k, "\"", tag
|
196
|
-
end
|
197
|
-
|
198
|
-
attributes_compressed.gsub! /([a-z\-_:]+='[^']*')/i do |k|
|
199
|
-
attr k, "'", tag
|
200
|
-
end
|
201
|
-
|
202
|
-
attributes_compressed = " " + attributes_compressed.strip
|
203
|
-
|
204
|
-
if attributes_compressed == " /"
|
205
|
-
attributes_compressed = "/"
|
206
|
-
elsif attributes_compressed == " "
|
207
|
-
attributes_compressed = ""
|
208
|
-
end
|
209
|
-
m.gsub(attributes, attributes_compressed)
|
210
|
-
else
|
211
|
-
m
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
def attr(attribute, delimiter, tag)
|
216
|
-
re = "([a-z\\-_:]+)(=" + delimiter + "[^" + delimiter + "]*" + delimiter + ")?"
|
217
|
-
re = Regexp.new re, true
|
218
|
-
value_original = attribute.gsub(re, "\\2")
|
219
|
-
value = value_original.downcase
|
220
|
-
name_original = attribute.gsub(re, "\\1")
|
221
|
-
name = name_original.downcase
|
222
|
-
tag_name = tag.downcase
|
223
|
-
|
224
|
-
if value.size > 0
|
225
|
-
re = "^=" + delimiter + "|" + delimiter + "$"
|
226
|
-
re = Regexp.new re
|
227
|
-
value_original.gsub!(re, "")
|
228
|
-
end
|
229
|
-
|
230
|
-
case tag_name
|
231
|
-
when "script"
|
232
|
-
if (name == "type" && value_original == "text/javascript") || (name == "language" && value_original == "JavaScript")
|
233
|
-
return ""
|
234
|
-
elsif name == "async" || name == "defer"
|
235
|
-
return name_original
|
236
|
-
end
|
237
|
-
when "form"
|
238
|
-
if name == "method" && value_original == "get"
|
239
|
-
return ""
|
240
|
-
end
|
241
|
-
when /link|style/
|
242
|
-
if name == "type" && value_original == "text/stylesheet"
|
243
|
-
return ""
|
244
|
-
end
|
245
|
-
when /input|textarea|button|select|option|optgroup/
|
246
|
-
if name == "disabled"
|
247
|
-
return name_original
|
248
|
-
end
|
249
|
-
if (tag_name == "input" || tag_name == "textarea") && name == "readonly"
|
250
|
-
return name_original
|
251
|
-
end
|
252
|
-
if tag_name == "option" && name == "selected"
|
253
|
-
return name_original
|
254
|
-
end
|
255
|
-
if tag_name == "input"
|
256
|
-
if name == "type" && value_original == "text"
|
257
|
-
return ""
|
258
|
-
end
|
259
|
-
if name == "checked"
|
260
|
-
return name_original
|
261
|
-
end
|
262
|
-
# if name == "value" && (value == "=\"\"" || value == "=''")
|
263
|
-
# return ''
|
264
|
-
# end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
if value.size > 0
|
269
|
-
|
270
|
-
if name == "style"
|
271
|
-
begin
|
272
|
-
value_original = MultiCss.min_attr value_original
|
273
|
-
# TODO what about escaped attribute values?
|
274
|
-
if delimiter == "\""
|
275
|
-
value_original.gsub!("\"", "'")
|
276
|
-
else
|
277
|
-
value_original.gsub!("'", "\"")
|
278
|
-
end
|
279
|
-
rescue MultiCss::ParseError => e
|
280
|
-
log e.message
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
if name == "class"
|
285
|
-
value_original.gsub!(/\s+/, " ")
|
286
|
-
value_original.gsub!(/^\s+|\s+$/, "")
|
287
|
-
end
|
288
|
-
|
289
|
-
events = %w[onfocus onblur onselect onchange onclick
|
290
|
-
ondblclick onmousedown onmouseup onmouseover onmousemove
|
291
|
-
onmouseout onkeypress onkeydown onkeyup]
|
292
|
-
|
293
|
-
if events.include? name
|
294
|
-
value_original.gsub! /^javascript:\s+|;$/, ''
|
295
|
-
begin
|
296
|
-
value_original = HtmlPress.js_compressor value_original, @options[:js_minifier_options]
|
297
|
-
# TODO what about escaped attribute values?
|
298
|
-
if delimiter == "\""
|
299
|
-
value_original.gsub! "\"", "'"
|
300
|
-
end
|
301
|
-
rescue MultiJs::ParseError => e
|
302
|
-
log e.message
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
if value_original.size == 0
|
307
|
-
#attribute without value may be dropped by IE7
|
308
|
-
if @options[:drop_empty_values]
|
309
|
-
attribute = name_original
|
310
|
-
else
|
311
|
-
attribute = name_original + "=" + delimiter + delimiter
|
312
|
-
end
|
313
|
-
elsif @options[:unquoted_attributes] && !(value_original =~ /[ \t\r\n\f"'`=<>]/)
|
314
|
-
attribute = name_original + "=" + value_original
|
315
|
-
else
|
316
|
-
attribute = name_original + "=" + delimiter + value_original + delimiter
|
317
|
-
end
|
318
|
-
|
319
|
-
end
|
320
|
-
|
321
|
-
attribute
|
322
|
-
end
|
323
|
-
|
324
|
-
end
|
325
|
-
end
|
1
|
+
module HtmlPress
|
2
|
+
class Html
|
3
|
+
|
4
|
+
DEFAULTS = {
|
5
|
+
:logger => false,
|
6
|
+
:unquoted_attributes => false,
|
7
|
+
:drop_empty_values => false,
|
8
|
+
:strip_crlf => false,
|
9
|
+
:js_minifier_options => false
|
10
|
+
}
|
11
|
+
|
12
|
+
def initialize (options = {})
|
13
|
+
@options = DEFAULTS.merge(options)
|
14
|
+
if @options.keys.include? :dump_empty_values
|
15
|
+
@options[:drop_empty_values] = @options.delete(:dump_empty_values)
|
16
|
+
warn "dump_empty_values deprecated use drop_empty_values"
|
17
|
+
end
|
18
|
+
if @options[:logger] && !@options[:logger].respond_to?(:error)
|
19
|
+
raise ArgumentError, 'Logger has no error method'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def press (html)
|
24
|
+
out = html.respond_to?(:read) ? html.read : html.dup
|
25
|
+
|
26
|
+
@replacement_hash = 'MINIFYHTML' + Time.now.to_i.to_s
|
27
|
+
@placeholders = []
|
28
|
+
|
29
|
+
out = process_ie_conditional_comments out
|
30
|
+
out = process_scripts out
|
31
|
+
out = process_styles out
|
32
|
+
out = process_html_comments out
|
33
|
+
out = process_pres out
|
34
|
+
|
35
|
+
out = HtmlPress.entities_compressor out
|
36
|
+
|
37
|
+
out = trim_lines out
|
38
|
+
out = process_block_elements out
|
39
|
+
out = process_textareas out
|
40
|
+
|
41
|
+
# use newlines before 1st attribute in open tags (to limit line lengths)
|
42
|
+
# out.gsub!(/(<[a-z\-:]+)\s+([^>]+>)/i, "\\1\n\\2")
|
43
|
+
|
44
|
+
out = process_attributes out
|
45
|
+
out = process_whitespaces out
|
46
|
+
out = fill_placeholders out
|
47
|
+
|
48
|
+
out
|
49
|
+
end
|
50
|
+
|
51
|
+
# for backward compatibility
|
52
|
+
alias :compile :press
|
53
|
+
|
54
|
+
protected
|
55
|
+
|
56
|
+
# IE conditional comments
|
57
|
+
def process_ie_conditional_comments (out)
|
58
|
+
out.gsub /(<!--\[[^\]]+\]>([\s\S]*?)<!\[[^\]]+\]-->)\s*/ do
|
59
|
+
m = $1
|
60
|
+
comment = $2
|
61
|
+
comment_compressed = Html.new.press(comment)
|
62
|
+
m.gsub!(comment, comment_compressed)
|
63
|
+
reserve m
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# replace SCRIPTs (and minify) with placeholders
|
68
|
+
def process_scripts (out)
|
69
|
+
out.gsub /(<script\b[^>]*?>([\s\S]*?)<\/script>)\s*/i do
|
70
|
+
js = $2
|
71
|
+
m = $1.gsub /^<script\s([^>]+)>/i do |m|
|
72
|
+
attrs(m, 'script', true)
|
73
|
+
end
|
74
|
+
begin
|
75
|
+
js_compressed = HtmlPress.js_compressor js, @options[:js_minifier_options]
|
76
|
+
m.gsub!(">#{js}<\/script>", ">#{js_compressed}<\/script>")
|
77
|
+
rescue MultiJs::ParseError => e
|
78
|
+
log e.message
|
79
|
+
end
|
80
|
+
reserve m
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# replace STYLEs (and minify) with placeholders
|
85
|
+
def process_styles (out)
|
86
|
+
out.gsub /(<style\b[^>]*?>([\s\S]*?)<\/style>)\s*/i do
|
87
|
+
css = $2
|
88
|
+
m = $1.gsub /^<style\s([^>]+)>/i do |m|
|
89
|
+
attrs(m, 'style', true)
|
90
|
+
end
|
91
|
+
begin
|
92
|
+
css_compressed = MultiCss.min css
|
93
|
+
m.gsub!(css, css_compressed)
|
94
|
+
rescue Exception => e
|
95
|
+
log e.message
|
96
|
+
end
|
97
|
+
reserve m
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# remove html comments (not IE conditional comments)
|
102
|
+
def process_html_comments (out)
|
103
|
+
out.gsub /<!--([\s\S]*?)-->/, ''
|
104
|
+
end
|
105
|
+
|
106
|
+
# replace PREs with placeholders
|
107
|
+
def process_pres (out)
|
108
|
+
out.gsub /(<pre\b[^>]*?>([\s\S]*?)<\/pre>)\s*/i do
|
109
|
+
pre = $2
|
110
|
+
m = $1
|
111
|
+
pre_compressed = pre.lines.map{ |l| l.gsub(/\s+$/, '') }.join("\n")
|
112
|
+
pre_compressed = HtmlPress.entities_compressor pre_compressed
|
113
|
+
m.gsub!(pre, pre_compressed)
|
114
|
+
reserve m
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# trim each line
|
119
|
+
def trim_lines (out)
|
120
|
+
out.gsub(/^\s+|\s+$/m, '')
|
121
|
+
end
|
122
|
+
|
123
|
+
# remove whitespaces outside of block elements
|
124
|
+
def process_block_elements (out)
|
125
|
+
re = '\\s+(<\\/?(?:area|base(?:font)?|blockquote|body' +
|
126
|
+
'|caption|center|cite|col(?:group)?|dd|dir|div|dl|dt|fieldset|form' +
|
127
|
+
'|frame(?:set)?|h[1-6]|head|hr|html|legend|li|link|map|menu|meta' +
|
128
|
+
'|ol|opt(?:group|ion)|p|param|t(?:able|body|head|d|h|r|foot|itle)' +
|
129
|
+
'|ul)\\b[^>]*>)'
|
130
|
+
|
131
|
+
re = Regexp.new(re)
|
132
|
+
out.gsub!(re, '\\1')
|
133
|
+
|
134
|
+
# remove whitespaces outside of all elements
|
135
|
+
out.gsub! />([^<]+)</ do |m|
|
136
|
+
m.gsub(/^\s+|\s+$/, ' ')
|
137
|
+
end
|
138
|
+
|
139
|
+
out
|
140
|
+
end
|
141
|
+
|
142
|
+
# replace TEXTAREAs with placeholders
|
143
|
+
def process_textareas (out)
|
144
|
+
out.gsub /(<textarea\b[^>]*?>[\s\S]*?<\/textarea>)\s*/i do |m|
|
145
|
+
reserve m
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# attributes
|
150
|
+
def process_attributes (out)
|
151
|
+
out.gsub /<[a-z\-:]+\s([^>]+)>/i do |m|
|
152
|
+
reserve attrs(m, '[a-z\-:]+', true)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# replace two or more whitespaces with one
|
157
|
+
def process_whitespaces (out)
|
158
|
+
out.gsub!(/[\r\n]+/, @options[:strip_crlf] ? ' ' : "\n")
|
159
|
+
out.gsub!(/\s+/, ' ')
|
160
|
+
out
|
161
|
+
end
|
162
|
+
|
163
|
+
# fill placeholders
|
164
|
+
def fill_placeholders (out)
|
165
|
+
re = Regexp.new('%' + @replacement_hash + '%(\d+)%')
|
166
|
+
out.gsub re do |m|
|
167
|
+
m.gsub!(re, "\\1")
|
168
|
+
@placeholders[m.to_i]
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def log (text)
|
173
|
+
@options[:logger].error text if @options[:logger]
|
174
|
+
end
|
175
|
+
|
176
|
+
def reserve (content)
|
177
|
+
@placeholders.push content
|
178
|
+
'%' + @replacement_hash + '%' + (@placeholders.size - 1).to_s + '%'
|
179
|
+
end
|
180
|
+
|
181
|
+
def attrs (m, tag_name, r)
|
182
|
+
re = "<(" + tag_name + ")(\s[^>]+)?>"
|
183
|
+
re = Regexp.new(re, true)
|
184
|
+
attributes = m.gsub(re, "\\2")
|
185
|
+
if r
|
186
|
+
tag = m.gsub(re, "\\1")
|
187
|
+
else
|
188
|
+
tag = tag_name
|
189
|
+
end
|
190
|
+
|
191
|
+
if attributes.size > 0
|
192
|
+
attributes_compressed = attributes.gsub(/([a-z\-_:]+(="[^"]*")?(='[^']*')?)\s*/i, " \\1")
|
193
|
+
|
194
|
+
attributes_compressed.gsub! /([a-z\-_:]+="[^"]*")/i do |k|
|
195
|
+
attr k, "\"", tag
|
196
|
+
end
|
197
|
+
|
198
|
+
attributes_compressed.gsub! /([a-z\-_:]+='[^']*')/i do |k|
|
199
|
+
attr k, "'", tag
|
200
|
+
end
|
201
|
+
|
202
|
+
attributes_compressed = " " + attributes_compressed.strip
|
203
|
+
|
204
|
+
if attributes_compressed == " /"
|
205
|
+
attributes_compressed = "/"
|
206
|
+
elsif attributes_compressed == " "
|
207
|
+
attributes_compressed = ""
|
208
|
+
end
|
209
|
+
m.gsub(attributes, attributes_compressed)
|
210
|
+
else
|
211
|
+
m
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def attr(attribute, delimiter, tag)
|
216
|
+
re = "([a-z\\-_:]+)(=" + delimiter + "[^" + delimiter + "]*" + delimiter + ")?"
|
217
|
+
re = Regexp.new re, true
|
218
|
+
value_original = attribute.gsub(re, "\\2")
|
219
|
+
value = value_original.downcase
|
220
|
+
name_original = attribute.gsub(re, "\\1")
|
221
|
+
name = name_original.downcase
|
222
|
+
tag_name = tag.downcase
|
223
|
+
|
224
|
+
if value.size > 0
|
225
|
+
re = "^=" + delimiter + "|" + delimiter + "$"
|
226
|
+
re = Regexp.new re
|
227
|
+
value_original.gsub!(re, "")
|
228
|
+
end
|
229
|
+
|
230
|
+
case tag_name
|
231
|
+
when "script"
|
232
|
+
if (name == "type" && value_original == "text/javascript") || (name == "language" && value_original == "JavaScript")
|
233
|
+
return ""
|
234
|
+
elsif name == "async" || name == "defer"
|
235
|
+
return name_original
|
236
|
+
end
|
237
|
+
when "form"
|
238
|
+
if name == "method" && value_original == "get"
|
239
|
+
return ""
|
240
|
+
end
|
241
|
+
when /link|style/
|
242
|
+
if name == "type" && value_original == "text/stylesheet"
|
243
|
+
return ""
|
244
|
+
end
|
245
|
+
when /input|textarea|button|select|option|optgroup/
|
246
|
+
if name == "disabled"
|
247
|
+
return name_original
|
248
|
+
end
|
249
|
+
if (tag_name == "input" || tag_name == "textarea") && name == "readonly"
|
250
|
+
return name_original
|
251
|
+
end
|
252
|
+
if tag_name == "option" && name == "selected"
|
253
|
+
return name_original
|
254
|
+
end
|
255
|
+
if tag_name == "input"
|
256
|
+
if name == "type" && value_original == "text"
|
257
|
+
return ""
|
258
|
+
end
|
259
|
+
if name == "checked"
|
260
|
+
return name_original
|
261
|
+
end
|
262
|
+
# if name == "value" && (value == "=\"\"" || value == "=''")
|
263
|
+
# return ''
|
264
|
+
# end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
if value.size > 0
|
269
|
+
|
270
|
+
if name == "style"
|
271
|
+
begin
|
272
|
+
value_original = MultiCss.min_attr value_original
|
273
|
+
# TODO what about escaped attribute values?
|
274
|
+
if delimiter == "\""
|
275
|
+
value_original.gsub!("\"", "'")
|
276
|
+
else
|
277
|
+
value_original.gsub!("'", "\"")
|
278
|
+
end
|
279
|
+
rescue MultiCss::ParseError => e
|
280
|
+
log e.message
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
if name == "class"
|
285
|
+
value_original.gsub!(/\s+/, " ")
|
286
|
+
value_original.gsub!(/^\s+|\s+$/, "")
|
287
|
+
end
|
288
|
+
|
289
|
+
events = %w[onfocus onblur onselect onchange onclick
|
290
|
+
ondblclick onmousedown onmouseup onmouseover onmousemove
|
291
|
+
onmouseout onkeypress onkeydown onkeyup]
|
292
|
+
|
293
|
+
if events.include? name
|
294
|
+
value_original.gsub! /^javascript:\s+|;$/, ''
|
295
|
+
begin
|
296
|
+
value_original = HtmlPress.js_compressor value_original, @options[:js_minifier_options]
|
297
|
+
# TODO what about escaped attribute values?
|
298
|
+
if delimiter == "\""
|
299
|
+
value_original.gsub! "\"", "'"
|
300
|
+
end
|
301
|
+
rescue MultiJs::ParseError => e
|
302
|
+
log e.message
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
if value_original.size == 0
|
307
|
+
#attribute without value may be dropped by IE7
|
308
|
+
if @options[:drop_empty_values]
|
309
|
+
attribute = name_original
|
310
|
+
else
|
311
|
+
attribute = name_original + "=" + delimiter + delimiter
|
312
|
+
end
|
313
|
+
elsif @options[:unquoted_attributes] && !(value_original =~ /[ \t\r\n\f"'`=<>]/)
|
314
|
+
attribute = name_original + "=" + value_original
|
315
|
+
else
|
316
|
+
attribute = name_original + "=" + delimiter + value_original + delimiter
|
317
|
+
end
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
attribute
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|
325
|
+
end
|