html_press 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -6
- data/.travis.yml +8 -8
- data/Gemfile +6 -6
- data/Readme.md +73 -71
- data/html_press.gemspec +26 -26
- data/lib/html_press.rb +24 -23
- data/lib/html_press/html.rb +325 -325
- data/lib/html_press/html_entities.rb +36 -36
- data/lib/html_press/version.rb +3 -3
- data/profile/index.html +37689 -37689
- data/profile/profile.rb +28 -28
- data/spec/html_press_spec.rb +258 -256
- metadata +19 -37
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: efbc2fa52751f8f1f80c58c759d884e26bfd11bd
|
4
|
+
data.tar.gz: 458a9c3dddb8c1af510d222c5e629952374c1908
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b30dcd0a49adda32a97912a24f6e7a1ee68a3d860d3bca4ac4ed06545be1bcad7a59808754a7d337e936b1b8a2efc0f5f47ee1e047dbe140123792862dcb529a
|
7
|
+
data.tar.gz: 4e46df1cbc717db5294c9474006dd70bc113659c9730dbe8dfa353f554fa009cd5978e1f88e3a9368640dee4be998527774fcce4f831f8f177d733f683212db5
|
data/.gitignore
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
*.gem
|
2
|
-
.bundle
|
3
|
-
Gemfile.lock
|
4
|
-
pkg/*
|
5
|
-
profile/reports/*
|
6
|
-
/.project
|
1
|
+
*.gem
|
2
|
+
.bundle
|
3
|
+
Gemfile.lock
|
4
|
+
pkg/*
|
5
|
+
profile/reports/*
|
6
|
+
/.project
|
data/.travis.yml
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
language: ruby
|
2
|
-
rvm:
|
3
|
-
- 1.8.7
|
4
|
-
- 1.9.2
|
5
|
-
- 1.9.3
|
6
|
-
- jruby-18mode
|
7
|
-
- rbx-18mode
|
8
|
-
# - jruby-19mode
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.8.7
|
4
|
+
- 1.9.2
|
5
|
+
- 1.9.3
|
6
|
+
- jruby-18mode
|
7
|
+
- rbx-18mode
|
8
|
+
# - jruby-19mode
|
9
9
|
# - rbx-19mode
|
data/Gemfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
source
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in html_press.gemspec
|
4
|
-
gemspec
|
5
|
-
|
6
|
-
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in html_press.gemspec
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
gem "ruby-prof", :platforms => :mri
|
data/Readme.md
CHANGED
@@ -1,71 +1,73 @@
|
|
1
|
-
# HtmlPress [![Build Status](https://
|
2
|
-
|
3
|
-
## How it works
|
4
|
-
|
5
|
-
Remove all whitespace junk. Leave only HTML
|
6
|
-
|
7
|
-
```
|
8
|
-
1. ┌――――――――――――――――――╖ 2. ┌――――――――――――――――――╖
|
9
|
-
●――――――――――――――├―╢ <html> ws junk ║ ●――――――――├―――――――――╢ <html> ║
|
10
|
-
└――――――――――――――――――╜ └――――――――――――――――――╜
|
11
|
-
```
|
12
|
-
|
13
|
-
## Usage
|
14
|
-
|
15
|
-
### Ruby
|
16
|
-
```ruby
|
17
|
-
require 'html_press'
|
18
|
-
compressed_html = HtmlPress.press html
|
19
|
-
```
|
20
|
-
|
21
|
-
### Jekyll
|
22
|
-
see [jekyll_press](https://github.com/stereobooster/jekyll_press)
|
23
|
-
|
24
|
-
### Rails
|
25
|
-
TODO :exclamation:
|
26
|
-
|
27
|
-
### Sinatra
|
28
|
-
TODO :exclamation:
|
29
|
-
|
30
|
-
### Command line
|
31
|
-
TODO :exclamation:
|
32
|
-
|
33
|
-
## TODO
|
34
|
-
- use parser ([鋸](https://github.com/tenderlove/nokogiri)) instead of regexp's
|
35
|
-
- add option to convert relative urls to absolute urls (for SEO)
|
36
|
-
- [ambigious ampersands](http://mathiasbynens.be/notes/ambiguous-ampersands) for compression?
|
37
|
-
- Support other js/css minifiers (Closure, YUI compressor)
|
38
|
-
- htmlTydi
|
39
|
-
- Rack plugin
|
40
|
-
- add script to benchmark real projects like amazon or stackoverflow
|
41
|
-
- support html5 tags
|
42
|
-
- add more options
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
- [
|
47
|
-
-
|
48
|
-
- [
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
- [
|
55
|
-
-
|
56
|
-
- [
|
57
|
-
-
|
58
|
-
- [
|
59
|
-
- [
|
60
|
-
- [
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
1
|
+
# HtmlPress ![Gem Version](https://fury-badge.herokuapp.com/rb/html_press.png) [![Build Status](https://travis-ci.org/stereobooster/html_press.png?branch=master)](https://travis-ci.org/stereobooster/html_press) [![Dependency Status](https://gemnasium.com/stereobooster/html_press.png?travis)](https://gemnasium.com/stereobooster/html_press) [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/stereobooster/html_press)
|
2
|
+
|
3
|
+
## How it works
|
4
|
+
|
5
|
+
Remove all whitespace junk. Leave only HTML
|
6
|
+
|
7
|
+
```
|
8
|
+
1. ┌――――――――――――――――――╖ 2. ┌――――――――――――――――――╖
|
9
|
+
●――――――――――――――├―╢ <html> ws junk ║ ●――――――――├―――――――――╢ <html> ║
|
10
|
+
└――――――――――――――――――╜ └――――――――――――――――――╜
|
11
|
+
```
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
### Ruby
|
16
|
+
```ruby
|
17
|
+
require 'html_press'
|
18
|
+
compressed_html = HtmlPress.press html
|
19
|
+
```
|
20
|
+
|
21
|
+
### Jekyll
|
22
|
+
see [jekyll_press](https://github.com/stereobooster/jekyll_press)
|
23
|
+
|
24
|
+
### Rails
|
25
|
+
TODO :exclamation:
|
26
|
+
|
27
|
+
### Sinatra
|
28
|
+
TODO :exclamation:
|
29
|
+
|
30
|
+
### Command line
|
31
|
+
TODO :exclamation:
|
32
|
+
|
33
|
+
## TODO
|
34
|
+
- use parser ([鋸](https://github.com/tenderlove/nokogiri)) instead of regexp's
|
35
|
+
- add option to convert relative urls to absolute urls (for SEO)
|
36
|
+
- [ambigious ampersands](http://mathiasbynens.be/notes/ambiguous-ampersands) for compression?
|
37
|
+
- Support other js/css minifiers (Closure, YUI compressor)
|
38
|
+
- htmlTydi
|
39
|
+
- Rack plugin
|
40
|
+
- add script to benchmark real projects like amazon or stackoverflow
|
41
|
+
- support html5 tags
|
42
|
+
- add more options
|
43
|
+
- Optimization: make substring replace based on substring length and its position in initial string
|
44
|
+
|
45
|
+
## Alternatives
|
46
|
+
- [html-minifier](https://github.com/kangax/html-minifier) (js), [test suite](https://github.com/kangax/html-minifier/blob/gh-pages/tests/index.html), ruby wrapper - [html_minifier](https://github.com/stereobooster/html_minifier)
|
47
|
+
- [htmlcompressor](http://code.google.com/p/htmlcompressor/) (java), [test suite](http://code.google.com/p/htmlcompressor/source/browse/#svn%2Ftrunk%2Fsrc%2Ftest%2Fresources%2Fhtml%253Fstate%253Dclosed)
|
48
|
+
- PHPTal compress (php), [test suite](https://svn.motion-twin.com/phptal/trunk/tests/CompressTest.php)
|
49
|
+
- [W3 total cache](http://wordpress.org/extend/plugins/w3-total-cache/) - WP plugin from smashingmagazine contains html minifier (php)
|
50
|
+
|
51
|
+
## Additional tools
|
52
|
+
- [jeanny](https://github.com/gfranco/jeanny) - rename css classes and ids in css and html files
|
53
|
+
- make shorter pathes for images in css
|
54
|
+
- [deadweight](https://github.com/aanand/deadweight) - remove unused css rules from css files
|
55
|
+
- [csscss](http://zmoazeni.github.com/csscss/) will parse any CSS files you give it and let you know which rulesets have duplicated declarations.
|
56
|
+
- [css-spriter](https://github.com/aberant/css-spriter), [sprite-factory](https://github.com/jakesgordon/sprite-factory) - combine images in sprites
|
57
|
+
- resize images by size defined in html and vice versa embed size of images in html
|
58
|
+
- [#1](http://habrahabr.ru/post/90761/), [#2](http://ap-project.org/English/Article/View/53/) - inline small images in css
|
59
|
+
- [smusher](https://github.com/grosser/smusher), jpegtran, optipng - losslessly minify images
|
60
|
+
- [sprockets](https://github.com/sstephenson/sprockets), [jammit](https://github.com/documentcloud/jammit) - asset bundlers
|
61
|
+
- [w3c_validators](https://github.com/alexdunae/w3c_validators)
|
62
|
+
- [reduce](https://github.com/grosser/reduce)
|
63
|
+
|
64
|
+
## Resources
|
65
|
+
|
66
|
+
### Minimize HTML
|
67
|
+
- http://perfectionkills.com/experimenting-with-html-minifier
|
68
|
+
- http://perfectionkills.com/optimizing-html
|
69
|
+
- https://developers.google.com/speed/articles/optimizing-html
|
70
|
+
|
71
|
+
### Front-end optimization
|
72
|
+
- https://developers.google.com/speed/docs/insights/rules
|
73
|
+
- http://developer.yahoo.com/performance/rules.html
|
data/html_press.gemspec
CHANGED
@@ -1,26 +1,26 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
$:.push File.expand_path("../lib", __FILE__)
|
3
|
-
require "html_press/version"
|
4
|
-
|
5
|
-
Gem::Specification.new do |s|
|
6
|
-
s.name = "html_press"
|
7
|
-
s.version = HtmlPress::VERSION
|
8
|
-
s.authors = ["stereobooster"]
|
9
|
-
s.email = ["stereobooster@gmail.com"]
|
10
|
-
s.homepage = "https://github.com/stereobooster/html_press"
|
11
|
-
s.summary = %q{Compress html}
|
12
|
-
s.description = %q{Ruby gem for compressing html}
|
13
|
-
s.license = "MIT"
|
14
|
-
|
15
|
-
s.files = `git ls-files`.split("\n")
|
16
|
-
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
-
s.require_paths = ["lib"]
|
19
|
-
|
20
|
-
s.add_development_dependency "rspec"
|
21
|
-
s.add_development_dependency "rake"
|
22
|
-
|
23
|
-
s.add_dependency "multi_css", ">= 0.1.0"
|
24
|
-
s.add_dependency "multi_js"
|
25
|
-
s.add_dependency "htmlentities"
|
26
|
-
end
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "html_press/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "html_press"
|
7
|
+
s.version = HtmlPress::VERSION
|
8
|
+
s.authors = ["stereobooster"]
|
9
|
+
s.email = ["stereobooster@gmail.com"]
|
10
|
+
s.homepage = "https://github.com/stereobooster/html_press"
|
11
|
+
s.summary = %q{Compress html}
|
12
|
+
s.description = %q{Ruby gem for compressing html}
|
13
|
+
s.license = "MIT"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.add_development_dependency "rspec"
|
21
|
+
s.add_development_dependency "rake"
|
22
|
+
|
23
|
+
s.add_dependency "multi_css", ">= 0.1.0"
|
24
|
+
s.add_dependency "multi_js", ">= 0.1.0"
|
25
|
+
s.add_dependency "htmlentities"
|
26
|
+
end
|
data/lib/html_press.rb
CHANGED
@@ -1,23 +1,24 @@
|
|
1
|
-
require "html_press/version"
|
2
|
-
require "html_press/html_entities"
|
3
|
-
require "html_press/html"
|
4
|
-
|
5
|
-
require 'multi_css'
|
6
|
-
require 'multi_js'
|
7
|
-
|
8
|
-
module HtmlPress
|
9
|
-
def self.press(text, options = {})
|
10
|
-
HtmlPress::Html.new(options).press text
|
11
|
-
end
|
12
|
-
|
13
|
-
# for backward compatibility
|
14
|
-
def self.compress(text, options = {})
|
15
|
-
HtmlPress::Html.new(options).press text
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.js_compressor (text, options = nil)
|
19
|
-
options ||= {}
|
20
|
-
options[:
|
21
|
-
|
22
|
-
|
23
|
-
end
|
1
|
+
require "html_press/version"
|
2
|
+
require "html_press/html_entities"
|
3
|
+
require "html_press/html"
|
4
|
+
|
5
|
+
require 'multi_css'
|
6
|
+
require 'multi_js'
|
7
|
+
|
8
|
+
module HtmlPress
|
9
|
+
def self.press(text, options = {})
|
10
|
+
HtmlPress::Html.new(options).press text
|
11
|
+
end
|
12
|
+
|
13
|
+
# for backward compatibility
|
14
|
+
def self.compress(text, options = {})
|
15
|
+
HtmlPress::Html.new(options).press text
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.js_compressor (text, options = nil)
|
19
|
+
options ||= {}
|
20
|
+
options[:output] ||= {}
|
21
|
+
options[:output][:inline_script] = true
|
22
|
+
MultiJs.compile(text, options).gsub(/;$/,'')
|
23
|
+
end
|
24
|
+
end
|
data/lib/html_press/html.rb
CHANGED
@@ -1,325 +1,325 @@
|
|
1
|
-
module HtmlPress
|
2
|
-
class Html
|
3
|
-
|
4
|
-
DEFAULTS = {
|
5
|
-
:logger => false,
|
6
|
-
:unquoted_attributes => false,
|
7
|
-
:drop_empty_values => false,
|
8
|
-
:strip_crlf => false,
|
9
|
-
:js_minifier_options => false
|
10
|
-
}
|
11
|
-
|
12
|
-
def initialize (options = {})
|
13
|
-
@options = DEFAULTS.merge(options)
|
14
|
-
if @options.keys.include? :dump_empty_values
|
15
|
-
@options[:drop_empty_values] = @options.delete(:dump_empty_values)
|
16
|
-
warn "dump_empty_values deprecated use drop_empty_values"
|
17
|
-
end
|
18
|
-
if @options[:logger] && !@options[:logger].respond_to?(:error)
|
19
|
-
raise ArgumentError, 'Logger has no error method'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def press (html)
|
24
|
-
out = html.respond_to?(:read) ? html.read : html.dup
|
25
|
-
|
26
|
-
@replacement_hash = 'MINIFYHTML' + Time.now.to_i.to_s
|
27
|
-
@placeholders = []
|
28
|
-
|
29
|
-
out = process_ie_conditional_comments out
|
30
|
-
out = process_scripts out
|
31
|
-
out = process_styles out
|
32
|
-
out = process_html_comments out
|
33
|
-
out = process_pres out
|
34
|
-
|
35
|
-
out = HtmlPress.entities_compressor out
|
36
|
-
|
37
|
-
out = trim_lines out
|
38
|
-
out = process_block_elements out
|
39
|
-
out = process_textareas out
|
40
|
-
|
41
|
-
# use newlines before 1st attribute in open tags (to limit line lengths)
|
42
|
-
# out.gsub!(/(<[a-z\-:]+)\s+([^>]+>)/i, "\\1\n\\2")
|
43
|
-
|
44
|
-
out = process_attributes out
|
45
|
-
out = process_whitespaces out
|
46
|
-
out = fill_placeholders out
|
47
|
-
|
48
|
-
out
|
49
|
-
end
|
50
|
-
|
51
|
-
# for backward compatibility
|
52
|
-
alias :compile :press
|
53
|
-
|
54
|
-
protected
|
55
|
-
|
56
|
-
# IE conditional comments
|
57
|
-
def process_ie_conditional_comments (out)
|
58
|
-
out.gsub /(<!--\[[^\]]+\]>([\s\S]*?)<!\[[^\]]+\]-->)\s*/ do
|
59
|
-
m = $1
|
60
|
-
comment = $2
|
61
|
-
comment_compressed = Html.new.press(comment)
|
62
|
-
m.gsub!(comment, comment_compressed)
|
63
|
-
reserve m
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
# replace SCRIPTs (and minify) with placeholders
|
68
|
-
def process_scripts (out)
|
69
|
-
out.gsub /(<script\b[^>]*?>([\s\S]*?)<\/script>)\s*/i do
|
70
|
-
js = $2
|
71
|
-
m = $1.gsub /^<script\s([^>]+)>/i do |m|
|
72
|
-
attrs(m, 'script', true)
|
73
|
-
end
|
74
|
-
begin
|
75
|
-
js_compressed = HtmlPress.js_compressor js, @options[:js_minifier_options]
|
76
|
-
m.gsub!(js, js_compressed)
|
77
|
-
rescue MultiJs::ParseError => e
|
78
|
-
log e.message
|
79
|
-
end
|
80
|
-
reserve m
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
# replace STYLEs (and minify) with placeholders
|
85
|
-
def process_styles (out)
|
86
|
-
out.gsub /(<style\b[^>]*?>([\s\S]*?)<\/style>)\s*/i do
|
87
|
-
css = $2
|
88
|
-
m = $1.gsub /^<style\s([^>]+)>/i do |m|
|
89
|
-
attrs(m, 'style', true)
|
90
|
-
end
|
91
|
-
begin
|
92
|
-
css_compressed = MultiCss.min css
|
93
|
-
m.gsub!(css, css_compressed)
|
94
|
-
rescue Exception => e
|
95
|
-
log e.message
|
96
|
-
end
|
97
|
-
reserve m
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# remove html comments (not IE conditional comments)
|
102
|
-
def process_html_comments (out)
|
103
|
-
out.gsub /<!--([\s\S]*?)-->/, ''
|
104
|
-
end
|
105
|
-
|
106
|
-
# replace PREs with placeholders
|
107
|
-
def process_pres (out)
|
108
|
-
out.gsub /(<pre\b[^>]*?>([\s\S]*?)<\/pre>)\s*/i do
|
109
|
-
pre = $2
|
110
|
-
m = $1
|
111
|
-
pre_compressed = pre.lines.map{ |l| l.gsub(/\s+$/, '') }.join("\n")
|
112
|
-
pre_compressed = HtmlPress.entities_compressor pre_compressed
|
113
|
-
m.gsub!(pre, pre_compressed)
|
114
|
-
reserve m
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
# trim each line
|
119
|
-
def trim_lines (out)
|
120
|
-
out.gsub(/^\s+|\s+$/m, '')
|
121
|
-
end
|
122
|
-
|
123
|
-
# remove whitespaces outside of block elements
|
124
|
-
def process_block_elements (out)
|
125
|
-
re = '\\s+(<\\/?(?:area|base(?:font)?|blockquote|body' +
|
126
|
-
'|caption|center|cite|col(?:group)?|dd|dir|div|dl|dt|fieldset|form' +
|
127
|
-
'|frame(?:set)?|h[1-6]|head|hr|html|legend|li|link|map|menu|meta' +
|
128
|
-
'|ol|opt(?:group|ion)|p|param|t(?:able|body|head|d|h|r|foot|itle)' +
|
129
|
-
'|ul)\\b[^>]*>)'
|
130
|
-
|
131
|
-
re = Regexp.new(re)
|
132
|
-
out.gsub!(re, '\\1')
|
133
|
-
|
134
|
-
# remove whitespaces outside of all elements
|
135
|
-
out.gsub! />([^<]+)</ do |m|
|
136
|
-
m.gsub(/^\s+|\s+$/, ' ')
|
137
|
-
end
|
138
|
-
|
139
|
-
out
|
140
|
-
end
|
141
|
-
|
142
|
-
# replace TEXTAREAs with placeholders
|
143
|
-
def process_textareas (out)
|
144
|
-
out.gsub /(<textarea\b[^>]*?>[\s\S]*?<\/textarea>)\s*/i do |m|
|
145
|
-
reserve m
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
# attributes
|
150
|
-
def process_attributes (out)
|
151
|
-
out.gsub /<[a-z\-:]+\s([^>]+)>/i do |m|
|
152
|
-
reserve attrs(m, '[a-z\-:]+', true)
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# replace two or more whitespaces with one
|
157
|
-
def process_whitespaces (out)
|
158
|
-
out.gsub!(/[\r\n]+/, @options[:strip_crlf] ? ' ' : "\n")
|
159
|
-
out.gsub!(/\s+/, ' ')
|
160
|
-
out
|
161
|
-
end
|
162
|
-
|
163
|
-
# fill placeholders
|
164
|
-
def fill_placeholders (out)
|
165
|
-
re = Regexp.new('%' + @replacement_hash + '%(\d+)%')
|
166
|
-
out.gsub re do |m|
|
167
|
-
m.gsub!(re, "\\1")
|
168
|
-
@placeholders[m.to_i]
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def log (text)
|
173
|
-
@options[:logger].error text if @options[:logger]
|
174
|
-
end
|
175
|
-
|
176
|
-
def reserve (content)
|
177
|
-
@placeholders.push content
|
178
|
-
'%' + @replacement_hash + '%' + (@placeholders.size - 1).to_s + '%'
|
179
|
-
end
|
180
|
-
|
181
|
-
def attrs (m, tag_name, r)
|
182
|
-
re = "<(" + tag_name + ")(\s[^>]+)?>"
|
183
|
-
re = Regexp.new(re, true)
|
184
|
-
attributes = m.gsub(re, "\\2")
|
185
|
-
if r
|
186
|
-
tag = m.gsub(re, "\\1")
|
187
|
-
else
|
188
|
-
tag = tag_name
|
189
|
-
end
|
190
|
-
|
191
|
-
if attributes.size > 0
|
192
|
-
attributes_compressed = attributes.gsub(/([a-z\-_:]+(="[^"]*")?(='[^']*')?)\s*/i, " \\1")
|
193
|
-
|
194
|
-
attributes_compressed.gsub! /([a-z\-_:]+="[^"]*")/i do |k|
|
195
|
-
attr k, "\"", tag
|
196
|
-
end
|
197
|
-
|
198
|
-
attributes_compressed.gsub! /([a-z\-_:]+='[^']*')/i do |k|
|
199
|
-
attr k, "'", tag
|
200
|
-
end
|
201
|
-
|
202
|
-
attributes_compressed = " " + attributes_compressed.strip
|
203
|
-
|
204
|
-
if attributes_compressed == " /"
|
205
|
-
attributes_compressed = "/"
|
206
|
-
elsif attributes_compressed == " "
|
207
|
-
attributes_compressed = ""
|
208
|
-
end
|
209
|
-
m.gsub(attributes, attributes_compressed)
|
210
|
-
else
|
211
|
-
m
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
def attr(attribute, delimiter, tag)
|
216
|
-
re = "([a-z\\-_:]+)(=" + delimiter + "[^" + delimiter + "]*" + delimiter + ")?"
|
217
|
-
re = Regexp.new re, true
|
218
|
-
value_original = attribute.gsub(re, "\\2")
|
219
|
-
value = value_original.downcase
|
220
|
-
name_original = attribute.gsub(re, "\\1")
|
221
|
-
name = name_original.downcase
|
222
|
-
tag_name = tag.downcase
|
223
|
-
|
224
|
-
if value.size > 0
|
225
|
-
re = "^=" + delimiter + "|" + delimiter + "$"
|
226
|
-
re = Regexp.new re
|
227
|
-
value_original.gsub!(re, "")
|
228
|
-
end
|
229
|
-
|
230
|
-
case tag_name
|
231
|
-
when "script"
|
232
|
-
if (name == "type" && value_original == "text/javascript") || (name == "language" && value_original == "JavaScript")
|
233
|
-
return ""
|
234
|
-
elsif name == "async" || name == "defer"
|
235
|
-
return name_original
|
236
|
-
end
|
237
|
-
when "form"
|
238
|
-
if name == "method" && value_original == "get"
|
239
|
-
return ""
|
240
|
-
end
|
241
|
-
when /link|style/
|
242
|
-
if name == "type" && value_original == "text/stylesheet"
|
243
|
-
return ""
|
244
|
-
end
|
245
|
-
when /input|textarea|button|select|option|optgroup/
|
246
|
-
if name == "disabled"
|
247
|
-
return name_original
|
248
|
-
end
|
249
|
-
if (tag_name == "input" || tag_name == "textarea") && name == "readonly"
|
250
|
-
return name_original
|
251
|
-
end
|
252
|
-
if tag_name == "option" && name == "selected"
|
253
|
-
return name_original
|
254
|
-
end
|
255
|
-
if tag_name == "input"
|
256
|
-
if name == "type" && value_original == "text"
|
257
|
-
return ""
|
258
|
-
end
|
259
|
-
if name == "checked"
|
260
|
-
return name_original
|
261
|
-
end
|
262
|
-
# if name == "value" && (value == "=\"\"" || value == "=''")
|
263
|
-
# return ''
|
264
|
-
# end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
if value.size > 0
|
269
|
-
|
270
|
-
if name == "style"
|
271
|
-
begin
|
272
|
-
value_original = MultiCss.min_attr value_original
|
273
|
-
# TODO what about escaped attribute values?
|
274
|
-
if delimiter == "\""
|
275
|
-
value_original.gsub!("\"", "'")
|
276
|
-
else
|
277
|
-
value_original.gsub!("'", "\"")
|
278
|
-
end
|
279
|
-
rescue MultiCss::ParseError => e
|
280
|
-
log e.message
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
if name == "class"
|
285
|
-
value_original.gsub!(/\s+/, " ")
|
286
|
-
value_original.gsub!(/^\s+|\s+$/, "")
|
287
|
-
end
|
288
|
-
|
289
|
-
events = %w[onfocus onblur onselect onchange onclick
|
290
|
-
ondblclick onmousedown onmouseup onmouseover onmousemove
|
291
|
-
onmouseout onkeypress onkeydown onkeyup]
|
292
|
-
|
293
|
-
if events.include? name
|
294
|
-
value_original.gsub! /^javascript:\s+|;$/, ''
|
295
|
-
begin
|
296
|
-
value_original = HtmlPress.js_compressor value_original, @options[:js_minifier_options]
|
297
|
-
# TODO what about escaped attribute values?
|
298
|
-
if delimiter == "\""
|
299
|
-
value_original.gsub! "\"", "'"
|
300
|
-
end
|
301
|
-
rescue MultiJs::ParseError => e
|
302
|
-
log e.message
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
if value_original.size == 0
|
307
|
-
#attribute without value may be dropped by IE7
|
308
|
-
if @options[:drop_empty_values]
|
309
|
-
attribute = name_original
|
310
|
-
else
|
311
|
-
attribute = name_original + "=" + delimiter + delimiter
|
312
|
-
end
|
313
|
-
elsif @options[:unquoted_attributes] && !(value_original =~ /[ \t\r\n\f"'`=<>]/)
|
314
|
-
attribute = name_original + "=" + value_original
|
315
|
-
else
|
316
|
-
attribute = name_original + "=" + delimiter + value_original + delimiter
|
317
|
-
end
|
318
|
-
|
319
|
-
end
|
320
|
-
|
321
|
-
attribute
|
322
|
-
end
|
323
|
-
|
324
|
-
end
|
325
|
-
end
|
1
|
+
module HtmlPress
|
2
|
+
class Html
|
3
|
+
|
4
|
+
DEFAULTS = {
|
5
|
+
:logger => false,
|
6
|
+
:unquoted_attributes => false,
|
7
|
+
:drop_empty_values => false,
|
8
|
+
:strip_crlf => false,
|
9
|
+
:js_minifier_options => false
|
10
|
+
}
|
11
|
+
|
12
|
+
def initialize (options = {})
|
13
|
+
@options = DEFAULTS.merge(options)
|
14
|
+
if @options.keys.include? :dump_empty_values
|
15
|
+
@options[:drop_empty_values] = @options.delete(:dump_empty_values)
|
16
|
+
warn "dump_empty_values deprecated use drop_empty_values"
|
17
|
+
end
|
18
|
+
if @options[:logger] && !@options[:logger].respond_to?(:error)
|
19
|
+
raise ArgumentError, 'Logger has no error method'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def press (html)
|
24
|
+
out = html.respond_to?(:read) ? html.read : html.dup
|
25
|
+
|
26
|
+
@replacement_hash = 'MINIFYHTML' + Time.now.to_i.to_s
|
27
|
+
@placeholders = []
|
28
|
+
|
29
|
+
out = process_ie_conditional_comments out
|
30
|
+
out = process_scripts out
|
31
|
+
out = process_styles out
|
32
|
+
out = process_html_comments out
|
33
|
+
out = process_pres out
|
34
|
+
|
35
|
+
out = HtmlPress.entities_compressor out
|
36
|
+
|
37
|
+
out = trim_lines out
|
38
|
+
out = process_block_elements out
|
39
|
+
out = process_textareas out
|
40
|
+
|
41
|
+
# use newlines before 1st attribute in open tags (to limit line lengths)
|
42
|
+
# out.gsub!(/(<[a-z\-:]+)\s+([^>]+>)/i, "\\1\n\\2")
|
43
|
+
|
44
|
+
out = process_attributes out
|
45
|
+
out = process_whitespaces out
|
46
|
+
out = fill_placeholders out
|
47
|
+
|
48
|
+
out
|
49
|
+
end
|
50
|
+
|
51
|
+
# for backward compatibility
|
52
|
+
alias :compile :press
|
53
|
+
|
54
|
+
protected
|
55
|
+
|
56
|
+
# IE conditional comments
|
57
|
+
def process_ie_conditional_comments (out)
|
58
|
+
out.gsub /(<!--\[[^\]]+\]>([\s\S]*?)<!\[[^\]]+\]-->)\s*/ do
|
59
|
+
m = $1
|
60
|
+
comment = $2
|
61
|
+
comment_compressed = Html.new.press(comment)
|
62
|
+
m.gsub!(comment, comment_compressed)
|
63
|
+
reserve m
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# replace SCRIPTs (and minify) with placeholders
|
68
|
+
def process_scripts (out)
|
69
|
+
out.gsub /(<script\b[^>]*?>([\s\S]*?)<\/script>)\s*/i do
|
70
|
+
js = $2
|
71
|
+
m = $1.gsub /^<script\s([^>]+)>/i do |m|
|
72
|
+
attrs(m, 'script', true)
|
73
|
+
end
|
74
|
+
begin
|
75
|
+
js_compressed = HtmlPress.js_compressor js, @options[:js_minifier_options]
|
76
|
+
m.gsub!(">#{js}<\/script>", ">#{js_compressed}<\/script>")
|
77
|
+
rescue MultiJs::ParseError => e
|
78
|
+
log e.message
|
79
|
+
end
|
80
|
+
reserve m
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# replace STYLEs (and minify) with placeholders
|
85
|
+
def process_styles (out)
|
86
|
+
out.gsub /(<style\b[^>]*?>([\s\S]*?)<\/style>)\s*/i do
|
87
|
+
css = $2
|
88
|
+
m = $1.gsub /^<style\s([^>]+)>/i do |m|
|
89
|
+
attrs(m, 'style', true)
|
90
|
+
end
|
91
|
+
begin
|
92
|
+
css_compressed = MultiCss.min css
|
93
|
+
m.gsub!(css, css_compressed)
|
94
|
+
rescue Exception => e
|
95
|
+
log e.message
|
96
|
+
end
|
97
|
+
reserve m
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# remove html comments (not IE conditional comments)
|
102
|
+
def process_html_comments (out)
|
103
|
+
out.gsub /<!--([\s\S]*?)-->/, ''
|
104
|
+
end
|
105
|
+
|
106
|
+
# replace PREs with placeholders
|
107
|
+
def process_pres (out)
|
108
|
+
out.gsub /(<pre\b[^>]*?>([\s\S]*?)<\/pre>)\s*/i do
|
109
|
+
pre = $2
|
110
|
+
m = $1
|
111
|
+
pre_compressed = pre.lines.map{ |l| l.gsub(/\s+$/, '') }.join("\n")
|
112
|
+
pre_compressed = HtmlPress.entities_compressor pre_compressed
|
113
|
+
m.gsub!(pre, pre_compressed)
|
114
|
+
reserve m
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# trim each line
|
119
|
+
def trim_lines (out)
|
120
|
+
out.gsub(/^\s+|\s+$/m, '')
|
121
|
+
end
|
122
|
+
|
123
|
+
# remove whitespaces outside of block elements
|
124
|
+
def process_block_elements (out)
|
125
|
+
re = '\\s+(<\\/?(?:area|base(?:font)?|blockquote|body' +
|
126
|
+
'|caption|center|cite|col(?:group)?|dd|dir|div|dl|dt|fieldset|form' +
|
127
|
+
'|frame(?:set)?|h[1-6]|head|hr|html|legend|li|link|map|menu|meta' +
|
128
|
+
'|ol|opt(?:group|ion)|p|param|t(?:able|body|head|d|h|r|foot|itle)' +
|
129
|
+
'|ul)\\b[^>]*>)'
|
130
|
+
|
131
|
+
re = Regexp.new(re)
|
132
|
+
out.gsub!(re, '\\1')
|
133
|
+
|
134
|
+
# remove whitespaces outside of all elements
|
135
|
+
out.gsub! />([^<]+)</ do |m|
|
136
|
+
m.gsub(/^\s+|\s+$/, ' ')
|
137
|
+
end
|
138
|
+
|
139
|
+
out
|
140
|
+
end
|
141
|
+
|
142
|
+
# replace TEXTAREAs with placeholders
|
143
|
+
def process_textareas (out)
|
144
|
+
out.gsub /(<textarea\b[^>]*?>[\s\S]*?<\/textarea>)\s*/i do |m|
|
145
|
+
reserve m
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# attributes
|
150
|
+
def process_attributes (out)
|
151
|
+
out.gsub /<[a-z\-:]+\s([^>]+)>/i do |m|
|
152
|
+
reserve attrs(m, '[a-z\-:]+', true)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# replace two or more whitespaces with one
|
157
|
+
def process_whitespaces (out)
|
158
|
+
out.gsub!(/[\r\n]+/, @options[:strip_crlf] ? ' ' : "\n")
|
159
|
+
out.gsub!(/\s+/, ' ')
|
160
|
+
out
|
161
|
+
end
|
162
|
+
|
163
|
+
# fill placeholders
|
164
|
+
def fill_placeholders (out)
|
165
|
+
re = Regexp.new('%' + @replacement_hash + '%(\d+)%')
|
166
|
+
out.gsub re do |m|
|
167
|
+
m.gsub!(re, "\\1")
|
168
|
+
@placeholders[m.to_i]
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def log (text)
|
173
|
+
@options[:logger].error text if @options[:logger]
|
174
|
+
end
|
175
|
+
|
176
|
+
def reserve (content)
|
177
|
+
@placeholders.push content
|
178
|
+
'%' + @replacement_hash + '%' + (@placeholders.size - 1).to_s + '%'
|
179
|
+
end
|
180
|
+
|
181
|
+
def attrs (m, tag_name, r)
|
182
|
+
re = "<(" + tag_name + ")(\s[^>]+)?>"
|
183
|
+
re = Regexp.new(re, true)
|
184
|
+
attributes = m.gsub(re, "\\2")
|
185
|
+
if r
|
186
|
+
tag = m.gsub(re, "\\1")
|
187
|
+
else
|
188
|
+
tag = tag_name
|
189
|
+
end
|
190
|
+
|
191
|
+
if attributes.size > 0
|
192
|
+
attributes_compressed = attributes.gsub(/([a-z\-_:]+(="[^"]*")?(='[^']*')?)\s*/i, " \\1")
|
193
|
+
|
194
|
+
attributes_compressed.gsub! /([a-z\-_:]+="[^"]*")/i do |k|
|
195
|
+
attr k, "\"", tag
|
196
|
+
end
|
197
|
+
|
198
|
+
attributes_compressed.gsub! /([a-z\-_:]+='[^']*')/i do |k|
|
199
|
+
attr k, "'", tag
|
200
|
+
end
|
201
|
+
|
202
|
+
attributes_compressed = " " + attributes_compressed.strip
|
203
|
+
|
204
|
+
if attributes_compressed == " /"
|
205
|
+
attributes_compressed = "/"
|
206
|
+
elsif attributes_compressed == " "
|
207
|
+
attributes_compressed = ""
|
208
|
+
end
|
209
|
+
m.gsub(attributes, attributes_compressed)
|
210
|
+
else
|
211
|
+
m
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def attr(attribute, delimiter, tag)
|
216
|
+
re = "([a-z\\-_:]+)(=" + delimiter + "[^" + delimiter + "]*" + delimiter + ")?"
|
217
|
+
re = Regexp.new re, true
|
218
|
+
value_original = attribute.gsub(re, "\\2")
|
219
|
+
value = value_original.downcase
|
220
|
+
name_original = attribute.gsub(re, "\\1")
|
221
|
+
name = name_original.downcase
|
222
|
+
tag_name = tag.downcase
|
223
|
+
|
224
|
+
if value.size > 0
|
225
|
+
re = "^=" + delimiter + "|" + delimiter + "$"
|
226
|
+
re = Regexp.new re
|
227
|
+
value_original.gsub!(re, "")
|
228
|
+
end
|
229
|
+
|
230
|
+
case tag_name
|
231
|
+
when "script"
|
232
|
+
if (name == "type" && value_original == "text/javascript") || (name == "language" && value_original == "JavaScript")
|
233
|
+
return ""
|
234
|
+
elsif name == "async" || name == "defer"
|
235
|
+
return name_original
|
236
|
+
end
|
237
|
+
when "form"
|
238
|
+
if name == "method" && value_original == "get"
|
239
|
+
return ""
|
240
|
+
end
|
241
|
+
when /link|style/
|
242
|
+
if name == "type" && value_original == "text/stylesheet"
|
243
|
+
return ""
|
244
|
+
end
|
245
|
+
when /input|textarea|button|select|option|optgroup/
|
246
|
+
if name == "disabled"
|
247
|
+
return name_original
|
248
|
+
end
|
249
|
+
if (tag_name == "input" || tag_name == "textarea") && name == "readonly"
|
250
|
+
return name_original
|
251
|
+
end
|
252
|
+
if tag_name == "option" && name == "selected"
|
253
|
+
return name_original
|
254
|
+
end
|
255
|
+
if tag_name == "input"
|
256
|
+
if name == "type" && value_original == "text"
|
257
|
+
return ""
|
258
|
+
end
|
259
|
+
if name == "checked"
|
260
|
+
return name_original
|
261
|
+
end
|
262
|
+
# if name == "value" && (value == "=\"\"" || value == "=''")
|
263
|
+
# return ''
|
264
|
+
# end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
if value.size > 0
|
269
|
+
|
270
|
+
if name == "style"
|
271
|
+
begin
|
272
|
+
value_original = MultiCss.min_attr value_original
|
273
|
+
# TODO what about escaped attribute values?
|
274
|
+
if delimiter == "\""
|
275
|
+
value_original.gsub!("\"", "'")
|
276
|
+
else
|
277
|
+
value_original.gsub!("'", "\"")
|
278
|
+
end
|
279
|
+
rescue MultiCss::ParseError => e
|
280
|
+
log e.message
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
if name == "class"
|
285
|
+
value_original.gsub!(/\s+/, " ")
|
286
|
+
value_original.gsub!(/^\s+|\s+$/, "")
|
287
|
+
end
|
288
|
+
|
289
|
+
events = %w[onfocus onblur onselect onchange onclick
|
290
|
+
ondblclick onmousedown onmouseup onmouseover onmousemove
|
291
|
+
onmouseout onkeypress onkeydown onkeyup]
|
292
|
+
|
293
|
+
if events.include? name
|
294
|
+
value_original.gsub! /^javascript:\s+|;$/, ''
|
295
|
+
begin
|
296
|
+
value_original = HtmlPress.js_compressor value_original, @options[:js_minifier_options]
|
297
|
+
# TODO what about escaped attribute values?
|
298
|
+
if delimiter == "\""
|
299
|
+
value_original.gsub! "\"", "'"
|
300
|
+
end
|
301
|
+
rescue MultiJs::ParseError => e
|
302
|
+
log e.message
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
if value_original.size == 0
|
307
|
+
#attribute without value may be dropped by IE7
|
308
|
+
if @options[:drop_empty_values]
|
309
|
+
attribute = name_original
|
310
|
+
else
|
311
|
+
attribute = name_original + "=" + delimiter + delimiter
|
312
|
+
end
|
313
|
+
elsif @options[:unquoted_attributes] && !(value_original =~ /[ \t\r\n\f"'`=<>]/)
|
314
|
+
attribute = name_original + "=" + value_original
|
315
|
+
else
|
316
|
+
attribute = name_original + "=" + delimiter + value_original + delimiter
|
317
|
+
end
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
attribute
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|
325
|
+
end
|