color_parser 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +43 -0
- data/Rakefile +10 -0
- data/color_parser.gemspec +26 -0
- data/lib/color_parser/image.rb +20 -0
- data/lib/color_parser/page.rb +88 -0
- data/lib/color_parser/stylesheet.rb +197 -0
- data/lib/color_parser/version.rb +3 -0
- data/lib/color_parser.rb +142 -0
- data/test/color_parser_test.rb +83 -0
- data/test/fixtures/css/absolute.html +15 -0
- data/test/fixtures/css/inline.html +34 -0
- data/test/fixtures/css/inline_import.html +16 -0
- data/test/fixtures/css/invalid.html +15 -0
- data/test/fixtures/css/relative.html +15 -0
- data/test/fixtures/css/relative_root.html +15 -0
- data/test/fixtures/css/stylesheets/colors.css +0 -0
- data/test/fixtures/css/stylesheets/fonts.css +0 -0
- data/test/fixtures/css/stylesheets/print.css +3 -0
- data/test/fixtures/css/stylesheets/screen.css +16 -0
- data/test/fixtures/css_color/frequency.html +22 -0
- data/test/fixtures/css_color/stylesheets/color_styles.css +34 -0
- data/test/fixtures/css_color/stylesheets/css_elements.css +24 -0
- data/test/fixtures/css_color/stylesheets/frequency.css +35 -0
- data/test/fixtures/css_color/stylesheets/imported_selectors.css +3 -0
- data/test/fixtures/css_color/stylesheets/properties.css +18 -0
- data/test/fixtures/css_images/images/apple.png +0 -0
- data/test/fixtures/css_images/images/cantaloupe.png +0 -0
- data/test/fixtures/css_images/images/kiwi.jpg +0 -0
- data/test/fixtures/css_images/images/mango.png +0 -0
- data/test/fixtures/css_images/images/pineapple.png +0 -0
- data/test/fixtures/css_images/paths.html +14 -0
- data/test/fixtures/css_images/stylesheets/import_paths.css +4 -0
- data/test/fixtures/css_images/stylesheets/paths.css +17 -0
- data/test/fixtures/css_images/stylesheets/quotes.css +14 -0
- data/test/fixtures/css_import/index.html +15 -0
- data/test/fixtures/css_import/stylesheets/borders.css +0 -0
- data/test/fixtures/css_import/stylesheets/colors.css +0 -0
- data/test/fixtures/css_import/stylesheets/fonts.css +3 -0
- data/test/fixtures/css_import/stylesheets/ie.css +3 -0
- data/test/fixtures/css_import/stylesheets/images.css +0 -0
- data/test/fixtures/css_import/stylesheets/master.css +12 -0
- data/test/fixtures/css_import/stylesheets/print.css +3 -0
- data/test/fixtures/css_import/stylesheets/screen.css +12 -0
- data/test/fixtures/inline_images/absolute.html +14 -0
- data/test/fixtures/inline_images/images/apple.png +0 -0
- data/test/fixtures/inline_images/images/kiwi.jpg +0 -0
- data/test/fixtures/inline_images/relative.html +14 -0
- data/test/fixtures/inline_images/relative_root.html +14 -0
- data/test/image_test.rb +27 -0
- data/test/page_test.rb +194 -0
- data/test/stylesheet_test.rb +257 -0
- data/test/test_helper.rb +6 -0
- data/test/test_request.rb +19 -0
- data/test/version_test.rb +7 -0
- metadata +184 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Derek DeVries
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
## ColorParser
|
2
|
+
|
3
|
+
The ColorParser gem provides a simple way to parse the colors from an html page or CSS file. It works with both local and remote resources.
|
4
|
+
|
5
|
+
## Example
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
page = ColorParser::Page.new("http://sportspyder.com/")
|
9
|
+
colors = page.colors
|
10
|
+
```
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
```
|
15
|
+
gem install color_parser
|
16
|
+
```
|
17
|
+
```
|
18
|
+
gem "color_parser"
|
19
|
+
```
|
20
|
+
|
21
|
+
## LICENSE
|
22
|
+
|
23
|
+
(The MIT License)
|
24
|
+
|
25
|
+
Copyright © 2012 [Derek DeVries](https://github.com/devrieda/)
|
26
|
+
|
27
|
+
Permission is hereby granted, free of charge, to any person obtaining a
|
28
|
+
copy of this software and associated documentation files (the "Software"),
|
29
|
+
to deal in the Software without restriction, including without
|
30
|
+
limitation the rights to use, copy, modify, merge, publish, distribute,
|
31
|
+
sublicense, and/or sell copies of the Software, and to permit persons
|
32
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
33
|
+
|
34
|
+
The above copyright notice and this permission notice shall be included
|
35
|
+
in all copies or substantial portions of the Software.
|
36
|
+
|
37
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
38
|
+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
39
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
40
|
+
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
41
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
42
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
43
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'color_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "color_parser"
|
8
|
+
gem.version = ColorParser::VERSION
|
9
|
+
gem.summary = %q{Color Parser finds the colors on a given webpage}
|
10
|
+
gem.description = gem.summary
|
11
|
+
|
12
|
+
gem.required_ruby_version = '>= 1.9.3'
|
13
|
+
gem.license = "MIT"
|
14
|
+
|
15
|
+
gem.authors = ["Derek DeVries"]
|
16
|
+
gem.email = ["derek@sportspyder.com"]
|
17
|
+
gem.homepage = "https://github.com/devrieda/color_parser"
|
18
|
+
|
19
|
+
gem.files = `git ls-files`.split($/)
|
20
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
21
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
22
|
+
gem.require_paths = ["lib"]
|
23
|
+
|
24
|
+
gem.add_runtime_dependency("nokogiri", "~> 1.5")
|
25
|
+
gem.add_development_dependency("rake")
|
26
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ColorParser
|
2
|
+
class Image
|
3
|
+
attr_reader :url, :host, :path, :query
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url
|
7
|
+
@host, @path, @query = ColorParser.parse_url(url)
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
path.split("/").last
|
12
|
+
end
|
13
|
+
|
14
|
+
# TODO - find colors in the image
|
15
|
+
def colors
|
16
|
+
[]
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module ColorParser
|
2
|
+
# a webpage
|
3
|
+
class Page
|
4
|
+
attr_reader :url, :host, :path, :query, :text, :doc
|
5
|
+
|
6
|
+
def initialize(url)
|
7
|
+
@url = url
|
8
|
+
@host, @path, @query = ColorParser.parse_url(url)
|
9
|
+
|
10
|
+
@text ||= ColorParser.request.get(url)
|
11
|
+
@doc ||= Nokogiri::HTML(@text)
|
12
|
+
end
|
13
|
+
|
14
|
+
def colors
|
15
|
+
unless @colors
|
16
|
+
@colors = {}
|
17
|
+
stylesheets.each do |style|
|
18
|
+
style.colors.each do |color, freq|
|
19
|
+
@colors[color] ? @colors[color] += freq : @colors[color] = freq
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
@colors
|
25
|
+
end
|
26
|
+
|
27
|
+
def colors_by_frequency
|
28
|
+
colors.sort {|a,b| b[1]<=>a[1] }.map {|clr| clr.first }
|
29
|
+
end
|
30
|
+
|
31
|
+
def images
|
32
|
+
@images ||= inline_images + stylesheet_images
|
33
|
+
end
|
34
|
+
|
35
|
+
def stylesheets
|
36
|
+
@stylesheets ||= inline_styles + external_styles
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# find all inline styles and build new stylesheet from them
|
43
|
+
def inline_styles
|
44
|
+
doc.css("style").map do |style|
|
45
|
+
Stylesheet.new(text: style.inner_html,
|
46
|
+
type: "inline",
|
47
|
+
url: "http://#{host}#{path}")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def external_styles
|
52
|
+
styles = []
|
53
|
+
|
54
|
+
doc.css("link[rel='stylesheet']").each do |style|
|
55
|
+
next unless href = style["href"]
|
56
|
+
|
57
|
+
asset_url = ColorParser.parse_asset(url, href)
|
58
|
+
next unless text = ColorParser.request.get(asset_url)
|
59
|
+
|
60
|
+
css = Stylesheet.new(text: text,
|
61
|
+
type: "external",
|
62
|
+
url: asset_url)
|
63
|
+
styles << css
|
64
|
+
end
|
65
|
+
|
66
|
+
styles
|
67
|
+
end
|
68
|
+
|
69
|
+
def inline_images
|
70
|
+
images = []
|
71
|
+
|
72
|
+
doc.css("img").map do |image|
|
73
|
+
next unless src = image["src"]
|
74
|
+
next unless src.match(/gif|jpg|jpeg|png|bmp/)
|
75
|
+
|
76
|
+
asset_url = ColorParser.parse_asset(url, src)
|
77
|
+
images << Image.new(asset_url)
|
78
|
+
end
|
79
|
+
|
80
|
+
images
|
81
|
+
end
|
82
|
+
|
83
|
+
def stylesheet_images
|
84
|
+
[stylesheets.map {|style| style.images }].flatten
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
module ColorParser
|
2
|
+
# a set of css selectors
|
3
|
+
class Stylesheet
|
4
|
+
TEXT_COLORS = {
|
5
|
+
aqua: "00ffff", black: "000000", blue: "0000ff",
|
6
|
+
fuchsia: "ff00ff", gray: "808080", green: "008000",
|
7
|
+
lime: "00ff00", maroon: "800000", navy: "000080",
|
8
|
+
olive: "808000", purple: "800080", red: "ff0000",
|
9
|
+
silver: "c0c0c0", teal: "008080", white: "ffffff",
|
10
|
+
yellow: "ffff00"
|
11
|
+
}
|
12
|
+
|
13
|
+
attr_reader :url, :type, :host, :path, :query, :text
|
14
|
+
|
15
|
+
def initialize(options)
|
16
|
+
@type = options[:type]
|
17
|
+
@text = options[:text]
|
18
|
+
@url = options[:url]
|
19
|
+
|
20
|
+
@host, @path, @query = ColorParser.parse_url(url)
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
path.split("/").last
|
25
|
+
end
|
26
|
+
|
27
|
+
# get imported stylesheets
|
28
|
+
def stylesheets
|
29
|
+
@stylesheets ||= imported_stylesheets
|
30
|
+
end
|
31
|
+
|
32
|
+
# gst list of colors from styles
|
33
|
+
def colors
|
34
|
+
@colors ||= parse_colors(color_properties)
|
35
|
+
end
|
36
|
+
|
37
|
+
def bg_colors
|
38
|
+
@bg_colors ||= parse_colors(bg_properties)
|
39
|
+
end
|
40
|
+
|
41
|
+
def text_colors
|
42
|
+
@text_colors ||= parse_colors(text_properties)
|
43
|
+
end
|
44
|
+
|
45
|
+
def border_colors
|
46
|
+
@border_colors ||= parse_colors(border_properties)
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
def images
|
51
|
+
images = []
|
52
|
+
|
53
|
+
image_properties.each do |key, value|
|
54
|
+
if value.include?("url") && match = value.match(/url\(['"]?([^'")]+)/)
|
55
|
+
asset_url = ColorParser.parse_asset(url, match[1])
|
56
|
+
images << Image.new(asset_url)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
images
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
# groups of css selectors (including imported styles)
|
65
|
+
def selectors
|
66
|
+
selectors = {}
|
67
|
+
|
68
|
+
text.scan(/([^\s\}]+)[\s]*?\{(.*?)\}/m).each do |match|
|
69
|
+
selector, rule = match
|
70
|
+
selectors[selector] ||= []
|
71
|
+
selectors[selector] << rule.strip
|
72
|
+
end
|
73
|
+
|
74
|
+
# imported styles
|
75
|
+
stylesheets.each do |style|
|
76
|
+
style.selectors.each do |selector, rule|
|
77
|
+
selectors[selector] ||= []
|
78
|
+
selectors[selector] += rule
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
selectors
|
83
|
+
end
|
84
|
+
|
85
|
+
# split up selectors into properties, and return property key/value pairs
|
86
|
+
def properties
|
87
|
+
properties = []
|
88
|
+
|
89
|
+
selectors.each do |selector, rules|
|
90
|
+
rules.each do |rule|
|
91
|
+
rule.split(";").each do |property|
|
92
|
+
props = property.split(":", 2).map {|v| v.strip }
|
93
|
+
properties << props if props.size == 2
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
properties
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def imported_stylesheets
|
105
|
+
return [] unless text.include?("@import")
|
106
|
+
|
107
|
+
styles = []
|
108
|
+
text.scan(/@import(?:\surl|\s)(.*?)[;\n]+/).each do |style|
|
109
|
+
style_path = style.first.gsub(/['"\(\);]/, "")
|
110
|
+
|
111
|
+
asset_url = ColorParser.parse_asset(url, style_path)
|
112
|
+
next unless text = ColorParser.request.get(asset_url)
|
113
|
+
|
114
|
+
css = Stylesheet.new(text: text,
|
115
|
+
type: "imported",
|
116
|
+
url: asset_url)
|
117
|
+
styles << css
|
118
|
+
end
|
119
|
+
|
120
|
+
styles
|
121
|
+
end
|
122
|
+
|
123
|
+
# find properties that might have a color
|
124
|
+
def color_properties
|
125
|
+
properties.select do |key, value|
|
126
|
+
["background-color", "background", "border-color", "border",
|
127
|
+
"border-top-color", "border-right-color", "border-bottom-color",
|
128
|
+
"border-left-color", "color", "outline-color"].include?(key)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# properties with bg colors
|
133
|
+
def bg_properties
|
134
|
+
color_properties.select {|key, value| key.include?("background") }
|
135
|
+
end
|
136
|
+
|
137
|
+
# properties with textual color
|
138
|
+
def text_properties
|
139
|
+
color_properties.select {|key, value| key == "color" }
|
140
|
+
end
|
141
|
+
|
142
|
+
# properties with borders
|
143
|
+
def border_properties
|
144
|
+
color_properties.select do |key, value|
|
145
|
+
key.include?("border") || key.include?("outline")
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# find properties that might have an image
|
150
|
+
def image_properties
|
151
|
+
color_properties.select {|key, value| key.include?("background") }
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_colors(property_list)
|
155
|
+
colors = {}
|
156
|
+
|
157
|
+
property_list.each do |key, value|
|
158
|
+
# hex
|
159
|
+
hex = if matches = value.match(/#([0-9a-f]{3,6})/i)
|
160
|
+
normalize_hex(matches[1])
|
161
|
+
|
162
|
+
# rgb/rgba
|
163
|
+
elsif matches = value.match(/rgba?\((\d{1,3}[,\s]+\d{1,3}[,\s]+\d{1,3})/)
|
164
|
+
rgb_to_hex(matches[1])
|
165
|
+
|
166
|
+
# textual
|
167
|
+
elsif matches = value.match(/(#{TEXT_COLORS.map {|k,v| k }.join("|")})/)
|
168
|
+
text_to_hex(matches[1])
|
169
|
+
end
|
170
|
+
|
171
|
+
next unless hex
|
172
|
+
|
173
|
+
colors[hex] ? colors[hex] += 1 : colors[hex] = 1
|
174
|
+
end
|
175
|
+
|
176
|
+
# sort by colors with most occurrances
|
177
|
+
colors
|
178
|
+
end
|
179
|
+
|
180
|
+
# convert rgb to hex
|
181
|
+
def rgb_to_hex(rgb)
|
182
|
+
r, g, b = rgb.split(",").map {|color| color.strip }
|
183
|
+
"%02x" % r + "%02x" % g + "%02x" % b
|
184
|
+
end
|
185
|
+
|
186
|
+
# find hex for textual color
|
187
|
+
def text_to_hex(color)
|
188
|
+
TEXT_COLORS[color.intern]
|
189
|
+
end
|
190
|
+
|
191
|
+
# convert 3 digit hex to 6
|
192
|
+
def normalize_hex(hex)
|
193
|
+
(hex.length == 3 ? hex[0,1]*2 + hex[1,1]*2 + hex[2,1]*2: hex).downcase
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
end
|
data/lib/color_parser.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
require 'color_parser/version'
|
6
|
+
require 'color_parser/page'
|
7
|
+
require 'color_parser/stylesheet'
|
8
|
+
require 'color_parser/image'
|
9
|
+
|
10
|
+
module ColorParser
|
11
|
+
|
12
|
+
# Build url of an asset based on the relative/absolute url
|
13
|
+
def self.parse_asset(doc_url, asset_url)
|
14
|
+
doc_host, doc_path, doc_query = self.parse_url(doc_url)
|
15
|
+
asset_host, asset_path, asset_query = self.parse_url(asset_url)
|
16
|
+
|
17
|
+
# absolute path
|
18
|
+
host, path, query = if asset_url.include?("http")
|
19
|
+
[asset_host, asset_path, asset_query]
|
20
|
+
|
21
|
+
# root relative
|
22
|
+
elsif asset_url[0,1] == "/"
|
23
|
+
[doc_host, asset_path, asset_query]
|
24
|
+
|
25
|
+
# relative
|
26
|
+
else
|
27
|
+
path = File.expand_path("#{doc_path.gsub(/[^\/]*$/, "")}#{asset_path}", "/")
|
28
|
+
[doc_host, path, asset_query]
|
29
|
+
end
|
30
|
+
|
31
|
+
"http://#{host}#{path}#{"?"+query if query}"
|
32
|
+
end
|
33
|
+
|
34
|
+
# parse url parts
|
35
|
+
def self.parse_url(url)
|
36
|
+
begin
|
37
|
+
uri = URI.parse(url.strip)
|
38
|
+
rescue URI::InvalidURIError
|
39
|
+
uri = URI.parse(URI.escape(url.strip))
|
40
|
+
end
|
41
|
+
|
42
|
+
[uri.host, (uri.path != "" ? uri.path : "/"), uri.query]
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# Request
|
47
|
+
|
48
|
+
def self.request=(request)
|
49
|
+
@request = request
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.request
|
53
|
+
@request ||= Request.new
|
54
|
+
end
|
55
|
+
|
56
|
+
# Request an asset
|
57
|
+
#
|
58
|
+
class Request
|
59
|
+
@@last_request = Time.now
|
60
|
+
|
61
|
+
# default throttle requests 1 per sec
|
62
|
+
def initialize(params={})
|
63
|
+
@throttle = params[:throttle] || 1
|
64
|
+
end
|
65
|
+
|
66
|
+
def get(url)
|
67
|
+
throttle
|
68
|
+
|
69
|
+
begin
|
70
|
+
uri = URI.parse(url.strip)
|
71
|
+
rescue URI::InvalidURIError
|
72
|
+
uri = URI.parse(URI.escape(url.strip))
|
73
|
+
end
|
74
|
+
|
75
|
+
response = get_response(uri)
|
76
|
+
|
77
|
+
# redirect
|
78
|
+
@prev_redirect ||= ""
|
79
|
+
if response.header['location']
|
80
|
+
# make sure we're not in an infinite loop
|
81
|
+
if response.header['location'] == @prev_redirect
|
82
|
+
raise HTTPError, "Recursive redirect: #{@prev_redirect}"
|
83
|
+
end
|
84
|
+
@prev_redirect = response.header['location']
|
85
|
+
|
86
|
+
return get(response.header['location'])
|
87
|
+
end
|
88
|
+
|
89
|
+
# bad req
|
90
|
+
if response.to_s.index 'Bad Request' || response.nil?
|
91
|
+
raise HTTPError, "invalid HTTP request #{url}"
|
92
|
+
end
|
93
|
+
|
94
|
+
response = fix_encoding(response)
|
95
|
+
response.body
|
96
|
+
end
|
97
|
+
|
98
|
+
def user_agent
|
99
|
+
"ColorParser Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.7) Gecko/20060909 Firefox/1.5.0.7"
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# Use charset in content-type, default to UTF-8 if absent
|
106
|
+
#
|
107
|
+
# text/html; charset=UTF-8
|
108
|
+
# - or -
|
109
|
+
# text/html; charset=iso-8859-1
|
110
|
+
# - or -
|
111
|
+
# text/html
|
112
|
+
def fix_encoding(response)
|
113
|
+
charset = if response.header["Content-Type"].to_s.include?("charset")
|
114
|
+
response.header["Content-Type"].split(";")[1].split("=")[1]
|
115
|
+
else
|
116
|
+
"UTF-8"
|
117
|
+
end
|
118
|
+
|
119
|
+
response.body.force_encoding(charset.upcase).encode("UTF-8")
|
120
|
+
response
|
121
|
+
end
|
122
|
+
|
123
|
+
# build http request object
|
124
|
+
def get_response(uri)
|
125
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
126
|
+
http.open_timeout = 15
|
127
|
+
http.read_timeout = 30
|
128
|
+
|
129
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
130
|
+
request["User-Agent"] = user_agent
|
131
|
+
|
132
|
+
http.request(request)
|
133
|
+
end
|
134
|
+
|
135
|
+
# throttle requests to 1 per sec
|
136
|
+
def throttle
|
137
|
+
sleep @throttle if @@last_request + @throttle > Time.now
|
138
|
+
@@last_request = Time.now
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require_relative "test_helper"
|
2
|
+
|
3
|
+
describe ColorParser do
|
4
|
+
def setup
|
5
|
+
ColorParser.request = ColorParser::TestRequest.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should retrieve fixture" do
|
9
|
+
url = "http://example.com/css/absolute.html?foo=bar"
|
10
|
+
result = ColorParser.request.get(url)
|
11
|
+
result.wont_be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# parse_url
|
16
|
+
|
17
|
+
it "should parse url" do
|
18
|
+
url = "http://example.com/test/something/"
|
19
|
+
assert_equal ["example.com", "/test/something/", nil], ColorParser.parse_url(url)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse url with no trailing slash" do
|
23
|
+
|
24
|
+
url = "http://example.com"
|
25
|
+
assert_equal ["example.com", "/", nil], ColorParser.parse_url(url)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should parse url with query params" do
|
29
|
+
url = "http://example.com?foo=bar&baz=bar"
|
30
|
+
assert_equal ["example.com", "/", "foo=bar&baz=bar"], ColorParser.parse_url(url)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
# parse_asset
|
35
|
+
|
36
|
+
it "should parse asset absolute path" do
|
37
|
+
doc = "http://example.com/stylesheets/base.css"
|
38
|
+
asset = "http://asset.example.com/stylesheets/style.css"
|
39
|
+
|
40
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
41
|
+
parsed.must_equal "http://asset.example.com/stylesheets/style.css"
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should parse asset absolute path with query string" do
|
45
|
+
doc = "http://example.com/stylesheets/base.css?foo=bar"
|
46
|
+
asset = "http://asset.example.com/stylesheets/style.css?baz=bar"
|
47
|
+
|
48
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
49
|
+
parsed.must_equal "http://asset.example.com/stylesheets/style.css?baz=bar"
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should parse relative root path" do
|
53
|
+
doc = "http://example.com/stylesheets/base.css"
|
54
|
+
asset = "/styles/style.css"
|
55
|
+
|
56
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
57
|
+
parsed.must_equal "http://example.com/styles/style.css"
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should parse relative root path with query string" do
|
61
|
+
doc = "http://example.com/stylesheets/base.css?foo=bar"
|
62
|
+
asset = "/styles/style.css?baz=bar"
|
63
|
+
|
64
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
65
|
+
parsed.must_equal "http://example.com/styles/style.css?baz=bar"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should parse relative path" do
|
69
|
+
doc = "http://example.com/stylesheets/base.css"
|
70
|
+
asset = "ie.css"
|
71
|
+
|
72
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
73
|
+
parsed.must_equal "http://example.com/stylesheets/ie.css"
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should parse relative path with query string" do
|
77
|
+
doc = "http://example.com/stylesheets/base.css?foo=bar"
|
78
|
+
asset = "ie.css?baz=bar"
|
79
|
+
|
80
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
81
|
+
parsed.must_equal "http://example.com/stylesheets/ie.css?baz=bar"
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" dir="ltr" lang="en-US">
|
4
|
+
|
5
|
+
<head>
|
6
|
+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
|
7
|
+
<title>Color Parser</title>
|
8
|
+
|
9
|
+
<link rel="stylesheet" href="http://example.com/css/stylesheets/screen.css" media="screen" type="text/css" />
|
10
|
+
<link rel="stylesheet" href="http://example.com/css/stylesheets/print.css" media="print" type="text/css" />
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<div></div>
|
14
|
+
</body>
|
15
|
+
</html>
|