color_parser 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +43 -0
- data/Rakefile +10 -0
- data/color_parser.gemspec +26 -0
- data/lib/color_parser/image.rb +20 -0
- data/lib/color_parser/page.rb +88 -0
- data/lib/color_parser/stylesheet.rb +197 -0
- data/lib/color_parser/version.rb +3 -0
- data/lib/color_parser.rb +142 -0
- data/test/color_parser_test.rb +83 -0
- data/test/fixtures/css/absolute.html +15 -0
- data/test/fixtures/css/inline.html +34 -0
- data/test/fixtures/css/inline_import.html +16 -0
- data/test/fixtures/css/invalid.html +15 -0
- data/test/fixtures/css/relative.html +15 -0
- data/test/fixtures/css/relative_root.html +15 -0
- data/test/fixtures/css/stylesheets/colors.css +0 -0
- data/test/fixtures/css/stylesheets/fonts.css +0 -0
- data/test/fixtures/css/stylesheets/print.css +3 -0
- data/test/fixtures/css/stylesheets/screen.css +16 -0
- data/test/fixtures/css_color/frequency.html +22 -0
- data/test/fixtures/css_color/stylesheets/color_styles.css +34 -0
- data/test/fixtures/css_color/stylesheets/css_elements.css +24 -0
- data/test/fixtures/css_color/stylesheets/frequency.css +35 -0
- data/test/fixtures/css_color/stylesheets/imported_selectors.css +3 -0
- data/test/fixtures/css_color/stylesheets/properties.css +18 -0
- data/test/fixtures/css_images/images/apple.png +0 -0
- data/test/fixtures/css_images/images/cantaloupe.png +0 -0
- data/test/fixtures/css_images/images/kiwi.jpg +0 -0
- data/test/fixtures/css_images/images/mango.png +0 -0
- data/test/fixtures/css_images/images/pineapple.png +0 -0
- data/test/fixtures/css_images/paths.html +14 -0
- data/test/fixtures/css_images/stylesheets/import_paths.css +4 -0
- data/test/fixtures/css_images/stylesheets/paths.css +17 -0
- data/test/fixtures/css_images/stylesheets/quotes.css +14 -0
- data/test/fixtures/css_import/index.html +15 -0
- data/test/fixtures/css_import/stylesheets/borders.css +0 -0
- data/test/fixtures/css_import/stylesheets/colors.css +0 -0
- data/test/fixtures/css_import/stylesheets/fonts.css +3 -0
- data/test/fixtures/css_import/stylesheets/ie.css +3 -0
- data/test/fixtures/css_import/stylesheets/images.css +0 -0
- data/test/fixtures/css_import/stylesheets/master.css +12 -0
- data/test/fixtures/css_import/stylesheets/print.css +3 -0
- data/test/fixtures/css_import/stylesheets/screen.css +12 -0
- data/test/fixtures/inline_images/absolute.html +14 -0
- data/test/fixtures/inline_images/images/apple.png +0 -0
- data/test/fixtures/inline_images/images/kiwi.jpg +0 -0
- data/test/fixtures/inline_images/relative.html +14 -0
- data/test/fixtures/inline_images/relative_root.html +14 -0
- data/test/image_test.rb +27 -0
- data/test/page_test.rb +194 -0
- data/test/stylesheet_test.rb +257 -0
- data/test/test_helper.rb +6 -0
- data/test/test_request.rb +19 -0
- data/test/version_test.rb +7 -0
- metadata +184 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Derek DeVries
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
## ColorParser
|
2
|
+
|
3
|
+
The ColorParser gem provides a simple way to parse the colors from an html page or CSS file. It works with both local and remote resources.
|
4
|
+
|
5
|
+
## Example
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
page = ColorParser::Page.new("http://sportspyder.com/")
|
9
|
+
colors = page.colors
|
10
|
+
```
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
```
|
15
|
+
gem install color_parser
|
16
|
+
```
|
17
|
+
```
|
18
|
+
gem "color_parser"
|
19
|
+
```
|
20
|
+
|
21
|
+
## LICENSE
|
22
|
+
|
23
|
+
(The MIT License)
|
24
|
+
|
25
|
+
Copyright © 2012 [Derek DeVries](https://github.com/devrieda/)
|
26
|
+
|
27
|
+
Permission is hereby granted, free of charge, to any person obtaining a
|
28
|
+
copy of this software and associated documentation files (the "Software"),
|
29
|
+
to deal in the Software without restriction, including without
|
30
|
+
limitation the rights to use, copy, modify, merge, publish, distribute,
|
31
|
+
sublicense, and/or sell copies of the Software, and to permit persons
|
32
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
33
|
+
|
34
|
+
The above copyright notice and this permission notice shall be included
|
35
|
+
in all copies or substantial portions of the Software.
|
36
|
+
|
37
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
38
|
+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
39
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
40
|
+
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
41
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
42
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
43
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'color_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "color_parser"
|
8
|
+
gem.version = ColorParser::VERSION
|
9
|
+
gem.summary = %q{Color Parser finds the colors on a given webpage}
|
10
|
+
gem.description = gem.summary
|
11
|
+
|
12
|
+
gem.required_ruby_version = '>= 1.9.3'
|
13
|
+
gem.license = "MIT"
|
14
|
+
|
15
|
+
gem.authors = ["Derek DeVries"]
|
16
|
+
gem.email = ["derek@sportspyder.com"]
|
17
|
+
gem.homepage = "https://github.com/devrieda/color_parser"
|
18
|
+
|
19
|
+
gem.files = `git ls-files`.split($/)
|
20
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
21
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
22
|
+
gem.require_paths = ["lib"]
|
23
|
+
|
24
|
+
gem.add_runtime_dependency("nokogiri", "~> 1.5")
|
25
|
+
gem.add_development_dependency("rake")
|
26
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ColorParser
|
2
|
+
class Image
|
3
|
+
attr_reader :url, :host, :path, :query
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url
|
7
|
+
@host, @path, @query = ColorParser.parse_url(url)
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
path.split("/").last
|
12
|
+
end
|
13
|
+
|
14
|
+
# TODO - find colors in the image
|
15
|
+
def colors
|
16
|
+
[]
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module ColorParser
|
2
|
+
# a webpage
|
3
|
+
class Page
|
4
|
+
attr_reader :url, :host, :path, :query, :text, :doc
|
5
|
+
|
6
|
+
def initialize(url)
|
7
|
+
@url = url
|
8
|
+
@host, @path, @query = ColorParser.parse_url(url)
|
9
|
+
|
10
|
+
@text ||= ColorParser.request.get(url)
|
11
|
+
@doc ||= Nokogiri::HTML(@text)
|
12
|
+
end
|
13
|
+
|
14
|
+
def colors
|
15
|
+
unless @colors
|
16
|
+
@colors = {}
|
17
|
+
stylesheets.each do |style|
|
18
|
+
style.colors.each do |color, freq|
|
19
|
+
@colors[color] ? @colors[color] += freq : @colors[color] = freq
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
@colors
|
25
|
+
end
|
26
|
+
|
27
|
+
def colors_by_frequency
|
28
|
+
colors.sort {|a,b| b[1]<=>a[1] }.map {|clr| clr.first }
|
29
|
+
end
|
30
|
+
|
31
|
+
def images
|
32
|
+
@images ||= inline_images + stylesheet_images
|
33
|
+
end
|
34
|
+
|
35
|
+
def stylesheets
|
36
|
+
@stylesheets ||= inline_styles + external_styles
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# find all inline styles and build new stylesheet from them
|
43
|
+
def inline_styles
|
44
|
+
doc.css("style").map do |style|
|
45
|
+
Stylesheet.new(text: style.inner_html,
|
46
|
+
type: "inline",
|
47
|
+
url: "http://#{host}#{path}")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def external_styles
|
52
|
+
styles = []
|
53
|
+
|
54
|
+
doc.css("link[rel='stylesheet']").each do |style|
|
55
|
+
next unless href = style["href"]
|
56
|
+
|
57
|
+
asset_url = ColorParser.parse_asset(url, href)
|
58
|
+
next unless text = ColorParser.request.get(asset_url)
|
59
|
+
|
60
|
+
css = Stylesheet.new(text: text,
|
61
|
+
type: "external",
|
62
|
+
url: asset_url)
|
63
|
+
styles << css
|
64
|
+
end
|
65
|
+
|
66
|
+
styles
|
67
|
+
end
|
68
|
+
|
69
|
+
def inline_images
|
70
|
+
images = []
|
71
|
+
|
72
|
+
doc.css("img").map do |image|
|
73
|
+
next unless src = image["src"]
|
74
|
+
next unless src.match(/gif|jpg|jpeg|png|bmp/)
|
75
|
+
|
76
|
+
asset_url = ColorParser.parse_asset(url, src)
|
77
|
+
images << Image.new(asset_url)
|
78
|
+
end
|
79
|
+
|
80
|
+
images
|
81
|
+
end
|
82
|
+
|
83
|
+
def stylesheet_images
|
84
|
+
[stylesheets.map {|style| style.images }].flatten
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
module ColorParser
|
2
|
+
# a set of css selectors
|
3
|
+
class Stylesheet
|
4
|
+
TEXT_COLORS = {
|
5
|
+
aqua: "00ffff", black: "000000", blue: "0000ff",
|
6
|
+
fuchsia: "ff00ff", gray: "808080", green: "008000",
|
7
|
+
lime: "00ff00", maroon: "800000", navy: "000080",
|
8
|
+
olive: "808000", purple: "800080", red: "ff0000",
|
9
|
+
silver: "c0c0c0", teal: "008080", white: "ffffff",
|
10
|
+
yellow: "ffff00"
|
11
|
+
}
|
12
|
+
|
13
|
+
attr_reader :url, :type, :host, :path, :query, :text
|
14
|
+
|
15
|
+
def initialize(options)
|
16
|
+
@type = options[:type]
|
17
|
+
@text = options[:text]
|
18
|
+
@url = options[:url]
|
19
|
+
|
20
|
+
@host, @path, @query = ColorParser.parse_url(url)
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
path.split("/").last
|
25
|
+
end
|
26
|
+
|
27
|
+
# get imported stylesheets
|
28
|
+
def stylesheets
|
29
|
+
@stylesheets ||= imported_stylesheets
|
30
|
+
end
|
31
|
+
|
32
|
+
# gst list of colors from styles
|
33
|
+
def colors
|
34
|
+
@colors ||= parse_colors(color_properties)
|
35
|
+
end
|
36
|
+
|
37
|
+
def bg_colors
|
38
|
+
@bg_colors ||= parse_colors(bg_properties)
|
39
|
+
end
|
40
|
+
|
41
|
+
def text_colors
|
42
|
+
@text_colors ||= parse_colors(text_properties)
|
43
|
+
end
|
44
|
+
|
45
|
+
def border_colors
|
46
|
+
@border_colors ||= parse_colors(border_properties)
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
def images
|
51
|
+
images = []
|
52
|
+
|
53
|
+
image_properties.each do |key, value|
|
54
|
+
if value.include?("url") && match = value.match(/url\(['"]?([^'")]+)/)
|
55
|
+
asset_url = ColorParser.parse_asset(url, match[1])
|
56
|
+
images << Image.new(asset_url)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
images
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
# groups of css selectors (including imported styles)
|
65
|
+
def selectors
|
66
|
+
selectors = {}
|
67
|
+
|
68
|
+
text.scan(/([^\s\}]+)[\s]*?\{(.*?)\}/m).each do |match|
|
69
|
+
selector, rule = match
|
70
|
+
selectors[selector] ||= []
|
71
|
+
selectors[selector] << rule.strip
|
72
|
+
end
|
73
|
+
|
74
|
+
# imported styles
|
75
|
+
stylesheets.each do |style|
|
76
|
+
style.selectors.each do |selector, rule|
|
77
|
+
selectors[selector] ||= []
|
78
|
+
selectors[selector] += rule
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
selectors
|
83
|
+
end
|
84
|
+
|
85
|
+
# split up selectors into properties, and return property key/value pairs
|
86
|
+
def properties
|
87
|
+
properties = []
|
88
|
+
|
89
|
+
selectors.each do |selector, rules|
|
90
|
+
rules.each do |rule|
|
91
|
+
rule.split(";").each do |property|
|
92
|
+
props = property.split(":", 2).map {|v| v.strip }
|
93
|
+
properties << props if props.size == 2
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
properties
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def imported_stylesheets
|
105
|
+
return [] unless text.include?("@import")
|
106
|
+
|
107
|
+
styles = []
|
108
|
+
text.scan(/@import(?:\surl|\s)(.*?)[;\n]+/).each do |style|
|
109
|
+
style_path = style.first.gsub(/['"\(\);]/, "")
|
110
|
+
|
111
|
+
asset_url = ColorParser.parse_asset(url, style_path)
|
112
|
+
next unless text = ColorParser.request.get(asset_url)
|
113
|
+
|
114
|
+
css = Stylesheet.new(text: text,
|
115
|
+
type: "imported",
|
116
|
+
url: asset_url)
|
117
|
+
styles << css
|
118
|
+
end
|
119
|
+
|
120
|
+
styles
|
121
|
+
end
|
122
|
+
|
123
|
+
# find properties that might have a color
|
124
|
+
def color_properties
|
125
|
+
properties.select do |key, value|
|
126
|
+
["background-color", "background", "border-color", "border",
|
127
|
+
"border-top-color", "border-right-color", "border-bottom-color",
|
128
|
+
"border-left-color", "color", "outline-color"].include?(key)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# properties with bg colors
|
133
|
+
def bg_properties
|
134
|
+
color_properties.select {|key, value| key.include?("background") }
|
135
|
+
end
|
136
|
+
|
137
|
+
# properties with textual color
|
138
|
+
def text_properties
|
139
|
+
color_properties.select {|key, value| key == "color" }
|
140
|
+
end
|
141
|
+
|
142
|
+
# properties with borders
|
143
|
+
def border_properties
|
144
|
+
color_properties.select do |key, value|
|
145
|
+
key.include?("border") || key.include?("outline")
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# find properties that might have an image
|
150
|
+
def image_properties
|
151
|
+
color_properties.select {|key, value| key.include?("background") }
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_colors(property_list)
|
155
|
+
colors = {}
|
156
|
+
|
157
|
+
property_list.each do |key, value|
|
158
|
+
# hex
|
159
|
+
hex = if matches = value.match(/#([0-9a-f]{3,6})/i)
|
160
|
+
normalize_hex(matches[1])
|
161
|
+
|
162
|
+
# rgb/rgba
|
163
|
+
elsif matches = value.match(/rgba?\((\d{1,3}[,\s]+\d{1,3}[,\s]+\d{1,3})/)
|
164
|
+
rgb_to_hex(matches[1])
|
165
|
+
|
166
|
+
# textual
|
167
|
+
elsif matches = value.match(/(#{TEXT_COLORS.map {|k,v| k }.join("|")})/)
|
168
|
+
text_to_hex(matches[1])
|
169
|
+
end
|
170
|
+
|
171
|
+
next unless hex
|
172
|
+
|
173
|
+
colors[hex] ? colors[hex] += 1 : colors[hex] = 1
|
174
|
+
end
|
175
|
+
|
176
|
+
# sort by colors with most occurrances
|
177
|
+
colors
|
178
|
+
end
|
179
|
+
|
180
|
+
# convert rgb to hex
|
181
|
+
def rgb_to_hex(rgb)
|
182
|
+
r, g, b = rgb.split(",").map {|color| color.strip }
|
183
|
+
"%02x" % r + "%02x" % g + "%02x" % b
|
184
|
+
end
|
185
|
+
|
186
|
+
# find hex for textual color
|
187
|
+
def text_to_hex(color)
|
188
|
+
TEXT_COLORS[color.intern]
|
189
|
+
end
|
190
|
+
|
191
|
+
# convert 3 digit hex to 6
|
192
|
+
def normalize_hex(hex)
|
193
|
+
(hex.length == 3 ? hex[0,1]*2 + hex[1,1]*2 + hex[2,1]*2: hex).downcase
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
end
|
data/lib/color_parser.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
require 'color_parser/version'
|
6
|
+
require 'color_parser/page'
|
7
|
+
require 'color_parser/stylesheet'
|
8
|
+
require 'color_parser/image'
|
9
|
+
|
10
|
+
module ColorParser
|
11
|
+
|
12
|
+
# Build url of an asset based on the relative/absolute url
|
13
|
+
def self.parse_asset(doc_url, asset_url)
|
14
|
+
doc_host, doc_path, doc_query = self.parse_url(doc_url)
|
15
|
+
asset_host, asset_path, asset_query = self.parse_url(asset_url)
|
16
|
+
|
17
|
+
# absolute path
|
18
|
+
host, path, query = if asset_url.include?("http")
|
19
|
+
[asset_host, asset_path, asset_query]
|
20
|
+
|
21
|
+
# root relative
|
22
|
+
elsif asset_url[0,1] == "/"
|
23
|
+
[doc_host, asset_path, asset_query]
|
24
|
+
|
25
|
+
# relative
|
26
|
+
else
|
27
|
+
path = File.expand_path("#{doc_path.gsub(/[^\/]*$/, "")}#{asset_path}", "/")
|
28
|
+
[doc_host, path, asset_query]
|
29
|
+
end
|
30
|
+
|
31
|
+
"http://#{host}#{path}#{"?"+query if query}"
|
32
|
+
end
|
33
|
+
|
34
|
+
# parse url parts
|
35
|
+
def self.parse_url(url)
|
36
|
+
begin
|
37
|
+
uri = URI.parse(url.strip)
|
38
|
+
rescue URI::InvalidURIError
|
39
|
+
uri = URI.parse(URI.escape(url.strip))
|
40
|
+
end
|
41
|
+
|
42
|
+
[uri.host, (uri.path != "" ? uri.path : "/"), uri.query]
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# Request
|
47
|
+
|
48
|
+
def self.request=(request)
|
49
|
+
@request = request
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.request
|
53
|
+
@request ||= Request.new
|
54
|
+
end
|
55
|
+
|
56
|
+
# Request an asset
|
57
|
+
#
|
58
|
+
class Request
|
59
|
+
@@last_request = Time.now
|
60
|
+
|
61
|
+
# default throttle requests 1 per sec
|
62
|
+
def initialize(params={})
|
63
|
+
@throttle = params[:throttle] || 1
|
64
|
+
end
|
65
|
+
|
66
|
+
def get(url)
|
67
|
+
throttle
|
68
|
+
|
69
|
+
begin
|
70
|
+
uri = URI.parse(url.strip)
|
71
|
+
rescue URI::InvalidURIError
|
72
|
+
uri = URI.parse(URI.escape(url.strip))
|
73
|
+
end
|
74
|
+
|
75
|
+
response = get_response(uri)
|
76
|
+
|
77
|
+
# redirect
|
78
|
+
@prev_redirect ||= ""
|
79
|
+
if response.header['location']
|
80
|
+
# make sure we're not in an infinite loop
|
81
|
+
if response.header['location'] == @prev_redirect
|
82
|
+
raise HTTPError, "Recursive redirect: #{@prev_redirect}"
|
83
|
+
end
|
84
|
+
@prev_redirect = response.header['location']
|
85
|
+
|
86
|
+
return get(response.header['location'])
|
87
|
+
end
|
88
|
+
|
89
|
+
# bad req
|
90
|
+
if response.to_s.index 'Bad Request' || response.nil?
|
91
|
+
raise HTTPError, "invalid HTTP request #{url}"
|
92
|
+
end
|
93
|
+
|
94
|
+
response = fix_encoding(response)
|
95
|
+
response.body
|
96
|
+
end
|
97
|
+
|
98
|
+
def user_agent
|
99
|
+
"ColorParser Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.7) Gecko/20060909 Firefox/1.5.0.7"
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# Use charset in content-type, default to UTF-8 if absent
|
106
|
+
#
|
107
|
+
# text/html; charset=UTF-8
|
108
|
+
# - or -
|
109
|
+
# text/html; charset=iso-8859-1
|
110
|
+
# - or -
|
111
|
+
# text/html
|
112
|
+
def fix_encoding(response)
|
113
|
+
charset = if response.header["Content-Type"].to_s.include?("charset")
|
114
|
+
response.header["Content-Type"].split(";")[1].split("=")[1]
|
115
|
+
else
|
116
|
+
"UTF-8"
|
117
|
+
end
|
118
|
+
|
119
|
+
response.body.force_encoding(charset.upcase).encode("UTF-8")
|
120
|
+
response
|
121
|
+
end
|
122
|
+
|
123
|
+
# build http request object
|
124
|
+
def get_response(uri)
|
125
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
126
|
+
http.open_timeout = 15
|
127
|
+
http.read_timeout = 30
|
128
|
+
|
129
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
130
|
+
request["User-Agent"] = user_agent
|
131
|
+
|
132
|
+
http.request(request)
|
133
|
+
end
|
134
|
+
|
135
|
+
# throttle requests to 1 per sec
|
136
|
+
def throttle
|
137
|
+
sleep @throttle if @@last_request + @throttle > Time.now
|
138
|
+
@@last_request = Time.now
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require_relative "test_helper"
|
2
|
+
|
3
|
+
describe ColorParser do
|
4
|
+
def setup
|
5
|
+
ColorParser.request = ColorParser::TestRequest.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should retrieve fixture" do
|
9
|
+
url = "http://example.com/css/absolute.html?foo=bar"
|
10
|
+
result = ColorParser.request.get(url)
|
11
|
+
result.wont_be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# parse_url
|
16
|
+
|
17
|
+
it "should parse url" do
|
18
|
+
url = "http://example.com/test/something/"
|
19
|
+
assert_equal ["example.com", "/test/something/", nil], ColorParser.parse_url(url)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse url with no trailing slash" do
|
23
|
+
|
24
|
+
url = "http://example.com"
|
25
|
+
assert_equal ["example.com", "/", nil], ColorParser.parse_url(url)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should parse url with query params" do
|
29
|
+
url = "http://example.com?foo=bar&baz=bar"
|
30
|
+
assert_equal ["example.com", "/", "foo=bar&baz=bar"], ColorParser.parse_url(url)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
# parse_asset
|
35
|
+
|
36
|
+
it "should parse asset absolute path" do
|
37
|
+
doc = "http://example.com/stylesheets/base.css"
|
38
|
+
asset = "http://asset.example.com/stylesheets/style.css"
|
39
|
+
|
40
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
41
|
+
parsed.must_equal "http://asset.example.com/stylesheets/style.css"
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should parse asset absolute path with query string" do
|
45
|
+
doc = "http://example.com/stylesheets/base.css?foo=bar"
|
46
|
+
asset = "http://asset.example.com/stylesheets/style.css?baz=bar"
|
47
|
+
|
48
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
49
|
+
parsed.must_equal "http://asset.example.com/stylesheets/style.css?baz=bar"
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should parse relative root path" do
|
53
|
+
doc = "http://example.com/stylesheets/base.css"
|
54
|
+
asset = "/styles/style.css"
|
55
|
+
|
56
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
57
|
+
parsed.must_equal "http://example.com/styles/style.css"
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should parse relative root path with query string" do
|
61
|
+
doc = "http://example.com/stylesheets/base.css?foo=bar"
|
62
|
+
asset = "/styles/style.css?baz=bar"
|
63
|
+
|
64
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
65
|
+
parsed.must_equal "http://example.com/styles/style.css?baz=bar"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should parse relative path" do
|
69
|
+
doc = "http://example.com/stylesheets/base.css"
|
70
|
+
asset = "ie.css"
|
71
|
+
|
72
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
73
|
+
parsed.must_equal "http://example.com/stylesheets/ie.css"
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should parse relative path with query string" do
|
77
|
+
doc = "http://example.com/stylesheets/base.css?foo=bar"
|
78
|
+
asset = "ie.css?baz=bar"
|
79
|
+
|
80
|
+
parsed = ColorParser.parse_asset(doc, asset)
|
81
|
+
parsed.must_equal "http://example.com/stylesheets/ie.css?baz=bar"
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" dir="ltr" lang="en-US">
|
4
|
+
|
5
|
+
<head>
|
6
|
+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
|
7
|
+
<title>Color Parser</title>
|
8
|
+
|
9
|
+
<link rel="stylesheet" href="http://example.com/css/stylesheets/screen.css" media="screen" type="text/css" />
|
10
|
+
<link rel="stylesheet" href="http://example.com/css/stylesheets/print.css" media="print" type="text/css" />
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<div></div>
|
14
|
+
</body>
|
15
|
+
</html>
|