varnisher 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/varnisher.rb +1 -0
- data/lib/varnisher/pagepurger.rb +32 -18
- data/lib/varnisher/purger.rb +3 -1
- data/lib/varnisher/spider.rb +20 -75
- data/lib/varnisher/urls.rb +92 -0
- data/lib/varnisher/version.rb +1 -1
- metadata +27 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53a788e751893e32d605e1221b9cc42012a3bda3
|
4
|
+
data.tar.gz: 79b9cdeb072f45e6042bfdad7f68f4c60ec5d574
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 974f483c163d0ead6adb56c18ee672ed8ac586618412e9614366069a8a54d763a1f2d6b1bf65ae0ab08a2d3090dbf9dc3f4fe39ebf3bdee97e94ef471713c851
|
7
|
+
data.tar.gz: de2a2698bf9e9aaa992884a0512a40df21c668763d0aa2241be6cb24fb4b70acda32821b2a73db7db818640484c640783c39d02aebd8d1c0c8254cb1af302586
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# varnisher
|
2
2
|
|
3
|
-
[![Build Status](https://travis-ci.org/robmiller/varnisher.png?branch=master)](https://travis-ci.org/robmiller/varnisher)
|
3
|
+
[![Build Status](https://travis-ci.org/robmiller/varnisher.png?branch=master)](https://travis-ci.org/robmiller/varnisher) [![Dependency Status](https://gemnasium.com/robmiller/varnisher.png)](https://gemnasium.com/robmiller/varnisher)
|
4
4
|
|
5
5
|
Administering Varnish is generally a breeze, but sometimes you want to
|
6
6
|
do one of the few things that aren't painless out of the box. Hopefully,
|
data/lib/varnisher.rb
CHANGED
data/lib/varnisher/pagepurger.rb
CHANGED
@@ -29,8 +29,6 @@ module Varnisher
|
|
29
29
|
def initialize(url)
|
30
30
|
@url = url
|
31
31
|
@uri = URI.parse(url)
|
32
|
-
|
33
|
-
@urls = []
|
34
32
|
end
|
35
33
|
|
36
34
|
# Sends a PURGE request to the Varnish server, asking it to purge
|
@@ -69,6 +67,8 @@ module Varnisher
|
|
69
67
|
# Purges all the resources on the given page.
|
70
68
|
def purge_resources
|
71
69
|
fetch_page
|
70
|
+
parse_page
|
71
|
+
@urls = find_resources
|
72
72
|
|
73
73
|
return if @urls.empty?
|
74
74
|
|
@@ -84,16 +84,18 @@ module Varnisher
|
|
84
84
|
Varnisher.log.info "Looking for external resources on #{@url}..."
|
85
85
|
|
86
86
|
begin
|
87
|
-
@
|
87
|
+
@html = Net::HTTP.get_response(@uri).body
|
88
88
|
rescue
|
89
89
|
Varnisher.log.info "Hmm, I couldn't fetch that URL. Sure it's right?\n"
|
90
90
|
return
|
91
91
|
end
|
92
|
+
end
|
92
93
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
# Parses the raw HTML we've fetched into a Nokogiri document.
|
95
|
+
#
|
96
|
+
# @api private
|
97
|
+
def parse_page
|
98
|
+
@doc = Nokogiri::HTML(@html)
|
97
99
|
end
|
98
100
|
|
99
101
|
# Returns an array of resources contained within the current page.
|
@@ -107,18 +109,31 @@ module Varnisher
|
|
107
109
|
# @return [Array] An array of strings, each representing a URL
|
108
110
|
#
|
109
111
|
# @api private
|
110
|
-
def find_resources
|
111
|
-
found =
|
112
|
+
def find_resources(&block)
|
113
|
+
found = Varnisher::Urls.new
|
114
|
+
|
115
|
+
self.class.resources.each do |resource|
|
116
|
+
found += find_resource(resource, &block)
|
117
|
+
end
|
118
|
+
|
119
|
+
Varnisher.log.debug ''
|
120
|
+
Varnisher.log.info "#{found.length} total resources found.\n"
|
121
|
+
|
122
|
+
found
|
123
|
+
end
|
124
|
+
|
125
|
+
# Given a resource, will return instances of that resource found in
|
126
|
+
# the current document.
|
127
|
+
def find_resource(resource, &block)
|
128
|
+
found = Varnisher::Urls.new
|
112
129
|
|
113
|
-
|
114
|
-
|
115
|
-
attribute = e[res.attribute]
|
130
|
+
@doc.css(resource.selector).each do |e|
|
131
|
+
attribute = e[resource.attribute]
|
116
132
|
|
117
|
-
|
133
|
+
Varnisher.log.debug("Found resource: #{attribute}")
|
118
134
|
|
119
|
-
|
120
|
-
|
121
|
-
end
|
135
|
+
yield attribute if block_given?
|
136
|
+
found << attribute
|
122
137
|
end
|
123
138
|
|
124
139
|
found
|
@@ -133,8 +148,7 @@ module Varnisher
|
|
133
148
|
def tidy_resources
|
134
149
|
Varnisher.log.info 'Tidying resources...'
|
135
150
|
|
136
|
-
@urls = @urls.
|
137
|
-
.select { |uri| uri.scheme == 'http' && uri.host == @uri.host }
|
151
|
+
@urls = @urls.make_absolute(@uri).with_hostname(@uri.host)
|
138
152
|
|
139
153
|
Varnisher.log.info "#{@urls.length} purgeable resources found.\n"
|
140
154
|
end
|
data/lib/varnisher/purger.rb
CHANGED
@@ -81,7 +81,9 @@ module Varnisher
|
|
81
81
|
|
82
82
|
begin
|
83
83
|
http = Net::HTTP.new(hostname, port)
|
84
|
-
|
84
|
+
request = @request_method.new(@path)
|
85
|
+
request.add_field("Host", @host)
|
86
|
+
response = http.request(request)
|
85
87
|
rescue Timeout::Error
|
86
88
|
return false
|
87
89
|
end
|
data/lib/varnisher/spider.rb
CHANGED
@@ -43,6 +43,9 @@ module Varnisher
|
|
43
43
|
|
44
44
|
@visited = []
|
45
45
|
@to_visit = []
|
46
|
+
|
47
|
+
@threads = Varnisher.options['threads']
|
48
|
+
@num_pages = Varnisher.options['num-pages']
|
46
49
|
end
|
47
50
|
|
48
51
|
# Adds a link to the queue of pages to be visited.
|
@@ -103,16 +106,14 @@ module Varnisher
|
|
103
106
|
#
|
104
107
|
# @api private
|
105
108
|
def find_links(doc, uri)
|
106
|
-
|
109
|
+
urls = Varnisher::Urls.new(get_anchors(doc) + get_commented_urls(doc))
|
107
110
|
|
108
|
-
|
109
|
-
hrefs += get_commented_urls(doc)
|
111
|
+
urls = urls.make_absolute(uri).with_hostname(uri.host)
|
110
112
|
|
111
|
-
|
112
|
-
|
113
|
-
hrefs = remove_query_strings(hrefs)
|
113
|
+
urls = urls.without_hashes if Varnisher.options['ignore-hashes']
|
114
|
+
urls = urls.without_query_strings if Varnisher.options['ignore-query-strings']
|
114
115
|
|
115
|
-
|
116
|
+
urls
|
116
117
|
end
|
117
118
|
|
118
119
|
# Given an HTML document, will return all the URLs that exist as
|
@@ -131,67 +132,6 @@ module Varnisher
|
|
131
132
|
doc.xpath('//comment()').flat_map { |e| URI.extract(e.to_html, 'http') }
|
132
133
|
end
|
133
134
|
|
134
|
-
# Given a set of URLs, will return only the ones that are valid for
|
135
|
-
# spidering.
|
136
|
-
#
|
137
|
-
# That means URLs that have the same hostname as the hostname we
|
138
|
-
# started from, and that are on the HTTP scheme rather than HTTPS
|
139
|
-
# (since Varnish doesn't support HTTPS).
|
140
|
-
#
|
141
|
-
# Additionally, some normalisation will be performed, so that the
|
142
|
-
# URLs are absolute (using the page that they were fetched from as
|
143
|
-
# the base, just like a browser would).
|
144
|
-
#
|
145
|
-
# @return [Array] An array of URIs
|
146
|
-
def valid_urls(hrefs, uri)
|
147
|
-
hrefs.map { |u| URI.join(uri, URI.escape(u)) }
|
148
|
-
.select { |u| u.scheme == 'http' && u.host == @uri.host }
|
149
|
-
end
|
150
|
-
|
151
|
-
# Given a set of URLs, will normalise them according to their URL
|
152
|
-
# minus the hash; that is, normalise them so that:
|
153
|
-
#
|
154
|
-
# foo#bar
|
155
|
-
#
|
156
|
-
# and:
|
157
|
-
#
|
158
|
-
# foo#baz
|
159
|
-
#
|
160
|
-
# Are considered the same.
|
161
|
-
#
|
162
|
-
# @return [Array] An array of URIs
|
163
|
-
def remove_hashes(hrefs)
|
164
|
-
return hrefs unless Varnisher.options['ignore-hashes']
|
165
|
-
|
166
|
-
hrefs = hrefs.group_by do |h|
|
167
|
-
URI.parse(h.scheme + '://' + h.host + h.path.to_s + h.query.to_s)
|
168
|
-
end
|
169
|
-
|
170
|
-
hrefs.keys
|
171
|
-
end
|
172
|
-
|
173
|
-
# Given a set of URLs, will normalise them according to their URL
|
174
|
-
# minus the query string; that is, normalise them so that:
|
175
|
-
#
|
176
|
-
# foo?foo=bar
|
177
|
-
#
|
178
|
-
# and:
|
179
|
-
#
|
180
|
-
# foo?foo=baz
|
181
|
-
#
|
182
|
-
# Are considered the same.
|
183
|
-
#
|
184
|
-
# @return [Array] An array of URIs
|
185
|
-
def remove_query_strings(hrefs)
|
186
|
-
return hrefs unless Varnisher.options['ignore-query-strings']
|
187
|
-
|
188
|
-
hrefs = hrefs.group_by do |h|
|
189
|
-
URI.parse(h.scheme + '://' + h.host + h.path.to_s)
|
190
|
-
end
|
191
|
-
|
192
|
-
hrefs.keys
|
193
|
-
end
|
194
|
-
|
195
135
|
# Pops a URL from the queue of yet-to-be-visited URLs, ensuring that
|
196
136
|
# it's not one that we've visited before.
|
197
137
|
#
|
@@ -207,6 +147,16 @@ module Varnisher
|
|
207
147
|
url
|
208
148
|
end
|
209
149
|
|
150
|
+
# Returns true if the spider has visited the maximum number of pages
|
151
|
+
# it's allowed to.
|
152
|
+
def limit_reached?
|
153
|
+
@visited.length > @num_pages && @num_pages >= 0
|
154
|
+
end
|
155
|
+
|
156
|
+
def pages_remaining?
|
157
|
+
@to_visit.length > 0
|
158
|
+
end
|
159
|
+
|
210
160
|
# Kicks off the spidering process.
|
211
161
|
#
|
212
162
|
# Fires up Parallel in as many threads as have been configured, and
|
@@ -221,13 +171,8 @@ module Varnisher
|
|
221
171
|
|
222
172
|
crawl_page(@uri)
|
223
173
|
|
224
|
-
threads
|
225
|
-
|
226
|
-
|
227
|
-
Parallel.in_threads(threads) do |thread_number|
|
228
|
-
next if @visited.length > num_pages && num_pages >= 0
|
229
|
-
|
230
|
-
crawl_page(pop_url) while @to_visit.length > 0
|
174
|
+
Parallel.in_threads(@threads) do |_|
|
175
|
+
crawl_page(pop_url) while pages_remaining? and !limit_reached?
|
231
176
|
end
|
232
177
|
|
233
178
|
Varnisher.log.info "Done; #{@visited.length} pages hit."
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Varnisher
|
2
|
+
# A collection for URLs, that exposes some useful behaviour (like
|
3
|
+
# selecting only URLs that have a given hostname, or converting
|
4
|
+
# relative URLs to absolute).
|
5
|
+
class Urls
|
6
|
+
include Enumerable
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
# Given an array of URLs (either strings or URI objects), store them
|
10
|
+
# in the collection.
|
11
|
+
def initialize(urls = [])
|
12
|
+
@urls = Array(urls)
|
13
|
+
@urls = make_uris
|
14
|
+
end
|
15
|
+
|
16
|
+
# Coerces the values of the current collection into being URI
|
17
|
+
# objects, which allows strings to be passed initially.
|
18
|
+
def make_uris
|
19
|
+
coerced = urls.map do |url|
|
20
|
+
begin
|
21
|
+
URI(url)
|
22
|
+
rescue
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
coerced.compact
|
28
|
+
end
|
29
|
+
|
30
|
+
# Given a relative URL and a base to work from, will return the
|
31
|
+
# absolute form of that URL.
|
32
|
+
#
|
33
|
+
# For example:
|
34
|
+
#
|
35
|
+
# absolute_url('http://www.example.com', '/foo')
|
36
|
+
# # => "http://www.example.com/foo"
|
37
|
+
#
|
38
|
+
# absolute_url('http://www.example.com/foo', 'bar')
|
39
|
+
# # => "http://www.example.com/bar"
|
40
|
+
#
|
41
|
+
# absolute_url('http://www.example.com/foo/bar', 'baz')
|
42
|
+
# # => "http://www.example.com/foo/baz"
|
43
|
+
def absolute_url(base, url)
|
44
|
+
URI.join(base, URI.escape(url.to_s))
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a new collection containing absolute versions of all the
|
48
|
+
# URLs in the current collection.
|
49
|
+
def make_absolute(base)
|
50
|
+
Urls.new(urls.map { |uri| absolute_url(base, uri) })
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns a new collection containing only the URLs in this
|
54
|
+
# collection that match the given hostname.
|
55
|
+
def with_hostname(hostname)
|
56
|
+
Urls.new(urls.select { |uri| uri.scheme == 'http' && uri.host == hostname })
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns a new collection containing the URLs in the current
|
60
|
+
# collection, normalised according to their hash.
|
61
|
+
def without_hashes
|
62
|
+
normalised = urls.group_by do |url|
|
63
|
+
url.fragment = nil
|
64
|
+
url
|
65
|
+
end
|
66
|
+
|
67
|
+
Urls.new(normalised.keys)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns a new collection containing the URLs in the current
|
71
|
+
# collection without their query string values.
|
72
|
+
def without_query_strings
|
73
|
+
normalised = urls.group_by do |h|
|
74
|
+
url.query = nil
|
75
|
+
url
|
76
|
+
end
|
77
|
+
|
78
|
+
Urls.new(normalised.keys)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Allows the addition of two collections by accessing the underlying
|
82
|
+
# array.
|
83
|
+
def +(other)
|
84
|
+
Urls.new(urls + other.urls)
|
85
|
+
end
|
86
|
+
|
87
|
+
def_delegators :urls, :each, :<<, :length, :empty?, :include?
|
88
|
+
|
89
|
+
protected
|
90
|
+
attr_reader :urls
|
91
|
+
end
|
92
|
+
end
|
data/lib/varnisher/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: varnisher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Miller
|
@@ -14,112 +14,112 @@ dependencies:
|
|
14
14
|
name: main
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 5.2.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 5.2.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.6.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.6.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: parallel
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ~>
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.8.1
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.8.1
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - ~>
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: 10.1.0
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - ~>
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 10.1.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: minitest
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - ~>
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: 5.0.6
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - ~>
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 5.0.6
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: webmock
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - ~>
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: 1.13.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - ~>
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 1.13.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: letters
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - ~>
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: 0.4.1
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - ~>
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 0.4.1
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rubygems-tasks
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- - ~>
|
115
|
+
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: 0.2.4
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- - ~>
|
122
|
+
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 0.2.4
|
125
125
|
description: Some tools that make working with the Varnish HTTP cache easier, including
|
@@ -130,16 +130,17 @@ executables:
|
|
130
130
|
extensions: []
|
131
131
|
extra_rdoc_files: []
|
132
132
|
files:
|
133
|
+
- Gemfile
|
134
|
+
- LICENSE
|
135
|
+
- README.md
|
133
136
|
- bin/varnisher
|
137
|
+
- lib/varnisher.rb
|
134
138
|
- lib/varnisher/domainpurger.rb
|
135
139
|
- lib/varnisher/pagepurger.rb
|
136
140
|
- lib/varnisher/purger.rb
|
137
141
|
- lib/varnisher/spider.rb
|
142
|
+
- lib/varnisher/urls.rb
|
138
143
|
- lib/varnisher/version.rb
|
139
|
-
- lib/varnisher.rb
|
140
|
-
- LICENSE
|
141
|
-
- README.md
|
142
|
-
- Gemfile
|
143
144
|
homepage: http://github.com/robmiller/varnisher
|
144
145
|
licenses:
|
145
146
|
- MIT
|
@@ -150,17 +151,17 @@ require_paths:
|
|
150
151
|
- lib
|
151
152
|
required_ruby_version: !ruby/object:Gem::Requirement
|
152
153
|
requirements:
|
153
|
-
- -
|
154
|
+
- - ">="
|
154
155
|
- !ruby/object:Gem::Version
|
155
156
|
version: '0'
|
156
157
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
158
|
requirements:
|
158
|
-
- -
|
159
|
+
- - ">="
|
159
160
|
- !ruby/object:Gem::Version
|
160
161
|
version: '0'
|
161
162
|
requirements: []
|
162
163
|
rubyforge_project:
|
163
|
-
rubygems_version: 2.
|
164
|
+
rubygems_version: 2.2.2
|
164
165
|
signing_key:
|
165
166
|
specification_version: 4
|
166
167
|
summary: Helpful tools for working with Varnish caches
|