varnisher 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/varnisher.rb +1 -0
- data/lib/varnisher/pagepurger.rb +32 -18
- data/lib/varnisher/purger.rb +3 -1
- data/lib/varnisher/spider.rb +20 -75
- data/lib/varnisher/urls.rb +92 -0
- data/lib/varnisher/version.rb +1 -1
- metadata +27 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53a788e751893e32d605e1221b9cc42012a3bda3
|
4
|
+
data.tar.gz: 79b9cdeb072f45e6042bfdad7f68f4c60ec5d574
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 974f483c163d0ead6adb56c18ee672ed8ac586618412e9614366069a8a54d763a1f2d6b1bf65ae0ab08a2d3090dbf9dc3f4fe39ebf3bdee97e94ef471713c851
|
7
|
+
data.tar.gz: de2a2698bf9e9aaa992884a0512a40df21c668763d0aa2241be6cb24fb4b70acda32821b2a73db7db818640484c640783c39d02aebd8d1c0c8254cb1af302586
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# varnisher
|
2
2
|
|
3
|
-
[](https://travis-ci.org/robmiller/varnisher)
|
3
|
+
[](https://travis-ci.org/robmiller/varnisher) [](https://gemnasium.com/robmiller/varnisher)
|
4
4
|
|
5
5
|
Administering Varnish is generally a breeze, but sometimes you want to
|
6
6
|
do one of the few things that aren't painless out of the box. Hopefully,
|
data/lib/varnisher.rb
CHANGED
data/lib/varnisher/pagepurger.rb
CHANGED
@@ -29,8 +29,6 @@ module Varnisher
|
|
29
29
|
def initialize(url)
|
30
30
|
@url = url
|
31
31
|
@uri = URI.parse(url)
|
32
|
-
|
33
|
-
@urls = []
|
34
32
|
end
|
35
33
|
|
36
34
|
# Sends a PURGE request to the Varnish server, asking it to purge
|
@@ -69,6 +67,8 @@ module Varnisher
|
|
69
67
|
# Purges all the resources on the given page.
|
70
68
|
def purge_resources
|
71
69
|
fetch_page
|
70
|
+
parse_page
|
71
|
+
@urls = find_resources
|
72
72
|
|
73
73
|
return if @urls.empty?
|
74
74
|
|
@@ -84,16 +84,18 @@ module Varnisher
|
|
84
84
|
Varnisher.log.info "Looking for external resources on #{@url}..."
|
85
85
|
|
86
86
|
begin
|
87
|
-
@
|
87
|
+
@html = Net::HTTP.get_response(@uri).body
|
88
88
|
rescue
|
89
89
|
Varnisher.log.info "Hmm, I couldn't fetch that URL. Sure it's right?\n"
|
90
90
|
return
|
91
91
|
end
|
92
|
+
end
|
92
93
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
# Parses the raw HTML we've fetched into a Nokogiri document.
|
95
|
+
#
|
96
|
+
# @api private
|
97
|
+
def parse_page
|
98
|
+
@doc = Nokogiri::HTML(@html)
|
97
99
|
end
|
98
100
|
|
99
101
|
# Returns an array of resources contained within the current page.
|
@@ -107,18 +109,31 @@ module Varnisher
|
|
107
109
|
# @return [Array] An array of strings, each representing a URL
|
108
110
|
#
|
109
111
|
# @api private
|
110
|
-
def find_resources
|
111
|
-
found =
|
112
|
+
def find_resources(&block)
|
113
|
+
found = Varnisher::Urls.new
|
114
|
+
|
115
|
+
self.class.resources.each do |resource|
|
116
|
+
found += find_resource(resource, &block)
|
117
|
+
end
|
118
|
+
|
119
|
+
Varnisher.log.debug ''
|
120
|
+
Varnisher.log.info "#{found.length} total resources found.\n"
|
121
|
+
|
122
|
+
found
|
123
|
+
end
|
124
|
+
|
125
|
+
# Given a resource, will return instances of that resource found in
|
126
|
+
# the current document.
|
127
|
+
def find_resource(resource, &block)
|
128
|
+
found = Varnisher::Urls.new
|
112
129
|
|
113
|
-
|
114
|
-
|
115
|
-
attribute = e[res.attribute]
|
130
|
+
@doc.css(resource.selector).each do |e|
|
131
|
+
attribute = e[resource.attribute]
|
116
132
|
|
117
|
-
|
133
|
+
Varnisher.log.debug("Found resource: #{attribute}")
|
118
134
|
|
119
|
-
|
120
|
-
|
121
|
-
end
|
135
|
+
yield attribute if block_given?
|
136
|
+
found << attribute
|
122
137
|
end
|
123
138
|
|
124
139
|
found
|
@@ -133,8 +148,7 @@ module Varnisher
|
|
133
148
|
def tidy_resources
|
134
149
|
Varnisher.log.info 'Tidying resources...'
|
135
150
|
|
136
|
-
@urls = @urls.
|
137
|
-
.select { |uri| uri.scheme == 'http' && uri.host == @uri.host }
|
151
|
+
@urls = @urls.make_absolute(@uri).with_hostname(@uri.host)
|
138
152
|
|
139
153
|
Varnisher.log.info "#{@urls.length} purgeable resources found.\n"
|
140
154
|
end
|
data/lib/varnisher/purger.rb
CHANGED
@@ -81,7 +81,9 @@ module Varnisher
|
|
81
81
|
|
82
82
|
begin
|
83
83
|
http = Net::HTTP.new(hostname, port)
|
84
|
-
|
84
|
+
request = @request_method.new(@path)
|
85
|
+
request.add_field("Host", @host)
|
86
|
+
response = http.request(request)
|
85
87
|
rescue Timeout::Error
|
86
88
|
return false
|
87
89
|
end
|
data/lib/varnisher/spider.rb
CHANGED
@@ -43,6 +43,9 @@ module Varnisher
|
|
43
43
|
|
44
44
|
@visited = []
|
45
45
|
@to_visit = []
|
46
|
+
|
47
|
+
@threads = Varnisher.options['threads']
|
48
|
+
@num_pages = Varnisher.options['num-pages']
|
46
49
|
end
|
47
50
|
|
48
51
|
# Adds a link to the queue of pages to be visited.
|
@@ -103,16 +106,14 @@ module Varnisher
|
|
103
106
|
#
|
104
107
|
# @api private
|
105
108
|
def find_links(doc, uri)
|
106
|
-
|
109
|
+
urls = Varnisher::Urls.new(get_anchors(doc) + get_commented_urls(doc))
|
107
110
|
|
108
|
-
|
109
|
-
hrefs += get_commented_urls(doc)
|
111
|
+
urls = urls.make_absolute(uri).with_hostname(uri.host)
|
110
112
|
|
111
|
-
|
112
|
-
|
113
|
-
hrefs = remove_query_strings(hrefs)
|
113
|
+
urls = urls.without_hashes if Varnisher.options['ignore-hashes']
|
114
|
+
urls = urls.without_query_strings if Varnisher.options['ignore-query-strings']
|
114
115
|
|
115
|
-
|
116
|
+
urls
|
116
117
|
end
|
117
118
|
|
118
119
|
# Given an HTML document, will return all the URLs that exist as
|
@@ -131,67 +132,6 @@ module Varnisher
|
|
131
132
|
doc.xpath('//comment()').flat_map { |e| URI.extract(e.to_html, 'http') }
|
132
133
|
end
|
133
134
|
|
134
|
-
# Given a set of URLs, will return only the ones that are valid for
|
135
|
-
# spidering.
|
136
|
-
#
|
137
|
-
# That means URLs that have the same hostname as the hostname we
|
138
|
-
# started from, and that are on the HTTP scheme rather than HTTPS
|
139
|
-
# (since Varnish doesn't support HTTPS).
|
140
|
-
#
|
141
|
-
# Additionally, some normalisation will be performed, so that the
|
142
|
-
# URLs are absolute (using the page that they were fetched from as
|
143
|
-
# the base, just like a browser would).
|
144
|
-
#
|
145
|
-
# @return [Array] An array of URIs
|
146
|
-
def valid_urls(hrefs, uri)
|
147
|
-
hrefs.map { |u| URI.join(uri, URI.escape(u)) }
|
148
|
-
.select { |u| u.scheme == 'http' && u.host == @uri.host }
|
149
|
-
end
|
150
|
-
|
151
|
-
# Given a set of URLs, will normalise them according to their URL
|
152
|
-
# minus the hash; that is, normalise them so that:
|
153
|
-
#
|
154
|
-
# foo#bar
|
155
|
-
#
|
156
|
-
# and:
|
157
|
-
#
|
158
|
-
# foo#baz
|
159
|
-
#
|
160
|
-
# Are considered the same.
|
161
|
-
#
|
162
|
-
# @return [Array] An array of URIs
|
163
|
-
def remove_hashes(hrefs)
|
164
|
-
return hrefs unless Varnisher.options['ignore-hashes']
|
165
|
-
|
166
|
-
hrefs = hrefs.group_by do |h|
|
167
|
-
URI.parse(h.scheme + '://' + h.host + h.path.to_s + h.query.to_s)
|
168
|
-
end
|
169
|
-
|
170
|
-
hrefs.keys
|
171
|
-
end
|
172
|
-
|
173
|
-
# Given a set of URLs, will normalise them according to their URL
|
174
|
-
# minus the query string; that is, normalise them so that:
|
175
|
-
#
|
176
|
-
# foo?foo=bar
|
177
|
-
#
|
178
|
-
# and:
|
179
|
-
#
|
180
|
-
# foo?foo=baz
|
181
|
-
#
|
182
|
-
# Are considered the same.
|
183
|
-
#
|
184
|
-
# @return [Array] An array of URIs
|
185
|
-
def remove_query_strings(hrefs)
|
186
|
-
return hrefs unless Varnisher.options['ignore-query-strings']
|
187
|
-
|
188
|
-
hrefs = hrefs.group_by do |h|
|
189
|
-
URI.parse(h.scheme + '://' + h.host + h.path.to_s)
|
190
|
-
end
|
191
|
-
|
192
|
-
hrefs.keys
|
193
|
-
end
|
194
|
-
|
195
135
|
# Pops a URL from the queue of yet-to-be-visited URLs, ensuring that
|
196
136
|
# it's not one that we've visited before.
|
197
137
|
#
|
@@ -207,6 +147,16 @@ module Varnisher
|
|
207
147
|
url
|
208
148
|
end
|
209
149
|
|
150
|
+
# Returns true if the spider has visited the maximum number of pages
|
151
|
+
# it's allowed to.
|
152
|
+
def limit_reached?
|
153
|
+
@visited.length > @num_pages && @num_pages >= 0
|
154
|
+
end
|
155
|
+
|
156
|
+
def pages_remaining?
|
157
|
+
@to_visit.length > 0
|
158
|
+
end
|
159
|
+
|
210
160
|
# Kicks off the spidering process.
|
211
161
|
#
|
212
162
|
# Fires up Parallel in as many threads as have been configured, and
|
@@ -221,13 +171,8 @@ module Varnisher
|
|
221
171
|
|
222
172
|
crawl_page(@uri)
|
223
173
|
|
224
|
-
threads
|
225
|
-
|
226
|
-
|
227
|
-
Parallel.in_threads(threads) do |thread_number|
|
228
|
-
next if @visited.length > num_pages && num_pages >= 0
|
229
|
-
|
230
|
-
crawl_page(pop_url) while @to_visit.length > 0
|
174
|
+
Parallel.in_threads(@threads) do |_|
|
175
|
+
crawl_page(pop_url) while pages_remaining? and !limit_reached?
|
231
176
|
end
|
232
177
|
|
233
178
|
Varnisher.log.info "Done; #{@visited.length} pages hit."
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Varnisher
|
2
|
+
# A collection for URLs, that exposes some useful behaviour (like
|
3
|
+
# selecting only URLs that have a given hostname, or converting
|
4
|
+
# relative URLs to absolute).
|
5
|
+
class Urls
|
6
|
+
include Enumerable
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
# Given an array of URLs (either strings or URI objects), store them
|
10
|
+
# in the collection.
|
11
|
+
def initialize(urls = [])
|
12
|
+
@urls = Array(urls)
|
13
|
+
@urls = make_uris
|
14
|
+
end
|
15
|
+
|
16
|
+
# Coerces the values of the current collection into being URI
|
17
|
+
# objects, which allows strings to be passed initially.
|
18
|
+
def make_uris
|
19
|
+
coerced = urls.map do |url|
|
20
|
+
begin
|
21
|
+
URI(url)
|
22
|
+
rescue
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
coerced.compact
|
28
|
+
end
|
29
|
+
|
30
|
+
# Given a relative URL and a base to work from, will return the
|
31
|
+
# absolute form of that URL.
|
32
|
+
#
|
33
|
+
# For example:
|
34
|
+
#
|
35
|
+
# absolute_url('http://www.example.com', '/foo')
|
36
|
+
# # => "http://www.example.com/foo"
|
37
|
+
#
|
38
|
+
# absolute_url('http://www.example.com/foo', 'bar')
|
39
|
+
# # => "http://www.example.com/bar"
|
40
|
+
#
|
41
|
+
# absolute_url('http://www.example.com/foo/bar', 'baz')
|
42
|
+
# # => "http://www.example.com/foo/baz"
|
43
|
+
def absolute_url(base, url)
|
44
|
+
URI.join(base, URI.escape(url.to_s))
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a new collection containing absolute versions of all the
|
48
|
+
# URLs in the current collection.
|
49
|
+
def make_absolute(base)
|
50
|
+
Urls.new(urls.map { |uri| absolute_url(base, uri) })
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns a new collection containing only the URLs in this
|
54
|
+
# collection that match the given hostname.
|
55
|
+
def with_hostname(hostname)
|
56
|
+
Urls.new(urls.select { |uri| uri.scheme == 'http' && uri.host == hostname })
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns a new collection containing the URLs in the current
|
60
|
+
# collection, normalised according to their hash.
|
61
|
+
def without_hashes
|
62
|
+
normalised = urls.group_by do |url|
|
63
|
+
url.fragment = nil
|
64
|
+
url
|
65
|
+
end
|
66
|
+
|
67
|
+
Urls.new(normalised.keys)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns a new collection containing the URLs in the current
|
71
|
+
# collection without their query string values.
|
72
|
+
def without_query_strings
|
73
|
+
normalised = urls.group_by do |h|
|
74
|
+
url.query = nil
|
75
|
+
url
|
76
|
+
end
|
77
|
+
|
78
|
+
Urls.new(normalised.keys)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Allows the addition of two collections by accessing the underlying
|
82
|
+
# array.
|
83
|
+
def +(other)
|
84
|
+
Urls.new(urls + other.urls)
|
85
|
+
end
|
86
|
+
|
87
|
+
def_delegators :urls, :each, :<<, :length, :empty?, :include?
|
88
|
+
|
89
|
+
protected
|
90
|
+
attr_reader :urls
|
91
|
+
end
|
92
|
+
end
|
data/lib/varnisher/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: varnisher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Miller
|
@@ -14,112 +14,112 @@ dependencies:
|
|
14
14
|
name: main
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 5.2.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 5.2.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.6.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.6.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: parallel
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ~>
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.8.1
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.8.1
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - ~>
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: 10.1.0
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - ~>
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 10.1.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: minitest
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - ~>
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: 5.0.6
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - ~>
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 5.0.6
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: webmock
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - ~>
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: 1.13.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - ~>
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 1.13.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: letters
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - ~>
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: 0.4.1
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - ~>
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 0.4.1
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rubygems-tasks
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- - ~>
|
115
|
+
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: 0.2.4
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- - ~>
|
122
|
+
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 0.2.4
|
125
125
|
description: Some tools that make working with the Varnish HTTP cache easier, including
|
@@ -130,16 +130,17 @@ executables:
|
|
130
130
|
extensions: []
|
131
131
|
extra_rdoc_files: []
|
132
132
|
files:
|
133
|
+
- Gemfile
|
134
|
+
- LICENSE
|
135
|
+
- README.md
|
133
136
|
- bin/varnisher
|
137
|
+
- lib/varnisher.rb
|
134
138
|
- lib/varnisher/domainpurger.rb
|
135
139
|
- lib/varnisher/pagepurger.rb
|
136
140
|
- lib/varnisher/purger.rb
|
137
141
|
- lib/varnisher/spider.rb
|
142
|
+
- lib/varnisher/urls.rb
|
138
143
|
- lib/varnisher/version.rb
|
139
|
-
- lib/varnisher.rb
|
140
|
-
- LICENSE
|
141
|
-
- README.md
|
142
|
-
- Gemfile
|
143
144
|
homepage: http://github.com/robmiller/varnisher
|
144
145
|
licenses:
|
145
146
|
- MIT
|
@@ -150,17 +151,17 @@ require_paths:
|
|
150
151
|
- lib
|
151
152
|
required_ruby_version: !ruby/object:Gem::Requirement
|
152
153
|
requirements:
|
153
|
-
- -
|
154
|
+
- - ">="
|
154
155
|
- !ruby/object:Gem::Version
|
155
156
|
version: '0'
|
156
157
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
158
|
requirements:
|
158
|
-
- -
|
159
|
+
- - ">="
|
159
160
|
- !ruby/object:Gem::Version
|
160
161
|
version: '0'
|
161
162
|
requirements: []
|
162
163
|
rubyforge_project:
|
163
|
-
rubygems_version: 2.
|
164
|
+
rubygems_version: 2.2.2
|
164
165
|
signing_key:
|
165
166
|
specification_version: 4
|
166
167
|
summary: Helpful tools for working with Varnish caches
|