varnisher 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f28d1ab92622c71c86bf45e3762de993496f2e7
4
- data.tar.gz: f59a59018cef6491a2684c3646f293a2aae80141
3
+ metadata.gz: 53a788e751893e32d605e1221b9cc42012a3bda3
4
+ data.tar.gz: 79b9cdeb072f45e6042bfdad7f68f4c60ec5d574
5
5
  SHA512:
6
- metadata.gz: 1f05dac54745831b650f7b809408778866f23a5bdd5176b0c46e10ce329d258d4567f3f45ebce80bb01a1e7a99359b56616de3f05d5095c0c6872f9e6f4429b6
7
- data.tar.gz: d87c226e4a485a47c29864c7d67972bb64916d1e33e2da93d546fcd4aae2627d468d885f98dc4aacd866d9e94a3039eedeb81083ccc90196f843074a2783183c
6
+ metadata.gz: 974f483c163d0ead6adb56c18ee672ed8ac586618412e9614366069a8a54d763a1f2d6b1bf65ae0ab08a2d3090dbf9dc3f4fe39ebf3bdee97e94ef471713c851
7
+ data.tar.gz: de2a2698bf9e9aaa992884a0512a40df21c668763d0aa2241be6cb24fb4b70acda32821b2a73db7db818640484c640783c39d02aebd8d1c0c8254cb1af302586
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # varnisher
2
2
 
3
- [![Build Status](https://travis-ci.org/robmiller/varnisher.png?branch=master)](https://travis-ci.org/robmiller/varnisher)
3
+ [![Build Status](https://travis-ci.org/robmiller/varnisher.png?branch=master)](https://travis-ci.org/robmiller/varnisher) [![Dependency Status](https://gemnasium.com/robmiller/varnisher.png)](https://gemnasium.com/robmiller/varnisher)
4
4
 
5
5
  Administering Varnish is generally a breeze, but sometimes you want to
6
6
  do one of the few things that aren't painless out of the box. Hopefully,
@@ -1,5 +1,6 @@
1
1
  require 'net/http'
2
2
 
3
+ require_relative 'varnisher/urls'
3
4
  require_relative 'varnisher/spider'
4
5
  require_relative 'varnisher/purger'
5
6
  require_relative 'varnisher/domainpurger'
@@ -29,8 +29,6 @@ module Varnisher
29
29
  def initialize(url)
30
30
  @url = url
31
31
  @uri = URI.parse(url)
32
-
33
- @urls = []
34
32
  end
35
33
 
36
34
  # Sends a PURGE request to the Varnish server, asking it to purge
@@ -69,6 +67,8 @@ module Varnisher
69
67
  # Purges all the resources on the given page.
70
68
  def purge_resources
71
69
  fetch_page
70
+ parse_page
71
+ @urls = find_resources
72
72
 
73
73
  return if @urls.empty?
74
74
 
@@ -84,16 +84,18 @@ module Varnisher
84
84
  Varnisher.log.info "Looking for external resources on #{@url}..."
85
85
 
86
86
  begin
87
- @doc = Nokogiri::HTML(Net::HTTP.get_response(@uri).body)
87
+ @html = Net::HTTP.get_response(@uri).body
88
88
  rescue
89
89
  Varnisher.log.info "Hmm, I couldn't fetch that URL. Sure it's right?\n"
90
90
  return
91
91
  end
92
+ end
92
93
 
93
- @urls = find_resources
94
-
95
- Varnisher.log.debug ''
96
- Varnisher.log.info "#{@urls.length} total resources found.\n"
94
+ # Parses the raw HTML we've fetched into a Nokogiri document.
95
+ #
96
+ # @api private
97
+ def parse_page
98
+ @doc = Nokogiri::HTML(@html)
97
99
  end
98
100
 
99
101
  # Returns an array of resources contained within the current page.
@@ -107,18 +109,31 @@ module Varnisher
107
109
  # @return [Array] An array of strings, each representing a URL
108
110
  #
109
111
  # @api private
110
- def find_resources
111
- found = []
112
+ def find_resources(&block)
113
+ found = Varnisher::Urls.new
114
+
115
+ self.class.resources.each do |resource|
116
+ found += find_resource(resource, &block)
117
+ end
118
+
119
+ Varnisher.log.debug ''
120
+ Varnisher.log.info "#{found.length} total resources found.\n"
121
+
122
+ found
123
+ end
124
+
125
+ # Given a resource, will return instances of that resource found in
126
+ # the current document.
127
+ def find_resource(resource, &block)
128
+ found = Varnisher::Urls.new
112
129
 
113
- self.class.resources.each do |res|
114
- @doc.css(res.selector).each do |e|
115
- attribute = e[res.attribute]
130
+ @doc.css(resource.selector).each do |e|
131
+ attribute = e[resource.attribute]
116
132
 
117
- Varnisher.log.debug("Found resource: #{attribute}")
133
+ Varnisher.log.debug("Found resource: #{attribute}")
118
134
 
119
- yield attribute if block_given?
120
- found << attribute
121
- end
135
+ yield attribute if block_given?
136
+ found << attribute
122
137
  end
123
138
 
124
139
  found
@@ -133,8 +148,7 @@ module Varnisher
133
148
  def tidy_resources
134
149
  Varnisher.log.info 'Tidying resources...'
135
150
 
136
- @urls = @urls.map { |url| URI.join(@uri, url) }
137
- .select { |uri| uri.scheme == 'http' && uri.host == @uri.host }
151
+ @urls = @urls.make_absolute(@uri).with_hostname(@uri.host)
138
152
 
139
153
  Varnisher.log.info "#{@urls.length} purgeable resources found.\n"
140
154
  end
@@ -81,7 +81,9 @@ module Varnisher
81
81
 
82
82
  begin
83
83
  http = Net::HTTP.new(hostname, port)
84
- response = http.request(@request_method.new(@path))
84
+ request = @request_method.new(@path)
85
+ request.add_field("Host", @host)
86
+ response = http.request(request)
85
87
  rescue Timeout::Error
86
88
  return false
87
89
  end
@@ -43,6 +43,9 @@ module Varnisher
43
43
 
44
44
  @visited = []
45
45
  @to_visit = []
46
+
47
+ @threads = Varnisher.options['threads']
48
+ @num_pages = Varnisher.options['num-pages']
46
49
  end
47
50
 
48
51
  # Adds a link to the queue of pages to be visited.
@@ -103,16 +106,14 @@ module Varnisher
103
106
  #
104
107
  # @api private
105
108
  def find_links(doc, uri)
106
- hrefs = []
109
+ urls = Varnisher::Urls.new(get_anchors(doc) + get_commented_urls(doc))
107
110
 
108
- hrefs = get_anchors(doc)
109
- hrefs += get_commented_urls(doc)
111
+ urls = urls.make_absolute(uri).with_hostname(uri.host)
110
112
 
111
- hrefs = valid_urls(hrefs, uri)
112
- hrefs = remove_hashes(hrefs)
113
- hrefs = remove_query_strings(hrefs)
113
+ urls = urls.without_hashes if Varnisher.options['ignore-hashes']
114
+ urls = urls.without_query_strings if Varnisher.options['ignore-query-strings']
114
115
 
115
- hrefs
116
+ urls
116
117
  end
117
118
 
118
119
  # Given an HTML document, will return all the URLs that exist as
@@ -131,67 +132,6 @@ module Varnisher
131
132
  doc.xpath('//comment()').flat_map { |e| URI.extract(e.to_html, 'http') }
132
133
  end
133
134
 
134
- # Given a set of URLs, will return only the ones that are valid for
135
- # spidering.
136
- #
137
- # That means URLs that have the same hostname as the hostname we
138
- # started from, and that are on the HTTP scheme rather than HTTPS
139
- # (since Varnish doesn't support HTTPS).
140
- #
141
- # Additionally, some normalisation will be performed, so that the
142
- # URLs are absolute (using the page that they were fetched from as
143
- # the base, just like a browser would).
144
- #
145
- # @return [Array] An array of URIs
146
- def valid_urls(hrefs, uri)
147
- hrefs.map { |u| URI.join(uri, URI.escape(u)) }
148
- .select { |u| u.scheme == 'http' && u.host == @uri.host }
149
- end
150
-
151
- # Given a set of URLs, will normalise them according to their URL
152
- # minus the hash; that is, normalise them so that:
153
- #
154
- # foo#bar
155
- #
156
- # and:
157
- #
158
- # foo#baz
159
- #
160
- # Are considered the same.
161
- #
162
- # @return [Array] An array of URIs
163
- def remove_hashes(hrefs)
164
- return hrefs unless Varnisher.options['ignore-hashes']
165
-
166
- hrefs = hrefs.group_by do |h|
167
- URI.parse(h.scheme + '://' + h.host + h.path.to_s + h.query.to_s)
168
- end
169
-
170
- hrefs.keys
171
- end
172
-
173
- # Given a set of URLs, will normalise them according to their URL
174
- # minus the query string; that is, normalise them so that:
175
- #
176
- # foo?foo=bar
177
- #
178
- # and:
179
- #
180
- # foo?foo=baz
181
- #
182
- # Are considered the same.
183
- #
184
- # @return [Array] An array of URIs
185
- def remove_query_strings(hrefs)
186
- return hrefs unless Varnisher.options['ignore-query-strings']
187
-
188
- hrefs = hrefs.group_by do |h|
189
- URI.parse(h.scheme + '://' + h.host + h.path.to_s)
190
- end
191
-
192
- hrefs.keys
193
- end
194
-
195
135
  # Pops a URL from the queue of yet-to-be-visited URLs, ensuring that
196
136
  # it's not one that we've visited before.
197
137
  #
@@ -207,6 +147,16 @@ module Varnisher
207
147
  url
208
148
  end
209
149
 
150
+ # Returns true if the spider has visited the maximum number of pages
151
+ # it's allowed to.
152
+ def limit_reached?
153
+ @visited.length > @num_pages && @num_pages >= 0
154
+ end
155
+
156
+ def pages_remaining?
157
+ @to_visit.length > 0
158
+ end
159
+
210
160
  # Kicks off the spidering process.
211
161
  #
212
162
  # Fires up Parallel in as many threads as have been configured, and
@@ -221,13 +171,8 @@ module Varnisher
221
171
 
222
172
  crawl_page(@uri)
223
173
 
224
- threads = Varnisher.options['threads']
225
- num_pages = Varnisher.options['num-pages']
226
-
227
- Parallel.in_threads(threads) do |thread_number|
228
- next if @visited.length > num_pages && num_pages >= 0
229
-
230
- crawl_page(pop_url) while @to_visit.length > 0
174
+ Parallel.in_threads(@threads) do |_|
175
+ crawl_page(pop_url) while pages_remaining? and !limit_reached?
231
176
  end
232
177
 
233
178
  Varnisher.log.info "Done; #{@visited.length} pages hit."
@@ -0,0 +1,92 @@
1
+ module Varnisher
2
+ # A collection for URLs, that exposes some useful behaviour (like
3
+ # selecting only URLs that have a given hostname, or converting
4
+ # relative URLs to absolute).
5
+ class Urls
6
+ include Enumerable
7
+ extend Forwardable
8
+
9
+ # Given an array of URLs (either strings or URI objects), store them
10
+ # in the collection.
11
+ def initialize(urls = [])
12
+ @urls = Array(urls)
13
+ @urls = make_uris
14
+ end
15
+
16
+ # Coerces the values of the current collection into being URI
17
+ # objects, which allows strings to be passed initially.
18
+ def make_uris
19
+ coerced = urls.map do |url|
20
+ begin
21
+ URI(url)
22
+ rescue
23
+ nil
24
+ end
25
+ end
26
+
27
+ coerced.compact
28
+ end
29
+
30
+ # Given a relative URL and a base to work from, will return the
31
+ # absolute form of that URL.
32
+ #
33
+ # For example:
34
+ #
35
+ # absolute_url('http://www.example.com', '/foo')
36
+ # # => "http://www.example.com/foo"
37
+ #
38
+ # absolute_url('http://www.example.com/foo', 'bar')
39
+ # # => "http://www.example.com/bar"
40
+ #
41
+ # absolute_url('http://www.example.com/foo/bar', 'baz')
42
+ # # => "http://www.example.com/foo/baz"
43
+ def absolute_url(base, url)
44
+ URI.join(base, URI.escape(url.to_s))
45
+ end
46
+
47
+ # Returns a new collection containing absolute versions of all the
48
+ # URLs in the current collection.
49
+ def make_absolute(base)
50
+ Urls.new(urls.map { |uri| absolute_url(base, uri) })
51
+ end
52
+
53
+ # Returns a new collection containing only the URLs in this
54
+ # collection that match the given hostname.
55
+ def with_hostname(hostname)
56
+ Urls.new(urls.select { |uri| uri.scheme == 'http' && uri.host == hostname })
57
+ end
58
+
59
+ # Returns a new collection containing the URLs in the current
60
+ # collection, normalised according to their hash.
61
+ def without_hashes
62
+ normalised = urls.group_by do |url|
63
+ url.fragment = nil
64
+ url
65
+ end
66
+
67
+ Urls.new(normalised.keys)
68
+ end
69
+
70
+ # Returns a new collection containing the URLs in the current
71
+ # collection without their query string values.
72
+ def without_query_strings
73
+ normalised = urls.group_by do |h|
74
+ url.query = nil
75
+ url
76
+ end
77
+
78
+ Urls.new(normalised.keys)
79
+ end
80
+
81
+ # Allows the addition of two collections by accessing the underlying
82
+ # array.
83
+ def +(other)
84
+ Urls.new(urls + other.urls)
85
+ end
86
+
87
+ def_delegators :urls, :each, :<<, :length, :empty?, :include?
88
+
89
+ protected
90
+ attr_reader :urls
91
+ end
92
+ end
@@ -1,3 +1,3 @@
1
1
  module Varnisher
2
- VERSION = '1.0.1'
2
+ VERSION = '1.0.2'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: varnisher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Miller
@@ -14,112 +14,112 @@ dependencies:
14
14
  name: main
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 5.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 5.2.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.6.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.6.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: parallel
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.7.1
47
+ version: 0.8.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.7.1
54
+ version: 0.8.1
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ~>
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: 10.1.0
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ~>
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 10.1.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: minitest
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ~>
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: 5.0.6
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ~>
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: 5.0.6
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: webmock
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ~>
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
89
  version: 1.13.0
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ~>
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.13.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: letters
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ~>
101
+ - - "~>"
102
102
  - !ruby/object:Gem::Version
103
103
  version: 0.4.1
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: 0.4.1
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rubygems-tasks
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ~>
115
+ - - "~>"
116
116
  - !ruby/object:Gem::Version
117
117
  version: 0.2.4
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ~>
122
+ - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: 0.2.4
125
125
  description: Some tools that make working with the Varnish HTTP cache easier, including
@@ -130,16 +130,17 @@ executables:
130
130
  extensions: []
131
131
  extra_rdoc_files: []
132
132
  files:
133
+ - Gemfile
134
+ - LICENSE
135
+ - README.md
133
136
  - bin/varnisher
137
+ - lib/varnisher.rb
134
138
  - lib/varnisher/domainpurger.rb
135
139
  - lib/varnisher/pagepurger.rb
136
140
  - lib/varnisher/purger.rb
137
141
  - lib/varnisher/spider.rb
142
+ - lib/varnisher/urls.rb
138
143
  - lib/varnisher/version.rb
139
- - lib/varnisher.rb
140
- - LICENSE
141
- - README.md
142
- - Gemfile
143
144
  homepage: http://github.com/robmiller/varnisher
144
145
  licenses:
145
146
  - MIT
@@ -150,17 +151,17 @@ require_paths:
150
151
  - lib
151
152
  required_ruby_version: !ruby/object:Gem::Requirement
152
153
  requirements:
153
- - - '>='
154
+ - - ">="
154
155
  - !ruby/object:Gem::Version
155
156
  version: '0'
156
157
  required_rubygems_version: !ruby/object:Gem::Requirement
157
158
  requirements:
158
- - - '>='
159
+ - - ">="
159
160
  - !ruby/object:Gem::Version
160
161
  version: '0'
161
162
  requirements: []
162
163
  rubyforge_project:
163
- rubygems_version: 2.0.3
164
+ rubygems_version: 2.2.2
164
165
  signing_key:
165
166
  specification_version: 4
166
167
  summary: Helpful tools for working with Varnish caches