varnisher 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f28d1ab92622c71c86bf45e3762de993496f2e7
4
- data.tar.gz: f59a59018cef6491a2684c3646f293a2aae80141
3
+ metadata.gz: 53a788e751893e32d605e1221b9cc42012a3bda3
4
+ data.tar.gz: 79b9cdeb072f45e6042bfdad7f68f4c60ec5d574
5
5
  SHA512:
6
- metadata.gz: 1f05dac54745831b650f7b809408778866f23a5bdd5176b0c46e10ce329d258d4567f3f45ebce80bb01a1e7a99359b56616de3f05d5095c0c6872f9e6f4429b6
7
- data.tar.gz: d87c226e4a485a47c29864c7d67972bb64916d1e33e2da93d546fcd4aae2627d468d885f98dc4aacd866d9e94a3039eedeb81083ccc90196f843074a2783183c
6
+ metadata.gz: 974f483c163d0ead6adb56c18ee672ed8ac586618412e9614366069a8a54d763a1f2d6b1bf65ae0ab08a2d3090dbf9dc3f4fe39ebf3bdee97e94ef471713c851
7
+ data.tar.gz: de2a2698bf9e9aaa992884a0512a40df21c668763d0aa2241be6cb24fb4b70acda32821b2a73db7db818640484c640783c39d02aebd8d1c0c8254cb1af302586
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # varnisher
2
2
 
3
- [![Build Status](https://travis-ci.org/robmiller/varnisher.png?branch=master)](https://travis-ci.org/robmiller/varnisher)
3
+ [![Build Status](https://travis-ci.org/robmiller/varnisher.png?branch=master)](https://travis-ci.org/robmiller/varnisher) [![Dependency Status](https://gemnasium.com/robmiller/varnisher.png)](https://gemnasium.com/robmiller/varnisher)
4
4
 
5
5
  Administering Varnish is generally a breeze, but sometimes you want to
6
6
  do one of the few things that aren't painless out of the box. Hopefully,
@@ -1,5 +1,6 @@
1
1
  require 'net/http'
2
2
 
3
+ require_relative 'varnisher/urls'
3
4
  require_relative 'varnisher/spider'
4
5
  require_relative 'varnisher/purger'
5
6
  require_relative 'varnisher/domainpurger'
@@ -29,8 +29,6 @@ module Varnisher
29
29
  def initialize(url)
30
30
  @url = url
31
31
  @uri = URI.parse(url)
32
-
33
- @urls = []
34
32
  end
35
33
 
36
34
  # Sends a PURGE request to the Varnish server, asking it to purge
@@ -69,6 +67,8 @@ module Varnisher
69
67
  # Purges all the resources on the given page.
70
68
  def purge_resources
71
69
  fetch_page
70
+ parse_page
71
+ @urls = find_resources
72
72
 
73
73
  return if @urls.empty?
74
74
 
@@ -84,16 +84,18 @@ module Varnisher
84
84
  Varnisher.log.info "Looking for external resources on #{@url}..."
85
85
 
86
86
  begin
87
- @doc = Nokogiri::HTML(Net::HTTP.get_response(@uri).body)
87
+ @html = Net::HTTP.get_response(@uri).body
88
88
  rescue
89
89
  Varnisher.log.info "Hmm, I couldn't fetch that URL. Sure it's right?\n"
90
90
  return
91
91
  end
92
+ end
92
93
 
93
- @urls = find_resources
94
-
95
- Varnisher.log.debug ''
96
- Varnisher.log.info "#{@urls.length} total resources found.\n"
94
+ # Parses the raw HTML we've fetched into a Nokogiri document.
95
+ #
96
+ # @api private
97
+ def parse_page
98
+ @doc = Nokogiri::HTML(@html)
97
99
  end
98
100
 
99
101
  # Returns an array of resources contained within the current page.
@@ -107,18 +109,31 @@ module Varnisher
107
109
  # @return [Array] An array of strings, each representing a URL
108
110
  #
109
111
  # @api private
110
- def find_resources
111
- found = []
112
+ def find_resources(&block)
113
+ found = Varnisher::Urls.new
114
+
115
+ self.class.resources.each do |resource|
116
+ found += find_resource(resource, &block)
117
+ end
118
+
119
+ Varnisher.log.debug ''
120
+ Varnisher.log.info "#{found.length} total resources found.\n"
121
+
122
+ found
123
+ end
124
+
125
+ # Given a resource, will return instances of that resource found in
126
+ # the current document.
127
+ def find_resource(resource, &block)
128
+ found = Varnisher::Urls.new
112
129
 
113
- self.class.resources.each do |res|
114
- @doc.css(res.selector).each do |e|
115
- attribute = e[res.attribute]
130
+ @doc.css(resource.selector).each do |e|
131
+ attribute = e[resource.attribute]
116
132
 
117
- Varnisher.log.debug("Found resource: #{attribute}")
133
+ Varnisher.log.debug("Found resource: #{attribute}")
118
134
 
119
- yield attribute if block_given?
120
- found << attribute
121
- end
135
+ yield attribute if block_given?
136
+ found << attribute
122
137
  end
123
138
 
124
139
  found
@@ -133,8 +148,7 @@ module Varnisher
133
148
  def tidy_resources
134
149
  Varnisher.log.info 'Tidying resources...'
135
150
 
136
- @urls = @urls.map { |url| URI.join(@uri, url) }
137
- .select { |uri| uri.scheme == 'http' && uri.host == @uri.host }
151
+ @urls = @urls.make_absolute(@uri).with_hostname(@uri.host)
138
152
 
139
153
  Varnisher.log.info "#{@urls.length} purgeable resources found.\n"
140
154
  end
@@ -81,7 +81,9 @@ module Varnisher
81
81
 
82
82
  begin
83
83
  http = Net::HTTP.new(hostname, port)
84
- response = http.request(@request_method.new(@path))
84
+ request = @request_method.new(@path)
85
+ request.add_field("Host", @host)
86
+ response = http.request(request)
85
87
  rescue Timeout::Error
86
88
  return false
87
89
  end
@@ -43,6 +43,9 @@ module Varnisher
43
43
 
44
44
  @visited = []
45
45
  @to_visit = []
46
+
47
+ @threads = Varnisher.options['threads']
48
+ @num_pages = Varnisher.options['num-pages']
46
49
  end
47
50
 
48
51
  # Adds a link to the queue of pages to be visited.
@@ -103,16 +106,14 @@ module Varnisher
103
106
  #
104
107
  # @api private
105
108
  def find_links(doc, uri)
106
- hrefs = []
109
+ urls = Varnisher::Urls.new(get_anchors(doc) + get_commented_urls(doc))
107
110
 
108
- hrefs = get_anchors(doc)
109
- hrefs += get_commented_urls(doc)
111
+ urls = urls.make_absolute(uri).with_hostname(uri.host)
110
112
 
111
- hrefs = valid_urls(hrefs, uri)
112
- hrefs = remove_hashes(hrefs)
113
- hrefs = remove_query_strings(hrefs)
113
+ urls = urls.without_hashes if Varnisher.options['ignore-hashes']
114
+ urls = urls.without_query_strings if Varnisher.options['ignore-query-strings']
114
115
 
115
- hrefs
116
+ urls
116
117
  end
117
118
 
118
119
  # Given an HTML document, will return all the URLs that exist as
@@ -131,67 +132,6 @@ module Varnisher
131
132
  doc.xpath('//comment()').flat_map { |e| URI.extract(e.to_html, 'http') }
132
133
  end
133
134
 
134
- # Given a set of URLs, will return only the ones that are valid for
135
- # spidering.
136
- #
137
- # That means URLs that have the same hostname as the hostname we
138
- # started from, and that are on the HTTP scheme rather than HTTPS
139
- # (since Varnish doesn't support HTTPS).
140
- #
141
- # Additionally, some normalisation will be performed, so that the
142
- # URLs are absolute (using the page that they were fetched from as
143
- # the base, just like a browser would).
144
- #
145
- # @return [Array] An array of URIs
146
- def valid_urls(hrefs, uri)
147
- hrefs.map { |u| URI.join(uri, URI.escape(u)) }
148
- .select { |u| u.scheme == 'http' && u.host == @uri.host }
149
- end
150
-
151
- # Given a set of URLs, will normalise them according to their URL
152
- # minus the hash; that is, normalise them so that:
153
- #
154
- # foo#bar
155
- #
156
- # and:
157
- #
158
- # foo#baz
159
- #
160
- # Are considered the same.
161
- #
162
- # @return [Array] An array of URIs
163
- def remove_hashes(hrefs)
164
- return hrefs unless Varnisher.options['ignore-hashes']
165
-
166
- hrefs = hrefs.group_by do |h|
167
- URI.parse(h.scheme + '://' + h.host + h.path.to_s + h.query.to_s)
168
- end
169
-
170
- hrefs.keys
171
- end
172
-
173
- # Given a set of URLs, will normalise them according to their URL
174
- # minus the query string; that is, normalise them so that:
175
- #
176
- # foo?foo=bar
177
- #
178
- # and:
179
- #
180
- # foo?foo=baz
181
- #
182
- # Are considered the same.
183
- #
184
- # @return [Array] An array of URIs
185
- def remove_query_strings(hrefs)
186
- return hrefs unless Varnisher.options['ignore-query-strings']
187
-
188
- hrefs = hrefs.group_by do |h|
189
- URI.parse(h.scheme + '://' + h.host + h.path.to_s)
190
- end
191
-
192
- hrefs.keys
193
- end
194
-
195
135
  # Pops a URL from the queue of yet-to-be-visited URLs, ensuring that
196
136
  # it's not one that we've visited before.
197
137
  #
@@ -207,6 +147,16 @@ module Varnisher
207
147
  url
208
148
  end
209
149
 
150
+ # Returns true if the spider has visited the maximum number of pages
151
+ # it's allowed to.
152
+ def limit_reached?
153
+ @visited.length > @num_pages && @num_pages >= 0
154
+ end
155
+
156
+ def pages_remaining?
157
+ @to_visit.length > 0
158
+ end
159
+
210
160
  # Kicks off the spidering process.
211
161
  #
212
162
  # Fires up Parallel in as many threads as have been configured, and
@@ -221,13 +171,8 @@ module Varnisher
221
171
 
222
172
  crawl_page(@uri)
223
173
 
224
- threads = Varnisher.options['threads']
225
- num_pages = Varnisher.options['num-pages']
226
-
227
- Parallel.in_threads(threads) do |thread_number|
228
- next if @visited.length > num_pages && num_pages >= 0
229
-
230
- crawl_page(pop_url) while @to_visit.length > 0
174
+ Parallel.in_threads(@threads) do |_|
175
+ crawl_page(pop_url) while pages_remaining? and !limit_reached?
231
176
  end
232
177
 
233
178
  Varnisher.log.info "Done; #{@visited.length} pages hit."
@@ -0,0 +1,92 @@
1
+ module Varnisher
2
+ # A collection for URLs, that exposes some useful behaviour (like
3
+ # selecting only URLs that have a given hostname, or converting
4
+ # relative URLs to absolute).
5
+ class Urls
6
+ include Enumerable
7
+ extend Forwardable
8
+
9
+ # Given an array of URLs (either strings or URI objects), store them
10
+ # in the collection.
11
+ def initialize(urls = [])
12
+ @urls = Array(urls)
13
+ @urls = make_uris
14
+ end
15
+
16
+ # Coerces the values of the current collection into being URI
17
+ # objects, which allows strings to be passed initially.
18
+ def make_uris
19
+ coerced = urls.map do |url|
20
+ begin
21
+ URI(url)
22
+ rescue
23
+ nil
24
+ end
25
+ end
26
+
27
+ coerced.compact
28
+ end
29
+
30
+ # Given a relative URL and a base to work from, will return the
31
+ # absolute form of that URL.
32
+ #
33
+ # For example:
34
+ #
35
+ # absolute_url('http://www.example.com', '/foo')
36
+ # # => "http://www.example.com/foo"
37
+ #
38
+ # absolute_url('http://www.example.com/foo', 'bar')
39
+ # # => "http://www.example.com/bar"
40
+ #
41
+ # absolute_url('http://www.example.com/foo/bar', 'baz')
42
+ # # => "http://www.example.com/foo/baz"
43
+ def absolute_url(base, url)
44
+ URI.join(base, URI.escape(url.to_s))
45
+ end
46
+
47
+ # Returns a new collection containing absolute versions of all the
48
+ # URLs in the current collection.
49
+ def make_absolute(base)
50
+ Urls.new(urls.map { |uri| absolute_url(base, uri) })
51
+ end
52
+
53
+ # Returns a new collection containing only the URLs in this
54
+ # collection that match the given hostname.
55
+ def with_hostname(hostname)
56
+ Urls.new(urls.select { |uri| uri.scheme == 'http' && uri.host == hostname })
57
+ end
58
+
59
+ # Returns a new collection containing the URLs in the current
60
+ # collection, normalised according to their hash.
61
+ def without_hashes
62
+ normalised = urls.group_by do |url|
63
+ url.fragment = nil
64
+ url
65
+ end
66
+
67
+ Urls.new(normalised.keys)
68
+ end
69
+
70
+ # Returns a new collection containing the URLs in the current
71
+ # collection without their query string values.
72
+ def without_query_strings
73
+ normalised = urls.group_by do |h|
74
+ url.query = nil
75
+ url
76
+ end
77
+
78
+ Urls.new(normalised.keys)
79
+ end
80
+
81
+ # Allows the addition of two collections by accessing the underlying
82
+ # array.
83
+ def +(other)
84
+ Urls.new(urls + other.urls)
85
+ end
86
+
87
+ def_delegators :urls, :each, :<<, :length, :empty?, :include?
88
+
89
+ protected
90
+ attr_reader :urls
91
+ end
92
+ end
@@ -1,3 +1,3 @@
1
1
  module Varnisher
2
- VERSION = '1.0.1'
2
+ VERSION = '1.0.2'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: varnisher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Miller
@@ -14,112 +14,112 @@ dependencies:
14
14
  name: main
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 5.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 5.2.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.6.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.6.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: parallel
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.7.1
47
+ version: 0.8.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.7.1
54
+ version: 0.8.1
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ~>
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: 10.1.0
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ~>
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 10.1.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: minitest
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ~>
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: 5.0.6
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ~>
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: 5.0.6
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: webmock
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ~>
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
89
  version: 1.13.0
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ~>
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.13.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: letters
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ~>
101
+ - - "~>"
102
102
  - !ruby/object:Gem::Version
103
103
  version: 0.4.1
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: 0.4.1
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rubygems-tasks
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ~>
115
+ - - "~>"
116
116
  - !ruby/object:Gem::Version
117
117
  version: 0.2.4
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ~>
122
+ - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: 0.2.4
125
125
  description: Some tools that make working with the Varnish HTTP cache easier, including
@@ -130,16 +130,17 @@ executables:
130
130
  extensions: []
131
131
  extra_rdoc_files: []
132
132
  files:
133
+ - Gemfile
134
+ - LICENSE
135
+ - README.md
133
136
  - bin/varnisher
137
+ - lib/varnisher.rb
134
138
  - lib/varnisher/domainpurger.rb
135
139
  - lib/varnisher/pagepurger.rb
136
140
  - lib/varnisher/purger.rb
137
141
  - lib/varnisher/spider.rb
142
+ - lib/varnisher/urls.rb
138
143
  - lib/varnisher/version.rb
139
- - lib/varnisher.rb
140
- - LICENSE
141
- - README.md
142
- - Gemfile
143
144
  homepage: http://github.com/robmiller/varnisher
144
145
  licenses:
145
146
  - MIT
@@ -150,17 +151,17 @@ require_paths:
150
151
  - lib
151
152
  required_ruby_version: !ruby/object:Gem::Requirement
152
153
  requirements:
153
- - - '>='
154
+ - - ">="
154
155
  - !ruby/object:Gem::Version
155
156
  version: '0'
156
157
  required_rubygems_version: !ruby/object:Gem::Requirement
157
158
  requirements:
158
- - - '>='
159
+ - - ">="
159
160
  - !ruby/object:Gem::Version
160
161
  version: '0'
161
162
  requirements: []
162
163
  rubyforge_project:
163
- rubygems_version: 2.0.3
164
+ rubygems_version: 2.2.2
164
165
  signing_key:
165
166
  specification_version: 4
166
167
  summary: Helpful tools for working with Varnish caches