rack-cache 0.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rack-cache might be problematic. Click here for more details.

Files changed (44) hide show
  1. data/CHANGES +27 -0
  2. data/COPYING +18 -0
  3. data/README +96 -0
  4. data/Rakefile +144 -0
  5. data/TODO +40 -0
  6. data/doc/configuration.markdown +224 -0
  7. data/doc/events.dot +27 -0
  8. data/doc/faq.markdown +133 -0
  9. data/doc/index.markdown +113 -0
  10. data/doc/layout.html.erb +33 -0
  11. data/doc/license.markdown +24 -0
  12. data/doc/rack-cache.css +362 -0
  13. data/doc/storage.markdown +162 -0
  14. data/lib/rack/cache.rb +51 -0
  15. data/lib/rack/cache/config.rb +65 -0
  16. data/lib/rack/cache/config/busters.rb +16 -0
  17. data/lib/rack/cache/config/default.rb +134 -0
  18. data/lib/rack/cache/config/no-cache.rb +13 -0
  19. data/lib/rack/cache/context.rb +95 -0
  20. data/lib/rack/cache/core.rb +271 -0
  21. data/lib/rack/cache/entitystore.rb +224 -0
  22. data/lib/rack/cache/headers.rb +237 -0
  23. data/lib/rack/cache/metastore.rb +309 -0
  24. data/lib/rack/cache/options.rb +119 -0
  25. data/lib/rack/cache/request.rb +37 -0
  26. data/lib/rack/cache/response.rb +76 -0
  27. data/lib/rack/cache/storage.rb +50 -0
  28. data/lib/rack/utils/environment_headers.rb +78 -0
  29. data/rack-cache.gemspec +74 -0
  30. data/test/cache_test.rb +35 -0
  31. data/test/config_test.rb +66 -0
  32. data/test/context_test.rb +465 -0
  33. data/test/core_test.rb +84 -0
  34. data/test/entitystore_test.rb +176 -0
  35. data/test/environment_headers_test.rb +71 -0
  36. data/test/headers_test.rb +215 -0
  37. data/test/logging_test.rb +45 -0
  38. data/test/metastore_test.rb +210 -0
  39. data/test/options_test.rb +64 -0
  40. data/test/pony.jpg +0 -0
  41. data/test/response_test.rb +37 -0
  42. data/test/spec_setup.rb +189 -0
  43. data/test/storage_test.rb +94 -0
  44. metadata +120 -0
@@ -0,0 +1,237 @@
1
+ require 'set'
2
+ require 'rack/utils/environment_headers'
3
+
4
+ module Rack::Cache
5
+ # Generic HTTP header helper methods. Provides access to headers that can be
6
+ # included in requests and responses. This can be mixed into any object that
7
+ # responds to #headers by returning a Hash.
8
+
9
+ module Headers
10
+ # Determine if any of the header names exist:
11
+ # if header?('Authorization', 'Cookie')
12
+ # ...
13
+ # end
14
+ def header?(*names)
15
+ names.any? { |name| headers.include?(name) }
16
+ end
17
+
18
+ # A Hash of name=value pairs that correspond to the Cache-Control header.
19
+ # Valueless parameters (e.g., must-revalidate, no-store) have a Hash value
20
+ # of true. This method always returns a Hash, empty if no Cache-Control
21
+ # header is present.
22
+ def cache_control
23
+ @cache_control ||=
24
+ (headers['Cache-Control'] || '').split(/\s*,\s*/).inject({}) {|hash,token|
25
+ name, value = token.split(/\s*=\s*/, 2)
26
+ hash[name.downcase] = (value || true) unless name.empty?
27
+ hash
28
+ }.freeze
29
+ end
30
+
31
+ # Set the Cache-Control header to the values specified by the Hash. See
32
+ # the #cache_control method for information on expected Hash structure.
33
+ def cache_control=(hash)
34
+ value =
35
+ hash.collect { |key,value|
36
+ next nil unless value
37
+ next key if value == true
38
+ "#{key}=#{value}"
39
+ }.compact.join(', ')
40
+ if value.empty?
41
+ headers.delete('Cache-Control')
42
+ @cache_control = {}
43
+ else
44
+ headers['Cache-Control'] = value
45
+ @cache_control = hash.dup.freeze
46
+ end
47
+ end
48
+
49
+ # The literal value of the ETag HTTP header or nil if no ETag is specified.
50
+ def etag
51
+ headers['Etag']
52
+ end
53
+ end
54
+
55
+ # HTTP request header helpers. When included in Rack::Cache::Request, headers
56
+ # may be accessed by their standard RFC 2616 names using the #headers Hash.
57
+ module RequestHeaders
58
+ include Rack::Cache::Headers
59
+
60
+ # A Hash-like object providing access to HTTP request headers.
61
+ def headers
62
+ @headers ||= Rack::Utils::EnvironmentHeaders.new(env)
63
+ end
64
+
65
+ # The literal value of the If-Modified-Since request header or nil when
66
+ # no If-Modified-Since header is present.
67
+ def if_modified_since
68
+ headers['If-Modified-Since']
69
+ end
70
+
71
+ # The literal value of the If-None-Match request header or nil when
72
+ # no If-None-Match header is present.
73
+ def if_none_match
74
+ headers['If-None-Match']
75
+ end
76
+ end
77
+
78
+ # HTTP response header helper methods.
79
+ module ResponseHeaders
80
+ include Rack::Cache::Headers
81
+
82
+ # Set of HTTP response codes of messages that can be cached, per
83
+ # RFC 2616.
84
+ CACHEABLE_RESPONSE_CODES = Set.new([200, 203, 300, 301, 302, 404, 410])
85
+
86
+ # Determine if the response is "fresh". Fresh responses may be served from
87
+ # cache without any interaction with the origin. A response is considered
88
+ # fresh when it includes a Cache-Control/max-age indicator or Expiration
89
+ # header and the calculated age is less than the freshness lifetime.
90
+ def fresh?
91
+ ttl && ttl > 0
92
+ end
93
+
94
+ # Determine if the response is "stale". Stale responses must be validated
95
+ # with the origin before use. This is the inverse of #fresh?.
96
+ def stale?
97
+ !fresh?
98
+ end
99
+
100
+ # Determine if the response is worth caching under any circumstance. An
101
+ # object that is cacheable may not necessary be served from cache without
102
+ # first validating the response with the origin.
103
+ #
104
+ # An object that includes no freshness lifetime (Expires, max-age) and that
105
+ # does not include a validator (Last-Modified, Etag) serves no purpose in a
106
+ # cache that only serves fresh or valid objects.
107
+ def cacheable?
108
+ return false unless CACHEABLE_RESPONSE_CODES.include?(status)
109
+ return false if no_store?
110
+ validateable? || fresh?
111
+ end
112
+
113
+ # The response includes specific information about its freshness. True when
114
+ # a +Cache-Control+ header with +max-age+ value is present or when the
115
+ # +Expires+ header is set.
116
+ def freshness_information?
117
+ header?('Expires') || !cache_control['max-age'].nil?
118
+ end
119
+
120
+ # Determine if the response includes headers that can be used to validate
121
+ # the response with the origin using a conditional GET request.
122
+ def validateable?
123
+ header?('Last-Modified') || header?('Etag')
124
+ end
125
+
126
+ # Indicates that the response should not be served from cache without first
127
+ # revalidating with the origin. Note that this does not necessary imply that
128
+ # a caching agent ought not store the response in its cache.
129
+ def no_cache?
130
+ !cache_control['no-cache'].nil?
131
+ end
132
+
133
+ # Indicates that the response should not be stored under any circumstances.
134
+ def no_store?
135
+ cache_control['no-store']
136
+ end
137
+
138
+ # The date, as specified by the Date header. When no Date header is present,
139
+ # set the Date header to Time.now and return.
140
+ def date
141
+ @date ||=
142
+ if date = headers['Date']
143
+ Time.httpdate(date)
144
+ else
145
+ headers['Date'] = now.httpdate unless headers.frozen?
146
+ now
147
+ end
148
+ end
149
+
150
+ # The age of the response.
151
+ def age
152
+ [(now - date).to_i, 0].max
153
+ end
154
+
155
+ # The number of seconds after the time specified in the response's Date
156
+ # header when the the response should no longer be considered fresh. First
157
+ # check for a Cache-Control max-age value, and fall back on an expires
158
+ # header; return nil when no maximum age can be established.
159
+ def max_age
160
+ if age = cache_control['max-age']
161
+ age.to_i
162
+ elsif headers['Expires']
163
+ Time.httpdate(headers['Expires']) - date
164
+ end
165
+ end
166
+
167
+ # Sets the number of seconds after which the response should no longer
168
+ # be considered fresh. This sets the Cache-Control max-age value.
169
+ def max_age=(value)
170
+ self.cache_control = cache_control.merge('max-age' => value.to_s)
171
+ end
172
+
173
+ # The Time when the response should be considered stale. With a
174
+ # Cache-Control/max-age value is present, this is calculated by adding the
175
+ # number of seconds specified to the responses #date value. Falls back to
176
+ # the time specified in the Expires header or returns nil if neither is
177
+ # present.
178
+ def expires_at
179
+ if max_age = cache_control['max-age']
180
+ date + max_age.to_i
181
+ elsif time = headers['Expires']
182
+ Time.httpdate(time)
183
+ end
184
+ end
185
+
186
+ # The response's time-to-live in seconds, or nil when no freshness
187
+ # information is present in the response. When the responses #ttl
188
+ # is <= 0, the response may not be served from cache without first
189
+ # revalidating with the origin.
190
+ def ttl
191
+ max_age - age if max_age
192
+ end
193
+
194
+ # Set the response's time-to-live to the specified number of seconds. This
195
+ # adjusts the Cache-Control/max-age value.
196
+ def ttl=(seconds)
197
+ self.max_age = age + seconds
198
+ end
199
+
200
+ # The String value of the Last-Modified header exactly as it appears
201
+ # in the response (i.e., no date parsing / conversion is performed).
202
+ def last_modified
203
+ headers['Last-Modified']
204
+ end
205
+
206
+ # Determine if the response was last modified at the time provided.
207
+ # time_value is the exact string provided in an origin response's
208
+ # Last-Modified header.
209
+ def last_modified_at?(time_value)
210
+ time_value && last_modified == time_value
211
+ end
212
+
213
+ # The literal value of the Vary header, or nil when no Vary header is
214
+ # present.
215
+ def vary
216
+ headers['Vary']
217
+ end
218
+
219
+ # Does the response include a Vary header?
220
+ def vary?
221
+ ! vary.nil?
222
+ end
223
+
224
+ # An array of header names given in the Vary header or an empty
225
+ # array when no Vary header is present.
226
+ def vary_header_names
227
+ return [] unless vary = headers['Vary']
228
+ vary.split(/[\s,]+/)
229
+ end
230
+
231
+ private
232
+ def now
233
+ @now ||= Time.now
234
+ end
235
+ end
236
+
237
+ end
@@ -0,0 +1,309 @@
1
+ require 'rack'
2
+ require 'fileutils'
3
+ require 'digest/sha1'
4
+
5
+ module Rack::Cache
6
+
7
+ # The MetaStore is responsible for storing meta information about a
8
+ # request/response pair keyed by the request's URL.
9
+ #
10
+ # The meta store keeps a list of request/response pairs for each canonical
11
+ # request URL. A request/response pair is a two element Array of the form:
12
+ # [request, response]
13
+ #
14
+ # The +request+ element is a Hash of Rack environment keys. Only protocol
15
+ # keys (i.e., those that start with "HTTP_") are stored. The +response+
16
+ # element is a Hash of cached HTTP response headers for the paired request.
17
+ #
18
+ # The MetaStore class is abstract and should not be instanstiated
19
+ # directly. Concrete subclasses should implement the protected #read,
20
+ # #write, and #purge methods. Care has been taken to keep these low-level
21
+ # methods dumb and straight-forward to implement.
22
+ class MetaStore
23
+
24
+ # Headers that should not be stored in cache (from RFC 2616).
25
+ HEADER_BLACKLIST = Set.new(%w[
26
+ Connection
27
+ Keep-Alive
28
+ Proxy-Authenticate
29
+ Proxy-Authorization
30
+ TE
31
+ Trailers
32
+ Transfer-Encoding
33
+ Upgrade
34
+ ])
35
+
36
+ # Locate a cached response for the request provided. Returns a
37
+ # Rack::Cache::Response object if the cache hits or nil if no cache entry
38
+ # was found.
39
+ def lookup(request, entity_store)
40
+ entries = read(request.fullpath)
41
+
42
+ # bail out if we have nothing cached
43
+ return nil if entries.empty?
44
+
45
+ # find a cached entry that matches the request.
46
+ env = request.env
47
+ match = entries.detect{ |req,res| requests_match?(res['Vary'], env, req)}
48
+ if match
49
+ # TODO what if body doesn't exist in entity store?
50
+ # reconstruct response object
51
+ req, res = match
52
+ status = res['X-Status']
53
+ body = entity_store.open(res['X-Content-Digest'])
54
+ response = Rack::Cache::Response.new(status.to_i, res, body)
55
+ response.activate!
56
+
57
+ # Return the cached response
58
+ response
59
+ end
60
+ end
61
+
62
+ # Write a cache entry to the store under the given key. Existing
63
+ # entries are read and any that match the response are removed.
64
+ # This method calls #write with the new list of cache entries.
65
+ #--
66
+ # TODO canonicalize URL key
67
+ def store(request, response, entity_store)
68
+ key = request.fullpath
69
+ stored_env = persist_request(request)
70
+ stored_response = persist_response(response)
71
+
72
+ # write the response body to the entity store if this is the
73
+ # original response.
74
+ if stored_response['X-Content-Digest'].nil?
75
+ digest, size = entity_store.write(response.body)
76
+ stored_response['X-Content-Digest'] = digest
77
+ stored_response['Content-Length'] = size.to_s
78
+ response.body = entity_store.open(digest)
79
+ end
80
+
81
+ # read existing cache entries, remove non-varying, and add this one to
82
+ # the list
83
+ vary = stored_response['Vary']
84
+ entries =
85
+ read(key).reject do |env,res|
86
+ (vary == res['Vary']) && requests_match?(vary, env, stored_env)
87
+ end
88
+ entries.unshift [stored_env, stored_response]
89
+ write key, entries
90
+ end
91
+
92
+ private
93
+ # Extract the environment Hash from +request+ while making any
94
+ # necessary modifications in preparation for persistence. The Hash
95
+ # returned must be marshalable.
96
+ def persist_request(request)
97
+ env = request.env.dup
98
+ env.reject! { |key,val| key =~ /[^0-9A-Z_]/ }
99
+ env
100
+ end
101
+
102
+ # Extract the headers Hash from +response+ while making any
103
+ # necessary modifications in preparation for persistence. The Hash
104
+ # returned must be marshalable.
105
+ def persist_response(response)
106
+ headers = response.headers.reject { |k,v| HEADER_BLACKLIST.include?(k) }
107
+ headers['X-Status'] = response.status.to_s
108
+ headers
109
+ end
110
+
111
+ # Determine whether the two environment hashes are non-varying based on
112
+ # the vary response header value provided.
113
+ def requests_match?(vary, env1, env2)
114
+ return true if vary.nil? || vary == ''
115
+ vary.split(/[\s,]+/).all? do |header|
116
+ key = "HTTP_#{header.upcase.tr('-', '_')}"
117
+ env1[key] == env2[key]
118
+ end
119
+ end
120
+
121
+ protected
122
+ # Locate all cached request/response pairs that match the specified
123
+ # URL key. The result must be an Array of all cached request/response
124
+ # pairs. An empty Array must be returned if nothing is cached for
125
+ # the specified key.
126
+ def read(key)
127
+ raise NotImplemented
128
+ end
129
+
130
+ # Store an Array of request/response pairs for the given key. Concrete
131
+ # implementations should not attempt to filter or concatenate the
132
+ # list in any way.
133
+ def write(key, negotiations)
134
+ raise NotImplemented
135
+ end
136
+
137
+ # Remove all cached entries at the key specified. No error is raised
138
+ # when the key does not exist.
139
+ def purge(key)
140
+ raise NotImplemented
141
+ end
142
+
143
+ private
144
+
145
+ # Generate a SHA1 hex digest for the specified string. This is a
146
+ # simple utility method for meta store implementations.
147
+ def hexdigest(data)
148
+ Digest::SHA1.hexdigest(data)
149
+ end
150
+
151
+ public
152
+
153
+ # Concrete MetaStore implementation that uses a simple Hash to store
154
+ # request/response pairs on the heap.
155
+ class Heap < MetaStore
156
+ def initialize(hash={})
157
+ @hash = hash
158
+ end
159
+
160
+ def read(key)
161
+ @hash.fetch(key, [])
162
+ end
163
+
164
+ def write(key, entries)
165
+ @hash[key] = entries
166
+ end
167
+
168
+ def purge(key)
169
+ @hash.delete(key)
170
+ nil
171
+ end
172
+
173
+ def to_hash
174
+ @hash
175
+ end
176
+
177
+ def self.resolve(uri)
178
+ new
179
+ end
180
+ end
181
+
182
+ HEAP = Heap
183
+ MEM = HEAP
184
+
185
+ # Concrete MetaStore implementation that stores request/response
186
+ # pairs on disk.
187
+ class Disk < MetaStore
188
+ attr_reader :root
189
+
190
+ def initialize(root="/tmp/rack-cache/meta-#{ARGV[0]}")
191
+ @root = File.expand_path(root)
192
+ FileUtils.mkdir_p(root, :mode => 0755)
193
+ end
194
+
195
+ def read(key)
196
+ path = key_path(key)
197
+ File.open(path, 'rb') { |io| Marshal.load(io) }
198
+ rescue Errno::ENOENT
199
+ []
200
+ end
201
+
202
+ def write(key, entries)
203
+ path = key_path(key)
204
+ File.open(path, 'wb') { |io| Marshal.dump(entries, io, -1) }
205
+ rescue Errno::ENOENT
206
+ Dir.mkdir(File.dirname(path), 0755)
207
+ retry
208
+ end
209
+
210
+ def purge(key)
211
+ path = key_path(key)
212
+ File.unlink(path)
213
+ nil
214
+ rescue Errno::ENOENT
215
+ nil
216
+ end
217
+
218
+ private
219
+ def key_path(key)
220
+ File.join(root, spread(hexdigest(key)))
221
+ end
222
+
223
+ def spread(sha, n=2)
224
+ sha = sha.dup
225
+ sha[n,0] = '/'
226
+ sha
227
+ end
228
+
229
+ public
230
+ def self.resolve(uri)
231
+ path = File.expand_path(uri.opaque || uri.path)
232
+ new path
233
+ end
234
+
235
+ end
236
+
237
+ DISK = Disk
238
+ FILE = Disk
239
+
240
+ # Stores request/response pairs in memcached. Keys are not stored
241
+ # directly since memcached has a 250-byte limit on key names. Instead,
242
+ # the SHA1 hexdigest of the key is used.
243
+ class MemCache < MetaStore
244
+
245
+ # The Memcached instance used to communicated with the memcached
246
+ # daemon.
247
+ attr_reader :cache
248
+
249
+ def initialize(server="localhost:11211", options={})
250
+ @cache =
251
+ if server.respond_to?(:stats)
252
+ server
253
+ else
254
+ require 'memcached'
255
+ Memcached.new(server, options)
256
+ end
257
+ end
258
+
259
+ def read(key)
260
+ key = hexdigest(key)
261
+ cache.get(key)
262
+ rescue Memcached::NotFound
263
+ []
264
+ end
265
+
266
+ def write(key, entries)
267
+ key = hexdigest(key)
268
+ cache.set(key, entries)
269
+ end
270
+
271
+ def purge(key)
272
+ key = hexdigest(key)
273
+ cache.delete(key)
274
+ nil
275
+ rescue Memcached::NotFound
276
+ nil
277
+ end
278
+
279
+ extend Rack::Utils
280
+
281
+ # Create MemCache store for the given URI. The URI must specify
282
+ # a host and may specify a port, namespace, and options:
283
+ #
284
+ # memcached://example.com:11211/namespace?opt1=val1&opt2=val2
285
+ #
286
+ # Query parameter names and values are documented with the memcached
287
+ # library: http://tinyurl.com/4upqnd
288
+ def self.resolve(uri)
289
+ server = "#{uri.host}:#{uri.port || '11211'}"
290
+ options = parse_query(uri.query)
291
+ options.keys.each do |key|
292
+ value =
293
+ case value = options.delete(key)
294
+ when 'true' ; true
295
+ when 'false' ; false
296
+ else value.to_sym
297
+ end
298
+ options[k.to_sym] = value
299
+ end
300
+ options[:namespace] = uri.path.sub(/^\//, '')
301
+ new server, options
302
+ end
303
+ end
304
+
305
+ MEMCACHE = MemCache
306
+ MEMCACHED = MemCache
307
+ end
308
+
309
+ end