rack-cache 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rack-cache might be problematic. Click here for more details.

Files changed (44) hide show
  1. data/CHANGES +27 -0
  2. data/COPYING +18 -0
  3. data/README +96 -0
  4. data/Rakefile +144 -0
  5. data/TODO +40 -0
  6. data/doc/configuration.markdown +224 -0
  7. data/doc/events.dot +27 -0
  8. data/doc/faq.markdown +133 -0
  9. data/doc/index.markdown +113 -0
  10. data/doc/layout.html.erb +33 -0
  11. data/doc/license.markdown +24 -0
  12. data/doc/rack-cache.css +362 -0
  13. data/doc/storage.markdown +162 -0
  14. data/lib/rack/cache.rb +51 -0
  15. data/lib/rack/cache/config.rb +65 -0
  16. data/lib/rack/cache/config/busters.rb +16 -0
  17. data/lib/rack/cache/config/default.rb +134 -0
  18. data/lib/rack/cache/config/no-cache.rb +13 -0
  19. data/lib/rack/cache/context.rb +95 -0
  20. data/lib/rack/cache/core.rb +271 -0
  21. data/lib/rack/cache/entitystore.rb +224 -0
  22. data/lib/rack/cache/headers.rb +237 -0
  23. data/lib/rack/cache/metastore.rb +309 -0
  24. data/lib/rack/cache/options.rb +119 -0
  25. data/lib/rack/cache/request.rb +37 -0
  26. data/lib/rack/cache/response.rb +76 -0
  27. data/lib/rack/cache/storage.rb +50 -0
  28. data/lib/rack/utils/environment_headers.rb +78 -0
  29. data/rack-cache.gemspec +74 -0
  30. data/test/cache_test.rb +35 -0
  31. data/test/config_test.rb +66 -0
  32. data/test/context_test.rb +465 -0
  33. data/test/core_test.rb +84 -0
  34. data/test/entitystore_test.rb +176 -0
  35. data/test/environment_headers_test.rb +71 -0
  36. data/test/headers_test.rb +215 -0
  37. data/test/logging_test.rb +45 -0
  38. data/test/metastore_test.rb +210 -0
  39. data/test/options_test.rb +64 -0
  40. data/test/pony.jpg +0 -0
  41. data/test/response_test.rb +37 -0
  42. data/test/spec_setup.rb +189 -0
  43. data/test/storage_test.rb +94 -0
  44. metadata +120 -0
@@ -0,0 +1,237 @@
1
+ require 'set'
2
+ require 'rack/utils/environment_headers'
3
+
4
+ module Rack::Cache
5
+ # Generic HTTP header helper methods. Provides access to headers that can be
6
+ # included in requests and responses. This can be mixed into any object that
7
+ # responds to #headers by returning a Hash.
8
+
9
+ module Headers
10
+ # Determine if any of the header names exist:
11
+ # if header?('Authorization', 'Cookie')
12
+ # ...
13
+ # end
14
+ def header?(*names)
15
+ names.any? { |name| headers.include?(name) }
16
+ end
17
+
18
+ # A Hash of name=value pairs that correspond to the Cache-Control header.
19
+ # Valueless parameters (e.g., must-revalidate, no-store) have a Hash value
20
+ # of true. This method always returns a Hash, empty if no Cache-Control
21
+ # header is present.
22
+ def cache_control
23
+ @cache_control ||=
24
+ (headers['Cache-Control'] || '').split(/\s*,\s*/).inject({}) {|hash,token|
25
+ name, value = token.split(/\s*=\s*/, 2)
26
+ hash[name.downcase] = (value || true) unless name.empty?
27
+ hash
28
+ }.freeze
29
+ end
30
+
31
+ # Set the Cache-Control header to the values specified by the Hash. See
32
+ # the #cache_control method for information on expected Hash structure.
33
+ def cache_control=(hash)
34
+ value =
35
+ hash.collect { |key,value|
36
+ next nil unless value
37
+ next key if value == true
38
+ "#{key}=#{value}"
39
+ }.compact.join(', ')
40
+ if value.empty?
41
+ headers.delete('Cache-Control')
42
+ @cache_control = {}
43
+ else
44
+ headers['Cache-Control'] = value
45
+ @cache_control = hash.dup.freeze
46
+ end
47
+ end
48
+
49
+ # The literal value of the ETag HTTP header or nil if no ETag is specified.
50
+ def etag
51
+ headers['Etag']
52
+ end
53
+ end
54
+
55
+ # HTTP request header helpers. When included in Rack::Cache::Request, headers
56
+ # may be accessed by their standard RFC 2616 names using the #headers Hash.
57
+ module RequestHeaders
58
+ include Rack::Cache::Headers
59
+
60
+ # A Hash-like object providing access to HTTP request headers.
61
+ def headers
62
+ @headers ||= Rack::Utils::EnvironmentHeaders.new(env)
63
+ end
64
+
65
+ # The literal value of the If-Modified-Since request header or nil when
66
+ # no If-Modified-Since header is present.
67
+ def if_modified_since
68
+ headers['If-Modified-Since']
69
+ end
70
+
71
+ # The literal value of the If-None-Match request header or nil when
72
+ # no If-None-Match header is present.
73
+ def if_none_match
74
+ headers['If-None-Match']
75
+ end
76
+ end
77
+
78
+ # HTTP response header helper methods.
79
+ module ResponseHeaders
80
+ include Rack::Cache::Headers
81
+
82
+ # Set of HTTP response codes of messages that can be cached, per
83
+ # RFC 2616.
84
+ CACHEABLE_RESPONSE_CODES = Set.new([200, 203, 300, 301, 302, 404, 410])
85
+
86
+ # Determine if the response is "fresh". Fresh responses may be served from
87
+ # cache without any interaction with the origin. A response is considered
88
+ # fresh when it includes a Cache-Control/max-age indicator or Expiration
89
+ # header and the calculated age is less than the freshness lifetime.
90
+ def fresh?
91
+ ttl && ttl > 0
92
+ end
93
+
94
+ # Determine if the response is "stale". Stale responses must be validated
95
+ # with the origin before use. This is the inverse of #fresh?.
96
+ def stale?
97
+ !fresh?
98
+ end
99
+
100
+ # Determine if the response is worth caching under any circumstance. An
101
+ # object that is cacheable may not necessary be served from cache without
102
+ # first validating the response with the origin.
103
+ #
104
+ # An object that includes no freshness lifetime (Expires, max-age) and that
105
+ # does not include a validator (Last-Modified, Etag) serves no purpose in a
106
+ # cache that only serves fresh or valid objects.
107
+ def cacheable?
108
+ return false unless CACHEABLE_RESPONSE_CODES.include?(status)
109
+ return false if no_store?
110
+ validateable? || fresh?
111
+ end
112
+
113
+ # The response includes specific information about its freshness. True when
114
+ # a +Cache-Control+ header with +max-age+ value is present or when the
115
+ # +Expires+ header is set.
116
+ def freshness_information?
117
+ header?('Expires') || !cache_control['max-age'].nil?
118
+ end
119
+
120
+ # Determine if the response includes headers that can be used to validate
121
+ # the response with the origin using a conditional GET request.
122
+ def validateable?
123
+ header?('Last-Modified') || header?('Etag')
124
+ end
125
+
126
+ # Indicates that the response should not be served from cache without first
127
+ # revalidating with the origin. Note that this does not necessary imply that
128
+ # a caching agent ought not store the response in its cache.
129
+ def no_cache?
130
+ !cache_control['no-cache'].nil?
131
+ end
132
+
133
+ # Indicates that the response should not be stored under any circumstances.
134
+ def no_store?
135
+ cache_control['no-store']
136
+ end
137
+
138
+ # The date, as specified by the Date header. When no Date header is present,
139
+ # set the Date header to Time.now and return.
140
+ def date
141
+ @date ||=
142
+ if date = headers['Date']
143
+ Time.httpdate(date)
144
+ else
145
+ headers['Date'] = now.httpdate unless headers.frozen?
146
+ now
147
+ end
148
+ end
149
+
150
+ # The age of the response.
151
+ def age
152
+ [(now - date).to_i, 0].max
153
+ end
154
+
155
+ # The number of seconds after the time specified in the response's Date
156
+ # header when the the response should no longer be considered fresh. First
157
+ # check for a Cache-Control max-age value, and fall back on an expires
158
+ # header; return nil when no maximum age can be established.
159
+ def max_age
160
+ if age = cache_control['max-age']
161
+ age.to_i
162
+ elsif headers['Expires']
163
+ Time.httpdate(headers['Expires']) - date
164
+ end
165
+ end
166
+
167
+ # Sets the number of seconds after which the response should no longer
168
+ # be considered fresh. This sets the Cache-Control max-age value.
169
+ def max_age=(value)
170
+ self.cache_control = cache_control.merge('max-age' => value.to_s)
171
+ end
172
+
173
+ # The Time when the response should be considered stale. With a
174
+ # Cache-Control/max-age value is present, this is calculated by adding the
175
+ # number of seconds specified to the responses #date value. Falls back to
176
+ # the time specified in the Expires header or returns nil if neither is
177
+ # present.
178
+ def expires_at
179
+ if max_age = cache_control['max-age']
180
+ date + max_age.to_i
181
+ elsif time = headers['Expires']
182
+ Time.httpdate(time)
183
+ end
184
+ end
185
+
186
+ # The response's time-to-live in seconds, or nil when no freshness
187
+ # information is present in the response. When the responses #ttl
188
+ # is <= 0, the response may not be served from cache without first
189
+ # revalidating with the origin.
190
+ def ttl
191
+ max_age - age if max_age
192
+ end
193
+
194
+ # Set the response's time-to-live to the specified number of seconds. This
195
+ # adjusts the Cache-Control/max-age value.
196
+ def ttl=(seconds)
197
+ self.max_age = age + seconds
198
+ end
199
+
200
+ # The String value of the Last-Modified header exactly as it appears
201
+ # in the response (i.e., no date parsing / conversion is performed).
202
+ def last_modified
203
+ headers['Last-Modified']
204
+ end
205
+
206
+ # Determine if the response was last modified at the time provided.
207
+ # time_value is the exact string provided in an origin response's
208
+ # Last-Modified header.
209
+ def last_modified_at?(time_value)
210
+ time_value && last_modified == time_value
211
+ end
212
+
213
+ # The literal value of the Vary header, or nil when no Vary header is
214
+ # present.
215
+ def vary
216
+ headers['Vary']
217
+ end
218
+
219
+ # Does the response include a Vary header?
220
+ def vary?
221
+ ! vary.nil?
222
+ end
223
+
224
+ # An array of header names given in the Vary header or an empty
225
+ # array when no Vary header is present.
226
+ def vary_header_names
227
+ return [] unless vary = headers['Vary']
228
+ vary.split(/[\s,]+/)
229
+ end
230
+
231
+ private
232
+ def now
233
+ @now ||= Time.now
234
+ end
235
+ end
236
+
237
+ end
@@ -0,0 +1,309 @@
1
+ require 'rack'
2
+ require 'fileutils'
3
+ require 'digest/sha1'
4
+
5
+ module Rack::Cache
6
+
7
+ # The MetaStore is responsible for storing meta information about a
8
+ # request/response pair keyed by the request's URL.
9
+ #
10
+ # The meta store keeps a list of request/response pairs for each canonical
11
+ # request URL. A request/response pair is a two element Array of the form:
12
+ # [request, response]
13
+ #
14
+ # The +request+ element is a Hash of Rack environment keys. Only protocol
15
+ # keys (i.e., those that start with "HTTP_") are stored. The +response+
16
+ # element is a Hash of cached HTTP response headers for the paired request.
17
+ #
18
+ # The MetaStore class is abstract and should not be instanstiated
19
+ # directly. Concrete subclasses should implement the protected #read,
20
+ # #write, and #purge methods. Care has been taken to keep these low-level
21
+ # methods dumb and straight-forward to implement.
22
+ class MetaStore
23
+
24
+ # Headers that should not be stored in cache (from RFC 2616).
25
+ HEADER_BLACKLIST = Set.new(%w[
26
+ Connection
27
+ Keep-Alive
28
+ Proxy-Authenticate
29
+ Proxy-Authorization
30
+ TE
31
+ Trailers
32
+ Transfer-Encoding
33
+ Upgrade
34
+ ])
35
+
36
+ # Locate a cached response for the request provided. Returns a
37
+ # Rack::Cache::Response object if the cache hits or nil if no cache entry
38
+ # was found.
39
+ def lookup(request, entity_store)
40
+ entries = read(request.fullpath)
41
+
42
+ # bail out if we have nothing cached
43
+ return nil if entries.empty?
44
+
45
+ # find a cached entry that matches the request.
46
+ env = request.env
47
+ match = entries.detect{ |req,res| requests_match?(res['Vary'], env, req)}
48
+ if match
49
+ # TODO what if body doesn't exist in entity store?
50
+ # reconstruct response object
51
+ req, res = match
52
+ status = res['X-Status']
53
+ body = entity_store.open(res['X-Content-Digest'])
54
+ response = Rack::Cache::Response.new(status.to_i, res, body)
55
+ response.activate!
56
+
57
+ # Return the cached response
58
+ response
59
+ end
60
+ end
61
+
62
+ # Write a cache entry to the store under the given key. Existing
63
+ # entries are read and any that match the response are removed.
64
+ # This method calls #write with the new list of cache entries.
65
+ #--
66
+ # TODO canonicalize URL key
67
+ def store(request, response, entity_store)
68
+ key = request.fullpath
69
+ stored_env = persist_request(request)
70
+ stored_response = persist_response(response)
71
+
72
+ # write the response body to the entity store if this is the
73
+ # original response.
74
+ if stored_response['X-Content-Digest'].nil?
75
+ digest, size = entity_store.write(response.body)
76
+ stored_response['X-Content-Digest'] = digest
77
+ stored_response['Content-Length'] = size.to_s
78
+ response.body = entity_store.open(digest)
79
+ end
80
+
81
+ # read existing cache entries, remove non-varying, and add this one to
82
+ # the list
83
+ vary = stored_response['Vary']
84
+ entries =
85
+ read(key).reject do |env,res|
86
+ (vary == res['Vary']) && requests_match?(vary, env, stored_env)
87
+ end
88
+ entries.unshift [stored_env, stored_response]
89
+ write key, entries
90
+ end
91
+
92
+ private
93
+ # Extract the environment Hash from +request+ while making any
94
+ # necessary modifications in preparation for persistence. The Hash
95
+ # returned must be marshalable.
96
+ def persist_request(request)
97
+ env = request.env.dup
98
+ env.reject! { |key,val| key =~ /[^0-9A-Z_]/ }
99
+ env
100
+ end
101
+
102
+ # Extract the headers Hash from +response+ while making any
103
+ # necessary modifications in preparation for persistence. The Hash
104
+ # returned must be marshalable.
105
+ def persist_response(response)
106
+ headers = response.headers.reject { |k,v| HEADER_BLACKLIST.include?(k) }
107
+ headers['X-Status'] = response.status.to_s
108
+ headers
109
+ end
110
+
111
+ # Determine whether the two environment hashes are non-varying based on
112
+ # the vary response header value provided.
113
+ def requests_match?(vary, env1, env2)
114
+ return true if vary.nil? || vary == ''
115
+ vary.split(/[\s,]+/).all? do |header|
116
+ key = "HTTP_#{header.upcase.tr('-', '_')}"
117
+ env1[key] == env2[key]
118
+ end
119
+ end
120
+
121
+ protected
122
+ # Locate all cached request/response pairs that match the specified
123
+ # URL key. The result must be an Array of all cached request/response
124
+ # pairs. An empty Array must be returned if nothing is cached for
125
+ # the specified key.
126
+ def read(key)
127
+ raise NotImplemented
128
+ end
129
+
130
+ # Store an Array of request/response pairs for the given key. Concrete
131
+ # implementations should not attempt to filter or concatenate the
132
+ # list in any way.
133
+ def write(key, negotiations)
134
+ raise NotImplemented
135
+ end
136
+
137
+ # Remove all cached entries at the key specified. No error is raised
138
+ # when the key does not exist.
139
+ def purge(key)
140
+ raise NotImplemented
141
+ end
142
+
143
+ private
144
+
145
+ # Generate a SHA1 hex digest for the specified string. This is a
146
+ # simple utility method for meta store implementations.
147
+ def hexdigest(data)
148
+ Digest::SHA1.hexdigest(data)
149
+ end
150
+
151
+ public
152
+
153
+ # Concrete MetaStore implementation that uses a simple Hash to store
154
+ # request/response pairs on the heap.
155
+ class Heap < MetaStore
156
+ def initialize(hash={})
157
+ @hash = hash
158
+ end
159
+
160
+ def read(key)
161
+ @hash.fetch(key, [])
162
+ end
163
+
164
+ def write(key, entries)
165
+ @hash[key] = entries
166
+ end
167
+
168
+ def purge(key)
169
+ @hash.delete(key)
170
+ nil
171
+ end
172
+
173
+ def to_hash
174
+ @hash
175
+ end
176
+
177
+ def self.resolve(uri)
178
+ new
179
+ end
180
+ end
181
+
182
+ HEAP = Heap
183
+ MEM = HEAP
184
+
185
+ # Concrete MetaStore implementation that stores request/response
186
+ # pairs on disk.
187
+ class Disk < MetaStore
188
+ attr_reader :root
189
+
190
+ def initialize(root="/tmp/rack-cache/meta-#{ARGV[0]}")
191
+ @root = File.expand_path(root)
192
+ FileUtils.mkdir_p(root, :mode => 0755)
193
+ end
194
+
195
+ def read(key)
196
+ path = key_path(key)
197
+ File.open(path, 'rb') { |io| Marshal.load(io) }
198
+ rescue Errno::ENOENT
199
+ []
200
+ end
201
+
202
+ def write(key, entries)
203
+ path = key_path(key)
204
+ File.open(path, 'wb') { |io| Marshal.dump(entries, io, -1) }
205
+ rescue Errno::ENOENT
206
+ Dir.mkdir(File.dirname(path), 0755)
207
+ retry
208
+ end
209
+
210
+ def purge(key)
211
+ path = key_path(key)
212
+ File.unlink(path)
213
+ nil
214
+ rescue Errno::ENOENT
215
+ nil
216
+ end
217
+
218
+ private
219
+ def key_path(key)
220
+ File.join(root, spread(hexdigest(key)))
221
+ end
222
+
223
+ def spread(sha, n=2)
224
+ sha = sha.dup
225
+ sha[n,0] = '/'
226
+ sha
227
+ end
228
+
229
+ public
230
+ def self.resolve(uri)
231
+ path = File.expand_path(uri.opaque || uri.path)
232
+ new path
233
+ end
234
+
235
+ end
236
+
237
+ DISK = Disk
238
+ FILE = Disk
239
+
240
+ # Stores request/response pairs in memcached. Keys are not stored
241
+ # directly since memcached has a 250-byte limit on key names. Instead,
242
+ # the SHA1 hexdigest of the key is used.
243
+ class MemCache < MetaStore
244
+
245
+ # The Memcached instance used to communicated with the memcached
246
+ # daemon.
247
+ attr_reader :cache
248
+
249
+ def initialize(server="localhost:11211", options={})
250
+ @cache =
251
+ if server.respond_to?(:stats)
252
+ server
253
+ else
254
+ require 'memcached'
255
+ Memcached.new(server, options)
256
+ end
257
+ end
258
+
259
+ def read(key)
260
+ key = hexdigest(key)
261
+ cache.get(key)
262
+ rescue Memcached::NotFound
263
+ []
264
+ end
265
+
266
+ def write(key, entries)
267
+ key = hexdigest(key)
268
+ cache.set(key, entries)
269
+ end
270
+
271
+ def purge(key)
272
+ key = hexdigest(key)
273
+ cache.delete(key)
274
+ nil
275
+ rescue Memcached::NotFound
276
+ nil
277
+ end
278
+
279
+ extend Rack::Utils
280
+
281
+ # Create MemCache store for the given URI. The URI must specify
282
+ # a host and may specify a port, namespace, and options:
283
+ #
284
+ # memcached://example.com:11211/namespace?opt1=val1&opt2=val2
285
+ #
286
+ # Query parameter names and values are documented with the memcached
287
+ # library: http://tinyurl.com/4upqnd
288
+ def self.resolve(uri)
289
+ server = "#{uri.host}:#{uri.port || '11211'}"
290
+ options = parse_query(uri.query)
291
+ options.keys.each do |key|
292
+ value =
293
+ case value = options.delete(key)
294
+ when 'true' ; true
295
+ when 'false' ; false
296
+ else value.to_sym
297
+ end
298
+ options[k.to_sym] = value
299
+ end
300
+ options[:namespace] = uri.path.sub(/^\//, '')
301
+ new server, options
302
+ end
303
+ end
304
+
305
+ MEMCACHE = MemCache
306
+ MEMCACHED = MemCache
307
+ end
308
+
309
+ end