hishel 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hishel/__init__.py +41 -1
- hishel/_async/_client.py +1 -1
- hishel/_async/_storages.py +10 -14
- hishel/_async/_transports.py +9 -4
- hishel/_controller.py +2 -3
- hishel/_lmdb_types_.pyi +53 -0
- hishel/_serializers.py +2 -2
- hishel/_sync/_client.py +1 -1
- hishel/_sync/_storages.py +10 -14
- hishel/_sync/_transports.py +9 -4
- hishel/_utils.py +340 -0
- hishel/beta/__init__.py +59 -0
- hishel/beta/_async_cache.py +167 -0
- hishel/beta/_core/__init__.py +0 -0
- hishel/beta/_core/_async/_storages/_sqlite.py +411 -0
- hishel/beta/_core/_base/_storages/_base.py +260 -0
- hishel/beta/_core/_base/_storages/_packing.py +165 -0
- hishel/beta/_core/_headers.py +301 -0
- hishel/beta/_core/_spec.py +2291 -0
- hishel/beta/_core/_sync/_storages/_sqlite.py +411 -0
- hishel/beta/_core/models.py +176 -0
- hishel/beta/_sync_cache.py +167 -0
- hishel/beta/httpx.py +317 -0
- hishel/beta/requests.py +193 -0
- {hishel-0.1.3.dist-info → hishel-0.1.4.dist-info}/METADATA +41 -4
- hishel-0.1.4.dist-info/RECORD +41 -0
- hishel-0.1.3.dist-info/RECORD +0 -27
- {hishel-0.1.3.dist-info → hishel-0.1.4.dist-info}/WHEEL +0 -0
- {hishel-0.1.3.dist-info → hishel-0.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,2291 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass, field, replace
|
|
8
|
+
from typing import (
|
|
9
|
+
TYPE_CHECKING,
|
|
10
|
+
Any,
|
|
11
|
+
Dict,
|
|
12
|
+
Literal,
|
|
13
|
+
Optional,
|
|
14
|
+
TypeVar,
|
|
15
|
+
Union,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from hishel._utils import parse_date, partition
|
|
19
|
+
from hishel.beta._core._headers import Headers, Range, Vary, parse_cache_control
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from hishel.beta import CompletePair, Request, Response
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
TState = TypeVar("TState", bound="State")
|
|
26
|
+
HEURISTICALLY_CACHEABLE_STATUS_CODES = (
|
|
27
|
+
200,
|
|
28
|
+
203,
|
|
29
|
+
204,
|
|
30
|
+
300,
|
|
31
|
+
301,
|
|
32
|
+
308,
|
|
33
|
+
404,
|
|
34
|
+
405,
|
|
35
|
+
410,
|
|
36
|
+
414,
|
|
37
|
+
501,
|
|
38
|
+
)
|
|
39
|
+
logger = logging.getLogger("hishel.core.spec")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class CacheOptions:
|
|
44
|
+
shared: bool = True
|
|
45
|
+
supported_methods: list[str] = field(default_factory=lambda: ["GET", "HEAD"])
|
|
46
|
+
allow_stale: bool = False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class State(ABC):
|
|
51
|
+
options: CacheOptions
|
|
52
|
+
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def next(self, *args: Any, **kwargs: Any) -> Union["State", None]:
|
|
55
|
+
raise NotImplementedError("Subclasses must implement this method")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def vary_headers_match(
|
|
59
|
+
original_request: Request,
|
|
60
|
+
associated_pair: CompletePair,
|
|
61
|
+
) -> bool:
|
|
62
|
+
"""
|
|
63
|
+
Determines if request headers match the Vary requirements of a cached response.
|
|
64
|
+
|
|
65
|
+
The Vary header specifies which request headers were used to select the
|
|
66
|
+
representation. For a cached response to be reusable, all headers listed
|
|
67
|
+
in Vary must match between the original and new requests.
|
|
68
|
+
|
|
69
|
+
RFC 9111 Section 4.1: Calculating Cache Keys
|
|
70
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.1
|
|
71
|
+
|
|
72
|
+
Parameters:
|
|
73
|
+
----------
|
|
74
|
+
original_request : Request
|
|
75
|
+
The new incoming request that we're trying to satisfy
|
|
76
|
+
associated_pair : CompletePair
|
|
77
|
+
A cached request-response pair that might match the new request
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
-------
|
|
81
|
+
bool
|
|
82
|
+
True if the Vary headers match (or no Vary header exists),
|
|
83
|
+
False if they don't match or Vary contains "*"
|
|
84
|
+
|
|
85
|
+
RFC 9111 Compliance:
|
|
86
|
+
-------------------
|
|
87
|
+
From RFC 9111 Section 4.1:
|
|
88
|
+
"When a cache receives a request that can be satisfied by a stored response
|
|
89
|
+
and that stored response contains a Vary header field, the cache MUST NOT
|
|
90
|
+
use that stored response without revalidation unless all the presented
|
|
91
|
+
request header fields nominated by that Vary field value match those fields
|
|
92
|
+
in the original request (i.e., the request that caused the cached response
|
|
93
|
+
to be stored)."
|
|
94
|
+
|
|
95
|
+
"The header fields from two requests are defined to match if and only if
|
|
96
|
+
those in the first request can be transformed to those in the second request
|
|
97
|
+
by applying any of the following:
|
|
98
|
+
- adding or removing whitespace
|
|
99
|
+
- combining multiple header field lines with the same field name
|
|
100
|
+
- normalizing header field values"
|
|
101
|
+
|
|
102
|
+
"A stored response with a Vary header field value containing a member '*'
|
|
103
|
+
always fails to match."
|
|
104
|
+
|
|
105
|
+
Examples:
|
|
106
|
+
--------
|
|
107
|
+
>>> # No Vary header - always matches
|
|
108
|
+
>>> request = Request(headers=Headers({"accept": "application/json"}))
|
|
109
|
+
>>> response = Response(headers=Headers({})) # No Vary
|
|
110
|
+
>>> pair = CompletePair(request=request, response=response)
|
|
111
|
+
>>> vary_headers_match(request, pair)
|
|
112
|
+
True
|
|
113
|
+
|
|
114
|
+
>>> # Vary: Accept with matching Accept header
|
|
115
|
+
>>> request1 = Request(headers=Headers({"accept": "application/json"}))
|
|
116
|
+
>>> response = Response(headers=Headers({"vary": "Accept"}))
|
|
117
|
+
>>> pair = CompletePair(request=request1, response=response)
|
|
118
|
+
>>> request2 = Request(headers=Headers({"accept": "application/json"}))
|
|
119
|
+
>>> vary_headers_match(request2, pair)
|
|
120
|
+
True
|
|
121
|
+
|
|
122
|
+
>>> # Vary: Accept with non-matching Accept header
|
|
123
|
+
>>> request2 = Request(headers=Headers({"accept": "application/xml"}))
|
|
124
|
+
>>> vary_headers_match(request2, pair)
|
|
125
|
+
False
|
|
126
|
+
|
|
127
|
+
>>> # Vary: * always fails
|
|
128
|
+
>>> response = Response(headers=Headers({"vary": "*"}))
|
|
129
|
+
>>> pair = CompletePair(request=request1, response=response)
|
|
130
|
+
>>> vary_headers_match(request2, pair)
|
|
131
|
+
False
|
|
132
|
+
"""
|
|
133
|
+
# Extract the Vary header from the cached response
|
|
134
|
+
vary_header = associated_pair.response.headers.get("vary")
|
|
135
|
+
|
|
136
|
+
# If no Vary header exists, any request matches
|
|
137
|
+
# The response doesn't vary based on request headers
|
|
138
|
+
if not vary_header:
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
# Parse the Vary header value into individual header names
|
|
142
|
+
vary = Vary.from_value(vary_header)
|
|
143
|
+
|
|
144
|
+
# Check each header name listed in Vary
|
|
145
|
+
for vary_header in vary.values:
|
|
146
|
+
# Special case: Vary: *
|
|
147
|
+
# RFC 9111 Section 4.1: "A stored response with a Vary header field
|
|
148
|
+
# value containing a member '*' always fails to match."
|
|
149
|
+
#
|
|
150
|
+
# Vary: * means the response varies on factors beyond request headers
|
|
151
|
+
# (e.g., cookies, user agent state, time of day). It can never be matched.
|
|
152
|
+
if vary_header == "*":
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
# Compare the specific header value between original and new request
|
|
156
|
+
# Both headers must have the same value (or both be absent)
|
|
157
|
+
if original_request.headers.get(vary_header) != associated_pair.request.headers.get(vary_header):
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
# All Vary headers matched
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def get_freshness_lifetime(response: Response, is_cache_shared: bool) -> Optional[int]:
|
|
165
|
+
"""
|
|
166
|
+
Calculates the freshness lifetime of a cached response in seconds.
|
|
167
|
+
|
|
168
|
+
The freshness lifetime is the time period during which a cached response
|
|
169
|
+
can be used without validation. It's determined by explicit directives
|
|
170
|
+
(max-age, s-maxage, Expires) or heuristically calculated.
|
|
171
|
+
|
|
172
|
+
RFC 9111 Section 4.2.1: Calculating Freshness Lifetime
|
|
173
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.1
|
|
174
|
+
|
|
175
|
+
Parameters:
|
|
176
|
+
----------
|
|
177
|
+
response : Response
|
|
178
|
+
The cached response to calculate freshness for
|
|
179
|
+
is_cache_shared : bool
|
|
180
|
+
True if this is a shared cache (proxy, CDN), False for private cache (browser)
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
-------
|
|
184
|
+
Optional[int]
|
|
185
|
+
Freshness lifetime in seconds, or None if it cannot be determined
|
|
186
|
+
|
|
187
|
+
RFC 9111 Compliance:
|
|
188
|
+
-------------------
|
|
189
|
+
From RFC 9111 Section 4.2.1:
|
|
190
|
+
"A cache can calculate the freshness lifetime (denoted as freshness_lifetime)
|
|
191
|
+
of a response by evaluating the following rules and using the first match:
|
|
192
|
+
|
|
193
|
+
- If the cache is shared and the s-maxage response directive is present,
|
|
194
|
+
use its value
|
|
195
|
+
- If the max-age response directive is present, use its value
|
|
196
|
+
- If the Expires response header field is present, use its value minus
|
|
197
|
+
the value of the Date response header field
|
|
198
|
+
- Otherwise, no explicit expiration time is present in the response.
|
|
199
|
+
A heuristic freshness lifetime might be applicable; see Section 4.2.2"
|
|
200
|
+
|
|
201
|
+
Priority Order:
|
|
202
|
+
--------------
|
|
203
|
+
1. s-maxage (shared caches only) - highest priority
|
|
204
|
+
2. max-age - applies to all caches
|
|
205
|
+
3. Expires - Date - legacy but still supported
|
|
206
|
+
4. Heuristic freshness - calculated from Last-Modified
|
|
207
|
+
|
|
208
|
+
Examples:
|
|
209
|
+
--------
|
|
210
|
+
>>> # max-age directive
|
|
211
|
+
>>> response = Response(headers=Headers({"cache-control": "max-age=3600"}))
|
|
212
|
+
>>> get_freshness_lifetime(response, is_cache_shared=True)
|
|
213
|
+
3600
|
|
214
|
+
|
|
215
|
+
>>> # s-maxage overrides max-age for shared caches
|
|
216
|
+
>>> response = Response(headers=Headers({
|
|
217
|
+
... "cache-control": "max-age=3600, s-maxage=7200"
|
|
218
|
+
... }))
|
|
219
|
+
>>> get_freshness_lifetime(response, is_cache_shared=True)
|
|
220
|
+
7200
|
|
221
|
+
>>> get_freshness_lifetime(response, is_cache_shared=False)
|
|
222
|
+
3600
|
|
223
|
+
"""
|
|
224
|
+
# Parse the Cache-Control header to extract directives
|
|
225
|
+
response_cache_control = parse_cache_control(response.headers.get("Cache-Control"))
|
|
226
|
+
|
|
227
|
+
# PRIORITY 1: s-maxage (Shared Cache Only)
|
|
228
|
+
# RFC 9111 Section 5.2.2.10: s-maxage Response Directive
|
|
229
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.10
|
|
230
|
+
#
|
|
231
|
+
# "The s-maxage response directive indicates that, for a shared cache,
|
|
232
|
+
# the maximum age specified by this directive overrides the maximum age
|
|
233
|
+
# specified by either the max-age directive or the Expires header field."
|
|
234
|
+
#
|
|
235
|
+
# s-maxage only applies to shared caches (proxies, CDNs)
|
|
236
|
+
# Private caches (browsers) ignore it and fall through to max-age
|
|
237
|
+
if is_cache_shared and response_cache_control.s_maxage is not None:
|
|
238
|
+
return response_cache_control.s_maxage
|
|
239
|
+
|
|
240
|
+
# PRIORITY 2: max-age
|
|
241
|
+
# RFC 9111 Section 5.2.2.1: max-age Response Directive
|
|
242
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.1
|
|
243
|
+
#
|
|
244
|
+
# "The max-age response directive indicates that the response is to be
|
|
245
|
+
# considered stale after its age is greater than the specified number
|
|
246
|
+
# of seconds."
|
|
247
|
+
#
|
|
248
|
+
# max-age is the most common caching directive
|
|
249
|
+
# It applies to both shared and private caches
|
|
250
|
+
if response_cache_control.max_age is not None:
|
|
251
|
+
return response_cache_control.max_age
|
|
252
|
+
|
|
253
|
+
# PRIORITY 3: Expires - Date
|
|
254
|
+
# RFC 9111 Section 5.3: Expires
|
|
255
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.3
|
|
256
|
+
#
|
|
257
|
+
# "The Expires header field gives the date/time after which the response
|
|
258
|
+
# is considered stale."
|
|
259
|
+
#
|
|
260
|
+
# This is an older mechanism (HTTP/1.0) but still supported
|
|
261
|
+
# Freshness lifetime = Expires - Date
|
|
262
|
+
if "expires" in response.headers:
|
|
263
|
+
expires_timestamp = parse_date(response.headers["expires"])
|
|
264
|
+
|
|
265
|
+
if expires_timestamp is None:
|
|
266
|
+
raise RuntimeError("Cannot parse Expires header") # pragma: nocover
|
|
267
|
+
|
|
268
|
+
# Get the Date header or use current time as fallback
|
|
269
|
+
date_timestamp = parse_date(response.headers["date"]) if "date" in response.headers else time.time()
|
|
270
|
+
|
|
271
|
+
if date_timestamp is None: # pragma: nocover
|
|
272
|
+
# If the Date header is invalid, we use the current time as the date
|
|
273
|
+
# RFC 9110 Section 6.6.1: Date
|
|
274
|
+
# "A recipient with a clock that receives a response with an invalid
|
|
275
|
+
# Date header field value MAY replace that value with the time that
|
|
276
|
+
# response was received."
|
|
277
|
+
date_timestamp = time.time()
|
|
278
|
+
|
|
279
|
+
# Calculate freshness lifetime as difference between Expires and Date
|
|
280
|
+
return int(expires_timestamp - (time.time() if date_timestamp is None else date_timestamp))
|
|
281
|
+
|
|
282
|
+
# PRIORITY 4: Heuristic Freshness
|
|
283
|
+
# RFC 9111 Section 4.2.2: Calculating Heuristic Freshness
|
|
284
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.2
|
|
285
|
+
#
|
|
286
|
+
# "Since origin servers do not always provide explicit expiration times,
|
|
287
|
+
# a cache MAY assign a heuristic expiration time when an explicit time
|
|
288
|
+
# is not specified."
|
|
289
|
+
#
|
|
290
|
+
# If no explicit freshness information exists, try to calculate it
|
|
291
|
+
# heuristically based on the Last-Modified header
|
|
292
|
+
heuristic_freshness = get_heuristic_freshness(response)
|
|
293
|
+
|
|
294
|
+
if heuristic_freshness is None:
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
return get_heuristic_freshness(response)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def allowed_stale(response: Response, allow_stale_option: bool) -> bool:
|
|
301
|
+
"""
|
|
302
|
+
Determines if a stale response is allowed to be served without revalidation.
|
|
303
|
+
|
|
304
|
+
Stale responses can sometimes be served to improve performance or availability,
|
|
305
|
+
but only if certain conditions are met and it's explicitly allowed.
|
|
306
|
+
|
|
307
|
+
RFC 9111 Section 4.2.4: Serving Stale Responses
|
|
308
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.4
|
|
309
|
+
|
|
310
|
+
Parameters:
|
|
311
|
+
----------
|
|
312
|
+
response : Response
|
|
313
|
+
The stale cached response being considered for use
|
|
314
|
+
allow_stale_option : bool
|
|
315
|
+
Configuration flag indicating if serving stale is allowed
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
-------
|
|
319
|
+
bool
|
|
320
|
+
True if the stale response is allowed to be served, False otherwise
|
|
321
|
+
|
|
322
|
+
RFC 9111 Compliance:
|
|
323
|
+
-------------------
|
|
324
|
+
From RFC 9111 Section 4.2.4:
|
|
325
|
+
"A cache MUST NOT generate a stale response if it is prohibited by an
|
|
326
|
+
explicit in-protocol directive (e.g., by a no-cache response directive,
|
|
327
|
+
a must-revalidate response directive, or an applicable s-maxage or
|
|
328
|
+
proxy-revalidate response directive; see Section 5.2.2)."
|
|
329
|
+
|
|
330
|
+
"A cache MUST NOT generate a stale response unless it is disconnected or
|
|
331
|
+
doing so is explicitly permitted by the client or origin server (e.g., by
|
|
332
|
+
the max-stale request directive in Section 5.2.1, extension directives
|
|
333
|
+
such as those defined in [RFC5861], or configuration in accordance with
|
|
334
|
+
an out-of-band contract)."
|
|
335
|
+
|
|
336
|
+
Conditions that prohibit serving stale:
|
|
337
|
+
--------------------------------------
|
|
338
|
+
1. allow_stale_option is False (configuration disallows it)
|
|
339
|
+
2. Response has no-cache directive (must always revalidate)
|
|
340
|
+
3. Response has must-revalidate directive (must revalidate when stale)
|
|
341
|
+
4. Response has proxy-revalidate directive (shared caches must revalidate)
|
|
342
|
+
5. Response has s-maxage directive (shared caches must revalidate)
|
|
343
|
+
|
|
344
|
+
Examples:
|
|
345
|
+
--------
|
|
346
|
+
>>> # Stale allowed with permissive configuration
|
|
347
|
+
>>> response = Response(headers=Headers({"cache-control": "max-age=3600"}))
|
|
348
|
+
>>> allowed_stale(response, allow_stale_option=True)
|
|
349
|
+
True
|
|
350
|
+
|
|
351
|
+
>>> # Stale not allowed when configuration disables it
|
|
352
|
+
>>> allowed_stale(response, allow_stale_option=False)
|
|
353
|
+
False
|
|
354
|
+
|
|
355
|
+
>>> # must-revalidate prevents serving stale
|
|
356
|
+
>>> response = Response(headers=Headers({
|
|
357
|
+
... "cache-control": "max-age=3600, must-revalidate"
|
|
358
|
+
... }))
|
|
359
|
+
>>> allowed_stale(response, allow_stale_option=True)
|
|
360
|
+
False
|
|
361
|
+
"""
|
|
362
|
+
# First check: Is serving stale enabled in configuration?
|
|
363
|
+
# If not, we can't serve stale responses regardless of directives
|
|
364
|
+
if not allow_stale_option:
|
|
365
|
+
return False
|
|
366
|
+
|
|
367
|
+
# Parse Cache-Control directives to check for prohibitions
|
|
368
|
+
response_cache_control = parse_cache_control(response.headers.get("Cache-Control"))
|
|
369
|
+
|
|
370
|
+
# PROHIBITION 1: no-cache directive
|
|
371
|
+
# RFC 9111 Section 5.2.2.4: no-cache Response Directive
|
|
372
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.4
|
|
373
|
+
#
|
|
374
|
+
# "The no-cache response directive... indicates that the response MUST NOT
|
|
375
|
+
# be used to satisfy any other request without forwarding it for validation
|
|
376
|
+
# and receiving a successful response."
|
|
377
|
+
#
|
|
378
|
+
# no-cache means the response must ALWAYS be revalidated before use,
|
|
379
|
+
# even if it's fresh. Stale responses definitely cannot be served.
|
|
380
|
+
if response_cache_control.no_cache:
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
# PROHIBITION 2: must-revalidate directive
|
|
384
|
+
# RFC 9111 Section 5.2.2.2: must-revalidate Response Directive
|
|
385
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.2
|
|
386
|
+
#
|
|
387
|
+
# "The must-revalidate response directive indicates that once the response
|
|
388
|
+
# has become stale, a cache MUST NOT reuse that response to satisfy another
|
|
389
|
+
# request until it has been successfully validated by the origin."
|
|
390
|
+
#
|
|
391
|
+
# must-revalidate specifically prohibits serving stale responses
|
|
392
|
+
# This is used for responses where serving stale content could cause
|
|
393
|
+
# incorrect operation (e.g., financial transactions)
|
|
394
|
+
if response_cache_control.must_revalidate:
|
|
395
|
+
return False
|
|
396
|
+
|
|
397
|
+
# All checks passed - stale response may be served
|
|
398
|
+
return True
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def get_heuristic_freshness(response: Response) -> int | None:
|
|
402
|
+
"""
|
|
403
|
+
Calculates a heuristic freshness lifetime when no explicit expiration is provided.
|
|
404
|
+
|
|
405
|
+
When a response lacks explicit caching directives (max-age, Expires),
|
|
406
|
+
caches may assign a heuristic freshness lifetime based on other response
|
|
407
|
+
characteristics, particularly the Last-Modified header.
|
|
408
|
+
|
|
409
|
+
RFC 9111 Section 4.2.2: Calculating Heuristic Freshness
|
|
410
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.2
|
|
411
|
+
|
|
412
|
+
Parameters:
|
|
413
|
+
----------
|
|
414
|
+
response : Response
|
|
415
|
+
The response to calculate heuristic freshness for
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
-------
|
|
419
|
+
int | None
|
|
420
|
+
Heuristic freshness lifetime in seconds, or None if it cannot be calculated
|
|
421
|
+
|
|
422
|
+
RFC 9111 Compliance:
|
|
423
|
+
-------------------
|
|
424
|
+
From RFC 9111 Section 4.2.2:
|
|
425
|
+
"Since origin servers do not always provide explicit expiration times,
|
|
426
|
+
a cache MAY assign a heuristic expiration time when an explicit time is
|
|
427
|
+
not specified, employing algorithms that use other field values (such as
|
|
428
|
+
the Last-Modified time) to estimate a plausible expiration time. This
|
|
429
|
+
specification does not provide specific algorithms, but it does impose
|
|
430
|
+
worst-case constraints on their results."
|
|
431
|
+
|
|
432
|
+
"If the response has a Last-Modified header field, caches are encouraged
|
|
433
|
+
to use a heuristic expiration value that is no more than some fraction of
|
|
434
|
+
the interval since that time. A typical setting of this fraction might
|
|
435
|
+
be 10%."
|
|
436
|
+
|
|
437
|
+
Heuristic Calculation:
|
|
438
|
+
---------------------
|
|
439
|
+
- Freshness = 10% of (now - Last-Modified)
|
|
440
|
+
- Maximum: 1 week (604,800 seconds)
|
|
441
|
+
- Minimum: 0 seconds
|
|
442
|
+
|
|
443
|
+
Rationale:
|
|
444
|
+
---------
|
|
445
|
+
If a resource hasn't changed in a long time (old Last-Modified), it's
|
|
446
|
+
likely stable and can be cached longer. The 10% rule is a conservative
|
|
447
|
+
heuristic that balances caching benefits with freshness.
|
|
448
|
+
|
|
449
|
+
Examples:
|
|
450
|
+
--------
|
|
451
|
+
>>> # Resource last modified 10 days ago
|
|
452
|
+
>>> # Heuristic freshness = 10% of 10 days = 1 day
|
|
453
|
+
>>> last_modified = (datetime.now() - timedelta(days=10)).strftime(...)
|
|
454
|
+
>>> response = Response(headers=Headers({"last-modified": last_modified}))
|
|
455
|
+
>>> get_heuristic_freshness(response)
|
|
456
|
+
86400 # 1 day in seconds
|
|
457
|
+
|
|
458
|
+
>>> # Resource last modified 100 days ago
|
|
459
|
+
>>> # Would be 10 days, but capped at 1 week maximum
|
|
460
|
+
>>> last_modified = (datetime.now() - timedelta(days=100)).strftime(...)
|
|
461
|
+
>>> response = Response(headers=Headers({"last-modified": last_modified}))
|
|
462
|
+
>>> get_heuristic_freshness(response)
|
|
463
|
+
604800 # 1 week (maximum)
|
|
464
|
+
|
|
465
|
+
>>> # No Last-Modified header
|
|
466
|
+
>>> response = Response(headers=Headers({}))
|
|
467
|
+
>>> get_heuristic_freshness(response)
|
|
468
|
+
None
|
|
469
|
+
"""
|
|
470
|
+
# Get the Last-Modified header if present
|
|
471
|
+
last_modified = response.headers.get("last-modified")
|
|
472
|
+
|
|
473
|
+
if last_modified:
|
|
474
|
+
# Parse the Last-Modified timestamp
|
|
475
|
+
last_modified_timestamp = parse_date(last_modified)
|
|
476
|
+
|
|
477
|
+
if last_modified_timestamp is None: # pragma: nocover
|
|
478
|
+
# Cannot parse the date, cannot calculate heuristic freshness
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
# Calculate how long ago the resource was last modified
|
|
482
|
+
now = time.time()
|
|
483
|
+
age_since_modification = now - last_modified_timestamp
|
|
484
|
+
|
|
485
|
+
# RFC 9111 recommends 10% of the age since modification
|
|
486
|
+
# "A typical setting of this fraction might be 10%."
|
|
487
|
+
heuristic_freshness = int(age_since_modification * 0.1)
|
|
488
|
+
|
|
489
|
+
# Cap at one week maximum
|
|
490
|
+
# RFC 9111 Section 4.2.2: "Historically, HTTP required the Expires
|
|
491
|
+
# field value to be no more than a year in the future. While longer
|
|
492
|
+
# freshness lifetimes are no longer prohibited, extremely large values
|
|
493
|
+
# have been demonstrated to cause problems."
|
|
494
|
+
#
|
|
495
|
+
# We use a conservative 1-week maximum for heuristic freshness
|
|
496
|
+
ONE_WEEK = 604_800 # 7 days * 24 hours * 60 minutes * 60 seconds
|
|
497
|
+
|
|
498
|
+
return min(ONE_WEEK, heuristic_freshness)
|
|
499
|
+
|
|
500
|
+
# No Last-Modified header, cannot calculate heuristic freshness
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def get_age(response: Response) -> int:
|
|
505
|
+
"""
|
|
506
|
+
Calculates the current age of a cached response in seconds.
|
|
507
|
+
|
|
508
|
+
Age represents how old a cached response is - the time since it was
|
|
509
|
+
generated or last validated by the origin server. This is crucial for
|
|
510
|
+
determining if a response is still fresh.
|
|
511
|
+
|
|
512
|
+
RFC 9111 Section 4.2.3: Calculating Age
|
|
513
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.3
|
|
514
|
+
|
|
515
|
+
Parameters:
|
|
516
|
+
----------
|
|
517
|
+
response : Response
|
|
518
|
+
The cached response to calculate age for
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
-------
|
|
522
|
+
int
|
|
523
|
+
Age of the response in seconds (always >= 0)
|
|
524
|
+
|
|
525
|
+
RFC 9111 Compliance:
|
|
526
|
+
-------------------
|
|
527
|
+
From RFC 9111 Section 4.2.3:
|
|
528
|
+
"A response's 'age' is the time that has passed since it was generated by,
|
|
529
|
+
or successfully validated with, the origin server."
|
|
530
|
+
|
|
531
|
+
The full RFC formula accounts for:
|
|
532
|
+
- apparent_age: Current time minus Date header
|
|
533
|
+
- age_value: Age header from upstream caches
|
|
534
|
+
- response_delay: Network latency
|
|
535
|
+
- resident_time: Time stored in this cache
|
|
536
|
+
|
|
537
|
+
This simplified implementation calculates:
|
|
538
|
+
age = max(0, now - Date)
|
|
539
|
+
|
|
540
|
+
Where:
|
|
541
|
+
- now: Current time
|
|
542
|
+
- Date: Value from the Date response header
|
|
543
|
+
|
|
544
|
+
Fallbacks:
|
|
545
|
+
---------
|
|
546
|
+
- If Date header is missing: age = 0
|
|
547
|
+
- If Date header is invalid: age = 0
|
|
548
|
+
- If Date is in the future: age = 0 (via max(0, ...))
|
|
549
|
+
|
|
550
|
+
Note on Accuracy:
|
|
551
|
+
----------------
|
|
552
|
+
This is a simplified calculation suitable for single-hop caches.
|
|
553
|
+
A full implementation would consider:
|
|
554
|
+
- Age header from upstream caches
|
|
555
|
+
- Request/response timing for latency correction
|
|
556
|
+
- Clock skew compensation
|
|
557
|
+
|
|
558
|
+
Examples:
|
|
559
|
+
--------
|
|
560
|
+
>>> # Response from 1 hour ago
|
|
561
|
+
>>> date = (datetime.utcnow() - timedelta(hours=1)).strftime(...)
|
|
562
|
+
>>> response = Response(headers=Headers({"date": date}))
|
|
563
|
+
>>> get_age(response)
|
|
564
|
+
3600 # 1 hour in seconds
|
|
565
|
+
|
|
566
|
+
>>> # Fresh response (Date = now)
|
|
567
|
+
>>> date = datetime.utcnow().strftime(...)
|
|
568
|
+
>>> response = Response(headers=Headers({"date": date}))
|
|
569
|
+
>>> get_age(response)
|
|
570
|
+
0 # or very close to 0
|
|
571
|
+
|
|
572
|
+
>>> # No Date header
|
|
573
|
+
>>> response = Response(headers=Headers({}))
|
|
574
|
+
>>> get_age(response)
|
|
575
|
+
0
|
|
576
|
+
"""
|
|
577
|
+
# RFC 9110 Section 6.6.1: Date
|
|
578
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-6.6.1
|
|
579
|
+
#
|
|
580
|
+
# "A recipient with a clock that receives a response with an invalid Date
|
|
581
|
+
# header field value MAY replace that value with the time that response
|
|
582
|
+
# was received."
|
|
583
|
+
#
|
|
584
|
+
# If no Date header exists, we treat the response as having age 0
|
|
585
|
+
# This is conservative - it assumes the response is brand new
|
|
586
|
+
if "date" not in response.headers:
|
|
587
|
+
return 0
|
|
588
|
+
|
|
589
|
+
# Parse the Date header
|
|
590
|
+
date = parse_date(response.headers["date"])
|
|
591
|
+
|
|
592
|
+
if date is None: # pragma: nocover
|
|
593
|
+
# Invalid Date header, treat as age 0
|
|
594
|
+
return 0
|
|
595
|
+
|
|
596
|
+
# Calculate apparent age: how long ago was the response generated?
|
|
597
|
+
now = time.time()
|
|
598
|
+
apparent_age = max(0, now - date)
|
|
599
|
+
|
|
600
|
+
# Return age as integer seconds
|
|
601
|
+
# max(0, ...) ensures we never return negative age (e.g., if Date is in future)
|
|
602
|
+
return int(apparent_age)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def make_conditional_request(request: Request, response: Response) -> Request:
|
|
606
|
+
"""
|
|
607
|
+
Converts a regular request into a conditional request for validation.
|
|
608
|
+
|
|
609
|
+
Conditional requests use validators (ETag, Last-Modified) to check if a
|
|
610
|
+
cached response is still valid. If the resource hasn't changed, the server
|
|
611
|
+
responds with 304 Not Modified, saving bandwidth.
|
|
612
|
+
|
|
613
|
+
RFC 9111 Section 4.3.1: Sending a Validation Request
|
|
614
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.1
|
|
615
|
+
|
|
616
|
+
Parameters:
|
|
617
|
+
----------
|
|
618
|
+
request : Request
|
|
619
|
+
The original request to make conditional
|
|
620
|
+
response : Response
|
|
621
|
+
The cached response containing validators (ETag, Last-Modified)
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
-------
|
|
625
|
+
Request
|
|
626
|
+
A new request with conditional headers added (If-None-Match, If-Modified-Since)
|
|
627
|
+
|
|
628
|
+
RFC 9111 Compliance:
|
|
629
|
+
-------------------
|
|
630
|
+
From RFC 9111 Section 4.3.1:
|
|
631
|
+
"When generating a conditional request for validation, a cache... updates
|
|
632
|
+
that request with one or more precondition header fields. These contain
|
|
633
|
+
validator metadata sourced from a stored response(s) that has the same URI."
|
|
634
|
+
|
|
635
|
+
"When generating a conditional request for validation, a cache:
|
|
636
|
+
- MUST send the relevant entity tags (using If-Match, If-None-Match, or
|
|
637
|
+
If-Range) if the entity tags were provided in the stored response(s)
|
|
638
|
+
being validated.
|
|
639
|
+
- SHOULD send the Last-Modified value (using If-Modified-Since) if the
|
|
640
|
+
request is not for a subrange, a single stored response is being
|
|
641
|
+
validated, and that response contains a Last-Modified value."
|
|
642
|
+
|
|
643
|
+
Conditional Headers Added:
|
|
644
|
+
-------------------------
|
|
645
|
+
1. If-None-Match: Added if response has ETag
|
|
646
|
+
- Asks server: "Send full response only if ETag doesn't match"
|
|
647
|
+
- 304 response if ETag matches (resource unchanged)
|
|
648
|
+
- 200 response with content if ETag differs (resource changed)
|
|
649
|
+
|
|
650
|
+
2. If-Modified-Since: Added if response has Last-Modified
|
|
651
|
+
- Asks server: "Send full response only if modified after this date"
|
|
652
|
+
- 304 response if not modified (resource unchanged)
|
|
653
|
+
- 200 response with content if modified (resource changed)
|
|
654
|
+
|
|
655
|
+
Validator Priority:
|
|
656
|
+
------------------
|
|
657
|
+
Both validators are sent if available. ETags are more reliable than
|
|
658
|
+
Last-Modified (1-second granularity), so servers typically check ETag first.
|
|
659
|
+
|
|
660
|
+
Examples:
|
|
661
|
+
--------
|
|
662
|
+
>>> # Request with ETag validator
|
|
663
|
+
>>> request = Request(method="GET", url="https://example.com/resource")
|
|
664
|
+
>>> response = Response(headers=Headers({"etag": '"abc123"'}))
|
|
665
|
+
>>> conditional = make_conditional_request(request, response)
|
|
666
|
+
>>> conditional.headers["if-none-match"]
|
|
667
|
+
'"abc123"'
|
|
668
|
+
|
|
669
|
+
>>> # Request with Last-Modified validator
|
|
670
|
+
>>> response = Response(headers=Headers({
|
|
671
|
+
... "last-modified": "Mon, 01 Jan 2024 00:00:00 GMT"
|
|
672
|
+
... }))
|
|
673
|
+
>>> conditional = make_conditional_request(request, response)
|
|
674
|
+
>>> conditional.headers["if-modified-since"]
|
|
675
|
+
'Mon, 01 Jan 2024 00:00:00 GMT'
|
|
676
|
+
|
|
677
|
+
>>> # Request with both validators
|
|
678
|
+
>>> response = Response(headers=Headers({
|
|
679
|
+
... "etag": '"abc123"',
|
|
680
|
+
... "last-modified": "Mon, 01 Jan 2024 00:00:00 GMT"
|
|
681
|
+
... }))
|
|
682
|
+
>>> conditional = make_conditional_request(request, response)
|
|
683
|
+
>>> "if-none-match" in conditional.headers
|
|
684
|
+
True
|
|
685
|
+
>>> "if-modified-since" in conditional.headers
|
|
686
|
+
True
|
|
687
|
+
"""
|
|
688
|
+
# Extract validators from the cached response
|
|
689
|
+
|
|
690
|
+
# VALIDATOR 1: Last-Modified
|
|
691
|
+
# RFC 9110 Section 8.8.2: Last-Modified
|
|
692
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.8.2
|
|
693
|
+
#
|
|
694
|
+
# Last-Modified indicates when the resource was last changed
|
|
695
|
+
# Used to create If-Modified-Since conditional header
|
|
696
|
+
if "last-modified" in response.headers:
|
|
697
|
+
last_modified = response.headers["last-modified"]
|
|
698
|
+
else:
|
|
699
|
+
last_modified = None
|
|
700
|
+
|
|
701
|
+
# VALIDATOR 2: ETag (Entity Tag)
|
|
702
|
+
# RFC 9110 Section 8.8.3: ETag
|
|
703
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.8.3
|
|
704
|
+
#
|
|
705
|
+
# ETag is an opaque validator that represents a specific version of a resource
|
|
706
|
+
# More reliable than Last-Modified (no timestamp granularity issues)
|
|
707
|
+
# Used to create If-None-Match conditional header
|
|
708
|
+
if "etag" in response.headers:
|
|
709
|
+
etag = response.headers["etag"]
|
|
710
|
+
else:
|
|
711
|
+
etag = None
|
|
712
|
+
|
|
713
|
+
# Build precondition headers dictionary
|
|
714
|
+
precondition_headers: Dict[str, str] = {}
|
|
715
|
+
|
|
716
|
+
# ADD PRECONDITION 1: If-None-Match (from ETag)
|
|
717
|
+
# RFC 9110 Section 13.1.2: If-None-Match
|
|
718
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2
|
|
719
|
+
#
|
|
720
|
+
# "MUST send the relevant entity tags (using If-Match, If-None-Match, or
|
|
721
|
+
# If-Range) if the entity tags were provided in the stored response(s)
|
|
722
|
+
# being validated."
|
|
723
|
+
#
|
|
724
|
+
# If-None-Match tells the server: "Only send the full response if the
|
|
725
|
+
# current ETag is different from this one"
|
|
726
|
+
#
|
|
727
|
+
# Server responses:
|
|
728
|
+
# - 304 Not Modified: ETag matches, cached version is still valid
|
|
729
|
+
# - 200 OK: ETag differs, sends new content
|
|
730
|
+
if etag is not None:
|
|
731
|
+
precondition_headers["If-None-Match"] = etag
|
|
732
|
+
|
|
733
|
+
# ADD PRECONDITION 2: If-Modified-Since (from Last-Modified)
|
|
734
|
+
# RFC 9110 Section 13.1.3: If-Modified-Since
|
|
735
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-13.1.3
|
|
736
|
+
#
|
|
737
|
+
# "SHOULD send the Last-Modified value (using If-Modified-Since) if the
|
|
738
|
+
# request is not for a subrange, a single stored response is being
|
|
739
|
+
# validated, and that response contains a Last-Modified value."
|
|
740
|
+
#
|
|
741
|
+
# If-Modified-Since tells the server: "Only send the full response if the
|
|
742
|
+
# resource has been modified after this date"
|
|
743
|
+
#
|
|
744
|
+
# Server responses:
|
|
745
|
+
# - 304 Not Modified: Not modified since date, cached version is valid
|
|
746
|
+
# - 200 OK: Modified since date, sends new content
|
|
747
|
+
if last_modified:
|
|
748
|
+
precondition_headers["If-Modified-Since"] = last_modified
|
|
749
|
+
|
|
750
|
+
# Create a new request with the original headers plus precondition headers
|
|
751
|
+
# The replace() function creates a copy of the request with updated headers
|
|
752
|
+
return replace(
|
|
753
|
+
request,
|
|
754
|
+
headers=Headers(
|
|
755
|
+
{
|
|
756
|
+
**request.headers,
|
|
757
|
+
**precondition_headers,
|
|
758
|
+
}
|
|
759
|
+
),
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def exclude_unstorable_headers(response: Response, is_cache_shared: bool) -> Response:
|
|
764
|
+
"""
|
|
765
|
+
Removes headers that must not be stored in the cache.
|
|
766
|
+
|
|
767
|
+
Certain headers are connection-specific or contain sensitive information
|
|
768
|
+
that should not be cached. This function filters them out before storage.
|
|
769
|
+
|
|
770
|
+
RFC 9111 Section 3.1: Storing Header and Trailer Fields
|
|
771
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-3.1
|
|
772
|
+
|
|
773
|
+
Parameters:
|
|
774
|
+
----------
|
|
775
|
+
response : Response
|
|
776
|
+
The response to filter headers from
|
|
777
|
+
is_cache_shared : bool
|
|
778
|
+
True if this is a shared cache (affects private directive handling)
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
-------
|
|
782
|
+
Response
|
|
783
|
+
A new response with unstorable headers removed
|
|
784
|
+
|
|
785
|
+
RFC 9111 Compliance:
|
|
786
|
+
-------------------
|
|
787
|
+
From RFC 9111 Section 3.1:
|
|
788
|
+
"Caches MUST include all received response header fields -- including
|
|
789
|
+
unrecognized ones -- when storing a response; this assures that new HTTP
|
|
790
|
+
header fields can be successfully deployed. However, the following exceptions
|
|
791
|
+
are made:
|
|
792
|
+
- The Connection header field and fields whose names are listed in it are
|
|
793
|
+
not stored (see Section 7.6.1 of [HTTP])
|
|
794
|
+
- Caches MUST NOT store fields defined as being specific to a particular
|
|
795
|
+
connection or applicable only to a tunnel or gateway, unless the cache
|
|
796
|
+
was specifically designed to support these fields"
|
|
797
|
+
|
|
798
|
+
Headers Always Excluded:
|
|
799
|
+
-----------------------
|
|
800
|
+
Connection-specific headers (RFC 9110 Section 7.6.1):
|
|
801
|
+
- Connection
|
|
802
|
+
- Keep-Alive
|
|
803
|
+
- Proxy-Connection (non-standard but common)
|
|
804
|
+
- Transfer-Encoding
|
|
805
|
+
- Upgrade
|
|
806
|
+
- TE
|
|
807
|
+
|
|
808
|
+
Hop-by-hop authentication headers:
|
|
809
|
+
- Proxy-Authenticate
|
|
810
|
+
- Proxy-Authorization
|
|
811
|
+
- Proxy-Authentication-Info
|
|
812
|
+
|
|
813
|
+
Headers Conditionally Excluded:
|
|
814
|
+
-------------------------------
|
|
815
|
+
- Fields listed in no-cache directive (always excluded)
|
|
816
|
+
- Fields listed in private directive (excluded for shared caches only)
|
|
817
|
+
|
|
818
|
+
Examples:
|
|
819
|
+
--------
|
|
820
|
+
>>> # Remove connection-specific headers
|
|
821
|
+
>>> response = Response(headers=Headers({
|
|
822
|
+
... "cache-control": "max-age=3600",
|
|
823
|
+
... "connection": "keep-alive",
|
|
824
|
+
... "keep-alive": "timeout=5",
|
|
825
|
+
... "content-type": "application/json"
|
|
826
|
+
... }))
|
|
827
|
+
>>> filtered = exclude_unstorable_headers(response, is_cache_shared=True)
|
|
828
|
+
>>> "connection" in filtered.headers
|
|
829
|
+
False
|
|
830
|
+
>>> "content-type" in filtered.headers
|
|
831
|
+
True
|
|
832
|
+
|
|
833
|
+
>>> # Remove headers listed in no-cache
|
|
834
|
+
>>> response = Response(headers=Headers({
|
|
835
|
+
... "cache-control": 'no-cache="Set-Cookie"',
|
|
836
|
+
... "set-cookie": "session=abc123"
|
|
837
|
+
... }))
|
|
838
|
+
>>> filtered = exclude_unstorable_headers(response, is_cache_shared=True)
|
|
839
|
+
>>> "set-cookie" in filtered.headers
|
|
840
|
+
False
|
|
841
|
+
"""
|
|
842
|
+
# Initialize set of headers to exclude
|
|
843
|
+
# These are connection-specific headers that must never be cached
|
|
844
|
+
# RFC 9110 Section 7.6.1: Connection-Specific Header Fields
|
|
845
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-7.6.1
|
|
846
|
+
need_to_be_excluded = set(
|
|
847
|
+
[
|
|
848
|
+
"connection", # Connection management
|
|
849
|
+
"keep-alive", # Connection timeout and max requests
|
|
850
|
+
"te", # Transfer encoding accepted by client
|
|
851
|
+
"transfer-encoding", # How the body is encoded for transfer
|
|
852
|
+
"upgrade", # Protocol upgrade (e.g., WebSocket)
|
|
853
|
+
"proxy-connection", # Non-standard but widely used
|
|
854
|
+
"proxy-authenticate", # Proxy authentication challenge
|
|
855
|
+
"proxy-authentication-info", # Proxy auth additional info
|
|
856
|
+
"proxy-authorization", # Proxy auth credentials
|
|
857
|
+
]
|
|
858
|
+
)
|
|
859
|
+
|
|
860
|
+
# Parse Cache-Control to check for no-cache and private directives
|
|
861
|
+
cache_control = parse_cache_control(response.headers.get("cache-control"))
|
|
862
|
+
|
|
863
|
+
# EXCLUSION RULE 1: no-cache with field names
|
|
864
|
+
# RFC 9111 Section 5.2.2.4: no-cache Response Directive
|
|
865
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.4
|
|
866
|
+
#
|
|
867
|
+
# "The qualified form of the no-cache response directive, with an argument
|
|
868
|
+
# that lists one or more field names, indicates that a cache MAY use the
|
|
869
|
+
# response to satisfy a subsequent request, subject to any other restrictions
|
|
870
|
+
# on caching, if the listed header fields are excluded from the subsequent
|
|
871
|
+
# response or the subsequent response has been successfully revalidated with
|
|
872
|
+
# the origin server."
|
|
873
|
+
#
|
|
874
|
+
# Example: Cache-Control: no-cache="Set-Cookie, Set-Cookie2"
|
|
875
|
+
# Means: Cache the response but exclude Set-Cookie headers from the cache
|
|
876
|
+
if isinstance(cache_control.no_cache, list):
|
|
877
|
+
for field in cache_control.no_cache:
|
|
878
|
+
need_to_be_excluded.add(field.lower())
|
|
879
|
+
|
|
880
|
+
# EXCLUSION RULE 2: private with field names (shared caches only)
|
|
881
|
+
# RFC 9111 Section 5.2.2.7: private Response Directive
|
|
882
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.7
|
|
883
|
+
#
|
|
884
|
+
# "If a qualified private response directive is present, with an argument
|
|
885
|
+
# that lists one or more field names, then only the listed header fields
|
|
886
|
+
# are limited to a single user: a shared cache MUST NOT store the listed
|
|
887
|
+
# header fields if they are present in the original response but MAY store
|
|
888
|
+
# the remainder of the response message without those header fields"
|
|
889
|
+
#
|
|
890
|
+
# Example: Cache-Control: private="Authorization"
|
|
891
|
+
# Means: Shared caches can cache the response but must exclude Authorization
|
|
892
|
+
if is_cache_shared and isinstance(cache_control.private, list):
|
|
893
|
+
for field in cache_control.private:
|
|
894
|
+
need_to_be_excluded.add(field.lower())
|
|
895
|
+
|
|
896
|
+
# Filter out the excluded headers
|
|
897
|
+
# Create new Headers dict with only the headers we want to keep
|
|
898
|
+
new_headers = Headers(
|
|
899
|
+
{key: value for key, value in response.headers.items() if key.lower() not in need_to_be_excluded}
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
# Return a new response with filtered headers
|
|
903
|
+
return replace(
|
|
904
|
+
response,
|
|
905
|
+
headers=new_headers,
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def refresh_response_headers(
|
|
910
|
+
stored_response: Response,
|
|
911
|
+
revalidation_response: Response,
|
|
912
|
+
) -> Response:
|
|
913
|
+
"""
|
|
914
|
+
Updates a stored response's headers with fresh metadata from a 304 response.
|
|
915
|
+
|
|
916
|
+
When revalidation succeeds (304 Not Modified), the cached response is still
|
|
917
|
+
valid but its metadata (Date, Cache-Control, etc.) should be updated with
|
|
918
|
+
fresh values from the 304 response.
|
|
919
|
+
|
|
920
|
+
RFC 9111 Section 3.2: Updating Stored Header Fields
|
|
921
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-3.2
|
|
922
|
+
|
|
923
|
+
Parameters:
|
|
924
|
+
----------
|
|
925
|
+
stored_response : Response
|
|
926
|
+
The cached response that is being freshened
|
|
927
|
+
revalidation_response : Response
|
|
928
|
+
The 304 Not Modified response containing fresh metadata
|
|
929
|
+
|
|
930
|
+
Returns:
|
|
931
|
+
-------
|
|
932
|
+
Response
|
|
933
|
+
The stored response with updated headers
|
|
934
|
+
|
|
935
|
+
RFC 9111 Compliance:
|
|
936
|
+
-------------------
|
|
937
|
+
From RFC 9111 Section 3.2:
|
|
938
|
+
"When doing so, the cache MUST add each header field in the provided response
|
|
939
|
+
to the stored response, replacing field values that are already present,
|
|
940
|
+
with the following exceptions:
|
|
941
|
+
- Header fields that provide metadata about the message content and/or the
|
|
942
|
+
selected representation (e.g., Content-Encoding, Content-Type, Content-Range)
|
|
943
|
+
MUST NOT be updated unless the response is being stored as a result of
|
|
944
|
+
successful validation."
|
|
945
|
+
|
|
946
|
+
Update Rules:
|
|
947
|
+
------------
|
|
948
|
+
1. Merge headers from 304 response into stored response
|
|
949
|
+
2. 304 headers override stored headers (newer metadata)
|
|
950
|
+
3. EXCEPT: Content metadata headers are NOT updated
|
|
951
|
+
- Content-Encoding
|
|
952
|
+
- Content-Type
|
|
953
|
+
- Content-Range
|
|
954
|
+
4. Remove unstorable headers after merging
|
|
955
|
+
|
|
956
|
+
Rationale for Exceptions:
|
|
957
|
+
------------------------
|
|
958
|
+
Content-* headers describe the body of the response. A 304 response has
|
|
959
|
+
no body, so its Content-* headers (if any) don't describe the cached body.
|
|
960
|
+
We must preserve the original Content-* headers from the cached response.
|
|
961
|
+
|
|
962
|
+
For example:
|
|
963
|
+
- Cached response: Content-Type: application/json, body is JSON
|
|
964
|
+
- 304 response: Content-Type: text/plain (this is wrong for the cached body!)
|
|
965
|
+
- Result: Keep application/json from cached response
|
|
966
|
+
|
|
967
|
+
Examples:
|
|
968
|
+
--------
|
|
969
|
+
>>> # Update Date and Cache-Control, preserve Content-Type
|
|
970
|
+
>>> stored = Response(
|
|
971
|
+
... status_code=200,
|
|
972
|
+
... headers=Headers({
|
|
973
|
+
... "date": "Mon, 01 Jan 2024 00:00:00 GMT",
|
|
974
|
+
... "cache-control": "max-age=3600",
|
|
975
|
+
... "content-type": "application/json"
|
|
976
|
+
... })
|
|
977
|
+
... )
|
|
978
|
+
>>> revalidation = Response(
|
|
979
|
+
... status_code=304,
|
|
980
|
+
... headers=Headers({
|
|
981
|
+
... "date": "Mon, 01 Jan 2024 12:00:00 GMT",
|
|
982
|
+
... "cache-control": "max-age=7200",
|
|
983
|
+
... "content-type": "text/plain" # Should be ignored
|
|
984
|
+
... })
|
|
985
|
+
... )
|
|
986
|
+
>>> refreshed = refresh_response_headers(stored, revalidation)
|
|
987
|
+
>>> refreshed.headers["cache-control"]
|
|
988
|
+
'max-age=7200' # Updated
|
|
989
|
+
>>> refreshed.headers["content-type"]
|
|
990
|
+
'application/json' # Preserved from stored response
|
|
991
|
+
"""
|
|
992
|
+
# Define headers that must NOT be updated from the 304 response
|
|
993
|
+
# These headers describe the message body/representation
|
|
994
|
+
# RFC 9111 Section 3.2: "Header fields that provide metadata about the
|
|
995
|
+
# message content and/or the selected representation... MUST NOT be updated"
|
|
996
|
+
excluded_headers = set(
|
|
997
|
+
[
|
|
998
|
+
"content-encoding", # How the body is encoded (gzip, br, etc.)
|
|
999
|
+
"content-type", # MIME type of the body
|
|
1000
|
+
"content-range", # For partial content (206 responses)
|
|
1001
|
+
]
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
# Merge headers: Start with stored response, overlay revalidation response
|
|
1005
|
+
# Headers from revalidation_response override stored_response
|
|
1006
|
+
# EXCEPT for excluded headers (content metadata)
|
|
1007
|
+
new_headers = {
|
|
1008
|
+
**stored_response.headers, # Base: original cached headers
|
|
1009
|
+
**{
|
|
1010
|
+
key: value
|
|
1011
|
+
for key, value in revalidation_response.headers.items()
|
|
1012
|
+
if key.lower() not in excluded_headers # Skip content metadata
|
|
1013
|
+
},
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
# Remove unstorable headers from the final merged headers
|
|
1017
|
+
# This ensures we don't accidentally cache connection-specific headers
|
|
1018
|
+
# that might have been in the 304 response
|
|
1019
|
+
return exclude_unstorable_headers(
|
|
1020
|
+
replace(
|
|
1021
|
+
stored_response,
|
|
1022
|
+
headers=Headers(new_headers),
|
|
1023
|
+
),
|
|
1024
|
+
is_cache_shared=True, # Assume shared cache for maximum safety
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
AnyState = Union[
|
|
1029
|
+
"CacheMiss",
|
|
1030
|
+
"StoreAndUse",
|
|
1031
|
+
"CouldNotBeStored",
|
|
1032
|
+
"FromCache",
|
|
1033
|
+
"NeedToBeUpdated",
|
|
1034
|
+
"NeedRevalidation",
|
|
1035
|
+
"IdleClient",
|
|
1036
|
+
"InvalidatePairs",
|
|
1037
|
+
]
|
|
1038
|
+
|
|
1039
|
+
# Defined in https://www.rfc-editor.org/rfc/rfc9110#name-safe-methods
|
|
1040
|
+
SAFE_METHODS = frozenset(["GET", "HEAD", "OPTIONS", "TRACE"])
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def create_idle_state(role: Literal["client", "server"], options: Optional[CacheOptions] = None) -> IdleClient:
|
|
1044
|
+
if role == "server":
|
|
1045
|
+
raise NotImplementedError("Server role is not implemented yet.")
|
|
1046
|
+
return IdleClient(options=options or CacheOptions())
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
@dataclass
|
|
1050
|
+
class IdleClient(State):
|
|
1051
|
+
"""
|
|
1052
|
+
Represents the idle state of a client initiating an HTTP request.
|
|
1053
|
+
|
|
1054
|
+
This is the entry point of the cache state machine. When a client wants to send
|
|
1055
|
+
a request, this state determines whether the request can be satisfied from cache,
|
|
1056
|
+
needs revalidation, or must be forwarded to the origin server (cache miss).
|
|
1057
|
+
|
|
1058
|
+
State Transitions:
|
|
1059
|
+
-----------------
|
|
1060
|
+
- CacheMiss: When no suitable cached response exists or the request cannot be cached
|
|
1061
|
+
- FromCache: When a fresh or stale-but-allowed cached response can be used
|
|
1062
|
+
- NeedRevalidation: When a stale cached response exists and must be validated
|
|
1063
|
+
|
|
1064
|
+
RFC 9111 References:
|
|
1065
|
+
-------------------
|
|
1066
|
+
- Section 4: Constructing Responses from Caches
|
|
1067
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4
|
|
1068
|
+
- Section 4.1: Calculating Cache Keys (Vary handling)
|
|
1069
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.1
|
|
1070
|
+
- Section 4.2: Freshness
|
|
1071
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2
|
|
1072
|
+
- Section 4.3: Validation
|
|
1073
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3
|
|
1074
|
+
|
|
1075
|
+
Attributes:
|
|
1076
|
+
----------
|
|
1077
|
+
options : CacheOptions
|
|
1078
|
+
Configuration options for cache behavior (inherited from State)
|
|
1079
|
+
"""
|
|
1080
|
+
|
|
1081
|
+
def next(
|
|
1082
|
+
self, request: Request, associated_pairs: list[CompletePair]
|
|
1083
|
+
) -> Union["CacheMiss", "FromCache", "NeedRevalidation"]:
|
|
1084
|
+
"""
|
|
1085
|
+
Determines the next state transition based on the request and available cached responses.
|
|
1086
|
+
|
|
1087
|
+
This method implements the core cache lookup algorithm defined in RFC 9111 Section 4.
|
|
1088
|
+
It evaluates whether a cached response can be reused and transitions to the appropriate
|
|
1089
|
+
next state.
|
|
1090
|
+
|
|
1091
|
+
Parameters:
|
|
1092
|
+
----------
|
|
1093
|
+
request : Request
|
|
1094
|
+
The incoming HTTP request from the client
|
|
1095
|
+
associated_pairs : list[CompletePair]
|
|
1096
|
+
List of request-response pairs previously stored in the cache that may match
|
|
1097
|
+
this request. These pairs are pre-filtered by cache key (typically URI).
|
|
1098
|
+
|
|
1099
|
+
Returns:
|
|
1100
|
+
-------
|
|
1101
|
+
Union[CacheMiss, FromCache, NeedRevalidation]
|
|
1102
|
+
- CacheMiss: No suitable cached response; request must be forwarded to origin
|
|
1103
|
+
- FromCache: A suitable cached response can be returned immediately
|
|
1104
|
+
- NeedRevalidation: A cached response exists but requires validation before use
|
|
1105
|
+
|
|
1106
|
+
RFC 9111 Compliance:
|
|
1107
|
+
-------------------
|
|
1108
|
+
This method enforces the requirements from RFC 9111 Section 4, paragraph 1:
|
|
1109
|
+
"When presented with a request, a cache MUST NOT reuse a stored response unless:
|
|
1110
|
+
1. the presented target URI and that of the stored response match, and
|
|
1111
|
+
2. the request method associated with the stored response allows it to be used
|
|
1112
|
+
for the presented request, and
|
|
1113
|
+
3. request header fields nominated by the stored response (if any) match those
|
|
1114
|
+
presented (see Section 4.1), and
|
|
1115
|
+
4. the stored response does not contain the no-cache directive (Section 5.2.2.4),
|
|
1116
|
+
unless it is successfully validated (Section 4.3), and
|
|
1117
|
+
5. the stored response is one of the following:
|
|
1118
|
+
- fresh (see Section 4.2), or
|
|
1119
|
+
- allowed to be served stale (see Section 4.2.4), or
|
|
1120
|
+
- successfully validated (see Section 4.3)."
|
|
1121
|
+
|
|
1122
|
+
Implementation Notes:
|
|
1123
|
+
--------------------
|
|
1124
|
+
- Range requests always result in a cache miss (simplified behavior)
|
|
1125
|
+
- Unsafe methods (POST, PUT, DELETE, etc.) are written through to origin
|
|
1126
|
+
- Multiple matching responses are sorted by Date header (most recent first)
|
|
1127
|
+
- Age header is updated when serving from cache
|
|
1128
|
+
|
|
1129
|
+
Examples:
|
|
1130
|
+
--------
|
|
1131
|
+
>>> # Cache miss - no matching responses
|
|
1132
|
+
>>> idle = IdleClient(options=default_options)
|
|
1133
|
+
>>> next_state = idle.next(get_request, [])
|
|
1134
|
+
>>> isinstance(next_state, CacheMiss)
|
|
1135
|
+
True
|
|
1136
|
+
|
|
1137
|
+
>>> # From cache - fresh response available
|
|
1138
|
+
>>> idle = IdleClient(options=default_options)
|
|
1139
|
+
>>> cached_pair = CompletePair(get_request, fresh_response)
|
|
1140
|
+
>>> next_state = idle.next(get_request, [cached_pair])
|
|
1141
|
+
>>> isinstance(next_state, FromCache)
|
|
1142
|
+
True
|
|
1143
|
+
|
|
1144
|
+
>>> # Need revalidation - stale response that cannot be served stale
|
|
1145
|
+
>>> idle = IdleClient(options=default_options)
|
|
1146
|
+
>>> cached_pair = CompletePair(get_request, stale_response)
|
|
1147
|
+
>>> next_state = idle.next(get_request, [cached_pair])
|
|
1148
|
+
>>> isinstance(next_state, NeedRevalidation)
|
|
1149
|
+
True
|
|
1150
|
+
"""
|
|
1151
|
+
|
|
1152
|
+
# ============================================================================
|
|
1153
|
+
# STEP 1: Handle Range Requests
|
|
1154
|
+
# ============================================================================
|
|
1155
|
+
# RFC 9111 Section 3.3: Storing Incomplete Responses
|
|
1156
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3.3
|
|
1157
|
+
#
|
|
1158
|
+
# Range requests are complex and require special handling. For simplicity,
|
|
1159
|
+
# this implementation treats all range requests as cache misses.
|
|
1160
|
+
# A full implementation could store and combine partial responses.
|
|
1161
|
+
request_range = Range.try_from_str(request.headers["range"]) if "range" in request.headers else None
|
|
1162
|
+
|
|
1163
|
+
if request_range is not None:
|
|
1164
|
+
# Simplified behavior: always forward range requests to origin
|
|
1165
|
+
return CacheMiss(options=self.options, request=request)
|
|
1166
|
+
|
|
1167
|
+
# ============================================================================
|
|
1168
|
+
# STEP 2: Handle Unsafe Methods (Write-Through)
|
|
1169
|
+
# ============================================================================
|
|
1170
|
+
# RFC 9111 Section 4, paragraph 5:
|
|
1171
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4-5
|
|
1172
|
+
#
|
|
1173
|
+
# "A cache MUST write through requests with methods that are unsafe
|
|
1174
|
+
# (Section 9.2.1 of [HTTP]) to the origin server; i.e., a cache is not
|
|
1175
|
+
# allowed to generate a reply to such a request before having forwarded
|
|
1176
|
+
# the request and having received a corresponding response."
|
|
1177
|
+
#
|
|
1178
|
+
# Unsafe methods: POST, PUT, DELETE, PATCH, etc.
|
|
1179
|
+
# Safe methods: GET, HEAD, OPTIONS, TRACE
|
|
1180
|
+
if request.method.upper() not in SAFE_METHODS:
|
|
1181
|
+
return CacheMiss(request=request, options=self.options) # pragma: nocover
|
|
1182
|
+
|
|
1183
|
+
# ============================================================================
|
|
1184
|
+
# STEP 3: Define Cache Reuse Conditions (RFC 9111 Section 4)
|
|
1185
|
+
# ============================================================================
|
|
1186
|
+
# The following lambda functions implement the five conditions that must ALL
|
|
1187
|
+
# be satisfied for a cached response to be reusable.
|
|
1188
|
+
|
|
1189
|
+
# CONDITION 1: URI Matching
|
|
1190
|
+
# RFC 9111 Section 4, paragraph 2.1:
|
|
1191
|
+
# "the presented target URI (Section 7.1 of [HTTP]) and that of the stored
|
|
1192
|
+
# response match"
|
|
1193
|
+
#
|
|
1194
|
+
# The cache key primarily consists of the request URI. Only responses with
|
|
1195
|
+
# matching URIs can be considered for reuse.
|
|
1196
|
+
url_matches = lambda pair: pair.request.url == request.url # noqa: E731
|
|
1197
|
+
|
|
1198
|
+
# CONDITION 2: Method Matching
|
|
1199
|
+
# RFC 9111 Section 4, paragraph 2.2:
|
|
1200
|
+
# "the request method associated with the stored response allows it to be
|
|
1201
|
+
# used for the presented request"
|
|
1202
|
+
#
|
|
1203
|
+
# Generally, only GET responses can satisfy GET requests, HEAD responses
|
|
1204
|
+
# for HEAD requests, etc. Some methods (like HEAD) can sometimes be satisfied
|
|
1205
|
+
# by GET responses, but this implementation requires exact matches.
|
|
1206
|
+
method_matches = lambda pair: pair.request.method == request.method # noqa: E731
|
|
1207
|
+
|
|
1208
|
+
# CONDITION 3: Vary Header Matching
|
|
1209
|
+
# RFC 9111 Section 4.1: Calculating Cache Keys
|
|
1210
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.1
|
|
1211
|
+
#
|
|
1212
|
+
# "When a cache receives a request that can be satisfied by a stored response
|
|
1213
|
+
# and that stored response contains a Vary header field, the cache MUST NOT
|
|
1214
|
+
# use that stored response without revalidation unless all the presented
|
|
1215
|
+
# request header fields nominated by that Vary field value match those fields
|
|
1216
|
+
# in the original request."
|
|
1217
|
+
#
|
|
1218
|
+
# Example: If response has "Vary: Accept-Encoding", the cached response can
|
|
1219
|
+
# only be used if the new request has the same Accept-Encoding header value.
|
|
1220
|
+
vary_headers_same = lambda pair: vary_headers_match(request, pair) # noqa: E731
|
|
1221
|
+
|
|
1222
|
+
# CONDITION 4: No-Cache Directive Handling
|
|
1223
|
+
# RFC 9111 Section 5.2.2.4: no-cache Response Directive
|
|
1224
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.4
|
|
1225
|
+
#
|
|
1226
|
+
# "The no-cache response directive... indicates that the response MUST NOT be
|
|
1227
|
+
# used to satisfy any other request without forwarding it for validation and
|
|
1228
|
+
# receiving a successful response."
|
|
1229
|
+
#
|
|
1230
|
+
# If a cached response has Cache-Control: no-cache, it cannot be reused without
|
|
1231
|
+
# validation, regardless of its freshness.
|
|
1232
|
+
def no_cache_missing(pair: CompletePair) -> bool:
|
|
1233
|
+
"""Check if the cached response lacks the no-cache directive."""
|
|
1234
|
+
return parse_cache_control(pair.response.headers.get("cache-control")).no_cache is False
|
|
1235
|
+
|
|
1236
|
+
# CONDITION 5: Freshness or Allowed Stale
|
|
1237
|
+
# RFC 9111 Section 4.2: Freshness
|
|
1238
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2
|
|
1239
|
+
#
|
|
1240
|
+
# A response can be reused if it is either:
|
|
1241
|
+
# a) Fresh: age < freshness_lifetime
|
|
1242
|
+
# b) Allowed to be served stale: Section 4.2.4
|
|
1243
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.4
|
|
1244
|
+
#
|
|
1245
|
+
# Note: Condition 5.3 (successfully validated) is handled in the
|
|
1246
|
+
# NeedRevalidation state, not here.
|
|
1247
|
+
def fresh_or_allowed_stale(pair: CompletePair) -> bool:
|
|
1248
|
+
"""
|
|
1249
|
+
Determine if a cached response is fresh or allowed to be served stale.
|
|
1250
|
+
|
|
1251
|
+
RFC 9111 Section 4.2:
|
|
1252
|
+
"A 'fresh' response is one whose age has not yet exceeded its freshness
|
|
1253
|
+
lifetime. Conversely, a 'stale' response is one where it has."
|
|
1254
|
+
|
|
1255
|
+
RFC 9111 Section 4.2.4: Serving Stale Responses
|
|
1256
|
+
"A cache MUST NOT generate a stale response unless it is disconnected or
|
|
1257
|
+
doing so is explicitly permitted by the client or origin server."
|
|
1258
|
+
"""
|
|
1259
|
+
freshness_lifetime = get_freshness_lifetime(pair.response, is_cache_shared=True)
|
|
1260
|
+
age = get_age(pair.response)
|
|
1261
|
+
|
|
1262
|
+
# Check freshness: response_is_fresh = (freshness_lifetime > current_age)
|
|
1263
|
+
is_fresh = False if freshness_lifetime is None else age < freshness_lifetime
|
|
1264
|
+
|
|
1265
|
+
# Check if stale responses are allowed (e.g., max-stale directive)
|
|
1266
|
+
return is_fresh or allowed_stale(pair.response, allow_stale_option=self.options.allow_stale)
|
|
1267
|
+
|
|
1268
|
+
# ============================================================================
|
|
1269
|
+
# STEP 4: Filter Cached Responses by Conditions 1-4
|
|
1270
|
+
# ============================================================================
|
|
1271
|
+
# Apply the first four conditions to filter the list of associated pairs.
|
|
1272
|
+
# Condition 5 (freshness) is applied separately to partition responses into
|
|
1273
|
+
# "ready to use" and "needs revalidation" groups.
|
|
1274
|
+
filtered_pairs = [
|
|
1275
|
+
pair
|
|
1276
|
+
for pair in associated_pairs
|
|
1277
|
+
if url_matches(pair) and method_matches(pair) and vary_headers_same(pair) and no_cache_missing(pair) # type: ignore[no-untyped-call]
|
|
1278
|
+
]
|
|
1279
|
+
|
|
1280
|
+
# ============================================================================
|
|
1281
|
+
# STEP 5: Select Most Recent Response
|
|
1282
|
+
# ============================================================================
|
|
1283
|
+
# RFC 9111 Section 4, paragraph 8:
|
|
1284
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4-8
|
|
1285
|
+
#
|
|
1286
|
+
# "When more than one suitable response is stored, a cache MUST use the most
|
|
1287
|
+
# recent one (as determined by the Date header field). It can also forward
|
|
1288
|
+
# the request with 'Cache-Control: max-age=0' or 'Cache-Control: no-cache'
|
|
1289
|
+
# to disambiguate which response to use."
|
|
1290
|
+
#
|
|
1291
|
+
# Sort by Date header in descending order (most recent first).
|
|
1292
|
+
filtered_pairs.sort(
|
|
1293
|
+
key=lambda pair: parse_date(
|
|
1294
|
+
pair.response.headers.get("date", str(int(time.time()))),
|
|
1295
|
+
)
|
|
1296
|
+
or int(time.time()),
|
|
1297
|
+
reverse=True,
|
|
1298
|
+
)
|
|
1299
|
+
|
|
1300
|
+
# ============================================================================
|
|
1301
|
+
# STEP 6: Partition by Freshness (Condition 5)
|
|
1302
|
+
# ============================================================================
|
|
1303
|
+
# Separate responses into two groups:
|
|
1304
|
+
# - ready_to_use: Fresh or allowed-stale responses that can be served immediately
|
|
1305
|
+
# - need_revalidation: Stale responses that require validation before serving
|
|
1306
|
+
ready_to_use, need_revalidation = partition(filtered_pairs, fresh_or_allowed_stale)
|
|
1307
|
+
|
|
1308
|
+
# ============================================================================
|
|
1309
|
+
# STEP 7: Determine Next State Based on Available Responses
|
|
1310
|
+
# ============================================================================
|
|
1311
|
+
|
|
1312
|
+
if ready_to_use:
|
|
1313
|
+
# --------------------------------------------------------------------
|
|
1314
|
+
# Transition to: FromCache
|
|
1315
|
+
# --------------------------------------------------------------------
|
|
1316
|
+
# We have a fresh (or allowed-stale) response that can be served.
|
|
1317
|
+
#
|
|
1318
|
+
# RFC 9111 Section 4, paragraph 4:
|
|
1319
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4-4
|
|
1320
|
+
#
|
|
1321
|
+
# "When a stored response is used to satisfy a request without validation,
|
|
1322
|
+
# a cache MUST generate an Age header field (Section 5.1), replacing any
|
|
1323
|
+
# present in the response with a value equal to the stored response's
|
|
1324
|
+
# current_age; see Section 4.2.3."
|
|
1325
|
+
#
|
|
1326
|
+
# The Age header informs the client how old the cached response is.
|
|
1327
|
+
|
|
1328
|
+
# Mark all ready-to-use responses with metadata (for observability)
|
|
1329
|
+
for pair in ready_to_use:
|
|
1330
|
+
pair.response.metadata["hishel_from_cache"] = True # type: ignore
|
|
1331
|
+
|
|
1332
|
+
# Use the most recent response (first in sorted list)
|
|
1333
|
+
selected_pair = ready_to_use[0]
|
|
1334
|
+
|
|
1335
|
+
# Calculate current age and update the Age header
|
|
1336
|
+
current_age = get_age(selected_pair.response)
|
|
1337
|
+
|
|
1338
|
+
return FromCache(
|
|
1339
|
+
pair=replace(
|
|
1340
|
+
selected_pair,
|
|
1341
|
+
response=replace(
|
|
1342
|
+
selected_pair.response,
|
|
1343
|
+
headers=Headers(
|
|
1344
|
+
{
|
|
1345
|
+
**selected_pair.response.headers,
|
|
1346
|
+
"age": str(current_age),
|
|
1347
|
+
}
|
|
1348
|
+
),
|
|
1349
|
+
),
|
|
1350
|
+
),
|
|
1351
|
+
options=self.options,
|
|
1352
|
+
)
|
|
1353
|
+
|
|
1354
|
+
elif need_revalidation:
|
|
1355
|
+
# --------------------------------------------------------------------
|
|
1356
|
+
# Transition to: NeedRevalidation
|
|
1357
|
+
# --------------------------------------------------------------------
|
|
1358
|
+
# We have stale cached response(s) that cannot be served without
|
|
1359
|
+
# validation (e.g., they lack must-revalidate or similar directives).
|
|
1360
|
+
#
|
|
1361
|
+
# RFC 9111 Section 4.3: Validation
|
|
1362
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3
|
|
1363
|
+
#
|
|
1364
|
+
# "When a cache has one or more stored responses for a requested URI,
|
|
1365
|
+
# but cannot serve any of them (e.g., because they are not fresh, or
|
|
1366
|
+
# one cannot be chosen), it can use the conditional request mechanism
|
|
1367
|
+
# in the forwarded request to give the next inbound server an opportunity
|
|
1368
|
+
# to choose a valid stored response to use, updating the stored metadata
|
|
1369
|
+
# in the process, or to replace the stored response(s) with a new response."
|
|
1370
|
+
#
|
|
1371
|
+
# Convert the request into a conditional request using validators
|
|
1372
|
+
# (ETag, Last-Modified) from the cached response.
|
|
1373
|
+
return NeedRevalidation(
|
|
1374
|
+
request=make_conditional_request(request, need_revalidation[-1].response),
|
|
1375
|
+
revalidating_pairs=need_revalidation,
|
|
1376
|
+
options=self.options,
|
|
1377
|
+
original_request=request,
|
|
1378
|
+
)
|
|
1379
|
+
else:
|
|
1380
|
+
# --------------------------------------------------------------------
|
|
1381
|
+
# Transition to: CacheMiss
|
|
1382
|
+
# --------------------------------------------------------------------
|
|
1383
|
+
# No suitable cached responses found. The request must be forwarded
|
|
1384
|
+
# to the origin server.
|
|
1385
|
+
#
|
|
1386
|
+
# This can happen when:
|
|
1387
|
+
# - No responses are cached for this URI
|
|
1388
|
+
# - Cached responses don't match the request (e.g., different Vary headers)
|
|
1389
|
+
# - Cached responses have no-cache directive
|
|
1390
|
+
# - Other conditions prevent cache reuse
|
|
1391
|
+
return CacheMiss(
|
|
1392
|
+
request=request,
|
|
1393
|
+
options=self.options,
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
@dataclass
|
|
1398
|
+
class CacheMiss(State):
|
|
1399
|
+
"""
|
|
1400
|
+
Represents a cache miss state where a response must be evaluated for storage.
|
|
1401
|
+
|
|
1402
|
+
This state is reached when:
|
|
1403
|
+
1. No suitable cached response exists (from IdleClient)
|
|
1404
|
+
2. A request must be forwarded to the origin server
|
|
1405
|
+
3. The origin server's response must be evaluated for cacheability
|
|
1406
|
+
|
|
1407
|
+
State Transitions:
|
|
1408
|
+
-----------------
|
|
1409
|
+
- StoreAndUse: Response meets all RFC 9111 storage requirements and will be cached
|
|
1410
|
+
- CouldNotBeStored: Response fails one or more storage requirements and cannot be cached
|
|
1411
|
+
|
|
1412
|
+
RFC 9111 References:
|
|
1413
|
+
-------------------
|
|
1414
|
+
- Section 3: Storing Responses in Caches
|
|
1415
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-3
|
|
1416
|
+
- Section 3.5: Authenticated Responses
|
|
1417
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-3.5
|
|
1418
|
+
|
|
1419
|
+
Attributes:
|
|
1420
|
+
----------
|
|
1421
|
+
request : Request
|
|
1422
|
+
The request that caused the cache miss. Note this is a Request object,
|
|
1423
|
+
not an IncompletePair, because this state can be reached from NeedRevalidation
|
|
1424
|
+
where we don't have an incomplete pair.
|
|
1425
|
+
after_revalidation : bool
|
|
1426
|
+
Flag indicating if this cache miss occurred after a revalidation attempt.
|
|
1427
|
+
When True, the response will be marked with hishel_revalidated metadata.
|
|
1428
|
+
options : CacheOptions
|
|
1429
|
+
Configuration options for cache behavior (inherited from State)
|
|
1430
|
+
"""
|
|
1431
|
+
|
|
1432
|
+
request: Request
|
|
1433
|
+
"""
|
|
1434
|
+
The request that missed the cache.
|
|
1435
|
+
|
|
1436
|
+
Note that this has a type of Request and not IncompletePair because
|
|
1437
|
+
when moving to this state from `NeedRevalidation` we don't have incomplete pair
|
|
1438
|
+
"""
|
|
1439
|
+
|
|
1440
|
+
after_revalidation: bool = False
|
|
1441
|
+
"""
|
|
1442
|
+
Indicates whether the cache miss occurred after a revalidation attempt.
|
|
1443
|
+
"""
|
|
1444
|
+
|
|
1445
|
+
def next(self, response: Response, pair_id: uuid.UUID) -> Union["StoreAndUse", "CouldNotBeStored"]:
|
|
1446
|
+
"""
|
|
1447
|
+
Evaluates whether a response can be stored in the cache.
|
|
1448
|
+
|
|
1449
|
+
This method implements the storage decision algorithm from RFC 9111 Section 3.
|
|
1450
|
+
A response can only be stored if ALL of the following conditions are met:
|
|
1451
|
+
|
|
1452
|
+
1. Request method is understood by the cache
|
|
1453
|
+
2. Response status code is final (not 1xx informational)
|
|
1454
|
+
3. Cache understands how to handle the response status code
|
|
1455
|
+
4. No no-store directive is present
|
|
1456
|
+
5. Private directive allows storage (for shared caches)
|
|
1457
|
+
6. Authorization is properly handled (for shared caches)
|
|
1458
|
+
7. Response contains explicit caching information or is heuristically cacheable
|
|
1459
|
+
|
|
1460
|
+
Parameters:
|
|
1461
|
+
----------
|
|
1462
|
+
response : Response
|
|
1463
|
+
The HTTP response received from the origin server
|
|
1464
|
+
pair_id : uuid.UUID
|
|
1465
|
+
Unique identifier for this request-response pair
|
|
1466
|
+
|
|
1467
|
+
Returns:
|
|
1468
|
+
-------
|
|
1469
|
+
Union[StoreAndUse, CouldNotBeStored]
|
|
1470
|
+
- StoreAndUse: Response can and will be stored in cache
|
|
1471
|
+
- CouldNotBeStored: Response cannot be stored (fails one or more requirements)
|
|
1472
|
+
|
|
1473
|
+
RFC 9111 Compliance:
|
|
1474
|
+
-------------------
|
|
1475
|
+
From RFC 9111 Section 3:
|
|
1476
|
+
"A cache MUST NOT store a response to a request unless:
|
|
1477
|
+
- the request method is understood by the cache;
|
|
1478
|
+
- the response status code is final;
|
|
1479
|
+
- if the response status code is 206 or 304, or the must-understand cache
|
|
1480
|
+
directive is present: the cache understands the response status code;
|
|
1481
|
+
- the no-store cache directive is not present in the response;
|
|
1482
|
+
- if the cache is shared: the private response directive is either not present
|
|
1483
|
+
or allows a shared cache to store a modified response;
|
|
1484
|
+
- if the cache is shared: the Authorization header field is not present in the
|
|
1485
|
+
request or a response directive is present that explicitly allows shared caching;
|
|
1486
|
+
- the response contains at least one of the following:
|
|
1487
|
+
* a public response directive
|
|
1488
|
+
* a private response directive (if cache is not shared)
|
|
1489
|
+
* an Expires header field
|
|
1490
|
+
* a max-age response directive
|
|
1491
|
+
* an s-maxage response directive (if cache is shared)
|
|
1492
|
+
* a status code that is defined as heuristically cacheable"
|
|
1493
|
+
|
|
1494
|
+
Side Effects:
|
|
1495
|
+
------------
|
|
1496
|
+
Sets metadata flags on the response object:
|
|
1497
|
+
- hishel_spec_ignored: False (caching spec is being followed)
|
|
1498
|
+
- hishel_from_cache: False (response is from origin, not cache)
|
|
1499
|
+
- hishel_revalidated: True (if after_revalidation is True)
|
|
1500
|
+
- hishel_stored: True/False (whether response was stored)
|
|
1501
|
+
|
|
1502
|
+
Logging:
|
|
1503
|
+
-------
|
|
1504
|
+
When a response cannot be stored, detailed debug logs are emitted explaining
|
|
1505
|
+
which specific RFC requirement failed, with direct links to the relevant
|
|
1506
|
+
RFC sections.
|
|
1507
|
+
|
|
1508
|
+
Examples:
|
|
1509
|
+
--------
|
|
1510
|
+
>>> # Cacheable response
|
|
1511
|
+
>>> cache_miss = CacheMiss(request=get_request, options=default_options)
|
|
1512
|
+
>>> response = Response(
|
|
1513
|
+
... status_code=200,
|
|
1514
|
+
... headers=Headers({"cache-control": "max-age=3600"})
|
|
1515
|
+
... )
|
|
1516
|
+
>>> next_state = cache_miss.next(response, uuid.uuid4())
|
|
1517
|
+
>>> isinstance(next_state, StoreAndUse)
|
|
1518
|
+
True
|
|
1519
|
+
|
|
1520
|
+
>>> # Non-cacheable response (no-store)
|
|
1521
|
+
>>> response = Response(
|
|
1522
|
+
... status_code=200,
|
|
1523
|
+
... headers=Headers({"cache-control": "no-store"})
|
|
1524
|
+
... )
|
|
1525
|
+
>>> next_state = cache_miss.next(response, uuid.uuid4())
|
|
1526
|
+
>>> isinstance(next_state, CouldNotBeStored)
|
|
1527
|
+
True
|
|
1528
|
+
"""
|
|
1529
|
+
|
|
1530
|
+
# ============================================================================
|
|
1531
|
+
# STEP 1: Set Response Metadata
|
|
1532
|
+
# ============================================================================
|
|
1533
|
+
# Initialize metadata flags to track the response lifecycle
|
|
1534
|
+
|
|
1535
|
+
response.metadata["hishel_spec_ignored"] = False # type: ignore
|
|
1536
|
+
# We are following the caching specification
|
|
1537
|
+
|
|
1538
|
+
response.metadata["hishel_from_cache"] = False # type: ignore
|
|
1539
|
+
# This response came from origin server, not cache
|
|
1540
|
+
|
|
1541
|
+
if self.after_revalidation:
|
|
1542
|
+
response.metadata["hishel_revalidated"] = True # type: ignore
|
|
1543
|
+
# Mark that this response is the result of a revalidation
|
|
1544
|
+
|
|
1545
|
+
# ============================================================================
|
|
1546
|
+
# STEP 2: Parse Cache-Control Directive
|
|
1547
|
+
# ============================================================================
|
|
1548
|
+
# Extract and parse the Cache-Control header to check caching directives
|
|
1549
|
+
|
|
1550
|
+
request = self.request
|
|
1551
|
+
response_cache_control = parse_cache_control(response.headers.get("cache-control"))
|
|
1552
|
+
|
|
1553
|
+
# ============================================================================
|
|
1554
|
+
# STEP 3: Evaluate Storage Requirements (7 Conditions)
|
|
1555
|
+
# ============================================================================
|
|
1556
|
+
# All conditions must be True for the response to be storable.
|
|
1557
|
+
# Each condition corresponds to a requirement from RFC 9111 Section 3.
|
|
1558
|
+
|
|
1559
|
+
# CONDITION 1: Request Method Understanding
|
|
1560
|
+
# RFC 9111 Section 3, paragraph 2.1:
|
|
1561
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.1.1
|
|
1562
|
+
#
|
|
1563
|
+
# "the request method is understood by the cache"
|
|
1564
|
+
#
|
|
1565
|
+
# The cache must recognize and support caching for this HTTP method.
|
|
1566
|
+
# Typically, only safe methods (GET, HEAD) are cacheable.
|
|
1567
|
+
# This prevents caching of methods with side effects (POST, PUT, DELETE).
|
|
1568
|
+
method_understood_by_cache = request.method.upper() in self.options.supported_methods
|
|
1569
|
+
|
|
1570
|
+
# CONDITION 2: Response Status Code is Final
|
|
1571
|
+
# RFC 9111 Section 3, paragraph 2.2:
|
|
1572
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.2.1
|
|
1573
|
+
#
|
|
1574
|
+
# "the response status code is final (see Section 15 of [HTTP])"
|
|
1575
|
+
#
|
|
1576
|
+
# 1xx status codes are informational and not final responses.
|
|
1577
|
+
# Only final responses (2xx, 3xx, 4xx, 5xx) can be cached.
|
|
1578
|
+
# Check: status_code % 100 != 1 means not in the 1xx range
|
|
1579
|
+
response_status_code_is_final = response.status_code // 100 != 1
|
|
1580
|
+
|
|
1581
|
+
# CONDITION 3: Cache Understands Response Status Code
|
|
1582
|
+
# RFC 9111 Section 3, paragraph 2.3:
|
|
1583
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.3.1
|
|
1584
|
+
#
|
|
1585
|
+
# "if the response status code is 206 or 304, or the must-understand cache
|
|
1586
|
+
# directive (see Section 5.2.2.3) is present: the cache understands the
|
|
1587
|
+
# response status code"
|
|
1588
|
+
#
|
|
1589
|
+
# 206 Partial Content: Used for range requests, requires special handling
|
|
1590
|
+
# 304 Not Modified: Used for conditional requests, is not a complete response
|
|
1591
|
+
#
|
|
1592
|
+
# This implementation takes a conservative approach: if the status is 206 or 304,
|
|
1593
|
+
# we mark it as not understood, preventing storage. A full implementation would
|
|
1594
|
+
# handle these specially (304 updates existing cache, 206 stores partial content).
|
|
1595
|
+
if response.status_code in (206, 304):
|
|
1596
|
+
understands_how_to_cache = False
|
|
1597
|
+
else:
|
|
1598
|
+
understands_how_to_cache = True
|
|
1599
|
+
|
|
1600
|
+
# CONDITION 4: No no-store Directive
|
|
1601
|
+
# RFC 9111 Section 5.2.2.5: no-store Response Directive
|
|
1602
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.5
|
|
1603
|
+
#
|
|
1604
|
+
# "The no-store response directive indicates that a cache MUST NOT store
|
|
1605
|
+
# any part of either the immediate request or the response"
|
|
1606
|
+
#
|
|
1607
|
+
# no-store is the strongest cache prevention directive. When present,
|
|
1608
|
+
# nothing should be stored, regardless of other directives.
|
|
1609
|
+
no_store_is_not_present = not response_cache_control.no_store
|
|
1610
|
+
|
|
1611
|
+
# CONDITION 5: Private Directive Allows Storing (Shared Cache Only)
|
|
1612
|
+
# RFC 9111 Section 5.2.2.7: private Response Directive
|
|
1613
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.7
|
|
1614
|
+
#
|
|
1615
|
+
# "The unqualified private response directive indicates that a shared cache
|
|
1616
|
+
# MUST NOT store the response"
|
|
1617
|
+
#
|
|
1618
|
+
# For shared caches (proxies, CDNs):
|
|
1619
|
+
# - If private=True, the response is for a single user only
|
|
1620
|
+
# - Shared caches MUST NOT store private responses
|
|
1621
|
+
#
|
|
1622
|
+
# For private caches (browser caches):
|
|
1623
|
+
# - private directive is allowed and encouraged
|
|
1624
|
+
#
|
|
1625
|
+
# Logic: If cache is shared AND response is private, storing is NOT allowed
|
|
1626
|
+
# Therefore: we check (shared cache) AND (private is NOT True)
|
|
1627
|
+
private_directive_allows_storing = not (self.options.shared and response_cache_control.private is True)
|
|
1628
|
+
|
|
1629
|
+
# CONDITION 6: Authorization Header Handling (Shared Cache Only)
|
|
1630
|
+
# RFC 9111 Section 3.5: Caching Authenticated Responses
|
|
1631
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3.5
|
|
1632
|
+
#
|
|
1633
|
+
# "A shared cache MUST NOT use a cached response to a request with an
|
|
1634
|
+
# Authorization header field unless... a response directive is present that
|
|
1635
|
+
# explicitly allows shared caching"
|
|
1636
|
+
#
|
|
1637
|
+
# Requests with Authorization headers often contain user-specific data.
|
|
1638
|
+
# Shared caches must be careful not to serve one user's data to another.
|
|
1639
|
+
#
|
|
1640
|
+
# This check is inverted in the current implementation and needs review:
|
|
1641
|
+
# TODO: Fix logic - should be: (not shared) OR (no auth header) OR (has explicit directive)
|
|
1642
|
+
# Current logic: (shared) AND (no auth header)
|
|
1643
|
+
is_shared_and_authorized = not (self.options.shared and "authorization" in request.headers)
|
|
1644
|
+
|
|
1645
|
+
# CONDITION 7: Response Contains Required Caching Information
|
|
1646
|
+
# RFC 9111 Section 3, paragraph 2.7:
|
|
1647
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.7.1
|
|
1648
|
+
#
|
|
1649
|
+
# "the response contains at least one of the following:..."
|
|
1650
|
+
#
|
|
1651
|
+
# A response must have explicit caching metadata OR be heuristically cacheable.
|
|
1652
|
+
# This ensures we only cache responses that the origin server intended to be cached.
|
|
1653
|
+
contains_required_component = (
|
|
1654
|
+
# OPTION A: public Directive
|
|
1655
|
+
# RFC 9111 Section 5.2.2.9:
|
|
1656
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.9
|
|
1657
|
+
# "The public response directive indicates that a cache MAY store the response"
|
|
1658
|
+
# Explicitly marks response as cacheable by any cache
|
|
1659
|
+
response_cache_control.public
|
|
1660
|
+
# OPTION B: private Directive (Private Cache Only)
|
|
1661
|
+
# RFC 9111 Section 5.2.2.7:
|
|
1662
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.7
|
|
1663
|
+
# "private... indicates that... a private cache MAY store the response"
|
|
1664
|
+
# For private caches only (not shared caches)
|
|
1665
|
+
or (not self.options.shared and response_cache_control.private)
|
|
1666
|
+
# OPTION C: Expires Header
|
|
1667
|
+
# RFC 9111 Section 5.3: Expires
|
|
1668
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.3
|
|
1669
|
+
# "The Expires header field gives the date/time after which the response
|
|
1670
|
+
# is considered stale"
|
|
1671
|
+
# Explicit expiration time
|
|
1672
|
+
or ("expires" in response.headers)
|
|
1673
|
+
# OPTION D: max-age Directive
|
|
1674
|
+
# RFC 9111 Section 5.2.2.1: max-age Response Directive
|
|
1675
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.1
|
|
1676
|
+
# "The max-age response directive indicates that the response is to be
|
|
1677
|
+
# considered stale after its age is greater than the specified number of seconds"
|
|
1678
|
+
# Most common caching directive
|
|
1679
|
+
or (response_cache_control.max_age is not None)
|
|
1680
|
+
# OPTION E: s-maxage Directive (Shared Cache Only)
|
|
1681
|
+
# RFC 9111 Section 5.2.2.10: s-maxage Response Directive
|
|
1682
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.10
|
|
1683
|
+
# "The s-maxage response directive indicates that, for a shared cache,
|
|
1684
|
+
# the maximum age specified by this directive overrides the maximum age
|
|
1685
|
+
# specified by either the max-age directive or the Expires header field"
|
|
1686
|
+
# Specific to shared caches (proxies, CDNs)
|
|
1687
|
+
or (self.options.shared and response_cache_control.s_maxage is not None)
|
|
1688
|
+
# OPTION F: Heuristically Cacheable Status Code
|
|
1689
|
+
# RFC 9111 Section 4.2.2: Calculating Heuristic Freshness
|
|
1690
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.2
|
|
1691
|
+
# "a cache MAY assign a heuristic expiration time when an explicit time
|
|
1692
|
+
# is not specified"
|
|
1693
|
+
#
|
|
1694
|
+
# Certain status codes are defined as "heuristically cacheable":
|
|
1695
|
+
# 200 OK, 203 Non-Authoritative, 204 No Content, 206 Partial Content,
|
|
1696
|
+
# 300 Multiple Choices, 301 Moved Permanently, 308 Permanent Redirect,
|
|
1697
|
+
# 404 Not Found, 405 Method Not Allowed, 410 Gone,
|
|
1698
|
+
# 414 URI Too Long, 501 Not Implemented
|
|
1699
|
+
or response.status_code in HEURISTICALLY_CACHEABLE_STATUS_CODES
|
|
1700
|
+
)
|
|
1701
|
+
|
|
1702
|
+
# ============================================================================
|
|
1703
|
+
# STEP 4: Determine Storage Decision
|
|
1704
|
+
# ============================================================================
|
|
1705
|
+
# If ANY condition is False, the response cannot be stored
|
|
1706
|
+
|
|
1707
|
+
if (
|
|
1708
|
+
not method_understood_by_cache
|
|
1709
|
+
or not response_status_code_is_final
|
|
1710
|
+
or not understands_how_to_cache
|
|
1711
|
+
or not no_store_is_not_present
|
|
1712
|
+
or not private_directive_allows_storing
|
|
1713
|
+
or not is_shared_and_authorized
|
|
1714
|
+
or not contains_required_component
|
|
1715
|
+
):
|
|
1716
|
+
# --------------------------------------------------------------------
|
|
1717
|
+
# Transition to: CouldNotBeStored
|
|
1718
|
+
# --------------------------------------------------------------------
|
|
1719
|
+
# One or more storage requirements failed. Log the specific reason
|
|
1720
|
+
# and return a CouldNotBeStored state.
|
|
1721
|
+
|
|
1722
|
+
# Detailed logging for debugging (only when DEBUG level is enabled)
|
|
1723
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
1724
|
+
if not method_understood_by_cache:
|
|
1725
|
+
logger.debug(
|
|
1726
|
+
"Cannot store the response because the request method is not understood by the cache. "
|
|
1727
|
+
"See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.1.1"
|
|
1728
|
+
)
|
|
1729
|
+
elif not response_status_code_is_final:
|
|
1730
|
+
logger.debug(
|
|
1731
|
+
f"Cannot store the response because the response status code ({response.status_code}) "
|
|
1732
|
+
"is not final. See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.2.1"
|
|
1733
|
+
)
|
|
1734
|
+
elif not understands_how_to_cache:
|
|
1735
|
+
logger.debug(
|
|
1736
|
+
"Cannot store the response because the cache does not understand how to cache the response. "
|
|
1737
|
+
"See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.3.2"
|
|
1738
|
+
)
|
|
1739
|
+
elif not no_store_is_not_present:
|
|
1740
|
+
logger.debug(
|
|
1741
|
+
"Cannot store the response because the no-store cache directive is present in the response. "
|
|
1742
|
+
"See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.4.1"
|
|
1743
|
+
)
|
|
1744
|
+
elif not private_directive_allows_storing:
|
|
1745
|
+
logger.debug(
|
|
1746
|
+
"Cannot store the response because the `private` response directive does not "
|
|
1747
|
+
"allow shared caches to store it. See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.5.1"
|
|
1748
|
+
)
|
|
1749
|
+
elif not is_shared_and_authorized:
|
|
1750
|
+
logger.debug(
|
|
1751
|
+
"Cannot store the response because the cache is shared and the request contains "
|
|
1752
|
+
"an Authorization header field. See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.6.1"
|
|
1753
|
+
)
|
|
1754
|
+
elif not contains_required_component:
|
|
1755
|
+
logger.debug(
|
|
1756
|
+
"Cannot store the response because it does not contain any of the required components. "
|
|
1757
|
+
"See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.7.1"
|
|
1758
|
+
)
|
|
1759
|
+
|
|
1760
|
+
# Mark response as not stored
|
|
1761
|
+
response.metadata["hishel_stored"] = False # type: ignore
|
|
1762
|
+
|
|
1763
|
+
return CouldNotBeStored(response=response, pair_id=pair_id, options=self.options)
|
|
1764
|
+
|
|
1765
|
+
# --------------------------------------------------------------------
|
|
1766
|
+
# Transition to: StoreAndUse
|
|
1767
|
+
# --------------------------------------------------------------------
|
|
1768
|
+
# All storage requirements are met. The response will be cached.
|
|
1769
|
+
|
|
1770
|
+
logger.debug("Storing response in cache")
|
|
1771
|
+
|
|
1772
|
+
# Mark response as stored
|
|
1773
|
+
response.metadata["hishel_stored"] = True # type: ignore
|
|
1774
|
+
|
|
1775
|
+
# Remove headers that should not be stored
|
|
1776
|
+
# RFC 9111 Section 3.1: Storing Header and Trailer Fields
|
|
1777
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3.1
|
|
1778
|
+
# Certain headers (Connection, hop-by-hop headers, etc.) must be excluded
|
|
1779
|
+
cleaned_response = exclude_unstorable_headers(response, self.options.shared)
|
|
1780
|
+
|
|
1781
|
+
return StoreAndUse(
|
|
1782
|
+
pair_id=pair_id,
|
|
1783
|
+
response=cleaned_response,
|
|
1784
|
+
options=self.options,
|
|
1785
|
+
)
|
|
1786
|
+
|
|
1787
|
+
|
|
1788
|
+
@dataclass
|
|
1789
|
+
class NeedRevalidation(State):
|
|
1790
|
+
"""
|
|
1791
|
+
Represents a state where cached responses require validation before use.
|
|
1792
|
+
|
|
1793
|
+
This state is reached when:
|
|
1794
|
+
1. A stale cached response exists (from IdleClient)
|
|
1795
|
+
2. The cached response cannot be served without validation
|
|
1796
|
+
3. A conditional request has been sent to the origin server
|
|
1797
|
+
|
|
1798
|
+
The validation mechanism uses HTTP conditional requests with validators
|
|
1799
|
+
(ETag, Last-Modified) to check if the cached response is still valid.
|
|
1800
|
+
|
|
1801
|
+
State Transitions:
|
|
1802
|
+
-----------------
|
|
1803
|
+
- NeedToBeUpdated: 304 response received, cached responses can be freshened
|
|
1804
|
+
- InvalidatePairs + CacheMiss: 2xx/5xx response received, new response must be cached
|
|
1805
|
+
- CacheMiss: No matching responses found during freshening
|
|
1806
|
+
|
|
1807
|
+
RFC 9111 References:
|
|
1808
|
+
-------------------
|
|
1809
|
+
- Section 4.3: Validation
|
|
1810
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3
|
|
1811
|
+
- Section 4.3.3: Handling a Validation Response
|
|
1812
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
|
|
1813
|
+
- Section 4.3.4: Freshening Stored Responses
|
|
1814
|
+
https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.4
|
|
1815
|
+
|
|
1816
|
+
Attributes:
|
|
1817
|
+
----------
|
|
1818
|
+
request : Request
|
|
1819
|
+
The conditional request that was sent to the server for revalidation.
|
|
1820
|
+
This request contains If-None-Match (from ETag) or If-Modified-Since
|
|
1821
|
+
(from Last-Modified) headers.
|
|
1822
|
+
original_request : Request
|
|
1823
|
+
The original client request (without conditional headers) that initiated
|
|
1824
|
+
this revalidation. This is used when creating new cache entries.
|
|
1825
|
+
revalidating_pairs : list[CompletePair]
|
|
1826
|
+
The cached request-response pairs that are being revalidated. These are
|
|
1827
|
+
stale responses that might still be usable if the server confirms they
|
|
1828
|
+
haven't changed (304 response).
|
|
1829
|
+
options : CacheOptions
|
|
1830
|
+
Configuration options for cache behavior (inherited from State)
|
|
1831
|
+
"""
|
|
1832
|
+
|
|
1833
|
+
request: Request
|
|
1834
|
+
"""
|
|
1835
|
+
The request that was sent to the server for revalidation.
|
|
1836
|
+
"""
|
|
1837
|
+
|
|
1838
|
+
original_request: Request
|
|
1839
|
+
|
|
1840
|
+
revalidating_pairs: list[CompletePair]
|
|
1841
|
+
"""
|
|
1842
|
+
The stored pairs that the request was sent for revalidation.
|
|
1843
|
+
"""
|
|
1844
|
+
|
|
1845
|
+
def next(self, revalidation_response: Response) -> Union["NeedToBeUpdated", "InvalidatePairs", "CacheMiss"]:
|
|
1846
|
+
"""
|
|
1847
|
+
Handles the response to a conditional request and determines the next state.
|
|
1848
|
+
|
|
1849
|
+
This method implements the validation response handling logic from RFC 9111
|
|
1850
|
+
Section 4.3.3. The behavior depends on the response status code:
|
|
1851
|
+
|
|
1852
|
+
- 304 Not Modified: Cached responses are still valid, freshen and reuse them
|
|
1853
|
+
- 2xx Success: Cached responses are outdated, use new response
|
|
1854
|
+
- 5xx Server Error: Server cannot validate, use new error response
|
|
1855
|
+
- Other: Unexpected status code (should not happen in normal operation)
|
|
1856
|
+
|
|
1857
|
+
Parameters:
|
|
1858
|
+
----------
|
|
1859
|
+
revalidation_response : Response
|
|
1860
|
+
The HTTP response received from the origin server in response to
|
|
1861
|
+
the conditional request
|
|
1862
|
+
|
|
1863
|
+
Returns:
|
|
1864
|
+
-------
|
|
1865
|
+
Union[NeedToBeUpdated, InvalidatePairs, CacheMiss]
|
|
1866
|
+
- NeedToBeUpdated: When 304 response allows cached responses to be freshened
|
|
1867
|
+
- InvalidatePairs: When old responses must be invalidated (wraps next state)
|
|
1868
|
+
- CacheMiss: When no matching responses found or storing new response
|
|
1869
|
+
|
|
1870
|
+
RFC 9111 Compliance:
|
|
1871
|
+
-------------------
|
|
1872
|
+
From RFC 9111 Section 4.3.3:
|
|
1873
|
+
"Cache handling of a response to a conditional request depends upon its
|
|
1874
|
+
status code:
|
|
1875
|
+
- A 304 (Not Modified) response status code indicates that the stored
|
|
1876
|
+
response can be updated and reused; see Section 4.3.4.
|
|
1877
|
+
- A full response (i.e., one containing content) indicates that none of
|
|
1878
|
+
the stored responses nominated in the conditional request are suitable.
|
|
1879
|
+
Instead, the cache MUST use the full response to satisfy the request.
|
|
1880
|
+
The cache MAY store such a full response, subject to its constraints
|
|
1881
|
+
(see Section 3).
|
|
1882
|
+
- However, if a cache receives a 5xx (Server Error) response while
|
|
1883
|
+
attempting to validate a response, it can either forward this response
|
|
1884
|
+
to the requesting client or act as if the server failed to respond.
|
|
1885
|
+
In the latter case, the cache can send a previously stored response,
|
|
1886
|
+
subject to its constraints on doing so (see Section 4.2.4), or retry
|
|
1887
|
+
the validation request."
|
|
1888
|
+
|
|
1889
|
+
Implementation Notes:
|
|
1890
|
+
--------------------
|
|
1891
|
+
- All revalidating pairs except the last are invalidated when receiving 2xx/5xx
|
|
1892
|
+
- The last pair's ID is reused for storing the new response
|
|
1893
|
+
- 5xx responses are treated the same as 2xx (both invalidate and store new response)
|
|
1894
|
+
- A full implementation might serve stale responses on 5xx errors
|
|
1895
|
+
|
|
1896
|
+
Examples:
|
|
1897
|
+
--------
|
|
1898
|
+
>>> # 304 Not Modified - freshen cached response
|
|
1899
|
+
>>> need_revalidation = NeedRevalidation(
|
|
1900
|
+
... request=conditional_request,
|
|
1901
|
+
... original_request=original_request,
|
|
1902
|
+
... revalidating_pairs=[cached_pair],
|
|
1903
|
+
... options=default_options
|
|
1904
|
+
... )
|
|
1905
|
+
>>> response_304 = Response(status_code=304, headers=Headers({"etag": '"abc123"'}))
|
|
1906
|
+
>>> next_state = need_revalidation.next(response_304)
|
|
1907
|
+
>>> isinstance(next_state, NeedToBeUpdated)
|
|
1908
|
+
True
|
|
1909
|
+
|
|
1910
|
+
>>> # 200 OK - use new response
|
|
1911
|
+
>>> response_200 = Response(status_code=200, headers=Headers({"cache-control": "max-age=3600"}))
|
|
1912
|
+
>>> next_state = need_revalidation.next(response_200)
|
|
1913
|
+
>>> isinstance(next_state, InvalidatePairs)
|
|
1914
|
+
True
|
|
1915
|
+
"""
|
|
1916
|
+
|
|
1917
|
+
# ============================================================================
|
|
1918
|
+
# STEP 1: Handle 304 Not Modified Response
|
|
1919
|
+
# ============================================================================
|
|
1920
|
+
# RFC 9111 Section 4.3.3, paragraph 1:
|
|
1921
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
|
|
1922
|
+
#
|
|
1923
|
+
# "A 304 (Not Modified) response status code indicates that the stored
|
|
1924
|
+
# response can be updated and reused; see Section 4.3.4."
|
|
1925
|
+
#
|
|
1926
|
+
# 304 means the cached response is still valid. The server is saying:
|
|
1927
|
+
# "The resource hasn't changed since the version you have cached."
|
|
1928
|
+
# We can freshen the cached response with new metadata (Date, Cache-Control)
|
|
1929
|
+
# from the 304 response and continue using the cached content.
|
|
1930
|
+
if revalidation_response.status_code == 304:
|
|
1931
|
+
return self.freshening_stored_responses(revalidation_response)
|
|
1932
|
+
|
|
1933
|
+
# ============================================================================
|
|
1934
|
+
# STEP 2: Handle 2xx Success Response (Full Response)
|
|
1935
|
+
# ============================================================================
|
|
1936
|
+
# RFC 9111 Section 4.3.3, paragraph 2:
|
|
1937
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
|
|
1938
|
+
#
|
|
1939
|
+
# "A full response (i.e., one containing content) indicates that none of
|
|
1940
|
+
# the stored responses nominated in the conditional request are suitable.
|
|
1941
|
+
# Instead, the cache MUST use the full response to satisfy the request.
|
|
1942
|
+
# The cache MAY store such a full response, subject to its constraints
|
|
1943
|
+
# (see Section 3)."
|
|
1944
|
+
#
|
|
1945
|
+
# 2xx responses mean the resource has changed. The server is sending a
|
|
1946
|
+
# complete new response that should replace the cached version.
|
|
1947
|
+
# We must:
|
|
1948
|
+
# 1. Invalidate old cached responses (they're outdated)
|
|
1949
|
+
# 2. Store the new response (if cacheable)
|
|
1950
|
+
# 3. Use the new response to satisfy the request
|
|
1951
|
+
elif revalidation_response.status_code // 100 == 2:
|
|
1952
|
+
# Invalidate all old pairs except the last one
|
|
1953
|
+
# The last pair's ID will be reused for the new response
|
|
1954
|
+
return InvalidatePairs(
|
|
1955
|
+
options=self.options,
|
|
1956
|
+
pair_ids=[pair.id for pair in self.revalidating_pairs[:-1]],
|
|
1957
|
+
# After invalidation, attempt to cache the new response
|
|
1958
|
+
next_state=CacheMiss(
|
|
1959
|
+
request=self.original_request,
|
|
1960
|
+
options=self.options,
|
|
1961
|
+
after_revalidation=True, # Mark that this occurred during revalidation
|
|
1962
|
+
).next(revalidation_response, pair_id=self.revalidating_pairs[-1].id),
|
|
1963
|
+
)
|
|
1964
|
+
|
|
1965
|
+
# ============================================================================
|
|
1966
|
+
# STEP 3: Handle 5xx Server Error Response
|
|
1967
|
+
# ============================================================================
|
|
1968
|
+
# RFC 9111 Section 4.3.3, paragraph 3:
|
|
1969
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
|
|
1970
|
+
#
|
|
1971
|
+
# "However, if a cache receives a 5xx (Server Error) response while
|
|
1972
|
+
# attempting to validate a response, it can either forward this response
|
|
1973
|
+
# to the requesting client or act as if the server failed to respond.
|
|
1974
|
+
# In the latter case, the cache can send a previously stored response,
|
|
1975
|
+
# subject to its constraints on doing so (see Section 4.2.4), or retry
|
|
1976
|
+
# the validation request."
|
|
1977
|
+
#
|
|
1978
|
+
# 5xx errors during revalidation are tricky. The server is having problems,
|
|
1979
|
+
# but we don't know if the cached content is still valid.
|
|
1980
|
+
#
|
|
1981
|
+
# Options per RFC 9111:
|
|
1982
|
+
# A) Forward the error to the client (implemented here)
|
|
1983
|
+
# B) Serve the stale cached response (allowed_stale)
|
|
1984
|
+
# C) Retry the validation request
|
|
1985
|
+
#
|
|
1986
|
+
# This implementation chooses option A: forward the error and store it.
|
|
1987
|
+
# A full implementation might check allowed_stale and serve cached content.
|
|
1988
|
+
elif revalidation_response.status_code // 100 == 5:
|
|
1989
|
+
# Same as 2xx: invalidate old responses and store the error response
|
|
1990
|
+
# This ensures clients see the error rather than potentially stale data
|
|
1991
|
+
return InvalidatePairs(
|
|
1992
|
+
options=self.options,
|
|
1993
|
+
pair_ids=[pair.id for pair in self.revalidating_pairs[:-1]],
|
|
1994
|
+
next_state=CacheMiss(
|
|
1995
|
+
request=self.original_request,
|
|
1996
|
+
options=self.options,
|
|
1997
|
+
after_revalidation=True,
|
|
1998
|
+
).next(revalidation_response, pair_id=self.revalidating_pairs[-1].id),
|
|
1999
|
+
)
|
|
2000
|
+
|
|
2001
|
+
# ============================================================================
|
|
2002
|
+
# STEP 4: Handle Unexpected Status Codes
|
|
2003
|
+
# ============================================================================
|
|
2004
|
+
# This should not happen in normal operation. Valid revalidation responses are:
|
|
2005
|
+
# - 304 Not Modified
|
|
2006
|
+
# - 2xx Success (typically 200 OK)
|
|
2007
|
+
# - 5xx Server Error
|
|
2008
|
+
#
|
|
2009
|
+
# Other status codes (1xx, 3xx, 4xx) are unexpected during revalidation.
|
|
2010
|
+
# 3xx redirects should have been followed by the HTTP client.
|
|
2011
|
+
# 4xx errors (except 404) are unusual during revalidation.
|
|
2012
|
+
raise RuntimeError(
|
|
2013
|
+
f"Unexpected response status code during revalidation: {revalidation_response.status_code}"
|
|
2014
|
+
) # pragma: nocover
|
|
2015
|
+
|
|
2016
|
+
def freshening_stored_responses(
|
|
2017
|
+
self, revalidation_response: Response
|
|
2018
|
+
) -> "NeedToBeUpdated" | "InvalidatePairs" | "CacheMiss":
|
|
2019
|
+
"""
|
|
2020
|
+
Freshens cached responses after receiving a 304 Not Modified response.
|
|
2021
|
+
|
|
2022
|
+
When the server responds with 304, it means "the resource hasn't changed,
|
|
2023
|
+
but here's updated metadata." This method:
|
|
2024
|
+
1. Identifies which cached responses match the 304 response
|
|
2025
|
+
2. Updates their headers with fresh metadata from the 304
|
|
2026
|
+
3. Invalidates any cached responses that don't match
|
|
2027
|
+
|
|
2028
|
+
Matching is done using validators in this priority order:
|
|
2029
|
+
1. Strong ETag (if present and not weak)
|
|
2030
|
+
2. Last-Modified (if present)
|
|
2031
|
+
3. Single response assumption (if only one cached response exists)
|
|
2032
|
+
|
|
2033
|
+
Parameters:
|
|
2034
|
+
----------
|
|
2035
|
+
revalidation_response : Response
|
|
2036
|
+
The 304 Not Modified response from the server, containing updated
|
|
2037
|
+
metadata (Date, Cache-Control, ETag, etc.)
|
|
2038
|
+
|
|
2039
|
+
Returns:
|
|
2040
|
+
-------
|
|
2041
|
+
Union[NeedToBeUpdated, InvalidatePairs, CacheMiss]
|
|
2042
|
+
- NeedToBeUpdated: When matching responses are found and updated
|
|
2043
|
+
- InvalidatePairs: Wraps NeedToBeUpdated if non-matching responses exist
|
|
2044
|
+
- CacheMiss: When no matching responses are found
|
|
2045
|
+
|
|
2046
|
+
RFC 9111 Compliance:
|
|
2047
|
+
-------------------
|
|
2048
|
+
From RFC 9111 Section 4.3.4:
|
|
2049
|
+
"When a cache receives a 304 (Not Modified) response, it needs to identify
|
|
2050
|
+
stored responses that are suitable for updating with the new information
|
|
2051
|
+
provided, and then do so.
|
|
2052
|
+
|
|
2053
|
+
The initial set of stored responses to update are those that could have
|
|
2054
|
+
been chosen for that request...
|
|
2055
|
+
|
|
2056
|
+
Then, that initial set of stored responses is further filtered by the
|
|
2057
|
+
first match of:
|
|
2058
|
+
- If the 304 response contains a strong entity tag: the stored responses
|
|
2059
|
+
with the same strong entity tag.
|
|
2060
|
+
- If the 304 response contains a Last-Modified value: the stored responses
|
|
2061
|
+
with the same Last-Modified value.
|
|
2062
|
+
- If there is only a single stored response: that response."
|
|
2063
|
+
|
|
2064
|
+
Implementation Notes:
|
|
2065
|
+
--------------------
|
|
2066
|
+
- Weak ETags (starting with "W/") are not used for matching
|
|
2067
|
+
- Only strong ETags provide reliable validation
|
|
2068
|
+
- If no validators match, all responses are invalidated
|
|
2069
|
+
- Multiple responses can be freshened if they share the same validator
|
|
2070
|
+
|
|
2071
|
+
Examples:
|
|
2072
|
+
--------
|
|
2073
|
+
>>> # Matching by strong ETag
|
|
2074
|
+
>>> cached_response = Response(headers=Headers({"etag": '"abc123"'}))
|
|
2075
|
+
>>> revalidation_response = Response(
|
|
2076
|
+
... status_code=304,
|
|
2077
|
+
... headers=Headers({"etag": '"abc123"', "cache-control": "max-age=3600"})
|
|
2078
|
+
... )
|
|
2079
|
+
>>> # Cached response will be freshened with new Cache-Control
|
|
2080
|
+
|
|
2081
|
+
>>> # Non-matching ETag
|
|
2082
|
+
>>> cached_response = Response(headers=Headers({"etag": '"old123"'}))
|
|
2083
|
+
>>> revalidation_response = Response(
|
|
2084
|
+
... status_code=304,
|
|
2085
|
+
... headers=Headers({"etag": '"new456"'})
|
|
2086
|
+
... )
|
|
2087
|
+
>>> # Cached response will be invalidated (doesn't match)
|
|
2088
|
+
"""
|
|
2089
|
+
|
|
2090
|
+
# ============================================================================
|
|
2091
|
+
# STEP 1: Identify Matching Responses Using Validators
|
|
2092
|
+
# ============================================================================
|
|
2093
|
+
# RFC 9111 Section 4.3.4: Freshening Stored Responses
|
|
2094
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.4
|
|
2095
|
+
#
|
|
2096
|
+
# The 304 response tells us "the resource is unchanged", but we need to
|
|
2097
|
+
# figure out WHICH of our cached responses match this confirmation.
|
|
2098
|
+
#
|
|
2099
|
+
# We use validators in priority order:
|
|
2100
|
+
# Priority 1: Strong ETag (most reliable)
|
|
2101
|
+
# Priority 2: Last-Modified timestamp
|
|
2102
|
+
# Priority 3: Single response assumption
|
|
2103
|
+
|
|
2104
|
+
identified_for_revalidation: list[CompletePair]
|
|
2105
|
+
|
|
2106
|
+
# MATCHING STRATEGY 1: Strong ETag
|
|
2107
|
+
# RFC 9110 Section 8.8.3: ETag
|
|
2108
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.8.3
|
|
2109
|
+
#
|
|
2110
|
+
# "If the 304 response contains a strong entity tag: the stored responses
|
|
2111
|
+
# with the same strong entity tag."
|
|
2112
|
+
#
|
|
2113
|
+
# ETags come in two flavors:
|
|
2114
|
+
# - Strong: "abc123" (exact byte-for-byte match)
|
|
2115
|
+
# - Weak: W/"abc123" (semantically equivalent, but not byte-identical)
|
|
2116
|
+
#
|
|
2117
|
+
# Only strong ETags are reliable for caching decisions. Weak ETags
|
|
2118
|
+
# indicate semantic equivalence but the content might differ slightly
|
|
2119
|
+
# (e.g., gzip compression, whitespace changes).
|
|
2120
|
+
if "etag" in revalidation_response.headers and (not revalidation_response.headers["etag"].startswith("W/")):
|
|
2121
|
+
# Found a strong ETag in the 304 response
|
|
2122
|
+
# Partition cached responses: matching vs non-matching ETags
|
|
2123
|
+
identified_for_revalidation, need_to_be_invalidated = partition(
|
|
2124
|
+
self.revalidating_pairs,
|
|
2125
|
+
lambda pair: pair.response.headers.get("etag") == revalidation_response.headers.get("etag"), # type: ignore[no-untyped-call]
|
|
2126
|
+
)
|
|
2127
|
+
|
|
2128
|
+
# MATCHING STRATEGY 2: Last-Modified
|
|
2129
|
+
# RFC 9110 Section 8.8.2: Last-Modified
|
|
2130
|
+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.8.2
|
|
2131
|
+
#
|
|
2132
|
+
# "If the 304 response contains a Last-Modified value: the stored responses
|
|
2133
|
+
# with the same Last-Modified value."
|
|
2134
|
+
#
|
|
2135
|
+
# Last-Modified is a timestamp indicating when the resource was last changed.
|
|
2136
|
+
# It's less precise than ETags (1-second granularity) but widely supported.
|
|
2137
|
+
# If the 304 has a Last-Modified, we can match it against cached responses.
|
|
2138
|
+
elif revalidation_response.headers.get("last-modified"):
|
|
2139
|
+
# Found Last-Modified in the 304 response
|
|
2140
|
+
# Partition cached responses: matching vs non-matching timestamps
|
|
2141
|
+
identified_for_revalidation, need_to_be_invalidated = partition(
|
|
2142
|
+
self.revalidating_pairs,
|
|
2143
|
+
lambda pair: pair.response.headers.get("last-modified")
|
|
2144
|
+
== revalidation_response.headers.get("last-modified"), # type: ignore[no-untyped-call]
|
|
2145
|
+
)
|
|
2146
|
+
|
|
2147
|
+
# MATCHING STRATEGY 3: Single Response Assumption
|
|
2148
|
+
# RFC 9111 Section 4.3.4:
|
|
2149
|
+
#
|
|
2150
|
+
# "If there is only a single stored response: that response."
|
|
2151
|
+
#
|
|
2152
|
+
# If we only have one cached response and the server says "not modified",
|
|
2153
|
+
# we can safely assume that single response is the one being confirmed.
|
|
2154
|
+
# This handles cases where the server doesn't return validators in the 304.
|
|
2155
|
+
else:
|
|
2156
|
+
if len(self.revalidating_pairs) == 1:
|
|
2157
|
+
# Only one cached response - it must be the matching one
|
|
2158
|
+
identified_for_revalidation, need_to_be_invalidated = [self.revalidating_pairs[0]], []
|
|
2159
|
+
else:
|
|
2160
|
+
# Multiple cached responses but no validators to match them
|
|
2161
|
+
# We cannot determine which (if any) are valid
|
|
2162
|
+
# Conservative approach: invalidate all of them
|
|
2163
|
+
identified_for_revalidation, need_to_be_invalidated = [], self.revalidating_pairs
|
|
2164
|
+
|
|
2165
|
+
# ============================================================================
|
|
2166
|
+
# STEP 2: Update Matching Responses or Create Cache Miss
|
|
2167
|
+
# ============================================================================
|
|
2168
|
+
# If we found matching responses, freshen them with new metadata.
|
|
2169
|
+
# If we found no matches, treat it as a cache miss.
|
|
2170
|
+
|
|
2171
|
+
next_state: "NeedToBeUpdated" | "CacheMiss"
|
|
2172
|
+
|
|
2173
|
+
if identified_for_revalidation:
|
|
2174
|
+
# We found responses that match the 304 confirmation
|
|
2175
|
+
# Update their headers with fresh metadata from the 304 response
|
|
2176
|
+
#
|
|
2177
|
+
# RFC 9111 Section 3.2: Updating Stored Header Fields
|
|
2178
|
+
# https://www.rfc-editor.org/rfc/rfc9111.html#section-3.2
|
|
2179
|
+
#
|
|
2180
|
+
# "When doing so, the cache MUST add each header field in the provided
|
|
2181
|
+
# response to the stored response, replacing field values that are
|
|
2182
|
+
# already present"
|
|
2183
|
+
#
|
|
2184
|
+
# The refresh_response_headers function handles this header merging
|
|
2185
|
+
# while excluding certain headers that shouldn't be updated
|
|
2186
|
+
# (Content-Encoding, Content-Type, Content-Range).
|
|
2187
|
+
next_state = NeedToBeUpdated(
|
|
2188
|
+
updating_pairs=[
|
|
2189
|
+
replace(
|
|
2190
|
+
pair,
|
|
2191
|
+
response=refresh_response_headers(pair.response, revalidation_response),
|
|
2192
|
+
)
|
|
2193
|
+
for pair in identified_for_revalidation
|
|
2194
|
+
],
|
|
2195
|
+
original_request=self.original_request,
|
|
2196
|
+
options=self.options,
|
|
2197
|
+
)
|
|
2198
|
+
else:
|
|
2199
|
+
# No matching responses found
|
|
2200
|
+
# This is unusual - the server said "not modified" but we can't figure
|
|
2201
|
+
# out which cached response it's referring to.
|
|
2202
|
+
# Treat this as a cache miss and let the normal flow handle it.
|
|
2203
|
+
next_state = CacheMiss(
|
|
2204
|
+
options=self.options,
|
|
2205
|
+
request=self.original_request,
|
|
2206
|
+
after_revalidation=True,
|
|
2207
|
+
)
|
|
2208
|
+
|
|
2209
|
+
# ============================================================================
|
|
2210
|
+
# STEP 3: Invalidate Non-Matching Responses (if any)
|
|
2211
|
+
# ============================================================================
|
|
2212
|
+
# If we had multiple cached responses and only some matched, we need to
|
|
2213
|
+
# invalidate the non-matching ones. They're outdated or incorrect.
|
|
2214
|
+
#
|
|
2215
|
+
# For example:
|
|
2216
|
+
# - Cached: Two responses with different ETags
|
|
2217
|
+
# - 304 response: Matches only one ETag
|
|
2218
|
+
# - Action: Update the matching one, invalidate the other
|
|
2219
|
+
|
|
2220
|
+
if need_to_be_invalidated:
|
|
2221
|
+
# Wrap the next state in an invalidation operation
|
|
2222
|
+
return InvalidatePairs(
|
|
2223
|
+
options=self.options,
|
|
2224
|
+
pair_ids=[pair.id for pair in need_to_be_invalidated],
|
|
2225
|
+
next_state=next_state,
|
|
2226
|
+
)
|
|
2227
|
+
|
|
2228
|
+
# No invalidations needed, return the next state directly
|
|
2229
|
+
return next_state
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
@dataclass
|
|
2233
|
+
class StoreAndUse(State):
|
|
2234
|
+
"""
|
|
2235
|
+
The state that indicates that the response can be stored in the cache and used.
|
|
2236
|
+
"""
|
|
2237
|
+
|
|
2238
|
+
pair_id: uuid.UUID
|
|
2239
|
+
|
|
2240
|
+
response: Response
|
|
2241
|
+
|
|
2242
|
+
def next(self) -> None:
|
|
2243
|
+
return None # pragma: nocover
|
|
2244
|
+
|
|
2245
|
+
|
|
2246
|
+
@dataclass
|
|
2247
|
+
class CouldNotBeStored(State):
|
|
2248
|
+
"""
|
|
2249
|
+
The state that indicates that the response could not be stored in the cache.
|
|
2250
|
+
"""
|
|
2251
|
+
|
|
2252
|
+
response: Response
|
|
2253
|
+
|
|
2254
|
+
pair_id: uuid.UUID
|
|
2255
|
+
|
|
2256
|
+
def next(self) -> None:
|
|
2257
|
+
return None # pragma: nocover
|
|
2258
|
+
|
|
2259
|
+
|
|
2260
|
+
@dataclass
|
|
2261
|
+
class InvalidatePairs(State):
|
|
2262
|
+
"""
|
|
2263
|
+
The state that represents the deletion of cache pairs.
|
|
2264
|
+
"""
|
|
2265
|
+
|
|
2266
|
+
pair_ids: list[uuid.UUID]
|
|
2267
|
+
|
|
2268
|
+
next_state: AnyState
|
|
2269
|
+
|
|
2270
|
+
def next(self) -> AnyState:
|
|
2271
|
+
return self.next_state
|
|
2272
|
+
|
|
2273
|
+
|
|
2274
|
+
@dataclass
|
|
2275
|
+
class FromCache(State):
|
|
2276
|
+
pair: CompletePair
|
|
2277
|
+
"""
|
|
2278
|
+
List of pairs that can be used to satisfy the request.
|
|
2279
|
+
"""
|
|
2280
|
+
|
|
2281
|
+
def next(self) -> None:
|
|
2282
|
+
return None # pragma: nocover
|
|
2283
|
+
|
|
2284
|
+
|
|
2285
|
+
@dataclass
|
|
2286
|
+
class NeedToBeUpdated(State):
|
|
2287
|
+
updating_pairs: list[CompletePair]
|
|
2288
|
+
original_request: Request
|
|
2289
|
+
|
|
2290
|
+
def next(self) -> FromCache:
|
|
2291
|
+
return FromCache(pair=self.updating_pairs[-1], options=self.options) # pragma: nocover
|