hishel 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2291 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import time
5
+ import uuid
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass, field, replace
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Any,
11
+ Dict,
12
+ Literal,
13
+ Optional,
14
+ TypeVar,
15
+ Union,
16
+ )
17
+
18
+ from hishel._utils import parse_date, partition
19
+ from hishel.beta._core._headers import Headers, Range, Vary, parse_cache_control
20
+
21
+ if TYPE_CHECKING:
22
+ from hishel.beta import CompletePair, Request, Response
23
+
24
+
25
+ TState = TypeVar("TState", bound="State")
26
+ HEURISTICALLY_CACHEABLE_STATUS_CODES = (
27
+ 200,
28
+ 203,
29
+ 204,
30
+ 300,
31
+ 301,
32
+ 308,
33
+ 404,
34
+ 405,
35
+ 410,
36
+ 414,
37
+ 501,
38
+ )
39
+ logger = logging.getLogger("hishel.core.spec")
40
+
41
+
42
+ @dataclass
43
+ class CacheOptions:
44
+ shared: bool = True
45
+ supported_methods: list[str] = field(default_factory=lambda: ["GET", "HEAD"])
46
+ allow_stale: bool = False
47
+
48
+
49
+ @dataclass
50
+ class State(ABC):
51
+ options: CacheOptions
52
+
53
+ @abstractmethod
54
+ def next(self, *args: Any, **kwargs: Any) -> Union["State", None]:
55
+ raise NotImplementedError("Subclasses must implement this method")
56
+
57
+
58
+ def vary_headers_match(
59
+ original_request: Request,
60
+ associated_pair: CompletePair,
61
+ ) -> bool:
62
+ """
63
+ Determines if request headers match the Vary requirements of a cached response.
64
+
65
+ The Vary header specifies which request headers were used to select the
66
+ representation. For a cached response to be reusable, all headers listed
67
+ in Vary must match between the original and new requests.
68
+
69
+ RFC 9111 Section 4.1: Calculating Cache Keys
70
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.1
71
+
72
+ Parameters:
73
+ ----------
74
+ original_request : Request
75
+ The new incoming request that we're trying to satisfy
76
+ associated_pair : CompletePair
77
+ A cached request-response pair that might match the new request
78
+
79
+ Returns:
80
+ -------
81
+ bool
82
+ True if the Vary headers match (or no Vary header exists),
83
+ False if they don't match or Vary contains "*"
84
+
85
+ RFC 9111 Compliance:
86
+ -------------------
87
+ From RFC 9111 Section 4.1:
88
+ "When a cache receives a request that can be satisfied by a stored response
89
+ and that stored response contains a Vary header field, the cache MUST NOT
90
+ use that stored response without revalidation unless all the presented
91
+ request header fields nominated by that Vary field value match those fields
92
+ in the original request (i.e., the request that caused the cached response
93
+ to be stored)."
94
+
95
+ "The header fields from two requests are defined to match if and only if
96
+ those in the first request can be transformed to those in the second request
97
+ by applying any of the following:
98
+ - adding or removing whitespace
99
+ - combining multiple header field lines with the same field name
100
+ - normalizing header field values"
101
+
102
+ "A stored response with a Vary header field value containing a member '*'
103
+ always fails to match."
104
+
105
+ Examples:
106
+ --------
107
+ >>> # No Vary header - always matches
108
+ >>> request = Request(headers=Headers({"accept": "application/json"}))
109
+ >>> response = Response(headers=Headers({})) # No Vary
110
+ >>> pair = CompletePair(request=request, response=response)
111
+ >>> vary_headers_match(request, pair)
112
+ True
113
+
114
+ >>> # Vary: Accept with matching Accept header
115
+ >>> request1 = Request(headers=Headers({"accept": "application/json"}))
116
+ >>> response = Response(headers=Headers({"vary": "Accept"}))
117
+ >>> pair = CompletePair(request=request1, response=response)
118
+ >>> request2 = Request(headers=Headers({"accept": "application/json"}))
119
+ >>> vary_headers_match(request2, pair)
120
+ True
121
+
122
+ >>> # Vary: Accept with non-matching Accept header
123
+ >>> request2 = Request(headers=Headers({"accept": "application/xml"}))
124
+ >>> vary_headers_match(request2, pair)
125
+ False
126
+
127
+ >>> # Vary: * always fails
128
+ >>> response = Response(headers=Headers({"vary": "*"}))
129
+ >>> pair = CompletePair(request=request1, response=response)
130
+ >>> vary_headers_match(request2, pair)
131
+ False
132
+ """
133
+ # Extract the Vary header from the cached response
134
+ vary_header = associated_pair.response.headers.get("vary")
135
+
136
+ # If no Vary header exists, any request matches
137
+ # The response doesn't vary based on request headers
138
+ if not vary_header:
139
+ return True
140
+
141
+ # Parse the Vary header value into individual header names
142
+ vary = Vary.from_value(vary_header)
143
+
144
+ # Check each header name listed in Vary
145
+ for vary_header in vary.values:
146
+ # Special case: Vary: *
147
+ # RFC 9111 Section 4.1: "A stored response with a Vary header field
148
+ # value containing a member '*' always fails to match."
149
+ #
150
+ # Vary: * means the response varies on factors beyond request headers
151
+ # (e.g., cookies, user agent state, time of day). It can never be matched.
152
+ if vary_header == "*":
153
+ return False
154
+
155
+ # Compare the specific header value between original and new request
156
+ # Both headers must have the same value (or both be absent)
157
+ if original_request.headers.get(vary_header) != associated_pair.request.headers.get(vary_header):
158
+ return False
159
+
160
+ # All Vary headers matched
161
+ return True
162
+
163
+
164
+ def get_freshness_lifetime(response: Response, is_cache_shared: bool) -> Optional[int]:
165
+ """
166
+ Calculates the freshness lifetime of a cached response in seconds.
167
+
168
+ The freshness lifetime is the time period during which a cached response
169
+ can be used without validation. It's determined by explicit directives
170
+ (max-age, s-maxage, Expires) or heuristically calculated.
171
+
172
+ RFC 9111 Section 4.2.1: Calculating Freshness Lifetime
173
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.1
174
+
175
+ Parameters:
176
+ ----------
177
+ response : Response
178
+ The cached response to calculate freshness for
179
+ is_cache_shared : bool
180
+ True if this is a shared cache (proxy, CDN), False for private cache (browser)
181
+
182
+ Returns:
183
+ -------
184
+ Optional[int]
185
+ Freshness lifetime in seconds, or None if it cannot be determined
186
+
187
+ RFC 9111 Compliance:
188
+ -------------------
189
+ From RFC 9111 Section 4.2.1:
190
+ "A cache can calculate the freshness lifetime (denoted as freshness_lifetime)
191
+ of a response by evaluating the following rules and using the first match:
192
+
193
+ - If the cache is shared and the s-maxage response directive is present,
194
+ use its value
195
+ - If the max-age response directive is present, use its value
196
+ - If the Expires response header field is present, use its value minus
197
+ the value of the Date response header field
198
+ - Otherwise, no explicit expiration time is present in the response.
199
+ A heuristic freshness lifetime might be applicable; see Section 4.2.2"
200
+
201
+ Priority Order:
202
+ --------------
203
+ 1. s-maxage (shared caches only) - highest priority
204
+ 2. max-age - applies to all caches
205
+ 3. Expires - Date - legacy but still supported
206
+ 4. Heuristic freshness - calculated from Last-Modified
207
+
208
+ Examples:
209
+ --------
210
+ >>> # max-age directive
211
+ >>> response = Response(headers=Headers({"cache-control": "max-age=3600"}))
212
+ >>> get_freshness_lifetime(response, is_cache_shared=True)
213
+ 3600
214
+
215
+ >>> # s-maxage overrides max-age for shared caches
216
+ >>> response = Response(headers=Headers({
217
+ ... "cache-control": "max-age=3600, s-maxage=7200"
218
+ ... }))
219
+ >>> get_freshness_lifetime(response, is_cache_shared=True)
220
+ 7200
221
+ >>> get_freshness_lifetime(response, is_cache_shared=False)
222
+ 3600
223
+ """
224
+ # Parse the Cache-Control header to extract directives
225
+ response_cache_control = parse_cache_control(response.headers.get("Cache-Control"))
226
+
227
+ # PRIORITY 1: s-maxage (Shared Cache Only)
228
+ # RFC 9111 Section 5.2.2.10: s-maxage Response Directive
229
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.10
230
+ #
231
+ # "The s-maxage response directive indicates that, for a shared cache,
232
+ # the maximum age specified by this directive overrides the maximum age
233
+ # specified by either the max-age directive or the Expires header field."
234
+ #
235
+ # s-maxage only applies to shared caches (proxies, CDNs)
236
+ # Private caches (browsers) ignore it and fall through to max-age
237
+ if is_cache_shared and response_cache_control.s_maxage is not None:
238
+ return response_cache_control.s_maxage
239
+
240
+ # PRIORITY 2: max-age
241
+ # RFC 9111 Section 5.2.2.1: max-age Response Directive
242
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.1
243
+ #
244
+ # "The max-age response directive indicates that the response is to be
245
+ # considered stale after its age is greater than the specified number
246
+ # of seconds."
247
+ #
248
+ # max-age is the most common caching directive
249
+ # It applies to both shared and private caches
250
+ if response_cache_control.max_age is not None:
251
+ return response_cache_control.max_age
252
+
253
+ # PRIORITY 3: Expires - Date
254
+ # RFC 9111 Section 5.3: Expires
255
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.3
256
+ #
257
+ # "The Expires header field gives the date/time after which the response
258
+ # is considered stale."
259
+ #
260
+ # This is an older mechanism (HTTP/1.0) but still supported
261
+ # Freshness lifetime = Expires - Date
262
+ if "expires" in response.headers:
263
+ expires_timestamp = parse_date(response.headers["expires"])
264
+
265
+ if expires_timestamp is None:
266
+ raise RuntimeError("Cannot parse Expires header") # pragma: nocover
267
+
268
+ # Get the Date header or use current time as fallback
269
+ date_timestamp = parse_date(response.headers["date"]) if "date" in response.headers else time.time()
270
+
271
+ if date_timestamp is None: # pragma: nocover
272
+ # If the Date header is invalid, we use the current time as the date
273
+ # RFC 9110 Section 6.6.1: Date
274
+ # "A recipient with a clock that receives a response with an invalid
275
+ # Date header field value MAY replace that value with the time that
276
+ # response was received."
277
+ date_timestamp = time.time()
278
+
279
+ # Calculate freshness lifetime as difference between Expires and Date
280
+ return int(expires_timestamp - (time.time() if date_timestamp is None else date_timestamp))
281
+
282
+ # PRIORITY 4: Heuristic Freshness
283
+ # RFC 9111 Section 4.2.2: Calculating Heuristic Freshness
284
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.2
285
+ #
286
+ # "Since origin servers do not always provide explicit expiration times,
287
+ # a cache MAY assign a heuristic expiration time when an explicit time
288
+ # is not specified."
289
+ #
290
+ # If no explicit freshness information exists, try to calculate it
291
+ # heuristically based on the Last-Modified header
292
+ heuristic_freshness = get_heuristic_freshness(response)
293
+
294
+ if heuristic_freshness is None:
295
+ return None
296
+
297
+ return get_heuristic_freshness(response)
298
+
299
+
300
+ def allowed_stale(response: Response, allow_stale_option: bool) -> bool:
301
+ """
302
+ Determines if a stale response is allowed to be served without revalidation.
303
+
304
+ Stale responses can sometimes be served to improve performance or availability,
305
+ but only if certain conditions are met and it's explicitly allowed.
306
+
307
+ RFC 9111 Section 4.2.4: Serving Stale Responses
308
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.4
309
+
310
+ Parameters:
311
+ ----------
312
+ response : Response
313
+ The stale cached response being considered for use
314
+ allow_stale_option : bool
315
+ Configuration flag indicating if serving stale is allowed
316
+
317
+ Returns:
318
+ -------
319
+ bool
320
+ True if the stale response is allowed to be served, False otherwise
321
+
322
+ RFC 9111 Compliance:
323
+ -------------------
324
+ From RFC 9111 Section 4.2.4:
325
+ "A cache MUST NOT generate a stale response if it is prohibited by an
326
+ explicit in-protocol directive (e.g., by a no-cache response directive,
327
+ a must-revalidate response directive, or an applicable s-maxage or
328
+ proxy-revalidate response directive; see Section 5.2.2)."
329
+
330
+ "A cache MUST NOT generate a stale response unless it is disconnected or
331
+ doing so is explicitly permitted by the client or origin server (e.g., by
332
+ the max-stale request directive in Section 5.2.1, extension directives
333
+ such as those defined in [RFC5861], or configuration in accordance with
334
+ an out-of-band contract)."
335
+
336
+ Conditions that prohibit serving stale:
337
+ --------------------------------------
338
+ 1. allow_stale_option is False (configuration disallows it)
339
+ 2. Response has no-cache directive (must always revalidate)
340
+ 3. Response has must-revalidate directive (must revalidate when stale)
341
+ 4. Response has proxy-revalidate directive (shared caches must revalidate)
342
+ 5. Response has s-maxage directive (shared caches must revalidate)
343
+
344
+ Examples:
345
+ --------
346
+ >>> # Stale allowed with permissive configuration
347
+ >>> response = Response(headers=Headers({"cache-control": "max-age=3600"}))
348
+ >>> allowed_stale(response, allow_stale_option=True)
349
+ True
350
+
351
+ >>> # Stale not allowed when configuration disables it
352
+ >>> allowed_stale(response, allow_stale_option=False)
353
+ False
354
+
355
+ >>> # must-revalidate prevents serving stale
356
+ >>> response = Response(headers=Headers({
357
+ ... "cache-control": "max-age=3600, must-revalidate"
358
+ ... }))
359
+ >>> allowed_stale(response, allow_stale_option=True)
360
+ False
361
+ """
362
+ # First check: Is serving stale enabled in configuration?
363
+ # If not, we can't serve stale responses regardless of directives
364
+ if not allow_stale_option:
365
+ return False
366
+
367
+ # Parse Cache-Control directives to check for prohibitions
368
+ response_cache_control = parse_cache_control(response.headers.get("Cache-Control"))
369
+
370
+ # PROHIBITION 1: no-cache directive
371
+ # RFC 9111 Section 5.2.2.4: no-cache Response Directive
372
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.4
373
+ #
374
+ # "The no-cache response directive... indicates that the response MUST NOT
375
+ # be used to satisfy any other request without forwarding it for validation
376
+ # and receiving a successful response."
377
+ #
378
+ # no-cache means the response must ALWAYS be revalidated before use,
379
+ # even if it's fresh. Stale responses definitely cannot be served.
380
+ if response_cache_control.no_cache:
381
+ return False
382
+
383
+ # PROHIBITION 2: must-revalidate directive
384
+ # RFC 9111 Section 5.2.2.2: must-revalidate Response Directive
385
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.2
386
+ #
387
+ # "The must-revalidate response directive indicates that once the response
388
+ # has become stale, a cache MUST NOT reuse that response to satisfy another
389
+ # request until it has been successfully validated by the origin."
390
+ #
391
+ # must-revalidate specifically prohibits serving stale responses
392
+ # This is used for responses where serving stale content could cause
393
+ # incorrect operation (e.g., financial transactions)
394
+ if response_cache_control.must_revalidate:
395
+ return False
396
+
397
+ # All checks passed - stale response may be served
398
+ return True
399
+
400
+
401
+ def get_heuristic_freshness(response: Response) -> int | None:
402
+ """
403
+ Calculates a heuristic freshness lifetime when no explicit expiration is provided.
404
+
405
+ When a response lacks explicit caching directives (max-age, Expires),
406
+ caches may assign a heuristic freshness lifetime based on other response
407
+ characteristics, particularly the Last-Modified header.
408
+
409
+ RFC 9111 Section 4.2.2: Calculating Heuristic Freshness
410
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.2
411
+
412
+ Parameters:
413
+ ----------
414
+ response : Response
415
+ The response to calculate heuristic freshness for
416
+
417
+ Returns:
418
+ -------
419
+ int | None
420
+ Heuristic freshness lifetime in seconds, or None if it cannot be calculated
421
+
422
+ RFC 9111 Compliance:
423
+ -------------------
424
+ From RFC 9111 Section 4.2.2:
425
+ "Since origin servers do not always provide explicit expiration times,
426
+ a cache MAY assign a heuristic expiration time when an explicit time is
427
+ not specified, employing algorithms that use other field values (such as
428
+ the Last-Modified time) to estimate a plausible expiration time. This
429
+ specification does not provide specific algorithms, but it does impose
430
+ worst-case constraints on their results."
431
+
432
+ "If the response has a Last-Modified header field, caches are encouraged
433
+ to use a heuristic expiration value that is no more than some fraction of
434
+ the interval since that time. A typical setting of this fraction might
435
+ be 10%."
436
+
437
+ Heuristic Calculation:
438
+ ---------------------
439
+ - Freshness = 10% of (now - Last-Modified)
440
+ - Maximum: 1 week (604,800 seconds)
441
+ - Minimum: 0 seconds
442
+
443
+ Rationale:
444
+ ---------
445
+ If a resource hasn't changed in a long time (old Last-Modified), it's
446
+ likely stable and can be cached longer. The 10% rule is a conservative
447
+ heuristic that balances caching benefits with freshness.
448
+
449
+ Examples:
450
+ --------
451
+ >>> # Resource last modified 10 days ago
452
+ >>> # Heuristic freshness = 10% of 10 days = 1 day
453
+ >>> last_modified = (datetime.now() - timedelta(days=10)).strftime(...)
454
+ >>> response = Response(headers=Headers({"last-modified": last_modified}))
455
+ >>> get_heuristic_freshness(response)
456
+ 86400 # 1 day in seconds
457
+
458
+ >>> # Resource last modified 100 days ago
459
+ >>> # Would be 10 days, but capped at 1 week maximum
460
+ >>> last_modified = (datetime.now() - timedelta(days=100)).strftime(...)
461
+ >>> response = Response(headers=Headers({"last-modified": last_modified}))
462
+ >>> get_heuristic_freshness(response)
463
+ 604800 # 1 week (maximum)
464
+
465
+ >>> # No Last-Modified header
466
+ >>> response = Response(headers=Headers({}))
467
+ >>> get_heuristic_freshness(response)
468
+ None
469
+ """
470
+ # Get the Last-Modified header if present
471
+ last_modified = response.headers.get("last-modified")
472
+
473
+ if last_modified:
474
+ # Parse the Last-Modified timestamp
475
+ last_modified_timestamp = parse_date(last_modified)
476
+
477
+ if last_modified_timestamp is None: # pragma: nocover
478
+ # Cannot parse the date, cannot calculate heuristic freshness
479
+ return None
480
+
481
+ # Calculate how long ago the resource was last modified
482
+ now = time.time()
483
+ age_since_modification = now - last_modified_timestamp
484
+
485
+ # RFC 9111 recommends 10% of the age since modification
486
+ # "A typical setting of this fraction might be 10%."
487
+ heuristic_freshness = int(age_since_modification * 0.1)
488
+
489
+ # Cap at one week maximum
490
+ # RFC 9111 Section 4.2.2: "Historically, HTTP required the Expires
491
+ # field value to be no more than a year in the future. While longer
492
+ # freshness lifetimes are no longer prohibited, extremely large values
493
+ # have been demonstrated to cause problems."
494
+ #
495
+ # We use a conservative 1-week maximum for heuristic freshness
496
+ ONE_WEEK = 604_800 # 7 days * 24 hours * 60 minutes * 60 seconds
497
+
498
+ return min(ONE_WEEK, heuristic_freshness)
499
+
500
+ # No Last-Modified header, cannot calculate heuristic freshness
501
+ return None
502
+
503
+
504
+ def get_age(response: Response) -> int:
505
+ """
506
+ Calculates the current age of a cached response in seconds.
507
+
508
+ Age represents how old a cached response is - the time since it was
509
+ generated or last validated by the origin server. This is crucial for
510
+ determining if a response is still fresh.
511
+
512
+ RFC 9111 Section 4.2.3: Calculating Age
513
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.3
514
+
515
+ Parameters:
516
+ ----------
517
+ response : Response
518
+ The cached response to calculate age for
519
+
520
+ Returns:
521
+ -------
522
+ int
523
+ Age of the response in seconds (always >= 0)
524
+
525
+ RFC 9111 Compliance:
526
+ -------------------
527
+ From RFC 9111 Section 4.2.3:
528
+ "A response's 'age' is the time that has passed since it was generated by,
529
+ or successfully validated with, the origin server."
530
+
531
+ The full RFC formula accounts for:
532
+ - apparent_age: Current time minus Date header
533
+ - age_value: Age header from upstream caches
534
+ - response_delay: Network latency
535
+ - resident_time: Time stored in this cache
536
+
537
+ This simplified implementation calculates:
538
+ age = max(0, now - Date)
539
+
540
+ Where:
541
+ - now: Current time
542
+ - Date: Value from the Date response header
543
+
544
+ Fallbacks:
545
+ ---------
546
+ - If Date header is missing: age = 0
547
+ - If Date header is invalid: age = 0
548
+ - If Date is in the future: age = 0 (via max(0, ...))
549
+
550
+ Note on Accuracy:
551
+ ----------------
552
+ This is a simplified calculation suitable for single-hop caches.
553
+ A full implementation would consider:
554
+ - Age header from upstream caches
555
+ - Request/response timing for latency correction
556
+ - Clock skew compensation
557
+
558
+ Examples:
559
+ --------
560
+ >>> # Response from 1 hour ago
561
+ >>> date = (datetime.utcnow() - timedelta(hours=1)).strftime(...)
562
+ >>> response = Response(headers=Headers({"date": date}))
563
+ >>> get_age(response)
564
+ 3600 # 1 hour in seconds
565
+
566
+ >>> # Fresh response (Date = now)
567
+ >>> date = datetime.utcnow().strftime(...)
568
+ >>> response = Response(headers=Headers({"date": date}))
569
+ >>> get_age(response)
570
+ 0 # or very close to 0
571
+
572
+ >>> # No Date header
573
+ >>> response = Response(headers=Headers({}))
574
+ >>> get_age(response)
575
+ 0
576
+ """
577
+ # RFC 9110 Section 6.6.1: Date
578
+ # https://www.rfc-editor.org/rfc/rfc9110#section-6.6.1
579
+ #
580
+ # "A recipient with a clock that receives a response with an invalid Date
581
+ # header field value MAY replace that value with the time that response
582
+ # was received."
583
+ #
584
+ # If no Date header exists, we treat the response as having age 0
585
+ # This is conservative - it assumes the response is brand new
586
+ if "date" not in response.headers:
587
+ return 0
588
+
589
+ # Parse the Date header
590
+ date = parse_date(response.headers["date"])
591
+
592
+ if date is None: # pragma: nocover
593
+ # Invalid Date header, treat as age 0
594
+ return 0
595
+
596
+ # Calculate apparent age: how long ago was the response generated?
597
+ now = time.time()
598
+ apparent_age = max(0, now - date)
599
+
600
+ # Return age as integer seconds
601
+ # max(0, ...) ensures we never return negative age (e.g., if Date is in future)
602
+ return int(apparent_age)
603
+
604
+
605
+ def make_conditional_request(request: Request, response: Response) -> Request:
606
+ """
607
+ Converts a regular request into a conditional request for validation.
608
+
609
+ Conditional requests use validators (ETag, Last-Modified) to check if a
610
+ cached response is still valid. If the resource hasn't changed, the server
611
+ responds with 304 Not Modified, saving bandwidth.
612
+
613
+ RFC 9111 Section 4.3.1: Sending a Validation Request
614
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.1
615
+
616
+ Parameters:
617
+ ----------
618
+ request : Request
619
+ The original request to make conditional
620
+ response : Response
621
+ The cached response containing validators (ETag, Last-Modified)
622
+
623
+ Returns:
624
+ -------
625
+ Request
626
+ A new request with conditional headers added (If-None-Match, If-Modified-Since)
627
+
628
+ RFC 9111 Compliance:
629
+ -------------------
630
+ From RFC 9111 Section 4.3.1:
631
+ "When generating a conditional request for validation, a cache... updates
632
+ that request with one or more precondition header fields. These contain
633
+ validator metadata sourced from a stored response(s) that has the same URI."
634
+
635
+ "When generating a conditional request for validation, a cache:
636
+ - MUST send the relevant entity tags (using If-Match, If-None-Match, or
637
+ If-Range) if the entity tags were provided in the stored response(s)
638
+ being validated.
639
+ - SHOULD send the Last-Modified value (using If-Modified-Since) if the
640
+ request is not for a subrange, a single stored response is being
641
+ validated, and that response contains a Last-Modified value."
642
+
643
+ Conditional Headers Added:
644
+ -------------------------
645
+ 1. If-None-Match: Added if response has ETag
646
+ - Asks server: "Send full response only if ETag doesn't match"
647
+ - 304 response if ETag matches (resource unchanged)
648
+ - 200 response with content if ETag differs (resource changed)
649
+
650
+ 2. If-Modified-Since: Added if response has Last-Modified
651
+ - Asks server: "Send full response only if modified after this date"
652
+ - 304 response if not modified (resource unchanged)
653
+ - 200 response with content if modified (resource changed)
654
+
655
+ Validator Priority:
656
+ ------------------
657
+ Both validators are sent if available. ETags are more reliable than
658
+ Last-Modified (1-second granularity), so servers typically check ETag first.
659
+
660
+ Examples:
661
+ --------
662
+ >>> # Request with ETag validator
663
+ >>> request = Request(method="GET", url="https://example.com/resource")
664
+ >>> response = Response(headers=Headers({"etag": '"abc123"'}))
665
+ >>> conditional = make_conditional_request(request, response)
666
+ >>> conditional.headers["if-none-match"]
667
+ '"abc123"'
668
+
669
+ >>> # Request with Last-Modified validator
670
+ >>> response = Response(headers=Headers({
671
+ ... "last-modified": "Mon, 01 Jan 2024 00:00:00 GMT"
672
+ ... }))
673
+ >>> conditional = make_conditional_request(request, response)
674
+ >>> conditional.headers["if-modified-since"]
675
+ 'Mon, 01 Jan 2024 00:00:00 GMT'
676
+
677
+ >>> # Request with both validators
678
+ >>> response = Response(headers=Headers({
679
+ ... "etag": '"abc123"',
680
+ ... "last-modified": "Mon, 01 Jan 2024 00:00:00 GMT"
681
+ ... }))
682
+ >>> conditional = make_conditional_request(request, response)
683
+ >>> "if-none-match" in conditional.headers
684
+ True
685
+ >>> "if-modified-since" in conditional.headers
686
+ True
687
+ """
688
+ # Extract validators from the cached response
689
+
690
+ # VALIDATOR 1: Last-Modified
691
+ # RFC 9110 Section 8.8.2: Last-Modified
692
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.8.2
693
+ #
694
+ # Last-Modified indicates when the resource was last changed
695
+ # Used to create If-Modified-Since conditional header
696
+ if "last-modified" in response.headers:
697
+ last_modified = response.headers["last-modified"]
698
+ else:
699
+ last_modified = None
700
+
701
+ # VALIDATOR 2: ETag (Entity Tag)
702
+ # RFC 9110 Section 8.8.3: ETag
703
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.8.3
704
+ #
705
+ # ETag is an opaque validator that represents a specific version of a resource
706
+ # More reliable than Last-Modified (no timestamp granularity issues)
707
+ # Used to create If-None-Match conditional header
708
+ if "etag" in response.headers:
709
+ etag = response.headers["etag"]
710
+ else:
711
+ etag = None
712
+
713
+ # Build precondition headers dictionary
714
+ precondition_headers: Dict[str, str] = {}
715
+
716
+ # ADD PRECONDITION 1: If-None-Match (from ETag)
717
+ # RFC 9110 Section 13.1.2: If-None-Match
718
+ # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2
719
+ #
720
+ # "MUST send the relevant entity tags (using If-Match, If-None-Match, or
721
+ # If-Range) if the entity tags were provided in the stored response(s)
722
+ # being validated."
723
+ #
724
+ # If-None-Match tells the server: "Only send the full response if the
725
+ # current ETag is different from this one"
726
+ #
727
+ # Server responses:
728
+ # - 304 Not Modified: ETag matches, cached version is still valid
729
+ # - 200 OK: ETag differs, sends new content
730
+ if etag is not None:
731
+ precondition_headers["If-None-Match"] = etag
732
+
733
+ # ADD PRECONDITION 2: If-Modified-Since (from Last-Modified)
734
+ # RFC 9110 Section 13.1.3: If-Modified-Since
735
+ # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.3
736
+ #
737
+ # "SHOULD send the Last-Modified value (using If-Modified-Since) if the
738
+ # request is not for a subrange, a single stored response is being
739
+ # validated, and that response contains a Last-Modified value."
740
+ #
741
+ # If-Modified-Since tells the server: "Only send the full response if the
742
+ # resource has been modified after this date"
743
+ #
744
+ # Server responses:
745
+ # - 304 Not Modified: Not modified since date, cached version is valid
746
+ # - 200 OK: Modified since date, sends new content
747
+ if last_modified:
748
+ precondition_headers["If-Modified-Since"] = last_modified
749
+
750
+ # Create a new request with the original headers plus precondition headers
751
+ # The replace() function creates a copy of the request with updated headers
752
+ return replace(
753
+ request,
754
+ headers=Headers(
755
+ {
756
+ **request.headers,
757
+ **precondition_headers,
758
+ }
759
+ ),
760
+ )
761
+
762
+
763
+ def exclude_unstorable_headers(response: Response, is_cache_shared: bool) -> Response:
764
+ """
765
+ Removes headers that must not be stored in the cache.
766
+
767
+ Certain headers are connection-specific or contain sensitive information
768
+ that should not be cached. This function filters them out before storage.
769
+
770
+ RFC 9111 Section 3.1: Storing Header and Trailer Fields
771
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-3.1
772
+
773
+ Parameters:
774
+ ----------
775
+ response : Response
776
+ The response to filter headers from
777
+ is_cache_shared : bool
778
+ True if this is a shared cache (affects private directive handling)
779
+
780
+ Returns:
781
+ -------
782
+ Response
783
+ A new response with unstorable headers removed
784
+
785
+ RFC 9111 Compliance:
786
+ -------------------
787
+ From RFC 9111 Section 3.1:
788
+ "Caches MUST include all received response header fields -- including
789
+ unrecognized ones -- when storing a response; this assures that new HTTP
790
+ header fields can be successfully deployed. However, the following exceptions
791
+ are made:
792
+ - The Connection header field and fields whose names are listed in it are
793
+ not stored (see Section 7.6.1 of [HTTP])
794
+ - Caches MUST NOT store fields defined as being specific to a particular
795
+ connection or applicable only to a tunnel or gateway, unless the cache
796
+ was specifically designed to support these fields"
797
+
798
+ Headers Always Excluded:
799
+ -----------------------
800
+ Connection-specific headers (RFC 9110 Section 7.6.1):
801
+ - Connection
802
+ - Keep-Alive
803
+ - Proxy-Connection (non-standard but common)
804
+ - Transfer-Encoding
805
+ - Upgrade
806
+ - TE
807
+
808
+ Hop-by-hop authentication headers:
809
+ - Proxy-Authenticate
810
+ - Proxy-Authorization
811
+ - Proxy-Authentication-Info
812
+
813
+ Headers Conditionally Excluded:
814
+ -------------------------------
815
+ - Fields listed in no-cache directive (always excluded)
816
+ - Fields listed in private directive (excluded for shared caches only)
817
+
818
+ Examples:
819
+ --------
820
+ >>> # Remove connection-specific headers
821
+ >>> response = Response(headers=Headers({
822
+ ... "cache-control": "max-age=3600",
823
+ ... "connection": "keep-alive",
824
+ ... "keep-alive": "timeout=5",
825
+ ... "content-type": "application/json"
826
+ ... }))
827
+ >>> filtered = exclude_unstorable_headers(response, is_cache_shared=True)
828
+ >>> "connection" in filtered.headers
829
+ False
830
+ >>> "content-type" in filtered.headers
831
+ True
832
+
833
+ >>> # Remove headers listed in no-cache
834
+ >>> response = Response(headers=Headers({
835
+ ... "cache-control": 'no-cache="Set-Cookie"',
836
+ ... "set-cookie": "session=abc123"
837
+ ... }))
838
+ >>> filtered = exclude_unstorable_headers(response, is_cache_shared=True)
839
+ >>> "set-cookie" in filtered.headers
840
+ False
841
+ """
842
+ # Initialize set of headers to exclude
843
+ # These are connection-specific headers that must never be cached
844
+ # RFC 9110 Section 7.6.1: Connection-Specific Header Fields
845
+ # https://www.rfc-editor.org/rfc/rfc9110#section-7.6.1
846
+ need_to_be_excluded = set(
847
+ [
848
+ "connection", # Connection management
849
+ "keep-alive", # Connection timeout and max requests
850
+ "te", # Transfer encoding accepted by client
851
+ "transfer-encoding", # How the body is encoded for transfer
852
+ "upgrade", # Protocol upgrade (e.g., WebSocket)
853
+ "proxy-connection", # Non-standard but widely used
854
+ "proxy-authenticate", # Proxy authentication challenge
855
+ "proxy-authentication-info", # Proxy auth additional info
856
+ "proxy-authorization", # Proxy auth credentials
857
+ ]
858
+ )
859
+
860
+ # Parse Cache-Control to check for no-cache and private directives
861
+ cache_control = parse_cache_control(response.headers.get("cache-control"))
862
+
863
+ # EXCLUSION RULE 1: no-cache with field names
864
+ # RFC 9111 Section 5.2.2.4: no-cache Response Directive
865
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.4
866
+ #
867
+ # "The qualified form of the no-cache response directive, with an argument
868
+ # that lists one or more field names, indicates that a cache MAY use the
869
+ # response to satisfy a subsequent request, subject to any other restrictions
870
+ # on caching, if the listed header fields are excluded from the subsequent
871
+ # response or the subsequent response has been successfully revalidated with
872
+ # the origin server."
873
+ #
874
+ # Example: Cache-Control: no-cache="Set-Cookie, Set-Cookie2"
875
+ # Means: Cache the response but exclude Set-Cookie headers from the cache
876
+ if isinstance(cache_control.no_cache, list):
877
+ for field in cache_control.no_cache:
878
+ need_to_be_excluded.add(field.lower())
879
+
880
+ # EXCLUSION RULE 2: private with field names (shared caches only)
881
+ # RFC 9111 Section 5.2.2.7: private Response Directive
882
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.7
883
+ #
884
+ # "If a qualified private response directive is present, with an argument
885
+ # that lists one or more field names, then only the listed header fields
886
+ # are limited to a single user: a shared cache MUST NOT store the listed
887
+ # header fields if they are present in the original response but MAY store
888
+ # the remainder of the response message without those header fields"
889
+ #
890
+ # Example: Cache-Control: private="Authorization"
891
+ # Means: Shared caches can cache the response but must exclude Authorization
892
+ if is_cache_shared and isinstance(cache_control.private, list):
893
+ for field in cache_control.private:
894
+ need_to_be_excluded.add(field.lower())
895
+
896
+ # Filter out the excluded headers
897
+ # Create new Headers dict with only the headers we want to keep
898
+ new_headers = Headers(
899
+ {key: value for key, value in response.headers.items() if key.lower() not in need_to_be_excluded}
900
+ )
901
+
902
+ # Return a new response with filtered headers
903
+ return replace(
904
+ response,
905
+ headers=new_headers,
906
+ )
907
+
908
+
909
+ def refresh_response_headers(
910
+ stored_response: Response,
911
+ revalidation_response: Response,
912
+ ) -> Response:
913
+ """
914
+ Updates a stored response's headers with fresh metadata from a 304 response.
915
+
916
+ When revalidation succeeds (304 Not Modified), the cached response is still
917
+ valid but its metadata (Date, Cache-Control, etc.) should be updated with
918
+ fresh values from the 304 response.
919
+
920
+ RFC 9111 Section 3.2: Updating Stored Header Fields
921
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-3.2
922
+
923
+ Parameters:
924
+ ----------
925
+ stored_response : Response
926
+ The cached response that is being freshened
927
+ revalidation_response : Response
928
+ The 304 Not Modified response containing fresh metadata
929
+
930
+ Returns:
931
+ -------
932
+ Response
933
+ The stored response with updated headers
934
+
935
+ RFC 9111 Compliance:
936
+ -------------------
937
+ From RFC 9111 Section 3.2:
938
+ "When doing so, the cache MUST add each header field in the provided response
939
+ to the stored response, replacing field values that are already present,
940
+ with the following exceptions:
941
+ - Header fields that provide metadata about the message content and/or the
942
+ selected representation (e.g., Content-Encoding, Content-Type, Content-Range)
943
+ MUST NOT be updated unless the response is being stored as a result of
944
+ successful validation."
945
+
946
+ Update Rules:
947
+ ------------
948
+ 1. Merge headers from 304 response into stored response
949
+ 2. 304 headers override stored headers (newer metadata)
950
+ 3. EXCEPT: Content metadata headers are NOT updated
951
+ - Content-Encoding
952
+ - Content-Type
953
+ - Content-Range
954
+ 4. Remove unstorable headers after merging
955
+
956
+ Rationale for Exceptions:
957
+ ------------------------
958
+ Content-* headers describe the body of the response. A 304 response has
959
+ no body, so its Content-* headers (if any) don't describe the cached body.
960
+ We must preserve the original Content-* headers from the cached response.
961
+
962
+ For example:
963
+ - Cached response: Content-Type: application/json, body is JSON
964
+ - 304 response: Content-Type: text/plain (this is wrong for the cached body!)
965
+ - Result: Keep application/json from cached response
966
+
967
+ Examples:
968
+ --------
969
+ >>> # Update Date and Cache-Control, preserve Content-Type
970
+ >>> stored = Response(
971
+ ... status_code=200,
972
+ ... headers=Headers({
973
+ ... "date": "Mon, 01 Jan 2024 00:00:00 GMT",
974
+ ... "cache-control": "max-age=3600",
975
+ ... "content-type": "application/json"
976
+ ... })
977
+ ... )
978
+ >>> revalidation = Response(
979
+ ... status_code=304,
980
+ ... headers=Headers({
981
+ ... "date": "Mon, 01 Jan 2024 12:00:00 GMT",
982
+ ... "cache-control": "max-age=7200",
983
+ ... "content-type": "text/plain" # Should be ignored
984
+ ... })
985
+ ... )
986
+ >>> refreshed = refresh_response_headers(stored, revalidation)
987
+ >>> refreshed.headers["cache-control"]
988
+ 'max-age=7200' # Updated
989
+ >>> refreshed.headers["content-type"]
990
+ 'application/json' # Preserved from stored response
991
+ """
992
+ # Define headers that must NOT be updated from the 304 response
993
+ # These headers describe the message body/representation
994
+ # RFC 9111 Section 3.2: "Header fields that provide metadata about the
995
+ # message content and/or the selected representation... MUST NOT be updated"
996
+ excluded_headers = set(
997
+ [
998
+ "content-encoding", # How the body is encoded (gzip, br, etc.)
999
+ "content-type", # MIME type of the body
1000
+ "content-range", # For partial content (206 responses)
1001
+ ]
1002
+ )
1003
+
1004
+ # Merge headers: Start with stored response, overlay revalidation response
1005
+ # Headers from revalidation_response override stored_response
1006
+ # EXCEPT for excluded headers (content metadata)
1007
+ new_headers = {
1008
+ **stored_response.headers, # Base: original cached headers
1009
+ **{
1010
+ key: value
1011
+ for key, value in revalidation_response.headers.items()
1012
+ if key.lower() not in excluded_headers # Skip content metadata
1013
+ },
1014
+ }
1015
+
1016
+ # Remove unstorable headers from the final merged headers
1017
+ # This ensures we don't accidentally cache connection-specific headers
1018
+ # that might have been in the 304 response
1019
+ return exclude_unstorable_headers(
1020
+ replace(
1021
+ stored_response,
1022
+ headers=Headers(new_headers),
1023
+ ),
1024
+ is_cache_shared=True, # Assume shared cache for maximum safety
1025
+ )
1026
+
1027
+
1028
+ AnyState = Union[
1029
+ "CacheMiss",
1030
+ "StoreAndUse",
1031
+ "CouldNotBeStored",
1032
+ "FromCache",
1033
+ "NeedToBeUpdated",
1034
+ "NeedRevalidation",
1035
+ "IdleClient",
1036
+ "InvalidatePairs",
1037
+ ]
1038
+
1039
+ # Defined in https://www.rfc-editor.org/rfc/rfc9110#name-safe-methods
1040
+ SAFE_METHODS = frozenset(["GET", "HEAD", "OPTIONS", "TRACE"])
1041
+
1042
+
1043
+ def create_idle_state(role: Literal["client", "server"], options: Optional[CacheOptions] = None) -> IdleClient:
1044
+ if role == "server":
1045
+ raise NotImplementedError("Server role is not implemented yet.")
1046
+ return IdleClient(options=options or CacheOptions())
1047
+
1048
+
1049
+ @dataclass
1050
+ class IdleClient(State):
1051
+ """
1052
+ Represents the idle state of a client initiating an HTTP request.
1053
+
1054
+ This is the entry point of the cache state machine. When a client wants to send
1055
+ a request, this state determines whether the request can be satisfied from cache,
1056
+ needs revalidation, or must be forwarded to the origin server (cache miss).
1057
+
1058
+ State Transitions:
1059
+ -----------------
1060
+ - CacheMiss: When no suitable cached response exists or the request cannot be cached
1061
+ - FromCache: When a fresh or stale-but-allowed cached response can be used
1062
+ - NeedRevalidation: When a stale cached response exists and must be validated
1063
+
1064
+ RFC 9111 References:
1065
+ -------------------
1066
+ - Section 4: Constructing Responses from Caches
1067
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4
1068
+ - Section 4.1: Calculating Cache Keys (Vary handling)
1069
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.1
1070
+ - Section 4.2: Freshness
1071
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2
1072
+ - Section 4.3: Validation
1073
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3
1074
+
1075
+ Attributes:
1076
+ ----------
1077
+ options : CacheOptions
1078
+ Configuration options for cache behavior (inherited from State)
1079
+ """
1080
+
1081
+ def next(
1082
+ self, request: Request, associated_pairs: list[CompletePair]
1083
+ ) -> Union["CacheMiss", "FromCache", "NeedRevalidation"]:
1084
+ """
1085
+ Determines the next state transition based on the request and available cached responses.
1086
+
1087
+ This method implements the core cache lookup algorithm defined in RFC 9111 Section 4.
1088
+ It evaluates whether a cached response can be reused and transitions to the appropriate
1089
+ next state.
1090
+
1091
+ Parameters:
1092
+ ----------
1093
+ request : Request
1094
+ The incoming HTTP request from the client
1095
+ associated_pairs : list[CompletePair]
1096
+ List of request-response pairs previously stored in the cache that may match
1097
+ this request. These pairs are pre-filtered by cache key (typically URI).
1098
+
1099
+ Returns:
1100
+ -------
1101
+ Union[CacheMiss, FromCache, NeedRevalidation]
1102
+ - CacheMiss: No suitable cached response; request must be forwarded to origin
1103
+ - FromCache: A suitable cached response can be returned immediately
1104
+ - NeedRevalidation: A cached response exists but requires validation before use
1105
+
1106
+ RFC 9111 Compliance:
1107
+ -------------------
1108
+ This method enforces the requirements from RFC 9111 Section 4, paragraph 1:
1109
+ "When presented with a request, a cache MUST NOT reuse a stored response unless:
1110
+ 1. the presented target URI and that of the stored response match, and
1111
+ 2. the request method associated with the stored response allows it to be used
1112
+ for the presented request, and
1113
+ 3. request header fields nominated by the stored response (if any) match those
1114
+ presented (see Section 4.1), and
1115
+ 4. the stored response does not contain the no-cache directive (Section 5.2.2.4),
1116
+ unless it is successfully validated (Section 4.3), and
1117
+ 5. the stored response is one of the following:
1118
+ - fresh (see Section 4.2), or
1119
+ - allowed to be served stale (see Section 4.2.4), or
1120
+ - successfully validated (see Section 4.3)."
1121
+
1122
+ Implementation Notes:
1123
+ --------------------
1124
+ - Range requests always result in a cache miss (simplified behavior)
1125
+ - Unsafe methods (POST, PUT, DELETE, etc.) are written through to origin
1126
+ - Multiple matching responses are sorted by Date header (most recent first)
1127
+ - Age header is updated when serving from cache
1128
+
1129
+ Examples:
1130
+ --------
1131
+ >>> # Cache miss - no matching responses
1132
+ >>> idle = IdleClient(options=default_options)
1133
+ >>> next_state = idle.next(get_request, [])
1134
+ >>> isinstance(next_state, CacheMiss)
1135
+ True
1136
+
1137
+ >>> # From cache - fresh response available
1138
+ >>> idle = IdleClient(options=default_options)
1139
+ >>> cached_pair = CompletePair(get_request, fresh_response)
1140
+ >>> next_state = idle.next(get_request, [cached_pair])
1141
+ >>> isinstance(next_state, FromCache)
1142
+ True
1143
+
1144
+ >>> # Need revalidation - stale response that cannot be served stale
1145
+ >>> idle = IdleClient(options=default_options)
1146
+ >>> cached_pair = CompletePair(get_request, stale_response)
1147
+ >>> next_state = idle.next(get_request, [cached_pair])
1148
+ >>> isinstance(next_state, NeedRevalidation)
1149
+ True
1150
+ """
1151
+
1152
+ # ============================================================================
1153
+ # STEP 1: Handle Range Requests
1154
+ # ============================================================================
1155
+ # RFC 9111 Section 3.3: Storing Incomplete Responses
1156
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3.3
1157
+ #
1158
+ # Range requests are complex and require special handling. For simplicity,
1159
+ # this implementation treats all range requests as cache misses.
1160
+ # A full implementation could store and combine partial responses.
1161
+ request_range = Range.try_from_str(request.headers["range"]) if "range" in request.headers else None
1162
+
1163
+ if request_range is not None:
1164
+ # Simplified behavior: always forward range requests to origin
1165
+ return CacheMiss(options=self.options, request=request)
1166
+
1167
+ # ============================================================================
1168
+ # STEP 2: Handle Unsafe Methods (Write-Through)
1169
+ # ============================================================================
1170
+ # RFC 9111 Section 4, paragraph 5:
1171
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4-5
1172
+ #
1173
+ # "A cache MUST write through requests with methods that are unsafe
1174
+ # (Section 9.2.1 of [HTTP]) to the origin server; i.e., a cache is not
1175
+ # allowed to generate a reply to such a request before having forwarded
1176
+ # the request and having received a corresponding response."
1177
+ #
1178
+ # Unsafe methods: POST, PUT, DELETE, PATCH, etc.
1179
+ # Safe methods: GET, HEAD, OPTIONS, TRACE
1180
+ if request.method.upper() not in SAFE_METHODS:
1181
+ return CacheMiss(request=request, options=self.options) # pragma: nocover
1182
+
1183
+ # ============================================================================
1184
+ # STEP 3: Define Cache Reuse Conditions (RFC 9111 Section 4)
1185
+ # ============================================================================
1186
+ # The following lambda functions implement the five conditions that must ALL
1187
+ # be satisfied for a cached response to be reusable.
1188
+
1189
+ # CONDITION 1: URI Matching
1190
+ # RFC 9111 Section 4, paragraph 2.1:
1191
+ # "the presented target URI (Section 7.1 of [HTTP]) and that of the stored
1192
+ # response match"
1193
+ #
1194
+ # The cache key primarily consists of the request URI. Only responses with
1195
+ # matching URIs can be considered for reuse.
1196
+ url_matches = lambda pair: pair.request.url == request.url # noqa: E731
1197
+
1198
+ # CONDITION 2: Method Matching
1199
+ # RFC 9111 Section 4, paragraph 2.2:
1200
+ # "the request method associated with the stored response allows it to be
1201
+ # used for the presented request"
1202
+ #
1203
+ # Generally, only GET responses can satisfy GET requests, HEAD responses
1204
+ # for HEAD requests, etc. Some methods (like HEAD) can sometimes be satisfied
1205
+ # by GET responses, but this implementation requires exact matches.
1206
+ method_matches = lambda pair: pair.request.method == request.method # noqa: E731
1207
+
1208
+ # CONDITION 3: Vary Header Matching
1209
+ # RFC 9111 Section 4.1: Calculating Cache Keys
1210
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.1
1211
+ #
1212
+ # "When a cache receives a request that can be satisfied by a stored response
1213
+ # and that stored response contains a Vary header field, the cache MUST NOT
1214
+ # use that stored response without revalidation unless all the presented
1215
+ # request header fields nominated by that Vary field value match those fields
1216
+ # in the original request."
1217
+ #
1218
+ # Example: If response has "Vary: Accept-Encoding", the cached response can
1219
+ # only be used if the new request has the same Accept-Encoding header value.
1220
+ vary_headers_same = lambda pair: vary_headers_match(request, pair) # noqa: E731
1221
+
1222
+ # CONDITION 4: No-Cache Directive Handling
1223
+ # RFC 9111 Section 5.2.2.4: no-cache Response Directive
1224
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.4
1225
+ #
1226
+ # "The no-cache response directive... indicates that the response MUST NOT be
1227
+ # used to satisfy any other request without forwarding it for validation and
1228
+ # receiving a successful response."
1229
+ #
1230
+ # If a cached response has Cache-Control: no-cache, it cannot be reused without
1231
+ # validation, regardless of its freshness.
1232
+ def no_cache_missing(pair: CompletePair) -> bool:
1233
+ """Check if the cached response lacks the no-cache directive."""
1234
+ return parse_cache_control(pair.response.headers.get("cache-control")).no_cache is False
1235
+
1236
+ # CONDITION 5: Freshness or Allowed Stale
1237
+ # RFC 9111 Section 4.2: Freshness
1238
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2
1239
+ #
1240
+ # A response can be reused if it is either:
1241
+ # a) Fresh: age < freshness_lifetime
1242
+ # b) Allowed to be served stale: Section 4.2.4
1243
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.4
1244
+ #
1245
+ # Note: Condition 5.3 (successfully validated) is handled in the
1246
+ # NeedRevalidation state, not here.
1247
+ def fresh_or_allowed_stale(pair: CompletePair) -> bool:
1248
+ """
1249
+ Determine if a cached response is fresh or allowed to be served stale.
1250
+
1251
+ RFC 9111 Section 4.2:
1252
+ "A 'fresh' response is one whose age has not yet exceeded its freshness
1253
+ lifetime. Conversely, a 'stale' response is one where it has."
1254
+
1255
+ RFC 9111 Section 4.2.4: Serving Stale Responses
1256
+ "A cache MUST NOT generate a stale response unless it is disconnected or
1257
+ doing so is explicitly permitted by the client or origin server."
1258
+ """
1259
+ freshness_lifetime = get_freshness_lifetime(pair.response, is_cache_shared=True)
1260
+ age = get_age(pair.response)
1261
+
1262
+ # Check freshness: response_is_fresh = (freshness_lifetime > current_age)
1263
+ is_fresh = False if freshness_lifetime is None else age < freshness_lifetime
1264
+
1265
+ # Check if stale responses are allowed (e.g., max-stale directive)
1266
+ return is_fresh or allowed_stale(pair.response, allow_stale_option=self.options.allow_stale)
1267
+
1268
+ # ============================================================================
1269
+ # STEP 4: Filter Cached Responses by Conditions 1-4
1270
+ # ============================================================================
1271
+ # Apply the first four conditions to filter the list of associated pairs.
1272
+ # Condition 5 (freshness) is applied separately to partition responses into
1273
+ # "ready to use" and "needs revalidation" groups.
1274
+ filtered_pairs = [
1275
+ pair
1276
+ for pair in associated_pairs
1277
+ if url_matches(pair) and method_matches(pair) and vary_headers_same(pair) and no_cache_missing(pair) # type: ignore[no-untyped-call]
1278
+ ]
1279
+
1280
+ # ============================================================================
1281
+ # STEP 5: Select Most Recent Response
1282
+ # ============================================================================
1283
+ # RFC 9111 Section 4, paragraph 8:
1284
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4-8
1285
+ #
1286
+ # "When more than one suitable response is stored, a cache MUST use the most
1287
+ # recent one (as determined by the Date header field). It can also forward
1288
+ # the request with 'Cache-Control: max-age=0' or 'Cache-Control: no-cache'
1289
+ # to disambiguate which response to use."
1290
+ #
1291
+ # Sort by Date header in descending order (most recent first).
1292
+ filtered_pairs.sort(
1293
+ key=lambda pair: parse_date(
1294
+ pair.response.headers.get("date", str(int(time.time()))),
1295
+ )
1296
+ or int(time.time()),
1297
+ reverse=True,
1298
+ )
1299
+
1300
+ # ============================================================================
1301
+ # STEP 6: Partition by Freshness (Condition 5)
1302
+ # ============================================================================
1303
+ # Separate responses into two groups:
1304
+ # - ready_to_use: Fresh or allowed-stale responses that can be served immediately
1305
+ # - need_revalidation: Stale responses that require validation before serving
1306
+ ready_to_use, need_revalidation = partition(filtered_pairs, fresh_or_allowed_stale)
1307
+
1308
+ # ============================================================================
1309
+ # STEP 7: Determine Next State Based on Available Responses
1310
+ # ============================================================================
1311
+
1312
+ if ready_to_use:
1313
+ # --------------------------------------------------------------------
1314
+ # Transition to: FromCache
1315
+ # --------------------------------------------------------------------
1316
+ # We have a fresh (or allowed-stale) response that can be served.
1317
+ #
1318
+ # RFC 9111 Section 4, paragraph 4:
1319
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4-4
1320
+ #
1321
+ # "When a stored response is used to satisfy a request without validation,
1322
+ # a cache MUST generate an Age header field (Section 5.1), replacing any
1323
+ # present in the response with a value equal to the stored response's
1324
+ # current_age; see Section 4.2.3."
1325
+ #
1326
+ # The Age header informs the client how old the cached response is.
1327
+
1328
+ # Mark all ready-to-use responses with metadata (for observability)
1329
+ for pair in ready_to_use:
1330
+ pair.response.metadata["hishel_from_cache"] = True # type: ignore
1331
+
1332
+ # Use the most recent response (first in sorted list)
1333
+ selected_pair = ready_to_use[0]
1334
+
1335
+ # Calculate current age and update the Age header
1336
+ current_age = get_age(selected_pair.response)
1337
+
1338
+ return FromCache(
1339
+ pair=replace(
1340
+ selected_pair,
1341
+ response=replace(
1342
+ selected_pair.response,
1343
+ headers=Headers(
1344
+ {
1345
+ **selected_pair.response.headers,
1346
+ "age": str(current_age),
1347
+ }
1348
+ ),
1349
+ ),
1350
+ ),
1351
+ options=self.options,
1352
+ )
1353
+
1354
+ elif need_revalidation:
1355
+ # --------------------------------------------------------------------
1356
+ # Transition to: NeedRevalidation
1357
+ # --------------------------------------------------------------------
1358
+ # We have stale cached response(s) that cannot be served without
1359
+ # validation (e.g., they lack must-revalidate or similar directives).
1360
+ #
1361
+ # RFC 9111 Section 4.3: Validation
1362
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3
1363
+ #
1364
+ # "When a cache has one or more stored responses for a requested URI,
1365
+ # but cannot serve any of them (e.g., because they are not fresh, or
1366
+ # one cannot be chosen), it can use the conditional request mechanism
1367
+ # in the forwarded request to give the next inbound server an opportunity
1368
+ # to choose a valid stored response to use, updating the stored metadata
1369
+ # in the process, or to replace the stored response(s) with a new response."
1370
+ #
1371
+ # Convert the request into a conditional request using validators
1372
+ # (ETag, Last-Modified) from the cached response.
1373
+ return NeedRevalidation(
1374
+ request=make_conditional_request(request, need_revalidation[-1].response),
1375
+ revalidating_pairs=need_revalidation,
1376
+ options=self.options,
1377
+ original_request=request,
1378
+ )
1379
+ else:
1380
+ # --------------------------------------------------------------------
1381
+ # Transition to: CacheMiss
1382
+ # --------------------------------------------------------------------
1383
+ # No suitable cached responses found. The request must be forwarded
1384
+ # to the origin server.
1385
+ #
1386
+ # This can happen when:
1387
+ # - No responses are cached for this URI
1388
+ # - Cached responses don't match the request (e.g., different Vary headers)
1389
+ # - Cached responses have no-cache directive
1390
+ # - Other conditions prevent cache reuse
1391
+ return CacheMiss(
1392
+ request=request,
1393
+ options=self.options,
1394
+ )
1395
+
1396
+
1397
+ @dataclass
1398
+ class CacheMiss(State):
1399
+ """
1400
+ Represents a cache miss state where a response must be evaluated for storage.
1401
+
1402
+ This state is reached when:
1403
+ 1. No suitable cached response exists (from IdleClient)
1404
+ 2. A request must be forwarded to the origin server
1405
+ 3. The origin server's response must be evaluated for cacheability
1406
+
1407
+ State Transitions:
1408
+ -----------------
1409
+ - StoreAndUse: Response meets all RFC 9111 storage requirements and will be cached
1410
+ - CouldNotBeStored: Response fails one or more storage requirements and cannot be cached
1411
+
1412
+ RFC 9111 References:
1413
+ -------------------
1414
+ - Section 3: Storing Responses in Caches
1415
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-3
1416
+ - Section 3.5: Authenticated Responses
1417
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-3.5
1418
+
1419
+ Attributes:
1420
+ ----------
1421
+ request : Request
1422
+ The request that caused the cache miss. Note this is a Request object,
1423
+ not an IncompletePair, because this state can be reached from NeedRevalidation
1424
+ where we don't have an incomplete pair.
1425
+ after_revalidation : bool
1426
+ Flag indicating if this cache miss occurred after a revalidation attempt.
1427
+ When True, the response will be marked with hishel_revalidated metadata.
1428
+ options : CacheOptions
1429
+ Configuration options for cache behavior (inherited from State)
1430
+ """
1431
+
1432
+ request: Request
1433
+ """
1434
+ The request that missed the cache.
1435
+
1436
+ Note that this has a type of Request and not IncompletePair because
1437
+ when moving to this state from `NeedRevalidation` we don't have incomplete pair
1438
+ """
1439
+
1440
+ after_revalidation: bool = False
1441
+ """
1442
+ Indicates whether the cache miss occurred after a revalidation attempt.
1443
+ """
1444
+
1445
+ def next(self, response: Response, pair_id: uuid.UUID) -> Union["StoreAndUse", "CouldNotBeStored"]:
1446
+ """
1447
+ Evaluates whether a response can be stored in the cache.
1448
+
1449
+ This method implements the storage decision algorithm from RFC 9111 Section 3.
1450
+ A response can only be stored if ALL of the following conditions are met:
1451
+
1452
+ 1. Request method is understood by the cache
1453
+ 2. Response status code is final (not 1xx informational)
1454
+ 3. Cache understands how to handle the response status code
1455
+ 4. No no-store directive is present
1456
+ 5. Private directive allows storage (for shared caches)
1457
+ 6. Authorization is properly handled (for shared caches)
1458
+ 7. Response contains explicit caching information or is heuristically cacheable
1459
+
1460
+ Parameters:
1461
+ ----------
1462
+ response : Response
1463
+ The HTTP response received from the origin server
1464
+ pair_id : uuid.UUID
1465
+ Unique identifier for this request-response pair
1466
+
1467
+ Returns:
1468
+ -------
1469
+ Union[StoreAndUse, CouldNotBeStored]
1470
+ - StoreAndUse: Response can and will be stored in cache
1471
+ - CouldNotBeStored: Response cannot be stored (fails one or more requirements)
1472
+
1473
+ RFC 9111 Compliance:
1474
+ -------------------
1475
+ From RFC 9111 Section 3:
1476
+ "A cache MUST NOT store a response to a request unless:
1477
+ - the request method is understood by the cache;
1478
+ - the response status code is final;
1479
+ - if the response status code is 206 or 304, or the must-understand cache
1480
+ directive is present: the cache understands the response status code;
1481
+ - the no-store cache directive is not present in the response;
1482
+ - if the cache is shared: the private response directive is either not present
1483
+ or allows a shared cache to store a modified response;
1484
+ - if the cache is shared: the Authorization header field is not present in the
1485
+ request or a response directive is present that explicitly allows shared caching;
1486
+ - the response contains at least one of the following:
1487
+ * a public response directive
1488
+ * a private response directive (if cache is not shared)
1489
+ * an Expires header field
1490
+ * a max-age response directive
1491
+ * an s-maxage response directive (if cache is shared)
1492
+ * a status code that is defined as heuristically cacheable"
1493
+
1494
+ Side Effects:
1495
+ ------------
1496
+ Sets metadata flags on the response object:
1497
+ - hishel_spec_ignored: False (caching spec is being followed)
1498
+ - hishel_from_cache: False (response is from origin, not cache)
1499
+ - hishel_revalidated: True (if after_revalidation is True)
1500
+ - hishel_stored: True/False (whether response was stored)
1501
+
1502
+ Logging:
1503
+ -------
1504
+ When a response cannot be stored, detailed debug logs are emitted explaining
1505
+ which specific RFC requirement failed, with direct links to the relevant
1506
+ RFC sections.
1507
+
1508
+ Examples:
1509
+ --------
1510
+ >>> # Cacheable response
1511
+ >>> cache_miss = CacheMiss(request=get_request, options=default_options)
1512
+ >>> response = Response(
1513
+ ... status_code=200,
1514
+ ... headers=Headers({"cache-control": "max-age=3600"})
1515
+ ... )
1516
+ >>> next_state = cache_miss.next(response, uuid.uuid4())
1517
+ >>> isinstance(next_state, StoreAndUse)
1518
+ True
1519
+
1520
+ >>> # Non-cacheable response (no-store)
1521
+ >>> response = Response(
1522
+ ... status_code=200,
1523
+ ... headers=Headers({"cache-control": "no-store"})
1524
+ ... )
1525
+ >>> next_state = cache_miss.next(response, uuid.uuid4())
1526
+ >>> isinstance(next_state, CouldNotBeStored)
1527
+ True
1528
+ """
1529
+
1530
+ # ============================================================================
1531
+ # STEP 1: Set Response Metadata
1532
+ # ============================================================================
1533
+ # Initialize metadata flags to track the response lifecycle
1534
+
1535
+ response.metadata["hishel_spec_ignored"] = False # type: ignore
1536
+ # We are following the caching specification
1537
+
1538
+ response.metadata["hishel_from_cache"] = False # type: ignore
1539
+ # This response came from origin server, not cache
1540
+
1541
+ if self.after_revalidation:
1542
+ response.metadata["hishel_revalidated"] = True # type: ignore
1543
+ # Mark that this response is the result of a revalidation
1544
+
1545
+ # ============================================================================
1546
+ # STEP 2: Parse Cache-Control Directive
1547
+ # ============================================================================
1548
+ # Extract and parse the Cache-Control header to check caching directives
1549
+
1550
+ request = self.request
1551
+ response_cache_control = parse_cache_control(response.headers.get("cache-control"))
1552
+
1553
+ # ============================================================================
1554
+ # STEP 3: Evaluate Storage Requirements (7 Conditions)
1555
+ # ============================================================================
1556
+ # All conditions must be True for the response to be storable.
1557
+ # Each condition corresponds to a requirement from RFC 9111 Section 3.
1558
+
1559
+ # CONDITION 1: Request Method Understanding
1560
+ # RFC 9111 Section 3, paragraph 2.1:
1561
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.1.1
1562
+ #
1563
+ # "the request method is understood by the cache"
1564
+ #
1565
+ # The cache must recognize and support caching for this HTTP method.
1566
+ # Typically, only safe methods (GET, HEAD) are cacheable.
1567
+ # This prevents caching of methods with side effects (POST, PUT, DELETE).
1568
+ method_understood_by_cache = request.method.upper() in self.options.supported_methods
1569
+
1570
+ # CONDITION 2: Response Status Code is Final
1571
+ # RFC 9111 Section 3, paragraph 2.2:
1572
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.2.1
1573
+ #
1574
+ # "the response status code is final (see Section 15 of [HTTP])"
1575
+ #
1576
+ # 1xx status codes are informational and not final responses.
1577
+ # Only final responses (2xx, 3xx, 4xx, 5xx) can be cached.
1578
+ # Check: status_code % 100 != 1 means not in the 1xx range
1579
+ response_status_code_is_final = response.status_code // 100 != 1
1580
+
1581
+ # CONDITION 3: Cache Understands Response Status Code
1582
+ # RFC 9111 Section 3, paragraph 2.3:
1583
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.3.1
1584
+ #
1585
+ # "if the response status code is 206 or 304, or the must-understand cache
1586
+ # directive (see Section 5.2.2.3) is present: the cache understands the
1587
+ # response status code"
1588
+ #
1589
+ # 206 Partial Content: Used for range requests, requires special handling
1590
+ # 304 Not Modified: Used for conditional requests, is not a complete response
1591
+ #
1592
+ # This implementation takes a conservative approach: if the status is 206 or 304,
1593
+ # we mark it as not understood, preventing storage. A full implementation would
1594
+ # handle these specially (304 updates existing cache, 206 stores partial content).
1595
+ if response.status_code in (206, 304):
1596
+ understands_how_to_cache = False
1597
+ else:
1598
+ understands_how_to_cache = True
1599
+
1600
+ # CONDITION 4: No no-store Directive
1601
+ # RFC 9111 Section 5.2.2.5: no-store Response Directive
1602
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.5
1603
+ #
1604
+ # "The no-store response directive indicates that a cache MUST NOT store
1605
+ # any part of either the immediate request or the response"
1606
+ #
1607
+ # no-store is the strongest cache prevention directive. When present,
1608
+ # nothing should be stored, regardless of other directives.
1609
+ no_store_is_not_present = not response_cache_control.no_store
1610
+
1611
+ # CONDITION 5: Private Directive Allows Storing (Shared Cache Only)
1612
+ # RFC 9111 Section 5.2.2.7: private Response Directive
1613
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.7
1614
+ #
1615
+ # "The unqualified private response directive indicates that a shared cache
1616
+ # MUST NOT store the response"
1617
+ #
1618
+ # For shared caches (proxies, CDNs):
1619
+ # - If private=True, the response is for a single user only
1620
+ # - Shared caches MUST NOT store private responses
1621
+ #
1622
+ # For private caches (browser caches):
1623
+ # - private directive is allowed and encouraged
1624
+ #
1625
+ # Logic: If cache is shared AND response is private, storing is NOT allowed
1626
+ # Therefore: we check (shared cache) AND (private is NOT True)
1627
+ private_directive_allows_storing = not (self.options.shared and response_cache_control.private is True)
1628
+
1629
+ # CONDITION 6: Authorization Header Handling (Shared Cache Only)
1630
+ # RFC 9111 Section 3.5: Caching Authenticated Responses
1631
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3.5
1632
+ #
1633
+ # "A shared cache MUST NOT use a cached response to a request with an
1634
+ # Authorization header field unless... a response directive is present that
1635
+ # explicitly allows shared caching"
1636
+ #
1637
+ # Requests with Authorization headers often contain user-specific data.
1638
+ # Shared caches must be careful not to serve one user's data to another.
1639
+ #
1640
+ # This check is inverted in the current implementation and needs review:
1641
+ # TODO: Fix logic - should be: (not shared) OR (no auth header) OR (has explicit directive)
1642
+ # Current logic: (shared) AND (no auth header)
1643
+ is_shared_and_authorized = not (self.options.shared and "authorization" in request.headers)
1644
+
1645
+ # CONDITION 7: Response Contains Required Caching Information
1646
+ # RFC 9111 Section 3, paragraph 2.7:
1647
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.7.1
1648
+ #
1649
+ # "the response contains at least one of the following:..."
1650
+ #
1651
+ # A response must have explicit caching metadata OR be heuristically cacheable.
1652
+ # This ensures we only cache responses that the origin server intended to be cached.
1653
+ contains_required_component = (
1654
+ # OPTION A: public Directive
1655
+ # RFC 9111 Section 5.2.2.9:
1656
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.9
1657
+ # "The public response directive indicates that a cache MAY store the response"
1658
+ # Explicitly marks response as cacheable by any cache
1659
+ response_cache_control.public
1660
+ # OPTION B: private Directive (Private Cache Only)
1661
+ # RFC 9111 Section 5.2.2.7:
1662
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.7
1663
+ # "private... indicates that... a private cache MAY store the response"
1664
+ # For private caches only (not shared caches)
1665
+ or (not self.options.shared and response_cache_control.private)
1666
+ # OPTION C: Expires Header
1667
+ # RFC 9111 Section 5.3: Expires
1668
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.3
1669
+ # "The Expires header field gives the date/time after which the response
1670
+ # is considered stale"
1671
+ # Explicit expiration time
1672
+ or ("expires" in response.headers)
1673
+ # OPTION D: max-age Directive
1674
+ # RFC 9111 Section 5.2.2.1: max-age Response Directive
1675
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.1
1676
+ # "The max-age response directive indicates that the response is to be
1677
+ # considered stale after its age is greater than the specified number of seconds"
1678
+ # Most common caching directive
1679
+ or (response_cache_control.max_age is not None)
1680
+ # OPTION E: s-maxage Directive (Shared Cache Only)
1681
+ # RFC 9111 Section 5.2.2.10: s-maxage Response Directive
1682
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-5.2.2.10
1683
+ # "The s-maxage response directive indicates that, for a shared cache,
1684
+ # the maximum age specified by this directive overrides the maximum age
1685
+ # specified by either the max-age directive or the Expires header field"
1686
+ # Specific to shared caches (proxies, CDNs)
1687
+ or (self.options.shared and response_cache_control.s_maxage is not None)
1688
+ # OPTION F: Heuristically Cacheable Status Code
1689
+ # RFC 9111 Section 4.2.2: Calculating Heuristic Freshness
1690
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.2.2
1691
+ # "a cache MAY assign a heuristic expiration time when an explicit time
1692
+ # is not specified"
1693
+ #
1694
+ # Certain status codes are defined as "heuristically cacheable":
1695
+ # 200 OK, 203 Non-Authoritative, 204 No Content, 206 Partial Content,
1696
+ # 300 Multiple Choices, 301 Moved Permanently, 308 Permanent Redirect,
1697
+ # 404 Not Found, 405 Method Not Allowed, 410 Gone,
1698
+ # 414 URI Too Long, 501 Not Implemented
1699
+ or response.status_code in HEURISTICALLY_CACHEABLE_STATUS_CODES
1700
+ )
1701
+
1702
+ # ============================================================================
1703
+ # STEP 4: Determine Storage Decision
1704
+ # ============================================================================
1705
+ # If ANY condition is False, the response cannot be stored
1706
+
1707
+ if (
1708
+ not method_understood_by_cache
1709
+ or not response_status_code_is_final
1710
+ or not understands_how_to_cache
1711
+ or not no_store_is_not_present
1712
+ or not private_directive_allows_storing
1713
+ or not is_shared_and_authorized
1714
+ or not contains_required_component
1715
+ ):
1716
+ # --------------------------------------------------------------------
1717
+ # Transition to: CouldNotBeStored
1718
+ # --------------------------------------------------------------------
1719
+ # One or more storage requirements failed. Log the specific reason
1720
+ # and return a CouldNotBeStored state.
1721
+
1722
+ # Detailed logging for debugging (only when DEBUG level is enabled)
1723
+ if logger.isEnabledFor(logging.DEBUG):
1724
+ if not method_understood_by_cache:
1725
+ logger.debug(
1726
+ "Cannot store the response because the request method is not understood by the cache. "
1727
+ "See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.1.1"
1728
+ )
1729
+ elif not response_status_code_is_final:
1730
+ logger.debug(
1731
+ f"Cannot store the response because the response status code ({response.status_code}) "
1732
+ "is not final. See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.2.1"
1733
+ )
1734
+ elif not understands_how_to_cache:
1735
+ logger.debug(
1736
+ "Cannot store the response because the cache does not understand how to cache the response. "
1737
+ "See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.3.2"
1738
+ )
1739
+ elif not no_store_is_not_present:
1740
+ logger.debug(
1741
+ "Cannot store the response because the no-store cache directive is present in the response. "
1742
+ "See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.4.1"
1743
+ )
1744
+ elif not private_directive_allows_storing:
1745
+ logger.debug(
1746
+ "Cannot store the response because the `private` response directive does not "
1747
+ "allow shared caches to store it. See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.5.1"
1748
+ )
1749
+ elif not is_shared_and_authorized:
1750
+ logger.debug(
1751
+ "Cannot store the response because the cache is shared and the request contains "
1752
+ "an Authorization header field. See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.6.1"
1753
+ )
1754
+ elif not contains_required_component:
1755
+ logger.debug(
1756
+ "Cannot store the response because it does not contain any of the required components. "
1757
+ "See: https://www.rfc-editor.org/rfc/rfc9111.html#section-3-2.7.1"
1758
+ )
1759
+
1760
+ # Mark response as not stored
1761
+ response.metadata["hishel_stored"] = False # type: ignore
1762
+
1763
+ return CouldNotBeStored(response=response, pair_id=pair_id, options=self.options)
1764
+
1765
+ # --------------------------------------------------------------------
1766
+ # Transition to: StoreAndUse
1767
+ # --------------------------------------------------------------------
1768
+ # All storage requirements are met. The response will be cached.
1769
+
1770
+ logger.debug("Storing response in cache")
1771
+
1772
+ # Mark response as stored
1773
+ response.metadata["hishel_stored"] = True # type: ignore
1774
+
1775
+ # Remove headers that should not be stored
1776
+ # RFC 9111 Section 3.1: Storing Header and Trailer Fields
1777
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3.1
1778
+ # Certain headers (Connection, hop-by-hop headers, etc.) must be excluded
1779
+ cleaned_response = exclude_unstorable_headers(response, self.options.shared)
1780
+
1781
+ return StoreAndUse(
1782
+ pair_id=pair_id,
1783
+ response=cleaned_response,
1784
+ options=self.options,
1785
+ )
1786
+
1787
+
1788
+ @dataclass
1789
+ class NeedRevalidation(State):
1790
+ """
1791
+ Represents a state where cached responses require validation before use.
1792
+
1793
+ This state is reached when:
1794
+ 1. A stale cached response exists (from IdleClient)
1795
+ 2. The cached response cannot be served without validation
1796
+ 3. A conditional request has been sent to the origin server
1797
+
1798
+ The validation mechanism uses HTTP conditional requests with validators
1799
+ (ETag, Last-Modified) to check if the cached response is still valid.
1800
+
1801
+ State Transitions:
1802
+ -----------------
1803
+ - NeedToBeUpdated: 304 response received, cached responses can be freshened
1804
+ - InvalidatePairs + CacheMiss: 2xx/5xx response received, new response must be cached
1805
+ - CacheMiss: No matching responses found during freshening
1806
+
1807
+ RFC 9111 References:
1808
+ -------------------
1809
+ - Section 4.3: Validation
1810
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3
1811
+ - Section 4.3.3: Handling a Validation Response
1812
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
1813
+ - Section 4.3.4: Freshening Stored Responses
1814
+ https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.4
1815
+
1816
+ Attributes:
1817
+ ----------
1818
+ request : Request
1819
+ The conditional request that was sent to the server for revalidation.
1820
+ This request contains If-None-Match (from ETag) or If-Modified-Since
1821
+ (from Last-Modified) headers.
1822
+ original_request : Request
1823
+ The original client request (without conditional headers) that initiated
1824
+ this revalidation. This is used when creating new cache entries.
1825
+ revalidating_pairs : list[CompletePair]
1826
+ The cached request-response pairs that are being revalidated. These are
1827
+ stale responses that might still be usable if the server confirms they
1828
+ haven't changed (304 response).
1829
+ options : CacheOptions
1830
+ Configuration options for cache behavior (inherited from State)
1831
+ """
1832
+
1833
+ request: Request
1834
+ """
1835
+ The request that was sent to the server for revalidation.
1836
+ """
1837
+
1838
+ original_request: Request
1839
+
1840
+ revalidating_pairs: list[CompletePair]
1841
+ """
1842
+ The stored pairs that the request was sent for revalidation.
1843
+ """
1844
+
1845
+ def next(self, revalidation_response: Response) -> Union["NeedToBeUpdated", "InvalidatePairs", "CacheMiss"]:
1846
+ """
1847
+ Handles the response to a conditional request and determines the next state.
1848
+
1849
+ This method implements the validation response handling logic from RFC 9111
1850
+ Section 4.3.3. The behavior depends on the response status code:
1851
+
1852
+ - 304 Not Modified: Cached responses are still valid, freshen and reuse them
1853
+ - 2xx Success: Cached responses are outdated, use new response
1854
+ - 5xx Server Error: Server cannot validate, use new error response
1855
+ - Other: Unexpected status code (should not happen in normal operation)
1856
+
1857
+ Parameters:
1858
+ ----------
1859
+ revalidation_response : Response
1860
+ The HTTP response received from the origin server in response to
1861
+ the conditional request
1862
+
1863
+ Returns:
1864
+ -------
1865
+ Union[NeedToBeUpdated, InvalidatePairs, CacheMiss]
1866
+ - NeedToBeUpdated: When 304 response allows cached responses to be freshened
1867
+ - InvalidatePairs: When old responses must be invalidated (wraps next state)
1868
+ - CacheMiss: When no matching responses found or storing new response
1869
+
1870
+ RFC 9111 Compliance:
1871
+ -------------------
1872
+ From RFC 9111 Section 4.3.3:
1873
+ "Cache handling of a response to a conditional request depends upon its
1874
+ status code:
1875
+ - A 304 (Not Modified) response status code indicates that the stored
1876
+ response can be updated and reused; see Section 4.3.4.
1877
+ - A full response (i.e., one containing content) indicates that none of
1878
+ the stored responses nominated in the conditional request are suitable.
1879
+ Instead, the cache MUST use the full response to satisfy the request.
1880
+ The cache MAY store such a full response, subject to its constraints
1881
+ (see Section 3).
1882
+ - However, if a cache receives a 5xx (Server Error) response while
1883
+ attempting to validate a response, it can either forward this response
1884
+ to the requesting client or act as if the server failed to respond.
1885
+ In the latter case, the cache can send a previously stored response,
1886
+ subject to its constraints on doing so (see Section 4.2.4), or retry
1887
+ the validation request."
1888
+
1889
+ Implementation Notes:
1890
+ --------------------
1891
+ - All revalidating pairs except the last are invalidated when receiving 2xx/5xx
1892
+ - The last pair's ID is reused for storing the new response
1893
+ - 5xx responses are treated the same as 2xx (both invalidate and store new response)
1894
+ - A full implementation might serve stale responses on 5xx errors
1895
+
1896
+ Examples:
1897
+ --------
1898
+ >>> # 304 Not Modified - freshen cached response
1899
+ >>> need_revalidation = NeedRevalidation(
1900
+ ... request=conditional_request,
1901
+ ... original_request=original_request,
1902
+ ... revalidating_pairs=[cached_pair],
1903
+ ... options=default_options
1904
+ ... )
1905
+ >>> response_304 = Response(status_code=304, headers=Headers({"etag": '"abc123"'}))
1906
+ >>> next_state = need_revalidation.next(response_304)
1907
+ >>> isinstance(next_state, NeedToBeUpdated)
1908
+ True
1909
+
1910
+ >>> # 200 OK - use new response
1911
+ >>> response_200 = Response(status_code=200, headers=Headers({"cache-control": "max-age=3600"}))
1912
+ >>> next_state = need_revalidation.next(response_200)
1913
+ >>> isinstance(next_state, InvalidatePairs)
1914
+ True
1915
+ """
1916
+
1917
+ # ============================================================================
1918
+ # STEP 1: Handle 304 Not Modified Response
1919
+ # ============================================================================
1920
+ # RFC 9111 Section 4.3.3, paragraph 1:
1921
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
1922
+ #
1923
+ # "A 304 (Not Modified) response status code indicates that the stored
1924
+ # response can be updated and reused; see Section 4.3.4."
1925
+ #
1926
+ # 304 means the cached response is still valid. The server is saying:
1927
+ # "The resource hasn't changed since the version you have cached."
1928
+ # We can freshen the cached response with new metadata (Date, Cache-Control)
1929
+ # from the 304 response and continue using the cached content.
1930
+ if revalidation_response.status_code == 304:
1931
+ return self.freshening_stored_responses(revalidation_response)
1932
+
1933
+ # ============================================================================
1934
+ # STEP 2: Handle 2xx Success Response (Full Response)
1935
+ # ============================================================================
1936
+ # RFC 9111 Section 4.3.3, paragraph 2:
1937
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
1938
+ #
1939
+ # "A full response (i.e., one containing content) indicates that none of
1940
+ # the stored responses nominated in the conditional request are suitable.
1941
+ # Instead, the cache MUST use the full response to satisfy the request.
1942
+ # The cache MAY store such a full response, subject to its constraints
1943
+ # (see Section 3)."
1944
+ #
1945
+ # 2xx responses mean the resource has changed. The server is sending a
1946
+ # complete new response that should replace the cached version.
1947
+ # We must:
1948
+ # 1. Invalidate old cached responses (they're outdated)
1949
+ # 2. Store the new response (if cacheable)
1950
+ # 3. Use the new response to satisfy the request
1951
+ elif revalidation_response.status_code // 100 == 2:
1952
+ # Invalidate all old pairs except the last one
1953
+ # The last pair's ID will be reused for the new response
1954
+ return InvalidatePairs(
1955
+ options=self.options,
1956
+ pair_ids=[pair.id for pair in self.revalidating_pairs[:-1]],
1957
+ # After invalidation, attempt to cache the new response
1958
+ next_state=CacheMiss(
1959
+ request=self.original_request,
1960
+ options=self.options,
1961
+ after_revalidation=True, # Mark that this occurred during revalidation
1962
+ ).next(revalidation_response, pair_id=self.revalidating_pairs[-1].id),
1963
+ )
1964
+
1965
+ # ============================================================================
1966
+ # STEP 3: Handle 5xx Server Error Response
1967
+ # ============================================================================
1968
+ # RFC 9111 Section 4.3.3, paragraph 3:
1969
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.3
1970
+ #
1971
+ # "However, if a cache receives a 5xx (Server Error) response while
1972
+ # attempting to validate a response, it can either forward this response
1973
+ # to the requesting client or act as if the server failed to respond.
1974
+ # In the latter case, the cache can send a previously stored response,
1975
+ # subject to its constraints on doing so (see Section 4.2.4), or retry
1976
+ # the validation request."
1977
+ #
1978
+ # 5xx errors during revalidation are tricky. The server is having problems,
1979
+ # but we don't know if the cached content is still valid.
1980
+ #
1981
+ # Options per RFC 9111:
1982
+ # A) Forward the error to the client (implemented here)
1983
+ # B) Serve the stale cached response (allowed_stale)
1984
+ # C) Retry the validation request
1985
+ #
1986
+ # This implementation chooses option A: forward the error and store it.
1987
+ # A full implementation might check allowed_stale and serve cached content.
1988
+ elif revalidation_response.status_code // 100 == 5:
1989
+ # Same as 2xx: invalidate old responses and store the error response
1990
+ # This ensures clients see the error rather than potentially stale data
1991
+ return InvalidatePairs(
1992
+ options=self.options,
1993
+ pair_ids=[pair.id for pair in self.revalidating_pairs[:-1]],
1994
+ next_state=CacheMiss(
1995
+ request=self.original_request,
1996
+ options=self.options,
1997
+ after_revalidation=True,
1998
+ ).next(revalidation_response, pair_id=self.revalidating_pairs[-1].id),
1999
+ )
2000
+
2001
+ # ============================================================================
2002
+ # STEP 4: Handle Unexpected Status Codes
2003
+ # ============================================================================
2004
+ # This should not happen in normal operation. Valid revalidation responses are:
2005
+ # - 304 Not Modified
2006
+ # - 2xx Success (typically 200 OK)
2007
+ # - 5xx Server Error
2008
+ #
2009
+ # Other status codes (1xx, 3xx, 4xx) are unexpected during revalidation.
2010
+ # 3xx redirects should have been followed by the HTTP client.
2011
+ # 4xx errors (except 404) are unusual during revalidation.
2012
+ raise RuntimeError(
2013
+ f"Unexpected response status code during revalidation: {revalidation_response.status_code}"
2014
+ ) # pragma: nocover
2015
+
2016
+ def freshening_stored_responses(
2017
+ self, revalidation_response: Response
2018
+ ) -> "NeedToBeUpdated" | "InvalidatePairs" | "CacheMiss":
2019
+ """
2020
+ Freshens cached responses after receiving a 304 Not Modified response.
2021
+
2022
+ When the server responds with 304, it means "the resource hasn't changed,
2023
+ but here's updated metadata." This method:
2024
+ 1. Identifies which cached responses match the 304 response
2025
+ 2. Updates their headers with fresh metadata from the 304
2026
+ 3. Invalidates any cached responses that don't match
2027
+
2028
+ Matching is done using validators in this priority order:
2029
+ 1. Strong ETag (if present and not weak)
2030
+ 2. Last-Modified (if present)
2031
+ 3. Single response assumption (if only one cached response exists)
2032
+
2033
+ Parameters:
2034
+ ----------
2035
+ revalidation_response : Response
2036
+ The 304 Not Modified response from the server, containing updated
2037
+ metadata (Date, Cache-Control, ETag, etc.)
2038
+
2039
+ Returns:
2040
+ -------
2041
+ Union[NeedToBeUpdated, InvalidatePairs, CacheMiss]
2042
+ - NeedToBeUpdated: When matching responses are found and updated
2043
+ - InvalidatePairs: Wraps NeedToBeUpdated if non-matching responses exist
2044
+ - CacheMiss: When no matching responses are found
2045
+
2046
+ RFC 9111 Compliance:
2047
+ -------------------
2048
+ From RFC 9111 Section 4.3.4:
2049
+ "When a cache receives a 304 (Not Modified) response, it needs to identify
2050
+ stored responses that are suitable for updating with the new information
2051
+ provided, and then do so.
2052
+
2053
+ The initial set of stored responses to update are those that could have
2054
+ been chosen for that request...
2055
+
2056
+ Then, that initial set of stored responses is further filtered by the
2057
+ first match of:
2058
+ - If the 304 response contains a strong entity tag: the stored responses
2059
+ with the same strong entity tag.
2060
+ - If the 304 response contains a Last-Modified value: the stored responses
2061
+ with the same Last-Modified value.
2062
+ - If there is only a single stored response: that response."
2063
+
2064
+ Implementation Notes:
2065
+ --------------------
2066
+ - Weak ETags (starting with "W/") are not used for matching
2067
+ - Only strong ETags provide reliable validation
2068
+ - If no validators match, all responses are invalidated
2069
+ - Multiple responses can be freshened if they share the same validator
2070
+
2071
+ Examples:
2072
+ --------
2073
+ >>> # Matching by strong ETag
2074
+ >>> cached_response = Response(headers=Headers({"etag": '"abc123"'}))
2075
+ >>> revalidation_response = Response(
2076
+ ... status_code=304,
2077
+ ... headers=Headers({"etag": '"abc123"', "cache-control": "max-age=3600"})
2078
+ ... )
2079
+ >>> # Cached response will be freshened with new Cache-Control
2080
+
2081
+ >>> # Non-matching ETag
2082
+ >>> cached_response = Response(headers=Headers({"etag": '"old123"'}))
2083
+ >>> revalidation_response = Response(
2084
+ ... status_code=304,
2085
+ ... headers=Headers({"etag": '"new456"'})
2086
+ ... )
2087
+ >>> # Cached response will be invalidated (doesn't match)
2088
+ """
2089
+
2090
+ # ============================================================================
2091
+ # STEP 1: Identify Matching Responses Using Validators
2092
+ # ============================================================================
2093
+ # RFC 9111 Section 4.3.4: Freshening Stored Responses
2094
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-4.3.4
2095
+ #
2096
+ # The 304 response tells us "the resource is unchanged", but we need to
2097
+ # figure out WHICH of our cached responses match this confirmation.
2098
+ #
2099
+ # We use validators in priority order:
2100
+ # Priority 1: Strong ETag (most reliable)
2101
+ # Priority 2: Last-Modified timestamp
2102
+ # Priority 3: Single response assumption
2103
+
2104
+ identified_for_revalidation: list[CompletePair]
2105
+
2106
+ # MATCHING STRATEGY 1: Strong ETag
2107
+ # RFC 9110 Section 8.8.3: ETag
2108
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.8.3
2109
+ #
2110
+ # "If the 304 response contains a strong entity tag: the stored responses
2111
+ # with the same strong entity tag."
2112
+ #
2113
+ # ETags come in two flavors:
2114
+ # - Strong: "abc123" (exact byte-for-byte match)
2115
+ # - Weak: W/"abc123" (semantically equivalent, but not byte-identical)
2116
+ #
2117
+ # Only strong ETags are reliable for caching decisions. Weak ETags
2118
+ # indicate semantic equivalence but the content might differ slightly
2119
+ # (e.g., gzip compression, whitespace changes).
2120
+ if "etag" in revalidation_response.headers and (not revalidation_response.headers["etag"].startswith("W/")):
2121
+ # Found a strong ETag in the 304 response
2122
+ # Partition cached responses: matching vs non-matching ETags
2123
+ identified_for_revalidation, need_to_be_invalidated = partition(
2124
+ self.revalidating_pairs,
2125
+ lambda pair: pair.response.headers.get("etag") == revalidation_response.headers.get("etag"), # type: ignore[no-untyped-call]
2126
+ )
2127
+
2128
+ # MATCHING STRATEGY 2: Last-Modified
2129
+ # RFC 9110 Section 8.8.2: Last-Modified
2130
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.8.2
2131
+ #
2132
+ # "If the 304 response contains a Last-Modified value: the stored responses
2133
+ # with the same Last-Modified value."
2134
+ #
2135
+ # Last-Modified is a timestamp indicating when the resource was last changed.
2136
+ # It's less precise than ETags (1-second granularity) but widely supported.
2137
+ # If the 304 has a Last-Modified, we can match it against cached responses.
2138
+ elif revalidation_response.headers.get("last-modified"):
2139
+ # Found Last-Modified in the 304 response
2140
+ # Partition cached responses: matching vs non-matching timestamps
2141
+ identified_for_revalidation, need_to_be_invalidated = partition(
2142
+ self.revalidating_pairs,
2143
+ lambda pair: pair.response.headers.get("last-modified")
2144
+ == revalidation_response.headers.get("last-modified"), # type: ignore[no-untyped-call]
2145
+ )
2146
+
2147
+ # MATCHING STRATEGY 3: Single Response Assumption
2148
+ # RFC 9111 Section 4.3.4:
2149
+ #
2150
+ # "If there is only a single stored response: that response."
2151
+ #
2152
+ # If we only have one cached response and the server says "not modified",
2153
+ # we can safely assume that single response is the one being confirmed.
2154
+ # This handles cases where the server doesn't return validators in the 304.
2155
+ else:
2156
+ if len(self.revalidating_pairs) == 1:
2157
+ # Only one cached response - it must be the matching one
2158
+ identified_for_revalidation, need_to_be_invalidated = [self.revalidating_pairs[0]], []
2159
+ else:
2160
+ # Multiple cached responses but no validators to match them
2161
+ # We cannot determine which (if any) are valid
2162
+ # Conservative approach: invalidate all of them
2163
+ identified_for_revalidation, need_to_be_invalidated = [], self.revalidating_pairs
2164
+
2165
+ # ============================================================================
2166
+ # STEP 2: Update Matching Responses or Create Cache Miss
2167
+ # ============================================================================
2168
+ # If we found matching responses, freshen them with new metadata.
2169
+ # If we found no matches, treat it as a cache miss.
2170
+
2171
+ next_state: "NeedToBeUpdated" | "CacheMiss"
2172
+
2173
+ if identified_for_revalidation:
2174
+ # We found responses that match the 304 confirmation
2175
+ # Update their headers with fresh metadata from the 304 response
2176
+ #
2177
+ # RFC 9111 Section 3.2: Updating Stored Header Fields
2178
+ # https://www.rfc-editor.org/rfc/rfc9111.html#section-3.2
2179
+ #
2180
+ # "When doing so, the cache MUST add each header field in the provided
2181
+ # response to the stored response, replacing field values that are
2182
+ # already present"
2183
+ #
2184
+ # The refresh_response_headers function handles this header merging
2185
+ # while excluding certain headers that shouldn't be updated
2186
+ # (Content-Encoding, Content-Type, Content-Range).
2187
+ next_state = NeedToBeUpdated(
2188
+ updating_pairs=[
2189
+ replace(
2190
+ pair,
2191
+ response=refresh_response_headers(pair.response, revalidation_response),
2192
+ )
2193
+ for pair in identified_for_revalidation
2194
+ ],
2195
+ original_request=self.original_request,
2196
+ options=self.options,
2197
+ )
2198
+ else:
2199
+ # No matching responses found
2200
+ # This is unusual - the server said "not modified" but we can't figure
2201
+ # out which cached response it's referring to.
2202
+ # Treat this as a cache miss and let the normal flow handle it.
2203
+ next_state = CacheMiss(
2204
+ options=self.options,
2205
+ request=self.original_request,
2206
+ after_revalidation=True,
2207
+ )
2208
+
2209
+ # ============================================================================
2210
+ # STEP 3: Invalidate Non-Matching Responses (if any)
2211
+ # ============================================================================
2212
+ # If we had multiple cached responses and only some matched, we need to
2213
+ # invalidate the non-matching ones. They're outdated or incorrect.
2214
+ #
2215
+ # For example:
2216
+ # - Cached: Two responses with different ETags
2217
+ # - 304 response: Matches only one ETag
2218
+ # - Action: Update the matching one, invalidate the other
2219
+
2220
+ if need_to_be_invalidated:
2221
+ # Wrap the next state in an invalidation operation
2222
+ return InvalidatePairs(
2223
+ options=self.options,
2224
+ pair_ids=[pair.id for pair in need_to_be_invalidated],
2225
+ next_state=next_state,
2226
+ )
2227
+
2228
+ # No invalidations needed, return the next state directly
2229
+ return next_state
2230
+
2231
+
2232
+ @dataclass
2233
+ class StoreAndUse(State):
2234
+ """
2235
+ The state that indicates that the response can be stored in the cache and used.
2236
+ """
2237
+
2238
+ pair_id: uuid.UUID
2239
+
2240
+ response: Response
2241
+
2242
+ def next(self) -> None:
2243
+ return None # pragma: nocover
2244
+
2245
+
2246
+ @dataclass
2247
+ class CouldNotBeStored(State):
2248
+ """
2249
+ The state that indicates that the response could not be stored in the cache.
2250
+ """
2251
+
2252
+ response: Response
2253
+
2254
+ pair_id: uuid.UUID
2255
+
2256
+ def next(self) -> None:
2257
+ return None # pragma: nocover
2258
+
2259
+
2260
+ @dataclass
2261
+ class InvalidatePairs(State):
2262
+ """
2263
+ The state that represents the deletion of cache pairs.
2264
+ """
2265
+
2266
+ pair_ids: list[uuid.UUID]
2267
+
2268
+ next_state: AnyState
2269
+
2270
+ def next(self) -> AnyState:
2271
+ return self.next_state
2272
+
2273
+
2274
+ @dataclass
2275
+ class FromCache(State):
2276
+ pair: CompletePair
2277
+ """
2278
+ List of pairs that can be used to satisfy the request.
2279
+ """
2280
+
2281
+ def next(self) -> None:
2282
+ return None # pragma: nocover
2283
+
2284
+
2285
+ @dataclass
2286
+ class NeedToBeUpdated(State):
2287
+ updating_pairs: list[CompletePair]
2288
+ original_request: Request
2289
+
2290
+ def next(self) -> FromCache:
2291
+ return FromCache(pair=self.updating_pairs[-1], options=self.options) # pragma: nocover