hishel 0.1.4__py3-none-any.whl → 1.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. hishel/__init__.py +55 -53
  2. hishel/{beta/_async_cache.py → _async_cache.py} +3 -3
  3. hishel/{beta → _core}/__init__.py +6 -6
  4. hishel/{beta/_core → _core}/_async/_storages/_sqlite.py +3 -3
  5. hishel/{beta/_core → _core}/_base/_storages/_base.py +13 -1
  6. hishel/{beta/_core → _core}/_base/_storages/_packing.py +5 -5
  7. hishel/_core/_headers.py +636 -0
  8. hishel/{beta/_core → _core}/_spec.py +89 -2
  9. hishel/{beta/_core → _core}/_sync/_storages/_sqlite.py +3 -3
  10. hishel/{beta/_core → _core}/models.py +1 -1
  11. hishel/{beta/_sync_cache.py → _sync_cache.py} +3 -3
  12. hishel/{beta/httpx.py → httpx.py} +18 -7
  13. hishel/{beta/requests.py → requests.py} +15 -10
  14. hishel-1.0.0.dev0.dist-info/METADATA +321 -0
  15. hishel-1.0.0.dev0.dist-info/RECORD +19 -0
  16. hishel/_async/__init__.py +0 -5
  17. hishel/_async/_client.py +0 -30
  18. hishel/_async/_mock.py +0 -43
  19. hishel/_async/_pool.py +0 -201
  20. hishel/_async/_storages.py +0 -768
  21. hishel/_async/_transports.py +0 -282
  22. hishel/_controller.py +0 -581
  23. hishel/_exceptions.py +0 -10
  24. hishel/_files.py +0 -54
  25. hishel/_headers.py +0 -215
  26. hishel/_lfu_cache.py +0 -71
  27. hishel/_lmdb_types_.pyi +0 -53
  28. hishel/_s3.py +0 -122
  29. hishel/_serializers.py +0 -329
  30. hishel/_sync/__init__.py +0 -5
  31. hishel/_sync/_client.py +0 -30
  32. hishel/_sync/_mock.py +0 -43
  33. hishel/_sync/_pool.py +0 -201
  34. hishel/_sync/_storages.py +0 -768
  35. hishel/_sync/_transports.py +0 -282
  36. hishel/_synchronization.py +0 -37
  37. hishel/beta/_core/__init__.py +0 -0
  38. hishel/beta/_core/_headers.py +0 -301
  39. hishel-0.1.4.dist-info/METADATA +0 -404
  40. hishel-0.1.4.dist-info/RECORD +0 -41
  41. {hishel-0.1.4.dist-info → hishel-1.0.0.dev0.dist-info}/WHEEL +0 -0
  42. {hishel-0.1.4.dist-info → hishel-1.0.0.dev0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,636 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Iterator, List, Literal, Mapping, MutableMapping, Optional, Union, cast
5
+
6
+ """
7
+ HTTP token and quoted-string parsing utilities.
8
+
9
+ These functions implement RFC 7230 parsing rules for HTTP/1.1 tokens
10
+ and quoted strings.
11
+ """
12
+
13
+
14
+ def is_char(c: str) -> bool:
15
+ """
16
+ Check if character is a valid ASCII character (0-127).
17
+
18
+ Per RFC 7230: CHAR = any US-ASCII character (octets 0 - 127)
19
+
20
+ Args:
21
+ c: Single character string
22
+
23
+ Returns:
24
+ True if character is valid ASCII (0-127), False otherwise
25
+ """
26
+ if not c:
27
+ return False
28
+ return ord(c) <= 127
29
+
30
+
31
+ def is_ctl(c: str) -> bool:
32
+ """
33
+ Check if character is a control character.
34
+
35
+ Per RFC 7230: CTL = control characters (0-31 and 127)
36
+
37
+ Args:
38
+ c: Single character string
39
+
40
+ Returns:
41
+ True if character is a control character, False otherwise
42
+ """
43
+ if not c:
44
+ return False
45
+ b = ord(c)
46
+ return b <= 31 or b == 127
47
+
48
+
49
+ def is_separator(c: str) -> bool:
50
+ """
51
+ Check if character is an HTTP separator.
52
+
53
+ Per RFC 2616 Section 2.2:
54
+ separators = "(" | ")" | "<" | ">" | "@"
55
+ | "," | ";" | ":" | "\" | <">
56
+ | "/" | "[" | "]" | "?" | "="
57
+ | "{" | "}" | SP | HT
58
+
59
+ Args:
60
+ c: Single character string
61
+
62
+ Returns:
63
+ True if character is a separator, False otherwise
64
+ """
65
+ if not c:
66
+ return False
67
+ return c in '()<>@,;:\\"/[]?={} \t'
68
+
69
+
70
+ def is_token(c: str) -> bool:
71
+ """
72
+ Check if character is valid in an HTTP token.
73
+
74
+ Per RFC 7230 Section 3.2.6:
75
+ token = 1*tchar
76
+ tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
77
+ / "+" / "-" / "." / "0"-"9" / "A"-"Z"
78
+ / "^" / "_" / "`" / "a"-"z" / "|" / "~"
79
+
80
+ Implementation: token chars are CHAR but not CTL or separators
81
+
82
+ Args:
83
+ c: Single character string
84
+
85
+ Returns:
86
+ True if character is valid in a token, False otherwise
87
+
88
+ Examples:
89
+ >>> is_token('a')
90
+ True
91
+ >>> is_token('Z')
92
+ True
93
+ >>> is_token('5')
94
+ True
95
+ >>> is_token('-')
96
+ True
97
+ >>> is_token('!')
98
+ True
99
+ >>> is_token(' ')
100
+ False
101
+ >>> is_token(',')
102
+ False
103
+ >>> is_token('=')
104
+ False
105
+ """
106
+ return is_char(c) and not is_ctl(c) and not is_separator(c)
107
+
108
+
109
+ def is_qd_text(c: str) -> bool:
110
+ r"""
111
+ Check if character is valid in quoted-text.
112
+
113
+ Per RFC 7230 Section 3.2.6:
114
+ quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
115
+ qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text
116
+ obs-text = %x80-FF
117
+
118
+ In other words:
119
+ - HTAB (0x09)
120
+ - SP (0x20)
121
+ - 0x21 (!)
122
+ - 0x23-0x5B (# to [, excluding " which is 0x22)
123
+ - 0x5D-0x7E (] to ~, excluding \ which is 0x5C)
124
+ - 0x80-0xFF (obs-text, extended ASCII)
125
+
126
+ Args:
127
+ c: Single character string
128
+
129
+ Returns:
130
+ True if character is valid quoted-text, False otherwise
131
+ """
132
+ if not c:
133
+ return False
134
+
135
+ b = ord(c)
136
+ return (
137
+ b == 0x09 # HTAB
138
+ or b == 0x20 # SP
139
+ or b == 0x21 # !
140
+ or (0x23 <= b <= 0x5B) # # to [ (skips " which is 0x22)
141
+ or (0x5D <= b <= 0x7E) # ] to ~ (skips \ which is 0x5C)
142
+ or b >= 0x80
143
+ ) # obs-text
144
+
145
+
146
+ def http_unquote_pair(c: str) -> str:
147
+ """
148
+ Unquote a single escaped character from a quoted-pair.
149
+
150
+ Per RFC 7230 Section 3.2.6:
151
+ quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
152
+ VCHAR = visible characters (0x21-0x7E)
153
+
154
+ Valid escaped characters:
155
+ - HTAB (0x09)
156
+ - SP (0x20)
157
+ - VCHAR (0x21-0x7E)
158
+ - obs-text (0x80-0xFF)
159
+
160
+ Invalid characters are replaced with '?'
161
+
162
+ Args:
163
+ c: Single character string (the character after the backslash)
164
+
165
+ Returns:
166
+ The unquoted character, or '?' if invalid
167
+
168
+ Examples:
169
+ >>> http_unquote_pair('"')
170
+ '"'
171
+ >>> http_unquote_pair('n')
172
+ 'n'
173
+ >>> http_unquote_pair('\\')
174
+ '\\'
175
+ """
176
+ if not c:
177
+ return "?"
178
+
179
+ b = ord(c)
180
+ # Valid characters that can be escaped
181
+ if b == 0x09 or b == 0x20 or (0x21 <= b <= 0x7E) or b >= 0x80:
182
+ return c
183
+ return "?"
184
+
185
+
186
+ def http_unquote(raw: str) -> tuple[int, str]:
187
+ """
188
+ Unquote an HTTP quoted-string.
189
+
190
+ Per RFC 7230 Section 3.2.6:
191
+ quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
192
+ quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
193
+
194
+ The raw string must begin with a double quote ("). Only the first
195
+ quoted string is parsed. The function returns the number of characters
196
+ consumed and the unquoted result.
197
+
198
+ Args:
199
+ raw: String that must start with a double quote
200
+
201
+ Returns:
202
+ Tuple of (eaten, result) where:
203
+ - eaten: number of characters consumed, or -1 on failure
204
+ - result: the unquoted string, or empty string on failure
205
+
206
+ Examples:
207
+ >>> http_unquote('"hello"')
208
+ (7, 'hello')
209
+ >>> http_unquote('"hello world"')
210
+ (13, 'hello world')
211
+ >>> http_unquote('"hello\\"world"')
212
+ (14, 'hello"world')
213
+ >>> http_unquote('"test')
214
+ (-1, '')
215
+ >>> http_unquote('not quoted')
216
+ (-1, '')
217
+ """
218
+ if not raw or raw[0] != '"':
219
+ return -1, ""
220
+
221
+ buf: list[str] = []
222
+ i = 1 # Start after opening quote
223
+
224
+ while i < len(raw):
225
+ b = raw[i]
226
+
227
+ if b == '"':
228
+ # Found closing quote - success
229
+ return i + 1, "".join(buf)
230
+
231
+ elif b == "\\":
232
+ # Escaped character (quoted-pair)
233
+ if i + 1 >= len(raw):
234
+ # Backslash at end of string - invalid
235
+ return -1, ""
236
+
237
+ # Unquote the next character
238
+ buf.append(http_unquote_pair(raw[i + 1]))
239
+ i += 2 # Skip both backslash and escaped char
240
+
241
+ else:
242
+ # Regular character
243
+ if is_qd_text(b):
244
+ buf.append(b)
245
+ else:
246
+ # Invalid character in quoted text
247
+ buf.append("?")
248
+ i += 1
249
+
250
+ # Reached end without finding closing quote - invalid
251
+ return -1, ""
252
+
253
+
254
+ class Headers(MutableMapping[str, str]):
255
+ def __init__(self, headers: Mapping[str, Union[str, List[str]]]) -> None:
256
+ self._headers = {k.lower(): ([v] if isinstance(v, str) else v[:]) for k, v in headers.items()}
257
+
258
+ def get_list(self, key: str) -> Optional[List[str]]:
259
+ return self._headers.get(key.lower(), None)
260
+
261
+ def __getitem__(self, key: str) -> str:
262
+ return ", ".join(self._headers[key.lower()])
263
+
264
+ def __setitem__(self, key: str, value: str) -> None:
265
+ self._headers.setdefault(key.lower(), []).append(value)
266
+
267
+ def __delitem__(self, key: str) -> None:
268
+ del self._headers[key.lower()]
269
+
270
+ def __iter__(self) -> Iterator[str]:
271
+ return iter(self._headers)
272
+
273
+ def __len__(self) -> int:
274
+ return len(self._headers)
275
+
276
+ def __repr__(self) -> str:
277
+ return repr(self._headers)
278
+
279
+ def __str__(self) -> str:
280
+ return str(self._headers)
281
+
282
+ def __eq__(self, other_headers: Any) -> bool:
283
+ return isinstance(other_headers, Headers) and self._headers == other_headers._headers # type: ignore
284
+
285
+
286
+ class Vary:
287
+ def __init__(self, values: List[str]) -> None:
288
+ self.values = values
289
+
290
+ @classmethod
291
+ def from_value(cls, vary_value: str) -> "Vary":
292
+ values = []
293
+
294
+ for field_name in vary_value.split(","):
295
+ field_name = field_name.strip()
296
+ values.append(field_name)
297
+ return Vary(values)
298
+
299
+
300
+ @dataclass
301
+ class Range:
302
+ unit: Literal["bytes"]
303
+ range: tuple[int | None, int | None]
304
+
305
+ @classmethod
306
+ def try_from_str(cls, range_header: str) -> "Range" | None:
307
+ # Example: "bytes=0-99,200-299,-500,100-"
308
+ unit, values = range_header.split("=")
309
+ unit = unit.strip()
310
+ parts = [p.strip() for p in values.split(",")]
311
+
312
+ parsed: list[tuple[int | None, int | None]] = []
313
+ for part in parts:
314
+ if "-" not in part:
315
+ raise ValueError(f"Invalid range part: {part}")
316
+ start_str, end_str = part.split("-", 1)
317
+ start = int(start_str) if start_str else None
318
+ end = int(end_str) if end_str else None
319
+ parsed.append((start, end))
320
+
321
+ if len(parsed) != 1:
322
+ # we don't support multiple ranges
323
+ return None
324
+
325
+ return cls(
326
+ unit=cast(Literal["bytes"], unit),
327
+ range=parsed[0],
328
+ )
329
+
330
+
331
+ class CacheControl:
332
+ """
333
+ Unified Cache-Control directives for both requests and responses.
334
+
335
+ Supports all standard directives from RFC9111 and experimental directives.
336
+ Uses None for unset values instead of -1.
337
+
338
+ Supported Directives:
339
+ - immutable [RFC8246]
340
+ - max-age [RFC9111, Section 5.2.1.1, 5.2.2.1]
341
+ - max-stale [RFC9111, Section 5.2.1.2]
342
+ - min-fresh [RFC9111, Section 5.2.1.3]
343
+ - must-revalidate [RFC9111, Section 5.2.2.2]
344
+ - must-understand [RFC9111, Section 5.2.2.3]
345
+ - no-cache [RFC9111, Section 5.2.1.4, 5.2.2.4]
346
+ - no-store [RFC9111, Section 5.2.1.5, 5.2.2.5]
347
+ - no-transform [RFC9111, Section 5.2.1.6, 5.2.2.6]
348
+ - only-if-cached [RFC9111, Section 5.2.1.7]
349
+ - private [RFC9111, Section 5.2.2.7]
350
+ - proxy-revalidate [RFC9111, Section 5.2.2.8]
351
+ - public [RFC9111, Section 5.2.2.9]
352
+ - s-maxage [RFC9111, Section 5.2.2.10]
353
+ - stale-if-error [RFC5861, Section 4]
354
+ - stale-while-revalidate [RFC5861, Section 3]
355
+
356
+ no_cache and private can be:
357
+ - None: directive not present
358
+ - True: directive present without field names
359
+ - List[str]: directive present with specific field names
360
+ """
361
+
362
+ def __init__(self) -> None:
363
+ # Common directives
364
+ self.max_age: Optional[int] = None
365
+ self.no_store: bool = False
366
+ self.no_transform: bool = False
367
+
368
+ # Request-specific
369
+ self.max_stale: Optional[int] = None
370
+ self.min_fresh: Optional[int] = None
371
+ self.only_if_cached: bool = False
372
+
373
+ # Response-specific
374
+ self.must_revalidate: bool = False
375
+ self.must_understand: bool = False
376
+ self.public: bool = False
377
+ self.proxy_revalidate: bool = False
378
+ self.s_maxage: Optional[int] = None
379
+ self.immutable: bool = False
380
+
381
+ # Can be boolean or contain field names
382
+ self.no_cache: Union[bool, List[str]] = False
383
+ self.private: Union[bool, List[str]] = False
384
+
385
+ # Experimental
386
+ self.stale_if_error: Optional[int] = None
387
+ self.stale_while_revalidate: Optional[int] = None
388
+
389
+ # Extensions (unrecognized directives)
390
+ self.extensions: List[str] = []
391
+
392
+
393
+ def parse_int_value(value: str) -> Optional[int]:
394
+ """Parse integer value, return None if invalid."""
395
+ try:
396
+ val = int(value)
397
+ # Cap at max int32 for compatibility
398
+ return min(val, 2147483647) if val >= 0 else None
399
+ except (ValueError, OverflowError):
400
+ return None
401
+
402
+
403
+ def parse_field_names(value: str) -> List[str]:
404
+ """Parse comma-separated field names and canonicalize them."""
405
+ fields = []
406
+ for field in value.split(","):
407
+ field = field.strip()
408
+ if field:
409
+ # Convert to canonical header form (Title-Case)
410
+ canonical = "-".join(word.capitalize() for word in field.split("-"))
411
+ fields.append(canonical)
412
+ return fields
413
+
414
+
415
+ def has_field_names(token: str) -> bool:
416
+ """Check if token can have comma-separated field names."""
417
+ return token in ("no-cache", "private")
418
+
419
+
420
+ def parse(value: str) -> CacheControl:
421
+ """
422
+ Parse a Cache-Control header value character by character.
423
+
424
+ This parser handles quoted values and field names correctly,
425
+ allowing commas within field name lists.
426
+
427
+ Args:
428
+ value: The Cache-Control header value string
429
+
430
+ Returns:
431
+ CacheControl object with parsed directives
432
+ """
433
+ cc = CacheControl()
434
+
435
+ if not value:
436
+ return cc
437
+
438
+ i = 0
439
+ length = len(value)
440
+
441
+ while i < length:
442
+ # Skip leading whitespace and commas
443
+ while i < length and (value[i] in (" ", "\t", ",")):
444
+ i += 1
445
+
446
+ if i >= length:
447
+ break
448
+
449
+ # Find end of token
450
+ j = i
451
+ while j < length and is_token(value[j]):
452
+ j += 1
453
+
454
+ if j == i:
455
+ # No valid token found, skip this character
456
+ i += 1
457
+ continue
458
+
459
+ token = value[i:j].lower()
460
+ token_has_fields = has_field_names(token)
461
+
462
+ # Skip whitespace after token
463
+ while j < length and value[j] in (" ", "\t"):
464
+ j += 1
465
+
466
+ # Check if token has a value (token=value)
467
+ if j < length and value[j] == "=":
468
+ k = j + 1
469
+
470
+ # Skip whitespace after equals sign
471
+ while k < length and value[k] in (" ", "\t"):
472
+ k += 1
473
+
474
+ if k >= length:
475
+ # Directive ends with '=' but no value
476
+ i = k
477
+ continue
478
+
479
+ # Check for quoted value
480
+ if value[k] == '"':
481
+ eaten, result = http_unquote(value[k:])
482
+ if eaten == -1:
483
+ # Quote mismatch, skip to next directive
484
+ i = k + 1
485
+ continue
486
+
487
+ i = k + eaten
488
+ handle_directive_with_value(cc, token, result)
489
+ else:
490
+ # Unquoted value
491
+ z = k
492
+ while z < length:
493
+ if token_has_fields:
494
+ # For directives with field names, stop only at whitespace
495
+ if value[z] in (" ", "\t"):
496
+ break
497
+ else:
498
+ # For other directives, stop at whitespace or comma
499
+ if value[z] in (" ", "\t", ","):
500
+ break
501
+ z += 1
502
+
503
+ result = value[k:z]
504
+
505
+ # Remove trailing comma if present
506
+ if result and result[-1] == ",":
507
+ result = result[:-1]
508
+
509
+ i = z
510
+ handle_directive_with_value(cc, token, result)
511
+ else:
512
+ # Token without value
513
+ handle_directive_without_value(cc, token)
514
+ i = j
515
+
516
+ return cc
517
+
518
+
519
+ def handle_directive_with_value(cc: CacheControl, token: str, value: str) -> None:
520
+ """Handle a directive that has a value."""
521
+ if token == "max-age":
522
+ cc.max_age = parse_int_value(value)
523
+
524
+ elif token == "s-maxage":
525
+ cc.s_maxage = parse_int_value(value)
526
+
527
+ elif token == "max-stale":
528
+ cc.max_stale = parse_int_value(value)
529
+
530
+ elif token == "min-fresh":
531
+ cc.min_fresh = parse_int_value(value)
532
+
533
+ elif token == "stale-if-error":
534
+ cc.stale_if_error = parse_int_value(value)
535
+
536
+ elif token == "stale-while-revalidate":
537
+ cc.stale_while_revalidate = parse_int_value(value)
538
+
539
+ elif token == "no-cache":
540
+ # no-cache with field names
541
+ cc.no_cache = parse_field_names(value)
542
+
543
+ elif token == "private":
544
+ # private with field names
545
+ cc.private = parse_field_names(value)
546
+
547
+ else:
548
+ # Unrecognized directive with value
549
+ cc.extensions.append(f"{token}={value}")
550
+
551
+
552
+ def handle_directive_without_value(cc: CacheControl, token: str) -> None:
553
+ """Handle a directive that doesn't have a value."""
554
+ if token == "max-stale":
555
+ # max-stale without value means accept any stale response
556
+ cc.max_stale = 2147483647 # max int32
557
+
558
+ elif token == "no-cache":
559
+ cc.no_cache = True
560
+
561
+ elif token == "private":
562
+ cc.private = True
563
+
564
+ elif token == "no-store":
565
+ cc.no_store = True
566
+
567
+ elif token == "no-transform":
568
+ cc.no_transform = True
569
+
570
+ elif token == "only-if-cached":
571
+ cc.only_if_cached = True
572
+
573
+ elif token == "must-revalidate":
574
+ cc.must_revalidate = True
575
+
576
+ elif token == "must-understand":
577
+ cc.must_understand = True
578
+
579
+ elif token == "public":
580
+ cc.public = True
581
+
582
+ elif token == "proxy-revalidate":
583
+ cc.proxy_revalidate = True
584
+
585
+ elif token == "immutable":
586
+ cc.immutable = True
587
+
588
+ else:
589
+ # Unrecognized directive without value
590
+ cc.extensions.append(token)
591
+
592
+
593
+ def parse_cache_control(value: str | None) -> CacheControl:
594
+ """
595
+ Parse a Cache-Control header from either a request or response.
596
+
597
+ This is the main entry point for parsing.
598
+
599
+ Args:
600
+ value: The Cache-Control header value
601
+
602
+ Returns:
603
+ CacheControl object containing all parsed directives
604
+
605
+ Examples:
606
+ >>> # Response example
607
+ >>> cc = parse_cache_control("public, max-age=3600, must-revalidate")
608
+ >>> cc.public
609
+ True
610
+ >>> cc.max_age
611
+ 3600
612
+ >>> cc.must_revalidate
613
+ True
614
+
615
+ >>> # Request example
616
+ >>> cc = parse_cache_control("max-age=0, no-cache")
617
+ >>> cc.max_age
618
+ 0
619
+ >>> cc.no_cache
620
+ True
621
+
622
+ >>> # With field names
623
+ >>> cc = parse_cache_control('no-cache="Set-Cookie, Authorization"')
624
+ >>> cc.no_cache
625
+ ['Set-Cookie', 'Authorization']
626
+
627
+ >>> # Experimental directives
628
+ >>> cc = parse_cache_control("immutable, stale-while-revalidate=86400")
629
+ >>> cc.immutable
630
+ True
631
+ >>> cc.stale_while_revalidate
632
+ 86400
633
+ """
634
+ if value is None:
635
+ return CacheControl()
636
+ return parse(value)