careful 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
careful/httpx/__init__.py CHANGED
@@ -1,11 +1,20 @@
1
+ import os
2
+ from urllib.parse import urlparse
1
3
  from .retries import make_retry_client, retry_default_rule
2
4
  from .throttle import make_throttled_client
5
+ from .robots import (
6
+ make_robots_txt_client,
7
+ RobotExclusionError,
8
+ RobotsRejectFunc,
9
+ raise_robots_txt,
10
+ )
3
11
  from .dev_cache import (
4
12
  make_dev_caching_client,
5
13
  MemoryCache,
6
14
  FileCache,
7
15
  SqliteCache,
8
- CacheStorageBase,
16
+ CacheStorage,
17
+ CacheResponse,
9
18
  _cache_200s,
10
19
  _default_keyfunc,
11
20
  )
@@ -20,11 +29,14 @@ def make_careful_client(
20
29
  retry_wait_seconds: float = 10,
21
30
  should_retry: ResponsePredicate = retry_default_rule,
22
31
  requests_per_minute: int = 0,
23
- cache_storage: CacheStorageBase = None,
32
+ cache_storage: CacheStorage | None = None,
24
33
  cache_write_only: bool = False,
25
34
  should_cache: ResponsePredicate = _cache_200s,
26
35
  cache_keyfunc: CacheKeyfunc = _default_keyfunc,
27
- ):
36
+ check_robots_txt: bool = False,
37
+ robots_txt_user_agent: str | None = None,
38
+ robots_txt_on_reject: RobotsRejectFunc = raise_robots_txt,
39
+ ) -> Client:
28
40
  """
29
41
  This function patches an `httpx.Client` so that all requests made with the client support
30
42
  [retries](#retries), [throttling](#throttling), and [development caching](#development-caching).
@@ -51,43 +63,6 @@ def make_careful_client(
51
63
 
52
64
  cache_keyfunc: Function that takes request details and returns a unique cache key.
53
65
 
54
- ## Retries
55
-
56
- If `retry_attempts` is set, responses will be passed to `should_retry`.
57
- Responses that are rejected (return `True`) will be retried after a wait based on
58
- `retry_wait_seconds`.
59
- Each retry will wait twice as long as the one before.
60
-
61
- ## Throttling
62
-
63
- If `requests_per_minute` is set, standard (non-retry) requests will automatically
64
- sleep for a short period to target the given rate.
65
-
66
- For example, at 30rpm, the sleep time on a fast request will be close to 2 seconds.
67
-
68
- ## Development Caching
69
-
70
- Why **development caching?**
71
-
72
- This feature is named as a reminder that **this is not true HTTP caching**, which
73
- should take various headers into account. Look at libraries like [hishel](https://hishel.com) if that's what you are after.
74
-
75
- The purpose of this feature is to allow you to cache all of your HTTP requests during development.
76
- Often when writing a scraper or crawler, you wind up hitting the site you are working on more often than you'd like-- each time you iterate on your code you're likely making redundant requests to pages that haven't changed.
77
-
78
- By caching all successful requests (configurable with the `should_cache` parameter),
79
- you can easily re-run scrapers without making redundant HTTP requests.
80
- This means faster development time & happier upstream servers.
81
-
82
- To enable development caching, assign a [`MemoryCache`][careful.httpx.MemoryCache],
83
- [`FileCache`][careful.httpx.FileCache], or [`SqliteCache`][careful.httpx.SqliteCache] to
84
- the `cache_storage` property of a `scrapelib.Scraper`.
85
-
86
- ---
87
-
88
- When multiple features are applied, the order of wrapping ensures that:
89
- - the cache is checked first, and bypasses throttling if hit
90
- - retries use their own delays, but not throttled separately
91
66
  """
92
67
  if client is None:
93
68
  client = Client()
@@ -104,7 +79,7 @@ def make_careful_client(
104
79
  client = make_throttled_client(
105
80
  client=client, requests_per_minute=requests_per_minute
106
81
  )
107
- # caching on top layer, so cache will be checked first
82
+ # caching on top layer, so cache will be checked before throttling/etc.
108
83
  if cache_storage:
109
84
  client = make_dev_caching_client(
110
85
  client=client,
@@ -113,15 +88,111 @@ def make_careful_client(
113
88
  should_cache=should_cache,
114
89
  write_only=cache_write_only,
115
90
  )
91
+ # robots.txt before cache
92
+ if check_robots_txt:
93
+ client = make_robots_txt_client(
94
+ client=client,
95
+ as_user_agent=robots_txt_user_agent,
96
+ on_rejection=robots_txt_on_reject,
97
+ )
116
98
 
117
99
  return client
118
100
 
119
101
 
102
+ def _int_env(var_name: str, default: int) -> int:
103
+ return int(os.environ.get(var_name, default))
104
+
105
+
106
+ def _float_env(var_name: str, default: float) -> float:
107
+ return float(os.environ.get(var_name, default))
108
+
109
+
110
+ def _bool_env(var_name: str, default: bool) -> bool:
111
+ """helper function for bool env vars"""
112
+ return bool(os.environ.get(var_name, "T" if default else ""))
113
+
114
+
115
+ def _cache_env(var_name: str, default: CacheStorage | None) -> CacheStorage | None:
116
+ """
117
+ helper function that reads cache as a protocol string
118
+ """
119
+ cache_str = os.environ.get(var_name)
120
+ if not cache_str:
121
+ return default
122
+ parsed = urlparse(cache_str)
123
+ if parsed.scheme == "memory":
124
+ return MemoryCache()
125
+ elif parsed.scheme == "file":
126
+ return FileCache(parsed.path)
127
+ elif parsed.scheme == "sqlite":
128
+ return SqliteCache(parsed.path)
129
+
130
+
131
+ def make_careful_client_from_env(
132
+ *,
133
+ client: Client | None = None,
134
+ retry_attempts: int = 0,
135
+ retry_wait_seconds: float = 10,
136
+ should_retry: ResponsePredicate = retry_default_rule,
137
+ requests_per_minute: int = 0,
138
+ cache_storage: CacheStorage | None = None,
139
+ cache_write_only: bool = False,
140
+ should_cache: ResponsePredicate = _cache_200s,
141
+ cache_keyfunc: CacheKeyfunc = _default_keyfunc,
142
+ check_robots_txt: bool = False,
143
+ robots_txt_user_agent: str | None = None,
144
+ robots_txt_on_reject: RobotsRejectFunc = raise_robots_txt,
145
+ ) -> Client:
146
+ """
147
+ Make a careful client from environment variables.
148
+
149
+ Any set environment variables will override parameters if set.
150
+
151
+ Numeric:
152
+ - CAREFUL_RETRY_ATTEMPTS
153
+ - CAREFUL_RETRY_WAIT_SECONDS
154
+ - CAREFUL_REQUESTS_PER_MINUTE
155
+ - CAREFUL_CHECK_ROBOTS_TXT
156
+ Booleans (any non-empty value is true):
157
+ - CAREFUL_CACHE_WRITE_ONLY
158
+ - CAREFUL_ROBOTS_TXT_USER_AGENT
159
+ Cache:
160
+ - CAREFUL_CACHE, which can be:
161
+ memory://
162
+ cache://path/to/db.sqlite3
163
+ file://path/to/directory
164
+
165
+ Function parameters do not have environment variables.
166
+ """
167
+ return make_careful_client(
168
+ client=client,
169
+ retry_attempts=_int_env("CAREFUL_RETRY_ATTEMPTS", retry_attempts),
170
+ retry_wait_seconds=_float_env("CAREFUL_RETRY_WAIT_SECONDS", retry_wait_seconds),
171
+ should_retry=should_retry,
172
+ requests_per_minute=_int_env(
173
+ "CAREFUL_REQUESTS_PER_MINUTE", requests_per_minute
174
+ ),
175
+ cache_storage=_cache_env("CAREFUL_CACHE", cache_storage),
176
+ cache_write_only=_bool_env("CAREFUL_CACHE_WRITE_ONLY", cache_write_only),
177
+ should_cache=should_cache,
178
+ cache_keyfunc=cache_keyfunc,
179
+ check_robots_txt=_bool_env("CAREFUL_CHECK_ROBOTS_TXT", check_robots_txt),
180
+ robots_txt_user_agent=os.environ.get(
181
+ "CAREFUL_ROBOTS_TXT_USER_AGENT", robots_txt_user_agent
182
+ ),
183
+ robots_txt_on_reject=robots_txt_on_reject,
184
+ )
185
+
186
+
120
187
  __all__ = [
188
+ "make_careful_client",
121
189
  "make_retry_client",
122
190
  "make_throttled_client",
123
191
  "make_dev_caching_client",
192
+ "make_robots_txt_client",
124
193
  "MemoryCache",
125
194
  "FileCache",
126
195
  "SqliteCache",
196
+ "CacheResponse",
197
+ "RobotExclusionError",
127
198
  ]
@@ -1,4 +1,3 @@
1
- import abc
2
1
  import types
3
2
  import functools
4
3
  import logging
@@ -8,10 +7,27 @@ import glob
8
7
  import hashlib
9
8
  import sqlite3
10
9
  import json
10
+ from typing import cast, Protocol, Callable
11
+ from ._types import ResponsePredicate, CacheKeyfunc
11
12
 
12
13
  from httpx import Client, Response, Request
13
14
 
14
- log = logging.getLogger("httpx")
15
+ log = logging.getLogger("careful")
16
+
17
+
18
+ class CacheStorage(Protocol):
19
+ def get(self, key: str) -> None | Response: ...
20
+ def set(self, key: str, response: Response) -> None: ...
21
+
22
+
23
+ class DevCacheClient(Protocol):
24
+ _retry_attempts: int
25
+ _cache_storage: CacheStorage
26
+ _write_only: bool
27
+ _should_cache: ResponsePredicate
28
+ _cache_keyfunc: CacheKeyfunc
29
+ _no_cache_request: Callable
30
+ request: Callable
15
31
 
16
32
 
17
33
  def _default_keyfunc(
@@ -28,7 +44,7 @@ def _default_keyfunc(
28
44
  if method.lower() != "get":
29
45
  return None
30
46
 
31
- return Request(url=url, method=method, params=params).url
47
+ return str(Request(url=url, method=method, params=params).url)
32
48
 
33
49
 
34
50
  def _cache_200s(response: Response) -> bool:
@@ -40,7 +56,11 @@ def _cache_200s(response: Response) -> bool:
40
56
  return response.status_code == 200
41
57
 
42
58
 
43
- def _cached_request(client: Client, *args, **kwargs):
59
+ class CacheResponse(Response):
60
+ fromcache: bool
61
+
62
+
63
+ def _cached_request(client: DevCacheClient, *args, **kwargs) -> CacheResponse:
44
64
  method, url = args
45
65
  request_key = client._cache_keyfunc(method, url, kwargs["params"])
46
66
 
@@ -50,30 +70,28 @@ def _cached_request(client: Client, *args, **kwargs):
50
70
  cached_resp = client._cache_storage.get(request_key)
51
71
 
52
72
  if cached_resp:
53
- # resp = cast(CacheResponse, resp_maybe)
54
73
  log.info("using cached response request_key=%s", request_key)
55
- cached_resp.fromcache = True
56
- resp = cached_resp
74
+ new_resp = cast(CacheResponse, cached_resp)
75
+ new_resp.fromcache = True
57
76
  else:
58
- resp = client._no_cache_request(*args, **kwargs)
77
+ new_resp = cast(CacheResponse, client._no_cache_request(*args, **kwargs))
78
+ new_resp.fromcache = False
59
79
  # save to cache if request and response meet criteria
60
- log.debug("XX %s %s", request_key, client._should_cache(resp))
61
- if request_key and client._should_cache(resp):
62
- client._cache_storage.set(request_key, resp)
80
+ if request_key and client._should_cache(new_resp):
81
+ client._cache_storage.set(request_key, new_resp)
63
82
  log.info("caching response request_key=%s", request_key)
64
- resp.fromcache = False
65
83
 
66
- return resp
84
+ return new_resp
67
85
 
68
86
 
69
87
  def make_dev_caching_client(
70
88
  *,
89
+ cache_storage: CacheStorage,
71
90
  client: Client | None = None,
72
- cache_storage=None,
73
91
  cache_keyfunc=_default_keyfunc,
74
92
  should_cache=_cache_200s,
75
93
  write_only=False,
76
- ):
94
+ ) -> Client:
77
95
  """
78
96
  Returns an enhanced `httpx.Client` where requests are saved to a
79
97
  specified cache.
@@ -98,29 +116,21 @@ def make_dev_caching_client(
98
116
  if client is None:
99
117
  client = Client()
100
118
 
101
- client._cache_storage = cache_storage
102
- client._cache_keyfunc = cache_keyfunc
103
- client._should_cache = should_cache
104
- client._write_only = write_only
119
+ tclient = cast(DevCacheClient, client)
105
120
 
106
- client._no_cache_request = client.request
107
- client.request = types.MethodType(
121
+ tclient._cache_storage = cache_storage
122
+ tclient._cache_keyfunc = cache_keyfunc
123
+ tclient._should_cache = should_cache
124
+ tclient._write_only = write_only
125
+
126
+ tclient._no_cache_request = client.request
127
+ tclient.request = types.MethodType(
108
128
  functools.wraps(client.request)(_cached_request), client
109
129
  )
110
130
  return client
111
131
 
112
132
 
113
- class CacheStorageBase(abc.ABC):
114
- @abc.abstractmethod
115
- def get(self, key: str) -> None | Response:
116
- raise NotImplementedError()
117
-
118
- @abc.abstractmethod
119
- def set(self, key: str, response: Response) -> None:
120
- raise NotImplementedError()
121
-
122
-
123
- class MemoryCache(CacheStorageBase):
133
+ class MemoryCache(CacheStorage):
124
134
  """
125
135
  In memory cache for request responses.
126
136
 
@@ -144,7 +154,7 @@ class MemoryCache(CacheStorageBase):
144
154
  self.cache[key] = response
145
155
 
146
156
 
147
- class FileCache(CacheStorageBase):
157
+ class FileCache(CacheStorage):
148
158
  """
149
159
  File-based cache for request responses.
150
160
 
@@ -260,7 +270,7 @@ class FileCache(CacheStorageBase):
260
270
  os.remove(fname)
261
271
 
262
272
 
263
- class SqliteCache(CacheStorageBase):
273
+ class SqliteCache(CacheStorage):
264
274
  """
265
275
  sqlite cache for request responses.
266
276
 
careful/httpx/retries.py CHANGED
@@ -2,25 +2,37 @@ import time
2
2
  import types
3
3
  import functools
4
4
  import logging
5
+ from typing import Protocol, Callable, cast
5
6
  from httpx import Client, Response, HTTPError
7
+ from ._types import ResponsePredicate
6
8
 
7
- log = logging.getLogger("httpx")
9
+ log = logging.getLogger("careful")
10
+
11
+
12
+ class RetryClient(Protocol):
13
+ _retry_attempts: int
14
+ _retry_wait_seconds: float
15
+ _should_retry: ResponsePredicate
16
+ _no_retry_request: Callable
17
+ request: Callable
8
18
 
9
19
 
10
20
  def retry_default_rule(response: Response) -> bool:
11
- # default behavior is to retry 400s and 500s but not 404s
21
+ """default behavior is to retry 400s and 500s but not 404s"""
12
22
  return response.status_code >= 400 and response.status_code != 404
13
23
 
14
24
 
15
25
  def retry_only_500s(response: Response) -> bool:
26
+ """retry all status codes that are 500 or above"""
16
27
  return response.status_code >= 500
17
28
 
18
29
 
19
30
  def retry_all_400s_500s(response: Response) -> bool:
31
+ """retry all status codes that are 400 or above"""
20
32
  return response.status_code >= 400
21
33
 
22
34
 
23
- def _retry_request(client: Client, *args, **kwargs):
35
+ def _retry_request(client: RetryClient, *args, **kwargs) -> Response:
24
36
  # the retry loop
25
37
  tries = 0
26
38
  exception_raised = None
@@ -75,15 +87,17 @@ def make_retry_client(
75
87
  attempts: int = 1,
76
88
  wait_seconds: float = 10,
77
89
  should_retry=retry_default_rule,
78
- ):
90
+ ) -> Client:
79
91
  if client is None:
80
92
  client = Client()
81
- client._retry_attempts = max(0, attempts)
82
- client._retry_wait_seconds = wait_seconds
83
- client._should_retry = should_retry
84
93
 
85
- client._no_retry_request = client.request
86
- client.request = types.MethodType(
94
+ tclient = cast(RetryClient, client)
95
+ tclient._retry_attempts = max(0, attempts)
96
+ tclient._retry_wait_seconds = wait_seconds
97
+ tclient._should_retry = should_retry
98
+
99
+ tclient._no_retry_request = client.request
100
+ tclient.request = types.MethodType(
87
101
  functools.wraps(client.request)(_retry_request), client
88
102
  )
89
103
 
@@ -0,0 +1,72 @@
1
+ import types
2
+ import functools
3
+ import warnings
4
+ from urllib.robotparser import RobotFileParser
5
+ from typing import Protocol, cast, Callable
6
+ from httpx import Client, Response, URL
7
+
8
+
9
+ class RobotExclusionError(Exception):
10
+ pass
11
+
12
+
13
+ def raise_robots_txt(url, robots):
14
+ raise RobotExclusionError(f"{url} excluded by {robots.url}")
15
+
16
+
17
+ def warn_robots_txt(url, robots):
18
+ warnings.warn(f"{url} excluded by {robots.url}")
19
+
20
+
21
+ class RobotsClient(Protocol):
22
+ _robots_for_domain: dict[str, RobotFileParser]
23
+ _robots_ua: str
24
+ _rejected_action: Callable[[str, RobotFileParser], None]
25
+ _no_check_request: Callable
26
+ request: Callable
27
+ headers: dict
28
+
29
+
30
+ def _robot_check_request(client: RobotsClient, *args, **kwargs) -> Response:
31
+ method, url = args
32
+ uurl = URL(url)
33
+ domain = uurl.host
34
+ if domain not in client._robots_for_domain:
35
+ robots_url = f"{uurl.scheme}://{domain}/robots.txt"
36
+ robots_resp = client._no_check_request("GET", robots_url)
37
+ # pass url for output, but don't do read
38
+ parser = RobotFileParser(robots_url)
39
+ parser.parse(robots_resp.text.splitlines())
40
+ client._robots_for_domain[domain] = parser
41
+ if not client._robots_for_domain[domain].can_fetch(client._robots_ua, url):
42
+ client._rejected_action(url, client._robots_for_domain[domain])
43
+ # if action doesn't raise an exception, the request goes through
44
+ return client._no_check_request(*args, **kwargs)
45
+
46
+
47
+ RobotsRejectFunc = Callable[[str, RobotFileParser], None]
48
+
49
+
50
+ def make_robots_txt_client(
51
+ *,
52
+ client: Client | None = None,
53
+ as_user_agent: str | None = None,
54
+ on_rejection: RobotsRejectFunc = raise_robots_txt,
55
+ ) -> Client:
56
+ if client is None:
57
+ client = Client()
58
+
59
+ tclient = cast(RobotsClient, client)
60
+
61
+ tclient._robots_for_domain = {}
62
+ if as_user_agent:
63
+ tclient._robots_ua = as_user_agent
64
+ else:
65
+ tclient._robots_ua = tclient.headers["user-agent"]
66
+ tclient._rejected_action = on_rejection
67
+
68
+ tclient._no_check_request = client.request
69
+ tclient.request = types.MethodType(
70
+ functools.wraps(client.request)(_robot_check_request), client
71
+ )
72
+ return client
careful/httpx/throttle.py CHANGED
@@ -2,12 +2,21 @@ import time
2
2
  import types
3
3
  import functools
4
4
  import logging
5
- from httpx import Client
5
+ from typing import Protocol, cast, Callable
6
+ from httpx import Client, Response
6
7
 
7
- log = logging.getLogger("httpx")
8
+ log = logging.getLogger("careful")
8
9
 
9
10
 
10
- def _throttle_request(client: Client, *args, **kwargs):
11
+ class ThrottledClient(Protocol):
12
+ _last_request: float
13
+ _requests_per_minute: float
14
+ _request_frequency: float
15
+ _no_throttle_request: Callable
16
+ request: Callable
17
+
18
+
19
+ def _throttle_request(client: ThrottledClient, *args, **kwargs) -> Response:
11
20
  now = time.time()
12
21
  diff = client._request_frequency - (now - client._last_request)
13
22
  if diff > 0:
@@ -23,19 +32,21 @@ def make_throttled_client(
23
32
  *,
24
33
  client: Client | None = None,
25
34
  requests_per_minute: float = 0,
26
- ):
35
+ ) -> Client:
27
36
  if requests_per_minute <= 0:
28
37
  raise ValueError("requests per minute must be a positive number")
29
38
 
30
39
  if client is None:
31
40
  client = Client()
32
41
 
33
- client._last_request = 0.0
34
- client._requests_per_minute = requests_per_minute
35
- client._request_frequency = 60.0 / requests_per_minute
42
+ tclient = cast(ThrottledClient, client)
43
+
44
+ tclient._last_request = 0.0
45
+ tclient._requests_per_minute = requests_per_minute
46
+ tclient._request_frequency = 60.0 / requests_per_minute
36
47
 
37
- client._no_throttle_request = client.request
38
- client.request = types.MethodType(
48
+ tclient._no_throttle_request = client.request
49
+ tclient.request = types.MethodType(
39
50
  functools.wraps(client.request)(_throttle_request), client
40
51
  )
41
52
  return client
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: careful
3
- Version: 0.2.1
4
- Summary: careful extensions to httpx: throttle, retry, cache
3
+ Version: 0.3.1
4
+ Summary: a small library for writing resilient, well-behaved HTTP code
5
5
  Project-URL: Repository, https://codeberg.org/jpt/careful
6
6
  Author-email: jpt <dev@jpt.sh>
7
7
  License: BSD-2-Clause
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
18
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
20
  Requires-Python: >=3.10
20
21
  Requires-Dist: httpx>=0.28.1
@@ -22,43 +23,50 @@ Description-Content-Type: text/markdown
22
23
 
23
24
  # careful
24
25
 
25
- <img src="https://careful.jpt.sh/carefully-3681327.svg" width=100 height=100 alt="logo of a warning sign">
26
+ <img src="https://jpt.sh/projects/careful/carefully-3681327.svg" width=100 height=100 alt="logo of a warning sign">
27
+
28
+ **careful** is a Python library for writing resilient, well-behaved HTTP clients.
26
29
 
27
- **careful** is a Python library for making requests to unreliable websites with `httpx`.
28
-
29
30
  **Code**: <https://codeberg.org/jpt/careful>
30
31
 
31
- **Docs**: <https://careful.jpt.sh>
32
+ **Docs**: <https://jpt.sh/projects/careful/>
32
33
 
34
+ ![PyPI - Version](https://img.shields.io/pypi/v/careful)
33
35
  [![status-badge](https://ci.codeberg.org/api/badges/15185/status.svg)](https://ci.codeberg.org/repos/15185)
34
36
 
35
- It offers enhancements to
36
- [`httpx.Client`](https://www.python-httpx.org)
37
- useful for writing long-running scrapers & crawlers, particularly against sites that are slow or have intermittent errors.
37
+ Call one function to enchant an
38
+ **[httpx.Client](https://www.python-httpx.org)**, making your HTTP connections more resilient and better mannered.
38
39
 
39
- - **configurable retry support.** retry on timeouts or other errors, with exponential back-off.
40
- - **simple request throttling.** set a maximum number of requests per minute.
41
- - **development cache.** configurable caching aimed at reducing redundant requests made while authoring/testing web scrapers.
40
+ - Configure **throttling** to avoid accidental Denial-of-Service / risking getting banned.
41
+ - **Retries** help overcome intermittent failures on flaky sites or long crawls.
42
+ - **Development caching** Cache persists between runs during development, reduces redundant requests made while iterating on your crawlers & scrapers.
42
43
 
43
- ### example
44
+ ### Example
44
45
 
45
46
  ```python
46
47
  from httpx import Client
47
48
  from careful.httpx import make_careful_client
48
49
 
50
+ # the only function you need to call is make_careful_client
51
+ # this wraps your existing `httpx.Client` with your preferred
52
+ # careful behaviors
53
+
49
54
  client = make_careful_client(
50
- # can configure httpx.Client however you usually would
51
- client=Client(headers={'user-agent': 'careful/1.0'}),
55
+ client=Client(headers={'user-agent': 'spiderman/1.0'}),
56
+
52
57
  # retries are configurable w/ exponential back off
53
58
  retry_attempts=2,
54
59
  retry_wait_seconds=5,
60
+
55
61
  # can cache to process memory, filesystem, or SQLite
56
62
  cache_storage=MemoryCache(),
57
- # requests will automatically be throttled to aim at this rate
63
+
64
+ # easy-to-configure throttling
58
65
  requests_per_minute=60,
59
66
  )
60
67
 
61
- # all normal methods on httpx.Client make use of configured enhancements
68
+ # methods on client are called as they always are
69
+ # configured behaviors occur without further code changes
62
70
  client.get("https://example.com")
63
71
  ```
64
72
 
@@ -0,0 +1,12 @@
1
+ careful/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ careful/httpx/__init__.py,sha256=sIKMgGf-Ea5VEsEFqxXFbAiaF1tYOYm9CIOvSCPooqw,6674
3
+ careful/httpx/_types.py,sha256=jefYDxSbLRUatU8QKeyxStc9UC3AJwAba2SfhNkM0RY,151
4
+ careful/httpx/dev_cache.py,sha256=sfHX2TkKZQRTrOXSGSMgIWE0HbX3Fvuz88o8_jK9P8g,11316
5
+ careful/httpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ careful/httpx/retries.py,sha256=3kjuHKYnK1N4Rtum5gUyY_XO4o4cL4jc59d17Y6UwrI,2949
7
+ careful/httpx/robots.py,sha256=jfqQdplTap_RCENu6MHEIabFVznFLruMvSIaG_u0v_8,2168
8
+ careful/httpx/throttle.py,sha256=b1fbmUskcm343D1bbPbY-ITLdL1zVm1dXtjt9LT1bEA,1412
9
+ careful-0.3.1.dist-info/METADATA,sha256=fkI2V9YX5JKhPTYZga5q_3cyfatB4B_PMdTci3sRMmc,2692
10
+ careful-0.3.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
+ careful-0.3.1.dist-info/licenses/LICENSE,sha256=oHe4LmCuo6CZne42DRXfiR3uqqIfGsk4dAvDKucAi4M,1315
12
+ careful-0.3.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,11 +0,0 @@
1
- careful/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- careful/httpx/__init__.py,sha256=u-n0uKIWAd3NXsZUd1UA4wzJJTEhRR74diHzDV2EpEU,4885
3
- careful/httpx/_types.py,sha256=jefYDxSbLRUatU8QKeyxStc9UC3AJwAba2SfhNkM0RY,151
4
- careful/httpx/dev_cache.py,sha256=HNtEXncPpqsjIEoz5UhRf4YO2iVwz5uowKc4_B74fZg,11024
5
- careful/httpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- careful/httpx/retries.py,sha256=Kszm0wDITyPZ3qx5TsDL__HjCYVJyAZ2WehrlpXV5Cc,2500
7
- careful/httpx/throttle.py,sha256=ZpuFABYHGQ4D0zks922SCXp7WZG_-Ysafz-Npa2QVwQ,1096
8
- careful-0.2.1.dist-info/METADATA,sha256=ZAKwiwqykmep0LiYCzFLWJfTgharbvhW3FCJ3p0b_-8,2498
9
- careful-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- careful-0.2.1.dist-info/licenses/LICENSE,sha256=oHe4LmCuo6CZne42DRXfiR3uqqIfGsk4dAvDKucAi4M,1315
11
- careful-0.2.1.dist-info/RECORD,,