careful 0.2.1__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. careful-0.3.1/.spellignore +6 -0
  2. {careful-0.2.1 → careful-0.3.1}/.woodpecker.yml +4 -5
  3. {careful-0.2.1 → careful-0.3.1}/Justfile +2 -1
  4. {careful-0.2.1 → careful-0.3.1}/PKG-INFO +25 -17
  5. careful-0.3.1/README.md +53 -0
  6. {careful-0.2.1 → careful-0.3.1}/docs/changelog.md +10 -0
  7. careful-0.3.1/docs/design.md +224 -0
  8. careful-0.3.1/docs/index.md +56 -0
  9. careful-0.3.1/docs/reference.md +139 -0
  10. {careful-0.2.1 → careful-0.3.1}/mkdocs.yml +2 -1
  11. {careful-0.2.1 → careful-0.3.1}/pyproject.toml +3 -2
  12. careful-0.3.1/src/careful/httpx/__init__.py +198 -0
  13. {careful-0.2.1 → careful-0.3.1}/src/careful/httpx/dev_cache.py +44 -34
  14. {careful-0.2.1 → careful-0.3.1}/src/careful/httpx/retries.py +23 -9
  15. careful-0.3.1/src/careful/httpx/robots.py +72 -0
  16. {careful-0.2.1 → careful-0.3.1}/src/careful/httpx/throttle.py +20 -9
  17. {careful-0.2.1 → careful-0.3.1}/tests/test_cache.py +13 -6
  18. careful-0.3.1/tests/test_robots_txt.py +28 -0
  19. {careful-0.2.1 → careful-0.3.1}/tests/test_throttle.py +1 -1
  20. careful-0.3.1/trifold.toml +8 -0
  21. careful-0.2.1/README.md +0 -46
  22. careful-0.2.1/docs/index.md +0 -47
  23. careful-0.2.1/docs/reference.md +0 -32
  24. careful-0.2.1/src/careful/httpx/__init__.py +0 -127
  25. {careful-0.2.1 → careful-0.3.1}/.gitignore +0 -0
  26. {careful-0.2.1 → careful-0.3.1}/.pre-commit-config.yaml +0 -0
  27. {careful-0.2.1 → careful-0.3.1}/LICENSE +0 -0
  28. {careful-0.2.1 → careful-0.3.1}/docs/carefully-3681327.svg +0 -0
  29. {careful-0.2.1 → careful-0.3.1}/src/careful/__init__.py +0 -0
  30. {careful-0.2.1 → careful-0.3.1}/src/careful/httpx/_types.py +0 -0
  31. {careful-0.2.1 → careful-0.3.1}/src/careful/httpx/py.typed +0 -0
  32. {careful-0.2.1 → careful-0.3.1}/tests/fakeresponse.py +0 -0
  33. {careful-0.2.1 → careful-0.3.1}/tests/test_careful.py +0 -0
  34. {careful-0.2.1 → careful-0.3.1}/tests/test_retries.py +0 -0
@@ -0,0 +1,6 @@
1
+ Adrien
2
+ Coquet
3
+ MemoryCache
4
+ spiderman
5
+ src
6
+ img
@@ -11,15 +11,15 @@ matrix:
11
11
 
12
12
  steps:
13
13
  lint:
14
- image: python:3.11
14
+ image: python:3.13
15
15
  commands:
16
16
  - curl -LsSf https://astral.sh/uv/install.sh | sh
17
17
  - "export PATH=/root/.local/bin:$PATH"
18
18
  - uv run ruff check .
19
19
  - uv run ruff format --check .
20
+ - uvx ty check .
20
21
  when:
21
- - matrix:
22
- PYTHON_VERSION: 3.13
22
+ - evaluate: 'PYTHON_VERSION == "3.13"'
23
23
 
24
24
 
25
25
  test:
@@ -27,6 +27,5 @@ steps:
27
27
  commands:
28
28
  - curl -LsSf https://astral.sh/uv/install.sh | sh
29
29
  - "export PATH=/root/.local/bin:$PATH"
30
+ - uv sync --group httpbin
30
31
  - uv run --python ${PYTHON_VERSION} pytest
31
- depends_on:
32
- - lint
@@ -8,9 +8,10 @@ preview:
8
8
  uv run mkdocs serve
9
9
 
10
10
  publish-pypi:
11
+ rm dist/*
11
12
  uv build
12
13
  uv publish
13
14
 
14
15
  publish-docs:
15
16
  uv run mkdocs build
16
- netlify deploy --prod -s careful-docs -d site
17
+ uvx trifold publish
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: careful
3
- Version: 0.2.1
4
- Summary: careful extensions to httpx: throttle, retry, cache
3
+ Version: 0.3.1
4
+ Summary: a small library for writing resilient, well-behaved HTTP code
5
5
  Project-URL: Repository, https://codeberg.org/jpt/careful
6
6
  Author-email: jpt <dev@jpt.sh>
7
7
  License: BSD-2-Clause
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
18
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
20
  Requires-Python: >=3.10
20
21
  Requires-Dist: httpx>=0.28.1
@@ -22,43 +23,50 @@ Description-Content-Type: text/markdown
22
23
 
23
24
  # careful
24
25
 
25
- <img src="https://careful.jpt.sh/carefully-3681327.svg" width=100 height=100 alt="logo of a warning sign">
26
+ <img src="https://jpt.sh/projects/careful/carefully-3681327.svg" width=100 height=100 alt="logo of a warning sign">
27
+
28
+ **careful** is a Python library for writing resilient, well-behaved HTTP clients.
26
29
 
27
- **careful** is a Python library for making requests to unreliable websites with `httpx`.
28
-
29
30
  **Code**: <https://codeberg.org/jpt/careful>
30
31
 
31
- **Docs**: <https://careful.jpt.sh>
32
+ **Docs**: <https://jpt.sh/projects/careful/>
32
33
 
34
+ ![PyPI - Version](https://img.shields.io/pypi/v/careful)
33
35
  [![status-badge](https://ci.codeberg.org/api/badges/15185/status.svg)](https://ci.codeberg.org/repos/15185)
34
36
 
35
- It offers enhancements to
36
- [`httpx.Client`](https://www.python-httpx.org)
37
- useful for writing long-running scrapers & crawlers, particularly against sites that are slow or have intermittent errors.
37
+ Call one function to enchant an
38
+ **[httpx.Client](https://www.python-httpx.org)**, making your HTTP connections more resilient and better mannered.
38
39
 
39
- - **configurable retry support.** retry on timeouts or other errors, with exponential back-off.
40
- - **simple request throttling.** set a maximum number of requests per minute.
41
- - **development cache.** configurable caching aimed at reducing redundant requests made while authoring/testing web scrapers.
40
+ - Configure **throttling** to avoid accidental Denial-of-Service / risking getting banned.
41
+ - **Retries** help overcome intermittent failures on flaky sites or long crawls.
42
+ - **Development caching** Cache persists between runs during development, reduces redundant requests made while iterating on your crawlers & scrapers.
42
43
 
43
- ### example
44
+ ### Example
44
45
 
45
46
  ```python
46
47
  from httpx import Client
47
48
  from careful.httpx import make_careful_client
48
49
 
50
+ # the only function you need to call is make_careful_client
51
+ # this wraps your existing `httpx.Client` with your preferred
52
+ # careful behaviors
53
+
49
54
  client = make_careful_client(
50
- # can configure httpx.Client however you usually would
51
- client=Client(headers={'user-agent': 'careful/1.0'}),
55
+ client=Client(headers={'user-agent': 'spiderman/1.0'}),
56
+
52
57
  # retries are configurable w/ exponential back off
53
58
  retry_attempts=2,
54
59
  retry_wait_seconds=5,
60
+
55
61
  # can cache to process memory, filesystem, or SQLite
56
62
  cache_storage=MemoryCache(),
57
- # requests will automatically be throttled to aim at this rate
63
+
64
+ # easy-to-configure throttling
58
65
  requests_per_minute=60,
59
66
  )
60
67
 
61
- # all normal methods on httpx.Client make use of configured enhancements
68
+ # methods on client are called as they always are
69
+ # configured behaviors occur without further code changes
62
70
  client.get("https://example.com")
63
71
  ```
64
72
 
@@ -0,0 +1,53 @@
1
+ # careful
2
+
3
+ <img src="https://jpt.sh/projects/careful/carefully-3681327.svg" width=100 height=100 alt="logo of a warning sign">
4
+
5
+ **careful** is a Python library for writing resilient, well-behaved HTTP clients.
6
+
7
+ **Code**: <https://codeberg.org/jpt/careful>
8
+
9
+ **Docs**: <https://jpt.sh/projects/careful/>
10
+
11
+ ![PyPI - Version](https://img.shields.io/pypi/v/careful)
12
+ [![status-badge](https://ci.codeberg.org/api/badges/15185/status.svg)](https://ci.codeberg.org/repos/15185)
13
+
14
+ Call one function to enchant an
15
+ **[httpx.Client](https://www.python-httpx.org)**, making your HTTP connections more resilient and better mannered.
16
+
17
+ - Configure **throttling** to avoid accidental Denial-of-Service / risking getting banned.
18
+ - **Retries** help overcome intermittent failures on flaky sites or long crawls.
19
+ - **Development caching** Cache persists between runs during development, reduces redundant requests made while iterating on your crawlers & scrapers.
20
+
21
+ ### Example
22
+
23
+ ```python
24
+ from httpx import Client
25
+ from careful.httpx import make_careful_client
26
+
27
+ # the only function you need to call is make_careful_client
28
+ # this wraps your existing `httpx.Client` with your preferred
29
+ # careful behaviors
30
+
31
+ client = make_careful_client(
32
+ client=Client(headers={'user-agent': 'spiderman/1.0'}),
33
+
34
+ # retries are configurable w/ exponential back off
35
+ retry_attempts=2,
36
+ retry_wait_seconds=5,
37
+
38
+ # can cache to process memory, filesystem, or SQLite
39
+ cache_storage=MemoryCache(),
40
+
41
+ # easy-to-configure throttling
42
+ requests_per_minute=60,
43
+ )
44
+
45
+ # methods on client are called as they always are
46
+ # configured behaviors occur without further code changes
47
+ client.get("https://example.com")
48
+ ```
49
+
50
+
51
+ ---
52
+
53
+ Logo licensed from [Adrien Coquet via Noun Project](https://thenounproject.com/icon/carefully-3681327/)
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.1
4
+
5
+ - Add `make_careful_client_from_env`
6
+
7
+ ## 0.3.0 - 8 September 2025
8
+
9
+ - Increased consistency across interfaces with parameter names, etc.
10
+ - Added support for robots.txt protocol.
11
+ - Documentation overhaul.
12
+
3
13
  ## 0.2.0 - 6 September 2025
4
14
 
5
15
  - Initial release, mostly a port of `scrapelib` functionality.
@@ -0,0 +1,224 @@
1
+ # Design Decisions
2
+
3
+ !!! note
4
+
5
+ This section is not necessary for users of the library to understand.
6
+
7
+ If you are looking to contribute, or curious about why things work the way they do, read on.
8
+
9
+ ## Design Goals
10
+
11
+ In order of importance:
12
+
13
+ 1. Work as a drop-in replacement for `httpx.Client`, so existing code can gain the benefits of the library without being changed.
14
+ 2. Preserve the 100% typed interface of `httpx`.
15
+ 3. Make writing & testing new augmentations easier.
16
+ 4. Organize code in a way that'll make augmenting future libraries (or `httpx.AsyncClient`) easier. (Based on limitations in porting `scrapelib` that made a partial rewrite easier.)
17
+
18
+ ## How it works
19
+
20
+ Each "suite" of behavior: throttling, retries, caching consists of a function that
21
+ [monkey patches](https://en.wikipedia.org/wiki/Monkey_patch) `httpx.Client` to add this behavior
22
+ to the all-important `request` method. (The other common methods like `get` and `post` call this method.)
23
+
24
+ If you'd like to follow along, `throttle.py` is the simplest of them, at around 50 lines long.
25
+
26
+ You'll see two functions (ignore the class for now):
27
+
28
+ - `_throttle_request` - this acts as a sort of decorator for `httpx.Client.request`
29
+ - `make_throttled_client` - a psuedo-constructor for our monkey patched client.
30
+
31
+ Each feature is implemented in a similar way.
32
+
33
+ The recommended [make_careful_client][careful.httpx.make_careful_client] entrypoint is
34
+ just a convinient combination of these `make_ZZZ_ciient` functions.
35
+
36
+ If you don't need to be convinced that monkey patching was a reasonable choice here, you can skip to [Our patch pattern](#our-patch-pattern)
37
+
38
+ ## Why not inheritance?
39
+
40
+ For 15 years `scrapelib` used a class hierarchy to a similar end, it'd certainly work here too.
41
+
42
+ The equivalent to a careful client in `scrapelib` is a `scrapelib.Scraper`.
43
+ It has a long inheritance hierarchy:
44
+
45
+ `scrapelib.Scraper` -> `CachingSession` -> `ThrottledSession` -> `RetrySession` -> `requests.Session`
46
+
47
+ This hierarchy means that there is no such thing as a `CachingSession` that doesn't use throttling,
48
+ and adding new behavior means considering exactly where it works best in the chain and then setting that in stone.
49
+
50
+ There's arguably no benefit derived from having things set up this way.
51
+ It is just too annoying to mix & match behaviors, or add new ones, a single class would have been easier to maintain over the years.
52
+
53
+ ## Why not mixins?
54
+
55
+ We don't want to just give up and go with a single monolithic class. (See design goals 3 and 4.)
56
+
57
+ It is worth revisiting why those classes weren't [mixins](https://en.wikipedia.org/wiki/Mixin).
58
+
59
+ It seems like we could have `ThrottledMixin`, `RetryMixin`, `DevCacheMixin`, etc.
60
+
61
+ Then to use retry & cache together someone would:
62
+
63
+ ```python
64
+ import httpx
65
+ from careful.hypothetical import ThrottledMixin, RetryMixin, DevCacheMixin
66
+
67
+ class CustomClient(RetryMixin, DevCacheMixin, Client):
68
+ pass
69
+
70
+ client = CustomClient()
71
+ ```
72
+
73
+ Honestly, not a great start: having to declare an empty class, and to carefully think about method resolution order rules in ordering them.
74
+
75
+ But there's a bigger problem lurking here... configuration.
76
+
77
+ Assume each mixin is configured through its constructor:
78
+
79
+ ```python
80
+
81
+ class RetryMixin:
82
+ def __init__(self, num_retries=2, retry_delay_seconds=10, **kwargs):
83
+ ...
84
+
85
+
86
+ class DevCacheMixin:
87
+ def __init__(self, cache_backend=..., should_cache=..., **kwargs):
88
+ ...
89
+ ```
90
+
91
+ To make this work properly, our custom class needs a constructor too.
92
+ It'd wind up looking like:
93
+
94
+ ```python
95
+ def __init__(self, num_retries, retry_delay_seconds, cache_backend, should_cache, ...)
96
+ # Initialize mixins explicitly
97
+ RetryMixin.__init__(self, num_retries=num_retries,
98
+ retry_delay_seconds=retry_delay_seconds)
99
+
100
+ DevCacheMixin.__init__(self, cache_backend=cache_backend,
101
+ should_cache=should_cache)
102
+
103
+ Client.__init__(self, **kwargs)
104
+ ```
105
+
106
+ This makes working with the mixins frustrating, since any new combination requires modifying a repetitive constructor.
107
+
108
+ ## Preserving type signatures
109
+
110
+ One of the most annoying parts of maintaining `scrapelib` has been keeping its function signatures in sync with small `requests` changes.
111
+
112
+ Design goal #1 is that someone's existing usage of `httpx.Client` is unimpeded.
113
+ The most important method, and where we need to hook in our overrides, is `Client.request`.
114
+ The method takes a whopping 13 parameters, and of course `httpx` is also fully type-annotaed.
115
+
116
+ To replace `request` we have three options:
117
+
118
+ 1. Give our new class a `request` method which takes `*args, and **kwargs` and passes them up the chain.
119
+ 2. Give each class a `request` method take the exact same 13 parameters, and be careful to keep them in sync.
120
+ 3. Use `functools.wraps` to replace the function but leave existing annotations & docstrings in place.
121
+
122
+ \#1 reduces type safety and leads to a worse DX overall since language servers can no longer offer suggestions. **This won't work for us.**
123
+
124
+ \#2 is the approach that `scrapelib` took. It was annoying and conflicts with goals 3 and 4.
125
+
126
+ \#3 is the approach taken by `careful`, our actual monkey patch. Each `make_ZZZ_client` winds up with code resembling:
127
+
128
+ ```python
129
+ tclient._no_throttle_request = tclient.request
130
+ tclient.request = types.MethodType(
131
+ functools.wraps(client.request)(_throttle_request), client
132
+ )
133
+ ```
134
+
135
+ The first line tucks away the pre-patch request method for use within the decorated function. It uses a unique name since it'll be sharing a namespace with other patches.
136
+
137
+ The second line does two neat things:
138
+
139
+ - `_throttle_request` is given the signature of `client.request` via `functools.wraps`
140
+ - `types.MethodType` rebinds the member function (so `self` is correctly handled as the first parameter)
141
+
142
+ ## Our patch pattern
143
+
144
+ Augmenting `Client` is done in two steps:
145
+
146
+ - a request function that acts as a decorator for`Client.request`, this is where the actual logic for the augmentation lives
147
+ - a patch function that:
148
+ - writes any private state needed for the new behavior to a `Client` instance
149
+ - replaces `Client.request` with our patched request
150
+
151
+ All of the files in `careful.httpx` follow this structure.
152
+
153
+ ### Protocol-typing the internal interface
154
+
155
+ After considering some of the issues above, I was considering that I'd probably have to have type ignore statements everywhere to get the monkey patching to work with a type checker.
156
+
157
+ While this might have been an option, after all the end user experience is the priority, it'd be nice to keep the benefits of type checking for myself and other authors of extensions.
158
+
159
+ It turns out, as long as we consider the patches fully internal to the `Client`, there is a way to make this work.
160
+
161
+ The key challenge presented is that we add new variables to the `Client` during augmentation:
162
+
163
+ ```python
164
+ # this is that internal state we store on a client
165
+ tclient._last_request = 0.0
166
+ tclient._requests_per_minute = requests_per_minute
167
+ tclient._request_frequency = 60.0 / requests_per_minute
168
+ ```
169
+
170
+ These set off type checker alarms, and if we simply ignore them then when they are used in the request wrapper
171
+ they'll set off alarms again!
172
+
173
+ It'd be nice to at least have a consistency check between these, so the wrapped `request` doesn't accidentally use the wrong name, I typed `_requests_per_second` at least once while writing.
174
+
175
+ The answer here is a `Protocol` and a `cast`.
176
+
177
+ Each augmentation now comes with a `typing.Protocol`:
178
+
179
+ ```python
180
+ class Throttled(Protocol):
181
+ _last_request: float
182
+ _requests_per_minute: float
183
+ _request_frequency: float
184
+ _no_throttle_request: Callable
185
+ request: Callable
186
+ ```
187
+
188
+ This defines all of the hidden state for the augmentation, as well as a placeholder for our overriden `request`.
189
+
190
+ Then, our decorator looks like this:
191
+
192
+ ```python
193
+ def _throttle_request(client: Throttled, *args, **kwargs) -> Response:
194
+ ```
195
+
196
+ Which satisfies the type checker when it comes to internal use of those new attributes.
197
+
198
+ The final change comes in where we initialize the attributes in the wrapper functions:
199
+
200
+ ```python
201
+ # a cast is made to the new type, allowing assignment
202
+ tclient = cast(ThrottledClient, client)
203
+
204
+ tclient._last_request = 0.0
205
+ tclient._requests_per_minute = requests_per_minute
206
+ tclient._request_frequency = 60.0 / requests_per_minute
207
+
208
+ tclient._no_throttle_request = client.request
209
+ tclient.request = types.MethodType(
210
+ functools.wraps(client.request)(_throttle_request), client
211
+ )
212
+ # the original client can be returned, of type `Client`
213
+ return client
214
+ ```
215
+
216
+
217
+ ### Closing thoughts
218
+
219
+ With this approach, users do not know at any point they have a `ThrottledClient` or a `CachedClient`, etc.
220
+ Not having the final type change is not ideal, but the compromise made for today.
221
+
222
+ It would be nice to be able to expose an extra method or two, but this approach leans on only having private attributes & therefore being able to safely treat an augmented client as a `Client`.
223
+
224
+ There's almost certainly room for improvement, but I'm fairly happy with the trade-offs for now.
@@ -0,0 +1,56 @@
1
+ # careful
2
+
3
+ <img src="carefully-3681327.svg" width=100 height=100 alt="logo of a warning sign">
4
+
5
+ **careful** is a Python library for writing resilient, well-behaved HTTP clients.
6
+
7
+ **Code**: <https://codeberg.org/jpt/careful>
8
+
9
+ **Docs**: <https://jpt.sh/projects/careful/>
10
+
11
+ ![PyPI - Version](https://img.shields.io/pypi/v/careful)
12
+ [![status-badge](https://ci.codeberg.org/api/badges/15185/status.svg)](https://ci.codeberg.org/repos/15185)
13
+
14
+ Call one function to enchant an
15
+ **[httpx.Client](https://www.python-httpx.org)**, making your HTTP connections more resilient and better mannered.
16
+
17
+ - Configure **throttling** to avoid accidental Denial-of-Service / risking getting banned.
18
+ - **Retries** help overcome intermittent failures on flaky sites or long crawls.
19
+ - **Development caching** Cache persists between runs during development, reduces redundant requests made while iterating on your crawlers & scrapers.
20
+
21
+ Additionally the library is fully typed, thorougly tested, and [based on code that powered production web scrapers for 15+ years](changelog#scrapelib).
22
+
23
+ ### Example
24
+
25
+ ```python
26
+ from httpx import Client
27
+ from careful.httpx import make_careful_client
28
+
29
+ # the only function you need to call is make_careful_client
30
+ # this wraps your existing `httpx.Client` with your preferred
31
+ # careful behaviors
32
+
33
+ client = make_careful_client(
34
+ client=Client(headers={'user-agent': 'spiderman/1.0'}),
35
+
36
+ # retries are configurable w/ exponential back off
37
+ retry_attempts=2,
38
+ retry_wait_seconds=5,
39
+
40
+ # can cache to process memory, filesystem, or SQLite
41
+ cache_storage=MemoryCache(),
42
+
43
+ # easy-to-configure throttling
44
+ requests_per_minute=60,
45
+ )
46
+
47
+ # methods on client are called as they always are
48
+ # configured behaviors occur without further code changes
49
+ client.get("https://example.com")
50
+ ```
51
+
52
+
53
+ ---
54
+
55
+ Logo licensed from [Adrien Coquet via Noun Project](https://thenounproject.com/icon/carefully-3681327/)
56
+
@@ -0,0 +1,139 @@
1
+ # Usage
2
+
3
+ Most users will only need to call `careful.httpx.make_careful_client`.
4
+
5
+ ::: careful.httpx.make_careful_client
6
+ options:
7
+ annotations_path: brief
8
+ show_signature: false
9
+ show_root_heading: true
10
+
11
+ ## Throttling
12
+
13
+ If `requests_per_minute` is set, standard (non-retry) requests will automatically
14
+ sleep for a short period to target the given rate.
15
+
16
+ For example, at 30rpm, the sleep time on a fast request will be close to 2 seconds.
17
+
18
+ ```python
19
+ client = make_careful_client(requests_per_minute=20)
20
+
21
+ for page in range(10):
22
+ # will sleep ~3 seconds each time
23
+ client.get(f"https://example.com?page={page}")
24
+ ```
25
+
26
+ ## Retries
27
+
28
+ If `retry_attempts` is set, responses will be passed to `should_retry`.
29
+ Responses that are rejected (return `True`) will be retried after a wait based on
30
+ `retry_wait_seconds`.
31
+ Each retry will wait twice as long as the one before.
32
+
33
+ ```python
34
+ client = make_careful_client(retry_attempts=2, retry_wait_seconds=30)
35
+
36
+ # will try, wait 30s, try again, wait 60s, try again, then give up & return the 500
37
+ client.get("https://httpbin.org/status/500")
38
+ ```
39
+
40
+ There are a few simple pre-written `should_retry` predicates you can use:
41
+
42
+ ::: careful.httpx.retries.retry_default_rule
43
+ options:
44
+ heading_level: 3
45
+ show_root_heading: true
46
+ ::: careful.httpx.retries.retry_only_500s
47
+ options:
48
+ heading_level: 3
49
+ show_root_heading: true
50
+ ::: careful.httpx.retries.retry_all_400s_500s
51
+ options:
52
+ heading_level: 3
53
+ show_root_heading: true
54
+
55
+ ## Development Caching
56
+
57
+ !!! warning "Why _development_ caching?"
58
+
59
+ This feature is named as a reminder that **this is not true HTTP caching**, which
60
+ should take various headers into account. Look at libraries like [hishel](https://hishel.com) if that's what you are after.
61
+
62
+ The purpose of this feature is to allow you to cache all of your HTTP requests during development.
63
+ Often when writing a scraper or crawler, you wind up hitting the site you are working on more often than you'd like-- each time you iterate on your code you're likely making redundant requests to pages that haven't changed.
64
+
65
+ By caching all successful requests (configurable with the `should_cache` parameter),
66
+ you can easily re-run scrapers without making redundant HTTP requests.
67
+ This means much faster development & happier upstream servers.
68
+
69
+ To enable development caching, assign a [`MemoryCache`][careful.httpx.MemoryCache],
70
+ [`FileCache`][careful.httpx.FileCache], or [`SqliteCache`][careful.httpx.SqliteCache] to
71
+ the `cache_storage` property of a `scrapelib.Scraper`.
72
+
73
+ ```python
74
+ client = make_careful_client(
75
+ cache_storage=FileStorage("_cache")
76
+ )
77
+
78
+ # only one HTTP request is made
79
+ client.get("https://example.com")
80
+ client.get("https://example.com")
81
+ client.get("https://example.com")
82
+ client.get("https://example.com")
83
+ # on subsequent runs, zero will be made until _cache is cleared
84
+ ```
85
+
86
+ ## robots.txt
87
+
88
+ If `check_robots_txt` is set to `True`, then each request will be checked
89
+ against that domain's `robots.txt`.
90
+ (Each domain's `robots.txt` will be fetched once and cached.)
91
+
92
+ Evaluating `robots.txt` requires a *user agent*. `careful` will use the user agent
93
+ set on the `client` at the time of creation by default, but sometimes it is preferable
94
+ to use a custom user agent for these checks. To do so, set the `robots_txt_user_agent` parameter.
95
+
96
+ The default behavior when checking is enabled is that any attempt to fetch a denied page will
97
+ raise a `RobotExclusionError`. This behavior can be overriden by passing a function to `robots_txt_on_reject` that takes two parameters: `(url: str, rfp: RobotFileParser)`.
98
+
99
+ ::: careful.httpx.RobotExclusionError
100
+ options:
101
+ heading_level: 3
102
+ members: False
103
+ show_root_heading: true
104
+
105
+ ::: careful.httpx.robots.warn_robots_txt
106
+ options:
107
+ heading_level: 3
108
+ members: False
109
+ show_root_heading: true
110
+
111
+ ### Note about multiple enhancements
112
+
113
+ When multiple features are applied, the order of wrapping ensures that:
114
+
115
+ - the cache is checked first, and bypasses throttling if hit
116
+ - retries use their own delays, but are not throttled separately
117
+
118
+ ## Cache Storage Options
119
+
120
+ These options are available for `cache_storage`:
121
+
122
+ ::: careful.httpx.MemoryCache
123
+ options:
124
+ heading_level: 3
125
+ members: False
126
+ show_root_heading: true
127
+
128
+ ::: careful.httpx.FileCache
129
+ options:
130
+ heading_level: 3
131
+ members: False
132
+ show_root_heading: true
133
+
134
+ ::: careful.httpx.SqliteCache
135
+ options:
136
+ heading_level: 3
137
+ members: False
138
+ show_root_heading: true
139
+
@@ -1,5 +1,5 @@
1
1
  site_name: careful
2
- site_url: https://careful.jpt.sh/
2
+ site_url: https://jpt.sh/projects/careful/
3
3
  site_author: James Turk
4
4
  site_description: A library for making requests to unreliable sites with httpx.
5
5
  copyright: Copyright &copy; 2025 James Turk
@@ -49,4 +49,5 @@ watch:
49
49
  nav:
50
50
  - 'index.md'
51
51
  - 'reference.md'
52
+ - 'design.md'
52
53
  - 'changelog.md'
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "careful"
3
- version = "0.2.1"
4
- description = "careful extensions to httpx: throttle, retry, cache"
3
+ version = "0.3.1"
4
+ description = "a small library for writing resilient, well-behaved HTTP code"
5
5
  readme = "README.md"
6
6
  authors = [
7
7
  { name = "jpt", email = "dev@jpt.sh" }
@@ -18,6 +18,7 @@ classifiers = [
18
18
  "Programming Language :: Python :: 3.11",
19
19
  "Programming Language :: Python :: 3.12",
20
20
  "Programming Language :: Python :: 3.13",
21
+ "Programming Language :: Python :: 3.14",
21
22
  "Topic :: Software Development :: Libraries :: Python Modules",
22
23
  ]
23
24
  dependencies = [