bounded_subprocess 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ """
2
+ Bounded subprocess execution with timeout and output limits.
3
+
4
+ This package provides convenient functions for running subprocesses with bounded
5
+ execution time and output size, with support for both synchronous and asynchronous
6
+ execution patterns.
7
+ """
8
+
9
+ __version__ = "1.0.0"
10
+
11
+
12
+ # Lazy imports for better startup performance
13
+ def __getattr__(name):
14
+ if name == "run":
15
+ from .bounded_subprocess import run
16
+
17
+ return run
18
+ elif name == "Result":
19
+ from .util import Result
20
+
21
+ return Result
22
+ elif name == "SLEEP_BETWEEN_READS":
23
+ from .util import SLEEP_BETWEEN_READS
24
+
25
+ return SLEEP_BETWEEN_READS
26
+ else:
27
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
28
+
29
+
30
+ # Expose key classes and constants for convenience
31
+ __all__ = ["run", "Result", "SLEEP_BETWEEN_READS"]
@@ -0,0 +1,122 @@
1
+ """
2
+ Synchronous subprocess execution with bounds on runtime and output size.
3
+ """
4
+
5
+ import subprocess
6
+ import os
7
+ import signal
8
+ from typing import List, Optional
9
+ import time
10
+
11
+ from .util import (
12
+ Result,
13
+ set_nonblocking,
14
+ MAX_BYTES_PER_READ,
15
+ write_nonblocking_sync,
16
+ read_to_eof_sync,
17
+ )
18
+
19
+
20
+ def run(
21
+ args: List[str],
22
+ timeout_seconds: int = 15,
23
+ max_output_size: int = 2048,
24
+ env=None,
25
+ stdin_data: Optional[str] = None,
26
+ stdin_write_timeout: Optional[int] = None,
27
+ ) -> Result:
28
+ """
29
+ Run a subprocess with a timeout and bounded stdout/stderr capture.
30
+
31
+ This helper starts the child in a new session so timeout cleanup can kill
32
+ the entire process group. Stdout and stderr are read in nonblocking mode and
33
+ truncated to `max_output_size` bytes each. If the timeout elapses, the
34
+ returned `Result.timeout` is True and `Result.exit_code` is -1. If
35
+ `stdin_data` cannot be fully written before `stdin_write_timeout`,
36
+ `Result.exit_code` is set to -1 even if the process exits normally.
37
+
38
+ Example:
39
+
40
+ ```python
41
+ from bounded_subprocess import run
42
+
43
+ result = run(
44
+ ["bash", "-lc", "echo ok; echo err 1>&2"],
45
+ timeout_seconds=5,
46
+ max_output_size=1024,
47
+ )
48
+ print(result.exit_code)
49
+ print(result.stdout.strip())
50
+ print(result.stderr.strip())
51
+ ```
52
+ """
53
+ deadline = time.time() + timeout_seconds
54
+
55
+ p = subprocess.Popen(
56
+ args,
57
+ env=env,
58
+ stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
59
+ stdout=subprocess.PIPE,
60
+ stderr=subprocess.PIPE,
61
+ start_new_session=True,
62
+ bufsize=MAX_BYTES_PER_READ,
63
+ )
64
+ process_group_id = os.getpgid(p.pid)
65
+
66
+ set_nonblocking(p.stdout)
67
+ set_nonblocking(p.stderr)
68
+
69
+ if stdin_data is not None:
70
+ set_nonblocking(p.stdin)
71
+ write_ok = write_nonblocking_sync(
72
+ fd=p.stdin,
73
+ data=stdin_data.encode(),
74
+ timeout_seconds=stdin_write_timeout
75
+ if stdin_write_timeout is not None
76
+ else 15,
77
+ )
78
+ # From what I recall, closing stdin is not necessary, but is customary.
79
+ try:
80
+ p.stdin.close()
81
+ except (BrokenPipeError, BlockingIOError):
82
+ pass
83
+
84
+ bufs = read_to_eof_sync(
85
+ [p.stdout, p.stderr],
86
+ timeout_seconds=timeout_seconds,
87
+ max_len=max_output_size,
88
+ )
89
+
90
+ # Without this, even the trivial test fails on Linux but not on macOS. It
91
+ # seems possible for (1) both stdout and stderr to close (2) before the child
92
+ # process exits, and we can observe the instant between (1) and (2). So, we
93
+ # need to p.wait and not p.poll.
94
+ #
95
+ # Reading the above, we should be able to write a test case that just closes
96
+ # both stdout and stderr explicitly, and then sleeps for an instant before
97
+ # terminating normally. That program should not timeout.
98
+ try:
99
+ exit_code = p.wait(timeout=max(0, deadline - time.time()))
100
+ is_timeout = False
101
+ except subprocess.TimeoutExpired:
102
+ exit_code = None
103
+ is_timeout = True
104
+
105
+ try:
106
+ # Kills the process group. Without this line, test_fork_once fails.
107
+ os.killpg(process_group_id, signal.SIGKILL)
108
+ except ProcessLookupError:
109
+ pass
110
+
111
+ # Even if the process terminates normally, if we failed to write everything to
112
+ # stdin, we return -1 as the exit code.
113
+ exit_code = (
114
+ -1 if is_timeout or (stdin_data is not None and not write_ok) else exit_code
115
+ )
116
+
117
+ return Result(
118
+ timeout=is_timeout,
119
+ exit_code=exit_code,
120
+ stdout=bufs[0].decode(errors="ignore"),
121
+ stderr=bufs[1].decode(errors="ignore"),
122
+ )
@@ -0,0 +1,297 @@
1
+ """
2
+ Asynchronous subprocess execution with bounds on runtime and output size.
3
+ """
4
+
5
+ import asyncio
6
+ import os
7
+ import signal
8
+ import time
9
+ import subprocess
10
+ import tempfile
11
+ from typing import List, Optional
12
+ import logging
13
+
14
+ from .util import (
15
+ Result,
16
+ set_nonblocking,
17
+ MAX_BYTES_PER_READ,
18
+ write_nonblocking_async,
19
+ read_to_eof_async,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ async def run(
26
+ args: List[str],
27
+ timeout_seconds: int = 15,
28
+ max_output_size: int = 2048,
29
+ env=None,
30
+ stdin_data: Optional[str] = None,
31
+ stdin_write_timeout: Optional[int] = None,
32
+ ) -> Result:
33
+ """
34
+ Run a subprocess asynchronously with bounded stdout/stderr capture.
35
+
36
+ The child process is started in a new session and polled until it exits or
37
+ the timeout elapses. Stdout and stderr are read in nonblocking mode and
38
+ truncated to `max_output_size` bytes each. If the timeout elapses,
39
+ `Result.timeout` is True and `Result.exit_code` is -1. If `stdin_data`
40
+ cannot be fully written before `stdin_write_timeout`, `Result.exit_code`
41
+ is set to -1 even if the process exits normally.
42
+
43
+ Example:
44
+
45
+ ```python
46
+ import asyncio
47
+ from bounded_subprocess.bounded_subprocess_async import run
48
+
49
+ async def main():
50
+ result = await run(
51
+ ["bash", "-lc", "echo ok; echo err 1>&2"],
52
+ timeout_seconds=5,
53
+ max_output_size=1024,
54
+ )
55
+ print(result.exit_code)
56
+ print(result.stdout.strip())
57
+ print(result.stderr.strip())
58
+
59
+ asyncio.run(main())
60
+ ```
61
+ """
62
+
63
+ deadline = time.time() + timeout_seconds
64
+
65
+ p = subprocess.Popen(
66
+ args,
67
+ env=env,
68
+ stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
69
+ stdout=subprocess.PIPE,
70
+ stderr=subprocess.PIPE,
71
+ start_new_session=True,
72
+ bufsize=MAX_BYTES_PER_READ,
73
+ )
74
+ process_group_id = os.getpgid(p.pid)
75
+
76
+ set_nonblocking(p.stdout)
77
+ set_nonblocking(p.stderr)
78
+
79
+ write_ok = True
80
+ if stdin_data is not None:
81
+ set_nonblocking(p.stdin)
82
+ write_ok = await write_nonblocking_async(
83
+ fd=p.stdin,
84
+ data=stdin_data.encode(),
85
+ timeout_seconds=stdin_write_timeout
86
+ if stdin_write_timeout is not None
87
+ else 15,
88
+ )
89
+ try:
90
+ p.stdin.close()
91
+ except (BrokenPipeError, BlockingIOError):
92
+ pass
93
+
94
+ bufs = await read_to_eof_async(
95
+ [p.stdout, p.stderr],
96
+ timeout_seconds=timeout_seconds,
97
+ max_len=max_output_size,
98
+ )
99
+
100
+ exit_code = None
101
+ is_timeout = False
102
+ while True:
103
+ rc = p.poll()
104
+ if rc is not None:
105
+ exit_code = rc
106
+ break
107
+ remaining = deadline - time.time()
108
+ if remaining <= 0:
109
+ is_timeout = True
110
+ break
111
+ await asyncio.sleep(min(0.05, remaining))
112
+
113
+ try:
114
+ os.killpg(process_group_id, signal.SIGKILL)
115
+ except ProcessLookupError:
116
+ pass
117
+
118
+ exit_code = (
119
+ -1 if is_timeout or (stdin_data is not None and not write_ok) else exit_code
120
+ )
121
+
122
+ return Result(
123
+ timeout=is_timeout,
124
+ exit_code=exit_code if exit_code is not None else -1,
125
+ stdout=bufs[0].decode(errors="ignore"),
126
+ stderr=bufs[1].decode(errors="ignore"),
127
+ )
128
+
129
+
130
+ # https://docs.podman.io/en/stable/markdown/podman-rm.1.html
131
+ async def _podman_rm(cidfile_path: str):
132
+ try:
133
+ proc = await asyncio.create_subprocess_exec(
134
+ "podman",
135
+ "rm",
136
+ "-f",
137
+ "--time",
138
+ "0",
139
+ "--cidfile",
140
+ cidfile_path,
141
+ "--ignore",
142
+ stdout=subprocess.DEVNULL,
143
+ stderr=subprocess.DEVNULL,
144
+ )
145
+ # podman rm can take time. I think this will eventually complete even
146
+ # if we timeout below.
147
+ await asyncio.wait_for(proc.wait(), timeout=5.0)
148
+ except Exception as e:
149
+ logger.error(f"Error removing container: {e}")
150
+ finally:
151
+ try:
152
+ os.unlink(cidfile_path)
153
+ except OSError:
154
+ pass
155
+
156
+
157
+ async def podman_run(
158
+ args: List[str],
159
+ *,
160
+ image: str,
161
+ timeout_seconds: int,
162
+ max_output_size: int,
163
+ env=None,
164
+ stdin_data: Optional[str] = None,
165
+ stdin_write_timeout: Optional[int] = None,
166
+ volumes: List[str] = [],
167
+ cwd: Optional[str] = None,
168
+ ) -> Result:
169
+ """
170
+ Run a subprocess in a podman container asynchronously with bounded stdout/stderr capture.
171
+
172
+ This function wraps `run` but executes the command inside a podman container.
173
+ The container is automatically removed after execution. The interface is otherwise
174
+ the same as `run`, except for an additional `image` parameter to specify the
175
+ container image to use.
176
+
177
+ Args:
178
+ args: Command arguments to run in the container.
179
+ image: Container image to use.
180
+ timeout_seconds: Maximum time to wait for the process to complete.
181
+ max_output_size: Maximum size in bytes for stdout/stderr capture.
182
+ env: Optional dictionary of environment variables.
183
+ stdin_data: Optional string data to write to stdin.
184
+ stdin_write_timeout: Optional timeout for writing stdin data.
185
+ volumes: Optional list of volume mount specifications (e.g., ["/host/path:/container/path"]).
186
+ cwd: Optional working directory path inside the container.
187
+
188
+ Example:
189
+
190
+ ```python
191
+ import asyncio
192
+ from bounded_subprocess.bounded_subprocess_async import podman_run
193
+
194
+ async def main():
195
+ result = await podman_run(
196
+ ["cat"],
197
+ image="alpine:latest",
198
+ timeout_seconds=5,
199
+ max_output_size=1024,
200
+ stdin_data="hello\n",
201
+ volumes=["/host/data:/container/data"],
202
+ cwd="/container/data",
203
+ )
204
+ print(result.exit_code)
205
+ print(result.stdout.strip())
206
+
207
+ asyncio.run(main())
208
+ ```
209
+ """
210
+ deadline = time.time() + timeout_seconds
211
+
212
+ # Use --cidfile to get the container ID
213
+ with tempfile.NamedTemporaryFile(
214
+ mode="w", delete=False, prefix="bounded_subprocess_cid_"
215
+ ) as cidfile:
216
+ cidfile_path = cidfile.name
217
+
218
+ # Build podman command
219
+ podman_args = ["podman", "run", "--rm", "-i", "--cidfile", cidfile_path]
220
+
221
+ # Handle environment variables
222
+ if env is not None:
223
+ # Convert env dict to -e flags for podman
224
+ for key, value in env.items():
225
+ podman_args.extend(["-e", f"{key}={value}"])
226
+
227
+ # Handle volume mounts
228
+ for volume in volumes:
229
+ podman_args.extend(["-v", volume])
230
+
231
+ # Handle working directory
232
+ if cwd is not None:
233
+ podman_args.extend(["-w", cwd])
234
+
235
+ podman_args.append(image)
236
+ podman_args.extend(args)
237
+
238
+ p = subprocess.Popen(
239
+ podman_args,
240
+ env=None,
241
+ stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
242
+ stdout=subprocess.PIPE,
243
+ stderr=subprocess.PIPE,
244
+ bufsize=MAX_BYTES_PER_READ,
245
+ )
246
+
247
+ set_nonblocking(p.stdout)
248
+ set_nonblocking(p.stderr)
249
+
250
+ write_ok = True
251
+ if stdin_data is not None:
252
+ set_nonblocking(p.stdin)
253
+ write_ok = await write_nonblocking_async(
254
+ fd=p.stdin,
255
+ data=stdin_data.encode(),
256
+ timeout_seconds=stdin_write_timeout
257
+ if stdin_write_timeout is not None
258
+ else 15,
259
+ )
260
+ try:
261
+ p.stdin.close()
262
+ except (BrokenPipeError, BlockingIOError):
263
+ pass
264
+
265
+ bufs = await read_to_eof_async(
266
+ [p.stdout, p.stderr],
267
+ timeout_seconds=timeout_seconds,
268
+ max_len=max_output_size,
269
+ )
270
+
271
+ # Busy-wait for the process to exit or the deadline. Why do we need this
272
+ # when read_to_eof_async seems to do this? read_to_eof_async will return
273
+ # when the process closes stdout and stderr, but the process can continue
274
+ # running even after that. So, we really need to wait for an exit code.
275
+ exit_code = None
276
+ is_timeout = False
277
+ while True:
278
+ rc = p.poll()
279
+ if rc is not None:
280
+ exit_code = rc
281
+ break
282
+ remaining = deadline - time.time()
283
+ if remaining <= 0:
284
+ is_timeout = True
285
+ break
286
+ await asyncio.sleep(min(0.05, remaining))
287
+
288
+ await _podman_rm(cidfile_path)
289
+ exit_code = (
290
+ -1 if is_timeout or (stdin_data is not None and not write_ok) else exit_code
291
+ )
292
+ return Result(
293
+ timeout=is_timeout,
294
+ exit_code=exit_code if exit_code is not None else -1,
295
+ stdout=bufs[0].decode(errors="ignore"),
296
+ stderr=bufs[1].decode(errors="ignore"),
297
+ )
@@ -0,0 +1,170 @@
1
+ """
2
+ Interactive subprocess wrapper with nonblocking stdin/stdout.
3
+ """
4
+
5
+ from typeguard import typechecked
6
+ from typing import List, Optional
7
+ import time
8
+ import errno
9
+ import subprocess
10
+ from .util import set_nonblocking, MAX_BYTES_PER_READ, write_loop_sync
11
+
12
+ _SLEEP_AFTER_WOUND_BLOCK = 0.5
13
+
14
+
15
+ class _InteractiveState:
16
+ """Shared implementation for synchronous and asynchronous interaction."""
17
+
18
+ def __init__(self, args: List[str], read_buffer_size: int) -> None:
19
+ popen = subprocess.Popen(
20
+ args,
21
+ stdin=subprocess.PIPE,
22
+ stdout=subprocess.PIPE,
23
+ bufsize=MAX_BYTES_PER_READ,
24
+ )
25
+ set_nonblocking(popen.stdin)
26
+ set_nonblocking(popen.stdout)
27
+ self.popen = popen
28
+ self.read_buffer_size = read_buffer_size
29
+ self.stdout_saved_bytes = bytearray()
30
+
31
+ # --- low level helpers -------------------------------------------------
32
+ def poll(self) -> Optional[int]:
33
+ return self.popen.poll()
34
+
35
+ def close_pipes(self) -> None:
36
+ try:
37
+ self.popen.stdin.close()
38
+ except BlockingIOError:
39
+ pass
40
+ self.popen.stdout.close()
41
+
42
+ def kill(self) -> None:
43
+ self.popen.kill()
44
+
45
+ def return_code(self) -> int:
46
+ rc = self.popen.returncode
47
+ return rc if rc is not None else -9
48
+
49
+ def write_chunk(self, data: memoryview) -> tuple[int, bool]:
50
+ try:
51
+ written = self.popen.stdin.write(data)
52
+ self.popen.stdin.flush()
53
+ return written, True
54
+ except BlockingIOError as exn:
55
+ if exn.errno != errno.EAGAIN:
56
+ return exn.characters_written, False
57
+ return exn.characters_written, True
58
+ except BrokenPipeError:
59
+ return 0, False
60
+
61
+ def read_chunk(self) -> Optional[bytes]:
62
+ return self.popen.stdout.read(MAX_BYTES_PER_READ)
63
+
64
+ def pop_line(self, start_idx: int) -> Optional[bytes]:
65
+ newline_index = self.stdout_saved_bytes.find(b"\n", start_idx)
66
+ if newline_index == -1:
67
+ return None
68
+ line = memoryview(self.stdout_saved_bytes)[:newline_index].tobytes()
69
+ del self.stdout_saved_bytes[: newline_index + 1]
70
+ return line
71
+
72
+ def append_stdout(self, data: bytes) -> None:
73
+ self.stdout_saved_bytes.extend(data)
74
+
75
+ def trim_stdout(self) -> None:
76
+ if len(self.stdout_saved_bytes) > self.read_buffer_size:
77
+ del self.stdout_saved_bytes[
78
+ : len(self.stdout_saved_bytes) - self.read_buffer_size
79
+ ]
80
+
81
+
82
+ @typechecked
83
+ class Interactive:
84
+ """
85
+ Interact with a subprocess using nonblocking I/O.
86
+
87
+ The subprocess is started with pipes for stdin and stdout. Writes are
88
+ bounded by a timeout, and reads return complete lines (without the trailing
89
+ newline). The internal buffer is capped at `read_buffer_size`; older bytes
90
+ are dropped if output grows without line breaks.
91
+
92
+ Example:
93
+
94
+ ```python
95
+ from bounded_subprocess.interactive import Interactive
96
+
97
+ proc = Interactive(["python3", "-u", "-c", "print(input())"], read_buffer_size=4096)
98
+ ok = proc.write(b"hello\n", timeout_seconds=1)
99
+ line = proc.read_line(timeout_seconds=1)
100
+ rc = proc.close(nice_timeout_seconds=1)
101
+ ```
102
+ """
103
+
104
+ def __init__(self, args: List[str], read_buffer_size: int) -> None:
105
+ """
106
+ Start a subprocess with a bounded stdout buffer.
107
+
108
+ The child process is created with nonblocking stdin/stdout pipes. The
109
+ internal read buffer keeps at most `read_buffer_size` bytes of recent
110
+ output.
111
+ """
112
+ self._state = _InteractiveState(args, read_buffer_size)
113
+
114
+ def close(self, nice_timeout_seconds: int) -> int:
115
+ """
116
+ Close pipes, wait briefly, then kill the subprocess.
117
+
118
+ Returns the subprocess return code, or -9 if the process is still
119
+ running when it is killed.
120
+ """
121
+ self._state.close_pipes()
122
+ for _ in range(nice_timeout_seconds):
123
+ if self._state.poll() is not None:
124
+ break
125
+ time.sleep(1)
126
+ self._state.kill()
127
+ return self._state.return_code()
128
+
129
+ def write(self, stdin_data: bytes, timeout_seconds: int) -> bool:
130
+ """
131
+ Write `stdin_data` to the subprocess within `timeout_seconds`.
132
+
133
+ Returns False if the subprocess has already exited or if writing fails.
134
+ """
135
+ if self._state.poll() is not None:
136
+ return False
137
+ return write_loop_sync(
138
+ self._state.write_chunk,
139
+ stdin_data,
140
+ timeout_seconds,
141
+ sleep_interval=_SLEEP_AFTER_WOUND_BLOCK,
142
+ )
143
+
144
+ def read_line(self, timeout_seconds: int) -> Optional[bytes]:
145
+ """
146
+ Read the next line from stdout, or return None on timeout/EOF.
147
+
148
+ The returned line does not include the trailing newline byte.
149
+ """
150
+ line = self._state.pop_line(0)
151
+ if line is not None:
152
+ return line
153
+ if self._state.poll() is not None:
154
+ return None
155
+ deadline = time.time() + timeout_seconds
156
+ while time.time() < deadline:
157
+ new_bytes = self._state.read_chunk()
158
+ if new_bytes is None:
159
+ time.sleep(_SLEEP_AFTER_WOUND_BLOCK)
160
+ continue
161
+ if len(new_bytes) == 0:
162
+ return None
163
+ prev_len = len(self._state.stdout_saved_bytes)
164
+ self._state.append_stdout(new_bytes)
165
+ line = self._state.pop_line(prev_len)
166
+ if line is not None:
167
+ return line
168
+ self._state.trim_stdout()
169
+ time.sleep(_SLEEP_AFTER_WOUND_BLOCK)
170
+ return None
@@ -0,0 +1,91 @@
1
+ from typeguard import typechecked
2
+ from typing import List, Optional
3
+ import asyncio
4
+ import time
5
+ from .interactive import _InteractiveState
6
+ from .util import write_nonblocking_async, can_read, MAX_BYTES_PER_READ
7
+
8
+
9
+ @typechecked
10
+ class Interactive:
11
+ """Asynchronous interface for interacting with a subprocess."""
12
+
13
+ def __init__(self, args: List[str], read_buffer_size: int) -> None:
14
+ self._state = _InteractiveState(args, read_buffer_size)
15
+
16
+ async def close(self, nice_timeout_seconds: int) -> int:
17
+ self._state.close_pipes()
18
+ for _ in range(nice_timeout_seconds):
19
+ if self._state.poll() is not None:
20
+ break
21
+ await asyncio.sleep(1)
22
+ self._state.kill()
23
+ return self._state.return_code()
24
+
25
+ async def write(self, stdin_data: bytes, timeout_seconds: int) -> bool:
26
+ if self._state.poll() is not None:
27
+ return False
28
+ return await write_nonblocking_async(
29
+ fd=self._state.popen.stdin,
30
+ data=stdin_data,
31
+ timeout_seconds=timeout_seconds,
32
+ )
33
+
34
+ # I think I have reinvented buffered line reading. I dimly recall studying
35
+ # this in excruciating detail in CS153. The difference here is that there
36
+ # is a bunch of extra work to avoid blocking, a timeout, and a limit on
37
+ # how long a received line can be.
38
+ async def read_line(self, timeout_seconds: int) -> Optional[bytes]:
39
+ # First, try to read a line from the internal buffer. The zero argument
40
+ # indicates where to *start looking for a newline*. The returned line
41
+ # always begins from the start of the buffer. This is an optimization
42
+ # for the loop below.
43
+ line = self._state.pop_line(0)
44
+ if line is not None:
45
+ return line
46
+
47
+ if self._state.poll() is not None:
48
+ return None
49
+
50
+ deadline = time.time() + timeout_seconds
51
+ while time.time() < deadline:
52
+ # Non-blocking wait until bytes are available, or we reach the
53
+ # deadline.
54
+ try:
55
+ can_read_timeout = deadline - time.time()
56
+ if can_read_timeout <= 0:
57
+ return None
58
+ await asyncio.wait_for(
59
+ can_read(self._state.popen.stdout),
60
+ timeout=can_read_timeout,
61
+ )
62
+ except asyncio.TimeoutError:
63
+ return None
64
+
65
+ new_bytes = self._state.popen.stdout.read(MAX_BYTES_PER_READ)
66
+
67
+ # We append the received bytes to the buffer, and look for a newline.
68
+ # As an optimization, we only look for a newline in the received
69
+ # bytes.
70
+ prev_len = len(self._state.stdout_saved_bytes)
71
+ self._state.append_stdout(new_bytes)
72
+ line = self._state.pop_line(prev_len)
73
+ if line is not None:
74
+ return line
75
+
76
+ if len(new_bytes) == 0:
77
+ # Closed pipe before newline. We do *not* return the final
78
+ # bit of text that we received. But, we do clear our internal
79
+ # buffer.
80
+
81
+ # Alternative design: we could return the following
82
+ # last_incomplete_line = memoryview(self._state.stdout_saved_bytes).tobytes()
83
+ self._state.stdout_saved_bytes.clear()
84
+ return None
85
+
86
+ # We cap the size of the received line. This will make things go
87
+ # wrong if we are getting structured output (e.g., JSON) from the
88
+ # subprocess. However, it prevents the subprocess from making us
89
+ # run out of memory.
90
+ self._state.trim_stdout()
91
+ return None
@@ -0,0 +1,296 @@
1
+ """
2
+ Utilities for bounded subprocess I/O and nonblocking pipe helpers.
3
+ """
4
+
5
+ import subprocess
6
+ import os
7
+ import fcntl
8
+ import signal
9
+ from typing import Callable, List, Optional
10
+ import errno
11
+ import time
12
+ import asyncio
13
+ import dataclasses
14
+ import select
15
+
16
+ MAX_BYTES_PER_READ = 1024
17
+ SLEEP_BETWEEN_READS = 0.1
18
+ SLEEP_BETWEEN_WRITES = 0.01
19
+
20
+
21
+ @dataclasses.dataclass
22
+ class Result:
23
+ """
24
+ Result of a bounded subprocess run.
25
+
26
+ The `stdout` and `stderr` fields contain at most the requested number of
27
+ bytes, decoded with errors ignored. `timeout` is True only when the overall
28
+ timeout elapses. When a timeout or stdin write failure occurs, `exit_code`
29
+ is -1.
30
+ """
31
+
32
+ timeout: int
33
+ exit_code: int
34
+ stdout: str
35
+ stderr: str
36
+
37
+ def __init__(self, timeout, exit_code, stdout, stderr):
38
+ self.timeout = timeout
39
+ self.exit_code = exit_code
40
+ self.stdout = stdout
41
+ self.stderr = stderr
42
+
43
+
44
+ def set_nonblocking(reader):
45
+ """
46
+ Mark a file descriptor as nonblocking.
47
+
48
+ This is required before using the read/write helpers that rely on
49
+ nonblocking behavior.
50
+ """
51
+ fd = reader.fileno()
52
+ fl = fcntl.fcntl(fd, fcntl.F_GETFL)
53
+ fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
54
+
55
+
56
+ def write_loop_sync(
57
+ write_chunk: Callable[[memoryview], tuple[int, bool]],
58
+ data: bytes,
59
+ timeout_seconds: float,
60
+ *,
61
+ sleep_interval: float,
62
+ ) -> bool:
63
+ """
64
+ Repeatedly write data using `write_chunk` until complete or timeout.
65
+
66
+ The `write_chunk` callback returns `(bytes_written, keep_going)`. If
67
+ `keep_going` is False, this function returns False immediately.
68
+ """
69
+ mv = memoryview(data)
70
+ start = 0
71
+ start_time = time.time()
72
+ while start < len(mv):
73
+ written, keep_going = write_chunk(mv[start:])
74
+ start += written
75
+ if not keep_going:
76
+ return False
77
+ if start < len(mv):
78
+ if time.time() - start_time > timeout_seconds:
79
+ return False
80
+ time.sleep(sleep_interval)
81
+ return True
82
+
83
+
84
+ async def can_write(fd):
85
+ """
86
+ Wait until the file descriptor is writable.
87
+ """
88
+ future = asyncio.Future()
89
+ loop = asyncio.get_running_loop()
90
+ loop.add_writer(fd, future.set_result, None)
91
+ future.add_done_callback(lambda f: loop.remove_writer(fd))
92
+ await future
93
+
94
+
95
+ async def can_read(fd):
96
+ """
97
+ Wait until the file descriptor is readable.
98
+ """
99
+ future = asyncio.Future()
100
+ loop = asyncio.get_running_loop()
101
+ loop.add_reader(fd, future.set_result, None)
102
+ future.add_done_callback(lambda f: loop.remove_reader(fd))
103
+ await future
104
+
105
+
106
+ async def write_nonblocking_async(*, fd, data: bytes, timeout_seconds: int) -> bool:
107
+ """
108
+ Writes to a nonblocking file descriptor with the timeout.
109
+
110
+ Returns True if all the data was written. False indicates that there was
111
+ either a timeout or a broken pipe.
112
+
113
+ This function does not close the file descriptor.
114
+ """
115
+ start_time_seconds = time.time()
116
+
117
+ # A slice, data[..], would create a copy. A memoryview does not.
118
+ mv = memoryview(data)
119
+ start = 0
120
+ while start < len(mv):
121
+ try:
122
+ # Write as much as possible without blocking.
123
+ written = fd.write(mv[start:])
124
+ if written is None:
125
+ written = 0
126
+ start = start + written
127
+ except BrokenPipeError:
128
+ return False
129
+ except BlockingIOError as exn:
130
+ if exn.errno != errno.EAGAIN:
131
+ # NOTE(arjun): I am not certain why this would happen. However,
132
+ # you are only supposed to retry on EAGAIN.
133
+ return False
134
+ # Some, but not all the bytes were written.
135
+ start = start + exn.characters_written
136
+
137
+ # Compute how much more time we have left.
138
+ wait_timeout = timeout_seconds - (time.time() - start_time_seconds)
139
+ # We are already past the deadline, so abort.
140
+ if wait_timeout <= 0:
141
+ return False
142
+ try:
143
+ await asyncio.wait_for(can_write(fd), wait_timeout)
144
+ except asyncio.TimeoutError:
145
+ # Deadline elapsed, so abort.
146
+ return False
147
+
148
+ return True
149
+
150
+
151
+ def read_to_eof_sync(
152
+ files: list,
153
+ *,
154
+ timeout_seconds: int,
155
+ max_len: int,
156
+ ) -> Optional[List[bytes]]:
157
+ """
158
+ Read from nonblocking file descriptors until EOF, with limits on how long
159
+ to wait and the maximum number of bytes to read.
160
+
161
+ Returns the data read, or None if the timeout elapsed.
162
+ """
163
+ bufs = {fd: bytearray() for fd in files}
164
+ avail = set(files)
165
+ end_at = time.time() + timeout_seconds
166
+
167
+ while avail and time.time() < end_at:
168
+ # Wait only as long as we still have time left
169
+ remaining = max(0, end_at - time.time())
170
+ ready, _, _ = select.select(avail, [], [], remaining)
171
+ if not ready:
172
+ break
173
+ for fd in ready:
174
+ try:
175
+ chunk = fd.read(MAX_BYTES_PER_READ)
176
+ if not chunk:
177
+ # Reached EOF, so we can stop reading from this file.
178
+ avail.discard(fd)
179
+ continue
180
+ the_buf = bufs[fd]
181
+ # Keep at most max_len bytes, silently dropping any extra bytes.
182
+ if len(the_buf) < max_len:
183
+ keep = max_len - len(the_buf)
184
+ the_buf.extend(chunk[:keep])
185
+ except (BlockingIOError, InterruptedError):
186
+ # Would-block, so we can't read from this file.
187
+ pass
188
+ except OSError:
189
+ # Broken pipe, bad fd, etc.
190
+ avail.discard(fd)
191
+
192
+ # Preserve the caller-supplied order
193
+ return [bytes(bufs[fd]) for fd in files]
194
+
195
+
196
+ async def _wait_for_any_read(fds, timeout: float):
197
+ """Wait until any of the fds is readable or the timeout elapses."""
198
+ loop = asyncio.get_running_loop()
199
+ fut = loop.create_future()
200
+
201
+ def make_cb(fd):
202
+ return lambda: (not fut.done()) and fut.set_result(fd)
203
+
204
+ for fd in fds:
205
+ loop.add_reader(fd.fileno(), make_cb(fd))
206
+ try:
207
+ return await asyncio.wait_for(fut, timeout)
208
+ except asyncio.TimeoutError:
209
+ return None
210
+ finally:
211
+ for fd in fds:
212
+ loop.remove_reader(fd.fileno())
213
+
214
+
215
+ async def read_to_eof_async(
216
+ files: list,
217
+ *,
218
+ timeout_seconds: int,
219
+ max_len: int,
220
+ ) -> List[bytes]:
221
+ """
222
+ Asynchronously read from nonblocking FDs until EOF or timeout.
223
+
224
+ The returned list preserves the order of the `files` argument.
225
+ """
226
+ bufs = {fd: bytearray() for fd in files}
227
+ avail = list(files)
228
+ end_at = time.time() + timeout_seconds
229
+
230
+ while avail and time.time() < end_at:
231
+ remaining = max(0, end_at - time.time())
232
+ fd = await _wait_for_any_read(avail, remaining)
233
+ if fd is None:
234
+ break
235
+ try:
236
+ chunk = fd.read(MAX_BYTES_PER_READ)
237
+ if not chunk:
238
+ avail.remove(fd)
239
+ continue
240
+ buf = bufs[fd]
241
+ if len(buf) < max_len:
242
+ keep = max_len - len(buf)
243
+ buf.extend(chunk[:keep])
244
+ except (BlockingIOError, InterruptedError):
245
+ pass
246
+ except OSError:
247
+ avail.remove(fd)
248
+
249
+ return [bytes(bufs[fd]) for fd in files]
250
+
251
+
252
+ # This function is very similar to write_nonblocking_async. But, in my
253
+ # opinion, trying to build an abstraction that works for both sync and async
254
+ # code is painful and a deficiency of Python.
255
+ def write_nonblocking_sync(*, fd, data: bytes, timeout_seconds: int) -> bool:
256
+ """
257
+ Writes to a nonblocking file descriptor with the timeout.
258
+
259
+ Returns True if all the data was written. False indicates that there was
260
+ either a timeout or a broken pipe.
261
+
262
+ This function does not close the file descriptor.
263
+ """
264
+ start_time_seconds = time.time()
265
+
266
+ # A slice, data[..], would create a copy. A memoryview does not.
267
+ mv = memoryview(data)
268
+ start = 0
269
+ while start < len(mv):
270
+ try:
271
+ # Write as much as possible without blocking.
272
+ written = fd.write(mv[start:])
273
+ if written is None:
274
+ written = 0
275
+ start = start + written
276
+ except BrokenPipeError:
277
+ return False
278
+ except BlockingIOError as exn:
279
+ if exn.errno != errno.EAGAIN:
280
+ # NOTE(arjun): I am not certain why this would happen. However,
281
+ # you are only supposed to retry on EAGAIN.
282
+ return False
283
+ # Some, but not all the bytes were written.
284
+ start = start + exn.characters_written
285
+
286
+ # Compute how much more time we have left.
287
+ wait_timeout = timeout_seconds - (time.time() - start_time_seconds)
288
+ # We are already past the deadline, so abort.
289
+ if wait_timeout <= 0:
290
+ return False
291
+ select_result = select.select([], [fd], [], wait_timeout)
292
+ if len(select_result[1]) == 0:
293
+ # Deadline elapsed, so abort.
294
+ return False
295
+
296
+ return True
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: bounded_subprocess
3
+ Version: 2.5.0
4
+ Summary: A library to facilitate running subprocesses that may misbehave.
5
+ Project-URL: Homepage, https://github.com/arjunguha/bounded_subprocess
6
+ Project-URL: Bug Tracker, https://github.com/arjunguha/bounded_subprocess
7
+ Author: Arjun Guha, Ming-Ho Yee, Francesca Lucchetti
8
+ License: MIT License
9
+
10
+ Copyright (c) 2022--2024 Northeastern University
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
+ License-File: LICENSE.txt
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Operating System :: POSIX :: Linux
20
+ Classifier: Programming Language :: Python :: 3
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: typeguard<5.0.0,>=4.4.2
23
+ Description-Content-Type: text/markdown
24
+
25
+ # bounded_subprocess
26
+
27
+ [![PyPI - Version](https://img.shields.io/pypi/v/bounded-subprocess.svg)](https://pypi.org/project/bounded-subprocess)
28
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bounded-subprocess.svg)](https://pypi.org/project/bounded-subprocess)
29
+
30
+ The `bounded-subprocess` module runs a subprocess with several bounds:
31
+
32
+ 1. The subprocess runs in a Linux session, so the process and all its children
33
+ can be killed;
34
+ 2. The subprocess runs with a given timeout; and
35
+ 3. The parent captures a bounded amount of output from the subprocess and
36
+ discards the rest.
37
+
38
+ Note that the subprocess is not isolated: it can use the network, the filesystem,
39
+ or create new sessions.
40
+
41
+ - Documentation: https://arjunguha.github.io/bounded_subprocess/
42
+
43
+ ## Installation
44
+
45
+ ```console
46
+ python3 -m pip install bounded-subprocess
47
+ ```
48
+
49
+ ## License
50
+
51
+ `bounded-subprocess` is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
@@ -0,0 +1,10 @@
1
+ bounded_subprocess/__init__.py,sha256=7JId4_SDjbLML7vUWIFT9CRmCr7IRwxk6YbXaD2pgQk,832
2
+ bounded_subprocess/bounded_subprocess.py,sha256=z9bdQwdIP-0aNJwqyaY9Gq_ZKg9b7-Nd_97xGLZy9rQ,3691
3
+ bounded_subprocess/bounded_subprocess_async.py,sha256=IqQxp4skfVpX3FWfwRy84lWAzDEKzgw1zYcMyihYD8c,8603
4
+ bounded_subprocess/interactive.py,sha256=IK6G0SaIhsd2T7mmXc6ofF0Ehz0Xfnwl0e6fPjXqwyM,5662
5
+ bounded_subprocess/interactive_async.py,sha256=DZZnZBTJd7xWdhQmdMmWkPOoCYVdMpR-LMY9AzvMdW4,3675
6
+ bounded_subprocess/util.py,sha256=mQZ7mmjl5sTB3oSa8XetDErtfdRUNffaCIMuxMu4L90,9117
7
+ bounded_subprocess-2.5.0.dist-info/METADATA,sha256=IAXP0pdI8458SM17I687Tir_HtiNjdvX9Qsx3wdlI_A,2730
8
+ bounded_subprocess-2.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
9
+ bounded_subprocess-2.5.0.dist-info/licenses/LICENSE.txt,sha256=UVerBV0_1vMFt8QkaXuVnZVSlOiKDiBSieK5MNLy4Ls,1086
10
+ bounded_subprocess-2.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022--2024 Northeastern University
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.