fastaudit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastaudit-0.1.0/CHANGELOG.md +12 -0
- fastaudit-0.1.0/LICENSE +202 -0
- fastaudit-0.1.0/MANIFEST.in +3 -0
- fastaudit-0.1.0/PKG-INFO +188 -0
- fastaudit-0.1.0/README.md +166 -0
- fastaudit-0.1.0/fastaudit/__init__.py +2 -0
- fastaudit-0.1.0/fastaudit/core.py +183 -0
- fastaudit-0.1.0/fastaudit.egg-info/PKG-INFO +188 -0
- fastaudit-0.1.0/fastaudit.egg-info/SOURCES.txt +13 -0
- fastaudit-0.1.0/fastaudit.egg-info/dependency_links.txt +1 -0
- fastaudit-0.1.0/fastaudit.egg-info/requires.txt +9 -0
- fastaudit-0.1.0/fastaudit.egg-info/top_level.txt +1 -0
- fastaudit-0.1.0/pyproject.toml +31 -0
- fastaudit-0.1.0/setup.cfg +4 -0
- fastaudit-0.1.0/tests/test_core.py +124 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<!-- do not remove -->
|
|
2
|
+
|
|
3
|
+
## 0.1.0
|
|
4
|
+
|
|
5
|
+
### New Features
|
|
6
|
+
|
|
7
|
+
- Defer audit hook installation until first `mk_audit`() call ([#6](https://github.com/AnswerDotAI/fastaudit/issues/6))
|
|
8
|
+
- More fully exclude Python-level callables from native-call monitoring ([#4](https://github.com/AnswerDotAI/fastaudit/issues/4))
|
|
9
|
+
- Install audit hook once at import; move per-policy params into ContextVar-scoped config ([#3](https://github.com/AnswerDotAI/fastaudit/issues/3))
|
|
10
|
+
- Support dynamic '.' in allowed roots and audit os.chdir against destination ([#2](https://github.com/AnswerDotAI/fastaudit/issues/2))
|
|
11
|
+
- `monitor_calls`=True ([#1](https://github.com/AnswerDotAI/fastaudit/issues/1))
|
|
12
|
+
|
fastaudit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
|
|
2
|
+
Apache License
|
|
3
|
+
Version 2.0, January 2004
|
|
4
|
+
http://www.apache.org/licenses/
|
|
5
|
+
|
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
7
|
+
|
|
8
|
+
1. Definitions.
|
|
9
|
+
|
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
12
|
+
|
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
14
|
+
the copyright owner that is granting the License.
|
|
15
|
+
|
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
17
|
+
other entities that control, are controlled by, or are under common
|
|
18
|
+
control with that entity. For the purposes of this definition,
|
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
20
|
+
direction or management of such entity, whether by contract or
|
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
23
|
+
|
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
25
|
+
exercising permissions granted by this License.
|
|
26
|
+
|
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
28
|
+
including but not limited to software source code, documentation
|
|
29
|
+
source, and configuration files.
|
|
30
|
+
|
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
|
32
|
+
transformation or translation of a Source form, including but
|
|
33
|
+
not limited to compiled object code, generated documentation,
|
|
34
|
+
and conversions to other media types.
|
|
35
|
+
|
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
37
|
+
Object form, made available under the License, as indicated by a
|
|
38
|
+
copyright notice that is included in or attached to the work
|
|
39
|
+
(an example is provided in the Appendix below).
|
|
40
|
+
|
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
47
|
+
the Work and Derivative Works thereof.
|
|
48
|
+
|
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
|
50
|
+
the original version of the Work and any modifications or additions
|
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
62
|
+
|
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
65
|
+
subsequently incorporated within the Work.
|
|
66
|
+
|
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
|
73
|
+
|
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
79
|
+
where such license applies only to those patent claims licensable
|
|
80
|
+
by such Contributor that are necessarily infringed by their
|
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
83
|
+
institute patent litigation against any entity (including a
|
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
86
|
+
or contributory patent infringement, then any patent licenses
|
|
87
|
+
granted to You under this License for that Work shall terminate
|
|
88
|
+
as of the date such litigation is filed.
|
|
89
|
+
|
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
92
|
+
modifications, and in Source or Object form, provided that You
|
|
93
|
+
meet the following conditions:
|
|
94
|
+
|
|
95
|
+
(a) You must give any other recipients of the Work or
|
|
96
|
+
Derivative Works a copy of this License; and
|
|
97
|
+
|
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
|
99
|
+
stating that You changed the files; and
|
|
100
|
+
|
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
|
103
|
+
attribution notices from the Source form of the Work,
|
|
104
|
+
excluding those notices that do not pertain to any part of
|
|
105
|
+
the Derivative Works; and
|
|
106
|
+
|
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
|
109
|
+
include a readable copy of the attribution notices contained
|
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
|
112
|
+
of the following places: within a NOTICE text file distributed
|
|
113
|
+
as part of the Derivative Works; within the Source form or
|
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
|
115
|
+
within a display generated by the Derivative Works, if and
|
|
116
|
+
wherever such third-party notices normally appear. The contents
|
|
117
|
+
of the NOTICE file are for informational purposes only and
|
|
118
|
+
do not modify the License. You may add Your own attribution
|
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
121
|
+
that such additional attribution notices cannot be construed
|
|
122
|
+
as modifying the License.
|
|
123
|
+
|
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
|
125
|
+
may provide additional or different license terms and conditions
|
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
129
|
+
the conditions stated in this License.
|
|
130
|
+
|
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
134
|
+
this License, without any additional terms or conditions.
|
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
136
|
+
the terms of any separate license agreement you may have executed
|
|
137
|
+
with Licensor regarding such Contributions.
|
|
138
|
+
|
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
141
|
+
except as required for reasonable and customary use in describing the
|
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
143
|
+
|
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
|
153
|
+
|
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
|
159
|
+
incidental, or consequential damages of any character arising as a
|
|
160
|
+
result of this License or out of the use or inability to use the
|
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
163
|
+
other commercial damages or losses), even if such Contributor
|
|
164
|
+
has been advised of the possibility of such damages.
|
|
165
|
+
|
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
169
|
+
or other liability obligations and/or rights consistent with this
|
|
170
|
+
License. However, in accepting such obligations, You may act only
|
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
175
|
+
of your accepting any such warranty or additional liability.
|
|
176
|
+
|
|
177
|
+
END OF TERMS AND CONDITIONS
|
|
178
|
+
|
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
180
|
+
|
|
181
|
+
To apply the Apache License to your work, attach the following
|
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
183
|
+
replaced with your own identifying information. (Don't include
|
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
185
|
+
comment syntax for the file format. We also recommend that a
|
|
186
|
+
file or class name and description of purpose be included on the
|
|
187
|
+
same "printed page" as the copyright notice for easier
|
|
188
|
+
identification within third-party archives.
|
|
189
|
+
|
|
190
|
+
Copyright [yyyy] [name of copyright owner]
|
|
191
|
+
|
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
193
|
+
you may not use this file except in compliance with the License.
|
|
194
|
+
You may obtain a copy of the License at
|
|
195
|
+
|
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
197
|
+
|
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
201
|
+
See the License for the specific language governing permissions and
|
|
202
|
+
limitations under the License.
|
fastaudit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fastaudit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight execution guard for running LLM-generated Python in a normal Python process.
|
|
5
|
+
Author: Answer.AI
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/AnswerDotAI/fastaudit
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: fastcore
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest; extra == "dev"
|
|
16
|
+
Requires-Dist: orjson; extra == "dev"
|
|
17
|
+
Requires-Dist: numpy; extra == "dev"
|
|
18
|
+
Requires-Dist: fastship; extra == "dev"
|
|
19
|
+
Requires-Dist: build; extra == "dev"
|
|
20
|
+
Requires-Dist: twine; extra == "dev"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# fastaudit
|
|
24
|
+
|
|
25
|
+
`fastaudit` is a lightweight execution guard for running LLM-generated Python in a normal Python process.
|
|
26
|
+
|
|
27
|
+
It is not intended to be a hardened adversarial sandbox. Its purpose is to stop accidental damage from overly broad file operations, unexpected subprocess calls, and tool use that reaches outside approved working directories.
|
|
28
|
+
|
|
29
|
+
The core mechanism is Python's audit hook system. The first `mk_audit()` call installs one process-wide audit hook. On Python 3.12 and newer, `sys.monitoring` is also used to raise audit events for non-stdlib native calls. `mk_audit()` creates an audit context, then enables permission checks only while that execution context is active.
|
|
30
|
+
|
|
31
|
+
`fastaudit` requires Python 3.10 or newer. Native call monitoring requires Python 3.12 or newer and is enabled by default. Pass `monitor_calls=False` to use audit-hook-only mode on Python 3.10/3.11 or to avoid monitoring overhead.
|
|
32
|
+
|
|
33
|
+
## Why this exists
|
|
34
|
+
|
|
35
|
+
LLM-generated code is usually helpful, but sometimes too determined. If a command fails, an assistant may try another route; if a path is wrong, it may broaden the search; if a tool exists, it may use it without fully understanding its side effects.
|
|
36
|
+
|
|
37
|
+
`fastaudit` is designed for that case.
|
|
38
|
+
|
|
39
|
+
It helps with:
|
|
40
|
+
|
|
41
|
+
- blocking subprocess and process-escape operations unless explicitly allowed
|
|
42
|
+
- allowing writes only under approved roots
|
|
43
|
+
- allowing broad read access where appropriate
|
|
44
|
+
- making permission failures clear and immediate
|
|
45
|
+
- letting host policy callbacks allow trusted tools while ordinary generated code stays checked
|
|
46
|
+
- avoiding global audit state leaks across async tasks
|
|
47
|
+
|
|
48
|
+
It deliberately does not try to defeat malicious code running in the same interpreter.
|
|
49
|
+
|
|
50
|
+
## Audit hook categorization
|
|
51
|
+
|
|
52
|
+
The audit hook is designed as a lightweight guardrail for LLM/tool-generated code, not as a hardened security sandbox against malicious code. The goal is to prevent accidental or over-broad filesystem mutation outside approved working directories: e.g. deleting files in the wrong project, writing into a user’s home directory, or spawning subprocesses unexpectedly. It assumes the surrounding process, user account, and pre-existing filesystem layout are trusted, and that the code being checked is not actively trying to exploit races, pre-planted symlinks, or CPython internals.
|
|
53
|
+
|
|
54
|
+
The design keeps the common path simple and cheap. Dangerous process-escape events such as subprocess execution are denied outright. Filesystem write/delete events are allowed only when the relevant parent directory is inside a precomputed allowlist, since most mutations are really changes to directory entries. For destination-only operations such as copy, only the destination parent matters; for move/rename/link-style operations, both paths are checked because both filesystem locations may be affected. Read-only operations are generally ignored, and file-descriptor-based truncation is allowed on the assumption that the path policy was enforced when the descriptor was opened. This gives practical protection against accidental damage while avoiding the complexity and cost of pretending to be a fully adversarial sandbox.
|
|
55
|
+
|
|
56
|
+
Symlinks are treated as part of the trusted filesystem setup. The hook’s path checks focus on the parent directories of mutations, which is the right model for operations that create, remove, or rename directory entries. This means an existing symlink inside an allowed directory may still point outside the allowed roots; that is acceptable under this threat model because the user controls the workspace layout and is assumed not to pre-place hostile links. To avoid making that assumption worse, symlink and hard-link creation should be restricted: the new link’s parent must be allowed, and the link target should either be denied or required to resolve inside an allowed root.
|
|
57
|
+
|
|
58
|
+
## Threat model
|
|
59
|
+
|
|
60
|
+
`fastaudit` assumes:
|
|
61
|
+
|
|
62
|
+
- the user, workspace, and pre-existing filesystem layout are trusted
|
|
63
|
+
- code is LLM-generated or LLM-directed, not actively hostile
|
|
64
|
+
- accidental overreach is the main risk
|
|
65
|
+
- rich user tools may need access that ordinary generated code should not have
|
|
66
|
+
- Solveit or the host application controls the execution wrapper
|
|
67
|
+
|
|
68
|
+
It does not assume:
|
|
69
|
+
|
|
70
|
+
- Python introspection is unavailable
|
|
71
|
+
- frames, closures, or modules are impossible to inspect
|
|
72
|
+
- same-process execution can provide a hard security boundary
|
|
73
|
+
- OS-level sandboxing is unnecessary for adversarial workloads
|
|
74
|
+
|
|
75
|
+
For adversarial code, use a subprocess, container, VM, or OS-level policy.
|
|
76
|
+
|
|
77
|
+
## Audit scope
|
|
78
|
+
|
|
79
|
+
Auditing is opt-in per logical task. The audit hook is registered globally when the first audit context is created, and the optional call monitor is registered globally when needed, but permission checks only run while `audit_perms()` is active. This matters for async code. A global boolean or counter would leak audit state between unrelated coroutines whenever one audited task awaits. A `ContextVar` gives logical scoping: child tasks inherit at creation time, nested contexts restore cleanly via tokens, and the guard follows execution flow rather than scheduler order. Threads are denied in the audit sandbox since context variables otherwise are not maintained.
|
|
80
|
+
|
|
81
|
+
The hook is built once inside a closure rather than read from module globals on every event. Allowed roots and callbacks live in the active context config, the event-classification sets are converted to `frozenset`, and the helpers used in the hot path — `realpath`, `dirname`, `fsdecode`, `os.sep` — are captured as local names. Nothing the hook depends on lives in a mutable global that a generated cell could clear, replace, or reassign. This is not a security barrier against introspection or frame walking; it is a deliberate effort to remove the easy, accidental disabling paths that an enthusiastic LLM is most likely to take when retrying after a `PermissionError`.
|
|
82
|
+
|
|
83
|
+
## Permission model
|
|
84
|
+
|
|
85
|
+
The policy classifies audit events into a few groups:
|
|
86
|
+
|
|
87
|
+
- events denied outright
|
|
88
|
+
- events where the first path argument is checked
|
|
89
|
+
- events where the destination path is checked
|
|
90
|
+
- events where both source and destination paths are checked
|
|
91
|
+
- special cases such as `open`, `os.truncate`, and sensitive `object.__setattr__`
|
|
92
|
+
|
|
93
|
+
Writes and filesystem mutations are allowed only when the relevant parent directory is inside an approved root.
|
|
94
|
+
|
|
95
|
+
Reads are generally allowed.
|
|
96
|
+
|
|
97
|
+
Subprocess creation and similar process escapes are denied by default.
|
|
98
|
+
|
|
99
|
+
The allowed root `'.'` is dynamic: it means the current directory at the time of each checked operation. This lets a sandbox follow allowed `chdir` calls into child directories. `os.chdir` itself is checked against the destination directory, not the destination's parent.
|
|
100
|
+
|
|
101
|
+
Non-stdlib native calls raise a `fastaudit.call` audit event while `audit_perms()` is active when `monitor_calls=True`. Python calls and stdlib calls are ignored by the call monitor. The context manager calls `sys.monitoring.restart_events()` on entry so monitored call sites disabled before the context are seen again inside it. With `monitor_calls=False`, only normal Python audit-hook events are checked.
|
|
102
|
+
|
|
103
|
+
### get/set attr hooks
|
|
104
|
+
|
|
105
|
+
The `object.__setattr__` audit event fires only for a small fixed set of "sensitive" attribute assignments, not for general attribute setting. On types/classes, it fires when setting `__name__`, `__qualname__`, `__module__`, `__bases__`, `__doc__`, or `__type_params__` — these go through `check_set_special_type_attr` in `Objects/typeobject.c`. The `__class__` reassignment on any object is also audited, via `object_set_class` in the same file. On function objects, assignments to `__code__`, `__defaults__`, and `__kwdefaults__` are audited, via the relevant setters in `Objects/funcobject.c`.
|
|
106
|
+
|
|
107
|
+
All other attribute assignments — including ordinary `C.x = 1` on a class, instance attribute assignment, and even some dunders like `__abstractmethods__` and `__annotations__` (which write directly via `PyDict_SetItem`) — bypass the audit hook entirely. This is why `@dataclass` triggers an event (it sets `cls.__doc__`) and `namedtuple` triggers one (it sets `cls.__module__`), while `class C: pass; C.x = 1; C.foo = lambda self: None` is silent. The authoritative list lives in the CPython source at [`Objects/typeobject.c`](https://github.com/python/cpython/blob/v3.12.0/Objects/typeobject.c) and [`Objects/funcobject.c`](https://github.com/python/cpython/blob/v3.12.0/Objects/funcobject.c); the public docs only describe the event as firing for "certain sensitive attribute assignments" without enumerating them.
|
|
108
|
+
|
|
109
|
+
## Host policy
|
|
110
|
+
|
|
111
|
+
Some user-provided tools need permissions that ordinary generated code should not have. For instance, a search tool may need to call `rg`, or a helper may need to spawn a tightly controlled subprocess.
|
|
112
|
+
|
|
113
|
+
`fastaudit` does not define that policy itself. Host code can pass `before_deny`, which is called after `fastaudit` decides an operation should be blocked and before `PermissionError` is raised:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
before_deny(event, args, frame, msg, data)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The callback receives the event name, audit arguments, the first non-`fastaudit` stack frame, the error message, and the current host data. Returning a truthy value allows the operation. Returning a falsey value denies it. Exceptions from the callback propagate.
|
|
120
|
+
|
|
121
|
+
For non-stdlib native calls, host code can also pass `on_call`, which runs before `fastaudit.call` is raised. `on_call` requires `monitor_calls=True`:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
on_call(caller, callee, fn, code, off, data)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
It receives the caller, callee, function object, code object, bytecode offset, and current host data. It can return `False` to suppress the audit event for that call, or `sys.monitoring.DISABLE` to disable that monitored call site. Exceptions from the callback propagate.
|
|
128
|
+
|
|
129
|
+
The optional `data` argument is stored in the audit context config and passed to both callbacks. A host can build mutable policy state outside the sandbox, pass a frozen snapshot to `mk_audit`, and later update that snapshot with `audit_perms.set_data(...)`. Creating or entering a new audit context, or calling `set_data`, raises an internal audit event and is denied while `audit_perms()` is active.
|
|
130
|
+
|
|
131
|
+
`mk_audit()` uses `sys.monitoring` tool id `3` by default when call monitoring is enabled. Pass `tool_id=...` if the host already uses that id.
|
|
132
|
+
|
|
133
|
+
## API sketch
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
audit_perms = mk_audit(['/tmp', os.getcwd()], before_deny=allow_trusted_tool, data=frozenset(allowed))
|
|
137
|
+
|
|
138
|
+
with audit_perms():
|
|
139
|
+
exec(code, restricted_globals)
|
|
140
|
+
|
|
141
|
+
audit_perms.set_data(frozenset(new_allowed))
|
|
142
|
+
|
|
143
|
+
audit_perms = mk_audit(['/tmp'], monitor_calls=False) # audit hooks only
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Implementation notes
|
|
147
|
+
|
|
148
|
+
The hook should avoid relying on mutable globals during enforcement.
|
|
149
|
+
|
|
150
|
+
At construction time, bind or freeze:
|
|
151
|
+
|
|
152
|
+
- approved roots
|
|
153
|
+
- audit event sets
|
|
154
|
+
- write flags
|
|
155
|
+
- path helpers such as `realpath`, `dirname`, and `fsdecode`
|
|
156
|
+
- frame lookup helper
|
|
157
|
+
- call-monitor helpers and callbacks
|
|
158
|
+
|
|
159
|
+
This prevents the most likely accidental disabling paths, such as clearing a global deny set or replacing a helper function. The implementation still does not claim to be secure against deliberate frame walking or introspection.
|
|
160
|
+
|
|
161
|
+
## Limitations
|
|
162
|
+
|
|
163
|
+
`fastaudit` does not provide a hard security boundary.
|
|
164
|
+
|
|
165
|
+
Known limitations:
|
|
166
|
+
|
|
167
|
+
- same-process Python code can inspect a lot of runtime state
|
|
168
|
+
- pre-existing writable file descriptors may bypass path-open checks
|
|
169
|
+
- host callbacks can do anything their implementation permits
|
|
170
|
+
- thread support is intentionally restricted unless explicitly designed for
|
|
171
|
+
- The `CALL` event in `sys.monitoring` does not fire for operators invoked via dedicated bytecode opcodes — `BINARY_OP` (`a + b`), `BINARY_SUBSCR` (`a[i]`), comparisons, etc. These dispatch directly to the C-level numeric/subscript/compare slots, which aren't "calls" in PEP 669's model. Explicit dunder invocations (`a.__add__(b)`) do fire CALL normally.
|
|
172
|
+
|
|
173
|
+
These limitations are acceptable for a guardrail system aimed at LLM-directed execution. They are not acceptable for hostile code.
|
|
174
|
+
|
|
175
|
+
## Design principle
|
|
176
|
+
|
|
177
|
+
The goal is not to make escape impossible. The goal is to make the safe path easy, the risky path explicit, and accidental overreach fail early with a useful error.
|
|
178
|
+
|
|
179
|
+
## Release
|
|
180
|
+
|
|
181
|
+
1) Ensure your GitHub issues are labeled (`bug`, `enhancement`, `breaking`).
|
|
182
|
+
2) Run:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
ship-gh
|
|
186
|
+
ship-pypi
|
|
187
|
+
ship-bump
|
|
188
|
+
```
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# fastaudit
|
|
2
|
+
|
|
3
|
+
`fastaudit` is a lightweight execution guard for running LLM-generated Python in a normal Python process.
|
|
4
|
+
|
|
5
|
+
It is not intended to be a hardened adversarial sandbox. Its purpose is to stop accidental damage from overly broad file operations, unexpected subprocess calls, and tool use that reaches outside approved working directories.
|
|
6
|
+
|
|
7
|
+
The core mechanism is Python's audit hook system. The first `mk_audit()` call installs one process-wide audit hook. On Python 3.12 and newer, `sys.monitoring` is also used to raise audit events for non-stdlib native calls. `mk_audit()` creates an audit context, then enables permission checks only while that execution context is active.
|
|
8
|
+
|
|
9
|
+
`fastaudit` requires Python 3.10 or newer. Native call monitoring requires Python 3.12 or newer and is enabled by default. Pass `monitor_calls=False` to use audit-hook-only mode on Python 3.10/3.11 or to avoid monitoring overhead.
|
|
10
|
+
|
|
11
|
+
## Why this exists
|
|
12
|
+
|
|
13
|
+
LLM-generated code is usually helpful, but sometimes too determined. If a command fails, an assistant may try another route; if a path is wrong, it may broaden the search; if a tool exists, it may use it without fully understanding its side effects.
|
|
14
|
+
|
|
15
|
+
`fastaudit` is designed for that case.
|
|
16
|
+
|
|
17
|
+
It helps with:
|
|
18
|
+
|
|
19
|
+
- blocking subprocess and process-escape operations unless explicitly allowed
|
|
20
|
+
- allowing writes only under approved roots
|
|
21
|
+
- allowing broad read access where appropriate
|
|
22
|
+
- making permission failures clear and immediate
|
|
23
|
+
- letting host policy callbacks allow trusted tools while ordinary generated code stays checked
|
|
24
|
+
- avoiding global audit state leaks across async tasks
|
|
25
|
+
|
|
26
|
+
It deliberately does not try to defeat malicious code running in the same interpreter.
|
|
27
|
+
|
|
28
|
+
## Audit hook categorization
|
|
29
|
+
|
|
30
|
+
The audit hook is designed as a lightweight guardrail for LLM/tool-generated code, not as a hardened security sandbox against malicious code. The goal is to prevent accidental or over-broad filesystem mutation outside approved working directories: e.g. deleting files in the wrong project, writing into a user’s home directory, or spawning subprocesses unexpectedly. It assumes the surrounding process, user account, and pre-existing filesystem layout are trusted, and that the code being checked is not actively trying to exploit races, pre-planted symlinks, or CPython internals.
|
|
31
|
+
|
|
32
|
+
The design keeps the common path simple and cheap. Dangerous process-escape events such as subprocess execution are denied outright. Filesystem write/delete events are allowed only when the relevant parent directory is inside a precomputed allowlist, since most mutations are really changes to directory entries. For destination-only operations such as copy, only the destination parent matters; for move/rename/link-style operations, both paths are checked because both filesystem locations may be affected. Read-only operations are generally ignored, and file-descriptor-based truncation is allowed on the assumption that the path policy was enforced when the descriptor was opened. This gives practical protection against accidental damage while avoiding the complexity and cost of pretending to be a fully adversarial sandbox.
|
|
33
|
+
|
|
34
|
+
Symlinks are treated as part of the trusted filesystem setup. The hook’s path checks focus on the parent directories of mutations, which is the right model for operations that create, remove, or rename directory entries. This means an existing symlink inside an allowed directory may still point outside the allowed roots; that is acceptable under this threat model because the user controls the workspace layout and is assumed not to pre-place hostile links. To avoid making that assumption worse, symlink and hard-link creation should be restricted: the new link’s parent must be allowed, and the link target should either be denied or required to resolve inside an allowed root.
|
|
35
|
+
|
|
36
|
+
## Threat model
|
|
37
|
+
|
|
38
|
+
`fastaudit` assumes:
|
|
39
|
+
|
|
40
|
+
- the user, workspace, and pre-existing filesystem layout are trusted
|
|
41
|
+
- code is LLM-generated or LLM-directed, not actively hostile
|
|
42
|
+
- accidental overreach is the main risk
|
|
43
|
+
- rich user tools may need access that ordinary generated code should not have
|
|
44
|
+
- Solveit or the host application controls the execution wrapper
|
|
45
|
+
|
|
46
|
+
It does not assume:
|
|
47
|
+
|
|
48
|
+
- Python introspection is unavailable
|
|
49
|
+
- frames, closures, or modules are impossible to inspect
|
|
50
|
+
- same-process execution can provide a hard security boundary
|
|
51
|
+
- OS-level sandboxing is unnecessary for adversarial workloads
|
|
52
|
+
|
|
53
|
+
For adversarial code, use a subprocess, container, VM, or OS-level policy.
|
|
54
|
+
|
|
55
|
+
## Audit scope
|
|
56
|
+
|
|
57
|
+
Auditing is opt-in per logical task. The audit hook is registered globally when the first audit context is created, and the optional call monitor is registered globally when needed, but permission checks only run while `audit_perms()` is active. This matters for async code. A global boolean or counter would leak audit state between unrelated coroutines whenever one audited task awaits. A `ContextVar` gives logical scoping: child tasks inherit at creation time, nested contexts restore cleanly via tokens, and the guard follows execution flow rather than scheduler order. Threads are denied in the audit sandbox since context variables otherwise are not maintained.
|
|
58
|
+
|
|
59
|
+
The hook is built once inside a closure rather than read from module globals on every event. Allowed roots and callbacks live in the active context config, the event-classification sets are converted to `frozenset`, and the helpers used in the hot path — `realpath`, `dirname`, `fsdecode`, `os.sep` — are captured as local names. Nothing the hook depends on lives in a mutable global that a generated cell could clear, replace, or reassign. This is not a security barrier against introspection or frame walking; it is a deliberate effort to remove the easy, accidental disabling paths that an enthusiastic LLM is most likely to take when retrying after a `PermissionError`.
|
|
60
|
+
|
|
61
|
+
## Permission model
|
|
62
|
+
|
|
63
|
+
The policy classifies audit events into a few groups:
|
|
64
|
+
|
|
65
|
+
- events denied outright
|
|
66
|
+
- events where the first path argument is checked
|
|
67
|
+
- events where the destination path is checked
|
|
68
|
+
- events where both source and destination paths are checked
|
|
69
|
+
- special cases such as `open`, `os.truncate`, and sensitive `object.__setattr__`
|
|
70
|
+
|
|
71
|
+
Writes and filesystem mutations are allowed only when the relevant parent directory is inside an approved root.
|
|
72
|
+
|
|
73
|
+
Reads are generally allowed.
|
|
74
|
+
|
|
75
|
+
Subprocess creation and similar process escapes are denied by default.
|
|
76
|
+
|
|
77
|
+
The allowed root `'.'` is dynamic: it means the current directory at the time of each checked operation. This lets a sandbox follow allowed `chdir` calls into child directories. `os.chdir` itself is checked against the destination directory, not the destination's parent.
|
|
78
|
+
|
|
79
|
+
Non-stdlib native calls raise a `fastaudit.call` audit event while `audit_perms()` is active when `monitor_calls=True`. Python calls and stdlib calls are ignored by the call monitor. The context manager calls `sys.monitoring.restart_events()` on entry so monitored call sites disabled before the context are seen again inside it. With `monitor_calls=False`, only normal Python audit-hook events are checked.
|
|
80
|
+
|
|
81
|
+
### get/set attr hooks
|
|
82
|
+
|
|
83
|
+
The `object.__setattr__` audit event fires only for a small fixed set of "sensitive" attribute assignments, not for general attribute setting. On types/classes, it fires when setting `__name__`, `__qualname__`, `__module__`, `__bases__`, `__doc__`, or `__type_params__` — these go through `check_set_special_type_attr` in `Objects/typeobject.c`. The `__class__` reassignment on any object is also audited, via `object_set_class` in the same file. On function objects, assignments to `__code__`, `__defaults__`, and `__kwdefaults__` are audited, via the relevant setters in `Objects/funcobject.c`.
|
|
84
|
+
|
|
85
|
+
All other attribute assignments — including ordinary `C.x = 1` on a class, instance attribute assignment, and even some dunders like `__abstractmethods__` and `__annotations__` (which write directly via `PyDict_SetItem`) — bypass the audit hook entirely. This is why `@dataclass` triggers an event (it sets `cls.__doc__`) and `namedtuple` triggers one (it sets `cls.__module__`), while `class C: pass; C.x = 1; C.foo = lambda self: None` is silent. The authoritative list lives in the CPython source at [`Objects/typeobject.c`](https://github.com/python/cpython/blob/v3.12.0/Objects/typeobject.c) and [`Objects/funcobject.c`](https://github.com/python/cpython/blob/v3.12.0/Objects/funcobject.c); the public docs only describe the event as firing for "certain sensitive attribute assignments" without enumerating them.
|
|
86
|
+
|
|
87
|
+
## Host policy
|
|
88
|
+
|
|
89
|
+
Some user-provided tools need permissions that ordinary generated code should not have. For instance, a search tool may need to call `rg`, or a helper may need to spawn a tightly controlled subprocess.
|
|
90
|
+
|
|
91
|
+
`fastaudit` does not define that policy itself. Host code can pass `before_deny`, which is called after `fastaudit` decides an operation should be blocked and before `PermissionError` is raised:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
before_deny(event, args, frame, msg, data)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The callback receives the event name, audit arguments, the first non-`fastaudit` stack frame, the error message, and the current host data. Returning a truthy value allows the operation. Returning a falsey value denies it. Exceptions from the callback propagate.
|
|
98
|
+
|
|
99
|
+
For non-stdlib native calls, host code can also pass `on_call`, which runs before `fastaudit.call` is raised. `on_call` requires `monitor_calls=True`:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
on_call(caller, callee, fn, code, off, data)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
It receives the caller, callee, function object, code object, bytecode offset, and current host data. It can return `False` to suppress the audit event for that call, or `sys.monitoring.DISABLE` to disable that monitored call site. Exceptions from the callback propagate.
|
|
106
|
+
|
|
107
|
+
The optional `data` argument is stored in the audit context config and passed to both callbacks. A host can build mutable policy state outside the sandbox, pass a frozen snapshot to `mk_audit`, and later update that snapshot with `audit_perms.set_data(...)`. Creating or entering a new audit context, or calling `set_data`, raises an internal audit event and is denied while `audit_perms()` is active.
|
|
108
|
+
|
|
109
|
+
`mk_audit()` uses `sys.monitoring` tool id `3` by default when call monitoring is enabled. Pass `tool_id=...` if the host already uses that id.
|
|
110
|
+
|
|
111
|
+
## API sketch
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
audit_perms = mk_audit(['/tmp', os.getcwd()], before_deny=allow_trusted_tool, data=frozenset(allowed))
|
|
115
|
+
|
|
116
|
+
with audit_perms():
|
|
117
|
+
exec(code, restricted_globals)
|
|
118
|
+
|
|
119
|
+
audit_perms.set_data(frozenset(new_allowed))
|
|
120
|
+
|
|
121
|
+
audit_perms = mk_audit(['/tmp'], monitor_calls=False) # audit hooks only
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Implementation notes
|
|
125
|
+
|
|
126
|
+
The hook should avoid relying on mutable globals during enforcement.
|
|
127
|
+
|
|
128
|
+
At construction time, bind or freeze:
|
|
129
|
+
|
|
130
|
+
- approved roots
|
|
131
|
+
- audit event sets
|
|
132
|
+
- write flags
|
|
133
|
+
- path helpers such as `realpath`, `dirname`, and `fsdecode`
|
|
134
|
+
- frame lookup helper
|
|
135
|
+
- call-monitor helpers and callbacks
|
|
136
|
+
|
|
137
|
+
This prevents the most likely accidental disabling paths, such as clearing a global deny set or replacing a helper function. The implementation still does not claim to be secure against deliberate frame walking or introspection.
|
|
138
|
+
|
|
139
|
+
## Limitations
|
|
140
|
+
|
|
141
|
+
`fastaudit` does not provide a hard security boundary.
|
|
142
|
+
|
|
143
|
+
Known limitations:
|
|
144
|
+
|
|
145
|
+
- same-process Python code can inspect a lot of runtime state
|
|
146
|
+
- pre-existing writable file descriptors may bypass path-open checks
|
|
147
|
+
- host callbacks can do anything their implementation permits
|
|
148
|
+
- thread support is intentionally restricted unless explicitly designed for
|
|
149
|
+
- The `CALL` event in `sys.monitoring` does not fire for operators invoked via dedicated bytecode opcodes — `BINARY_OP` (`a + b`), `BINARY_SUBSCR` (`a[i]`), comparisons, etc. These dispatch directly to the C-level numeric/subscript/compare slots, which aren't "calls" in PEP 669's model. Explicit dunder invocations (`a.__add__(b)`) do fire CALL normally.
|
|
150
|
+
|
|
151
|
+
These limitations are acceptable for a guardrail system aimed at LLM-directed execution. They are not acceptable for hostile code.
|
|
152
|
+
|
|
153
|
+
## Design principle
|
|
154
|
+
|
|
155
|
+
The goal is not to make escape impossible. The goal is to make the safe path easy, the risky path explicit, and accidental overreach fail early with a useful error.
|
|
156
|
+
|
|
157
|
+
## Release
|
|
158
|
+
|
|
159
|
+
1) Ensure your GitHub issues are labeled (`bug`, `enhancement`, `breaking`).
|
|
160
|
+
2) Run:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
ship-gh
|
|
164
|
+
ship-pypi
|
|
165
|
+
ship-bump
|
|
166
|
+
```
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import os, sys
|
|
2
|
+
from fastcore.utils import *
|
|
3
|
+
from collections import namedtuple
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from contextvars import ContextVar
|
|
6
|
+
|
|
7
|
+
_audit_1st = {'os.chmod','os.chown','os.chflags','os.mkdir','os.remove','os.removexattr','os.rmdir','os.setxattr',
|
|
8
|
+
'shutil.chown','shutil.make_archive','shutil.rmtree','sqlite3.connect','tempfile.mkdtemp','tempfile.mkstemp'}
|
|
9
|
+
_audit_dst = {'shutil.copyfile','shutil.copymode','shutil.copystat','shutil.copytree','sqlite3.load_extension'}
|
|
10
|
+
_audit_both = {'os.link','os.rename','os.symlink','shutil.move','shutil.unpack_archive'}
|
|
11
|
+
|
|
12
|
+
_audit_proc = {'subprocess.Popen','os.system','os.exec','os.spawn','os.posix_spawn','os.startfile','os.startfile/2',
|
|
13
|
+
'os.kill','os.killpg','pty.spawn','_posixsubprocess.fork_exec','signal.pthread_kill'}
|
|
14
|
+
_audit_runtime = {'sys.addaudithook','sys.excepthook','sys.unraisablehook','sys.monitoring.register_callback',
|
|
15
|
+
'cpython.PyConfig_Set','cpython.PyInterpreterState_Clear','cpython.PyInterpreterState_New','cpython._PySys_ClearAuditHooks',
|
|
16
|
+
'cpython.run_command','cpython.run_file','cpython.run_module','cpython.run_stdin','cpython.run_startup',
|
|
17
|
+
'cpython.remote_debugger_script','sys.remote_exec','socket.sethostname','os.add_dll_directory','os.putenv','os.unsetenv',
|
|
18
|
+
'_thread.start_new_thread','_thread.start_joinable_thread'}
|
|
19
|
+
_audit_ctypes = {'ctypes.dlopen','ctypes.dlsym','ctypes.dlsym/handle','ctypes.call_function','ctypes.cdata','ctypes.cdata/buffer',
|
|
20
|
+
'ctypes.memoryview_at','ctypes.string_at','ctypes.wstring_at','ctypes.addressof','ctypes.PyObj_FromPtr'}
|
|
21
|
+
_audit_win = {'_winapi.CreateFile','_winapi.CreateProcess','_winapi.OpenProcess','_winapi.TerminateProcess','_winapi.CreateJunction',
|
|
22
|
+
'_winapi.CreateNamedPipe','_winapi.CreatePipe','msvcrt.locking','msvcrt.get_osfhandle','msvcrt.open_osfhandle',
|
|
23
|
+
'winreg.CreateKey','winreg.DeleteKey','winreg.DeleteValue','winreg.SetValue','winreg.LoadKey','winreg.SaveKey',
|
|
24
|
+
'winreg.DisableReflectionKey','winreg.EnableReflectionKey'}
|
|
25
|
+
_audit_deny = _audit_proc|_audit_runtime|_audit_ctypes|_audit_win
|
|
26
|
+
_AuditCfg = namedtuple('AuditCfg', 'oks before_deny on_call data monitor_calls')
|
|
27
|
+
_state_attr = '_fastaudit_state'
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _new_state():
|
|
31
|
+
ctx = ContextVar('fastaudit_cfg', default=None)
|
|
32
|
+
write_flags = os.O_WRONLY|os.O_RDWR|os.O_CREAT|os.O_TRUNC|os.O_APPEND
|
|
33
|
+
audit_1st,audit_dst,audit_both = map(frozenset, (_audit_1st,_audit_dst,_audit_both))
|
|
34
|
+
audit_deny = frozenset(_audit_deny|{'fastaudit.call','audit_perms.set_config','audit_perms.set_data'})
|
|
35
|
+
audit_all = audit_deny|audit_1st|audit_dst|audit_both|{'open','os.chdir','os.truncate','object.__setattr__'}
|
|
36
|
+
realpath,dirname,fsdecode,sep,getframe = os.path.realpath,os.path.dirname,os.fsdecode,os.sep,sys._getframe
|
|
37
|
+
mon,audit,stdlib = getattr(sys, 'monitoring', None),sys.audit,frozenset(sys.stdlib_module_names)
|
|
38
|
+
state = {'tool_id':None}
|
|
39
|
+
|
|
40
|
+
def frame_name(f): return f"{f.f_globals.get('__name__')}.{f.f_code.co_qualname}"
|
|
41
|
+
|
|
42
|
+
def func_mod(fn):
|
|
43
|
+
mod = getattr(fn, '__module__', None)
|
|
44
|
+
cls = getattr(fn, '__objclass__', None)
|
|
45
|
+
if not mod and cls is not None: mod = getattr(cls, '__module__', None)
|
|
46
|
+
s = getattr(fn, '__self__', None)
|
|
47
|
+
if not mod and s is not None: mod = getattr(type(s), '__module__', None)
|
|
48
|
+
return mod
|
|
49
|
+
|
|
50
|
+
def func_name(fn):
|
|
51
|
+
mod = func_mod(fn)
|
|
52
|
+
nm = getattr(fn, '__qualname__', getattr(fn, '__name__', None))
|
|
53
|
+
return f'{mod}.{nm}' if mod and nm else None
|
|
54
|
+
|
|
55
|
+
def is_stdlib(fn):
|
|
56
|
+
mod = func_mod(fn)
|
|
57
|
+
return bool(mod) and mod.split('.', 1)[0] in stdlib
|
|
58
|
+
|
|
59
|
+
def callee_is_python(fn):
|
|
60
|
+
if hasattr(fn, '__code__') or isinstance(fn, type): return True
|
|
61
|
+
return hasattr(getattr(type(fn), '__call__', None), '__code__')
|
|
62
|
+
|
|
63
|
+
def external_frame():
|
|
64
|
+
f = getframe()
|
|
65
|
+
while f:
|
|
66
|
+
if not frame_name(f).startswith('fastaudit.'): return f
|
|
67
|
+
f = f.f_back
|
|
68
|
+
|
|
69
|
+
def deny(cfg, event, args, msg):
|
|
70
|
+
if cfg.before_deny and cfg.before_deny(event, args, external_frame(), msg, cfg.data): return
|
|
71
|
+
raise PermissionError(msg)
|
|
72
|
+
|
|
73
|
+
def ok_path(cfg, p, parent=False):
|
|
74
|
+
try:
|
|
75
|
+
p = fsdecode(p)
|
|
76
|
+
if parent: p = dirname(p) or '.'
|
|
77
|
+
rp = realpath(p or '.')
|
|
78
|
+
except (OSError,TypeError,ValueError): return False
|
|
79
|
+
cur = realpath('.')
|
|
80
|
+
return any(rp==(cur if o=='.' else o) or rp.startswith((cur if o=='.' else o)+sep) for o in cfg.oks)
|
|
81
|
+
|
|
82
|
+
def chk(cfg, event, args):
|
|
83
|
+
if event not in audit_all: return
|
|
84
|
+
errstr = f"Audit: {event} blocked in sandbox with args: {args}"
|
|
85
|
+
if event in audit_deny: return deny(cfg, event, args, errstr)
|
|
86
|
+
if event=='object.__setattr__':
|
|
87
|
+
if args[1] in ('__doc__','__module__'): return
|
|
88
|
+
return deny(cfg, event, args, errstr)
|
|
89
|
+
ps = []
|
|
90
|
+
if event=='open':
|
|
91
|
+
path,mode,flags = args
|
|
92
|
+
if isinstance(mode,str) and not set('wax+') & set(mode): return
|
|
93
|
+
if mode is None and not flags & write_flags: return
|
|
94
|
+
ps = [path]
|
|
95
|
+
elif event=='os.chdir':
|
|
96
|
+
if not ok_path(cfg, args[0]): return deny(cfg, event, args, f"{event} {args[0]!r} not in {cfg.oks}")
|
|
97
|
+
return
|
|
98
|
+
elif event=='os.truncate':
|
|
99
|
+
if isinstance(args[0],int): return
|
|
100
|
+
ps = [args[0]]
|
|
101
|
+
elif event in audit_1st: ps = [args[0]]
|
|
102
|
+
elif event in audit_dst: ps = [args[1]]
|
|
103
|
+
elif event in audit_both: ps = args[:2]
|
|
104
|
+
for p in ps:
|
|
105
|
+
if not ok_path(cfg, p, parent=True): deny(cfg, event, args, f"{event} {p!r} not in {cfg.oks}")
|
|
106
|
+
|
|
107
|
+
def hook(event, args):
|
|
108
|
+
cfg = ctx.get()
|
|
109
|
+
if cfg is None: return
|
|
110
|
+
try: chk(cfg, event, args)
|
|
111
|
+
except PermissionError as e:
|
|
112
|
+
e.__traceback__ = None
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
def call_cb(code, off, fn, arg0):
|
|
116
|
+
if code is call_cb.__code__ or callee_is_python(fn) or is_stdlib(fn): return mon.DISABLE
|
|
117
|
+
cfg = ctx.get()
|
|
118
|
+
if cfg is None or not cfg.monitor_calls: return
|
|
119
|
+
caller,callee = frame_name(getframe(1)),func_name(fn)
|
|
120
|
+
if not callee: return
|
|
121
|
+
if cfg.on_call:
|
|
122
|
+
res = cfg.on_call(caller, callee, fn, code, off, cfg.data)
|
|
123
|
+
if res is mon.DISABLE: return mon.DISABLE
|
|
124
|
+
if res is False: return
|
|
125
|
+
try: audit('fastaudit.call', caller, callee)
|
|
126
|
+
except PermissionError as e:
|
|
127
|
+
e.__traceback__ = None
|
|
128
|
+
raise
|
|
129
|
+
|
|
130
|
+
def install_call_monitor(tool_id):
|
|
131
|
+
if not mon: raise RuntimeError('monitor_calls=True requires Python 3.12+ sys.monitoring')
|
|
132
|
+
if (old:=state['tool_id']) is not None:
|
|
133
|
+
if old!=tool_id: raise RuntimeError(f'fastaudit already uses sys.monitoring tool id {old}')
|
|
134
|
+
return
|
|
135
|
+
if (tool:=mon.get_tool(tool_id)) == 'fastaudit':
|
|
136
|
+
mon.set_events(tool_id, 0)
|
|
137
|
+
mon.register_callback(tool_id, mon.events.CALL, None)
|
|
138
|
+
mon.free_tool_id(tool_id)
|
|
139
|
+
elif tool: raise RuntimeError(f'sys.monitoring tool id {tool_id} is already used by {tool!r}')
|
|
140
|
+
mon.use_tool_id(tool_id, 'fastaudit')
|
|
141
|
+
mon.register_callback(tool_id, mon.events.CALL, call_cb)
|
|
142
|
+
mon.set_events(tool_id, mon.events.CALL)
|
|
143
|
+
state['tool_id'] = tool_id
|
|
144
|
+
|
|
145
|
+
def mk_audit_(oks, before_deny=None, on_call=None, data=None, tool_id=3, monitor_calls=True):
|
|
146
|
+
audit('audit_perms.set_config', oks)
|
|
147
|
+
if on_call and not monitor_calls: raise RuntimeError('on_call requires monitor_calls=True')
|
|
148
|
+
if monitor_calls: install_call_monitor(tool_id)
|
|
149
|
+
oks = tuple('.' if o=='.' else realpath(fsdecode(o)) for o in oks)
|
|
150
|
+
cfg = _AuditCfg(oks, before_deny, on_call, data, monitor_calls)
|
|
151
|
+
|
|
152
|
+
@contextmanager
|
|
153
|
+
def cm():
|
|
154
|
+
nonlocal cfg
|
|
155
|
+
old = ctx.get()
|
|
156
|
+
if old is not cfg: audit('audit_perms.set_config', cfg)
|
|
157
|
+
tok = ctx.set(cfg)
|
|
158
|
+
if cfg.monitor_calls: mon.restart_events()
|
|
159
|
+
try: yield
|
|
160
|
+
finally: ctx.reset(tok)
|
|
161
|
+
|
|
162
|
+
def set_data(d):
|
|
163
|
+
nonlocal cfg
|
|
164
|
+
audit('audit_perms.set_data', d)
|
|
165
|
+
cfg = cfg._replace(data=d)
|
|
166
|
+
return cfg.data
|
|
167
|
+
|
|
168
|
+
cm.set_data = set_data
|
|
169
|
+
return cm
|
|
170
|
+
|
|
171
|
+
sys.addaudithook(hook)
|
|
172
|
+
return mk_audit_
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _get_mk_audit():
|
|
176
|
+
mk_audit_ = getattr(sys, _state_attr, None)
|
|
177
|
+
if mk_audit_ is None:
|
|
178
|
+
mk_audit_ = _new_state()
|
|
179
|
+
setattr(sys, _state_attr, mk_audit_)
|
|
180
|
+
return mk_audit_
|
|
181
|
+
|
|
182
|
+
def mk_audit(oks, before_deny=None, on_call=None, data=None, tool_id=3, monitor_calls=True):
|
|
183
|
+
return _get_mk_audit()(oks, before_deny, on_call, data, tool_id, monitor_calls)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fastaudit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight execution guard for running LLM-generated Python in a normal Python process.
|
|
5
|
+
Author: Answer.AI
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/AnswerDotAI/fastaudit
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: fastcore
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest; extra == "dev"
|
|
16
|
+
Requires-Dist: orjson; extra == "dev"
|
|
17
|
+
Requires-Dist: numpy; extra == "dev"
|
|
18
|
+
Requires-Dist: fastship; extra == "dev"
|
|
19
|
+
Requires-Dist: build; extra == "dev"
|
|
20
|
+
Requires-Dist: twine; extra == "dev"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# fastaudit
|
|
24
|
+
|
|
25
|
+
`fastaudit` is a lightweight execution guard for running LLM-generated Python in a normal Python process.
|
|
26
|
+
|
|
27
|
+
It is not intended to be a hardened adversarial sandbox. Its purpose is to stop accidental damage from overly broad file operations, unexpected subprocess calls, and tool use that reaches outside approved working directories.
|
|
28
|
+
|
|
29
|
+
The core mechanism is Python's audit hook system. The first `mk_audit()` call installs one process-wide audit hook. On Python 3.12 and newer, `sys.monitoring` is also used to raise audit events for non-stdlib native calls. `mk_audit()` creates an audit context, then enables permission checks only while that execution context is active.
|
|
30
|
+
|
|
31
|
+
`fastaudit` requires Python 3.10 or newer. Native call monitoring requires Python 3.12 or newer and is enabled by default. Pass `monitor_calls=False` to use audit-hook-only mode on Python 3.10/3.11 or to avoid monitoring overhead.
|
|
32
|
+
|
|
33
|
+
## Why this exists
|
|
34
|
+
|
|
35
|
+
LLM-generated code is usually helpful, but sometimes too determined. If a command fails, an assistant may try another route; if a path is wrong, it may broaden the search; if a tool exists, it may use it without fully understanding its side effects.
|
|
36
|
+
|
|
37
|
+
`fastaudit` is designed for that case.
|
|
38
|
+
|
|
39
|
+
It helps with:
|
|
40
|
+
|
|
41
|
+
- blocking subprocess and process-escape operations unless explicitly allowed
|
|
42
|
+
- allowing writes only under approved roots
|
|
43
|
+
- allowing broad read access where appropriate
|
|
44
|
+
- making permission failures clear and immediate
|
|
45
|
+
- letting host policy callbacks allow trusted tools while ordinary generated code stays checked
|
|
46
|
+
- avoiding global audit state leaks across async tasks
|
|
47
|
+
|
|
48
|
+
It deliberately does not try to defeat malicious code running in the same interpreter.
|
|
49
|
+
|
|
50
|
+
## Audit hook categorization
|
|
51
|
+
|
|
52
|
+
The audit hook is designed as a lightweight guardrail for LLM/tool-generated code, not as a hardened security sandbox against malicious code. The goal is to prevent accidental or over-broad filesystem mutation outside approved working directories: e.g. deleting files in the wrong project, writing into a user’s home directory, or spawning subprocesses unexpectedly. It assumes the surrounding process, user account, and pre-existing filesystem layout are trusted, and that the code being checked is not actively trying to exploit races, pre-planted symlinks, or CPython internals.
|
|
53
|
+
|
|
54
|
+
The design keeps the common path simple and cheap. Dangerous process-escape events such as subprocess execution are denied outright. Filesystem write/delete events are allowed only when the relevant parent directory is inside a precomputed allowlist, since most mutations are really changes to directory entries. For destination-only operations such as copy, only the destination parent matters; for move/rename/link-style operations, both paths are checked because both filesystem locations may be affected. Read-only operations are generally ignored, and file-descriptor-based truncation is allowed on the assumption that the path policy was enforced when the descriptor was opened. This gives practical protection against accidental damage while avoiding the complexity and cost of pretending to be a fully adversarial sandbox.
|
|
55
|
+
|
|
56
|
+
Symlinks are treated as part of the trusted filesystem setup. The hook’s path checks focus on the parent directories of mutations, which is the right model for operations that create, remove, or rename directory entries. This means an existing symlink inside an allowed directory may still point outside the allowed roots; that is acceptable under this threat model because the user controls the workspace layout and is assumed not to pre-place hostile links. To avoid making that assumption worse, symlink and hard-link creation should be restricted: the new link’s parent must be allowed, and the link target should either be denied or required to resolve inside an allowed root.
|
|
57
|
+
|
|
58
|
+
## Threat model
|
|
59
|
+
|
|
60
|
+
`fastaudit` assumes:
|
|
61
|
+
|
|
62
|
+
- the user, workspace, and pre-existing filesystem layout are trusted
|
|
63
|
+
- code is LLM-generated or LLM-directed, not actively hostile
|
|
64
|
+
- accidental overreach is the main risk
|
|
65
|
+
- rich user tools may need access that ordinary generated code should not have
|
|
66
|
+
- Solveit or the host application controls the execution wrapper
|
|
67
|
+
|
|
68
|
+
It does not assume:
|
|
69
|
+
|
|
70
|
+
- Python introspection is unavailable
|
|
71
|
+
- frames, closures, or modules are impossible to inspect
|
|
72
|
+
- same-process execution can provide a hard security boundary
|
|
73
|
+
- OS-level sandboxing is unnecessary for adversarial workloads
|
|
74
|
+
|
|
75
|
+
For adversarial code, use a subprocess, container, VM, or OS-level policy.
|
|
76
|
+
|
|
77
|
+
## Audit scope
|
|
78
|
+
|
|
79
|
+
Auditing is opt-in per logical task. The audit hook is registered globally when the first audit context is created, and the optional call monitor is registered globally when needed, but permission checks only run while `audit_perms()` is active. This matters for async code. A global boolean or counter would leak audit state between unrelated coroutines whenever one audited task awaits. A `ContextVar` gives logical scoping: child tasks inherit at creation time, nested contexts restore cleanly via tokens, and the guard follows execution flow rather than scheduler order. Threads are denied in the audit sandbox since context variables otherwise are not maintained.
|
|
80
|
+
|
|
81
|
+
The hook is built once inside a closure rather than read from module globals on every event. Allowed roots and callbacks live in the active context config, the event-classification sets are converted to `frozenset`, and the helpers used in the hot path — `realpath`, `dirname`, `fsdecode`, `os.sep` — are captured as local names. Nothing the hook depends on lives in a mutable global that a generated cell could clear, replace, or reassign. This is not a security barrier against introspection or frame walking; it is a deliberate effort to remove the easy, accidental disabling paths that an enthusiastic LLM is most likely to take when retrying after a `PermissionError`.
|
|
82
|
+
|
|
83
|
+
## Permission model
|
|
84
|
+
|
|
85
|
+
The policy classifies audit events into a few groups:
|
|
86
|
+
|
|
87
|
+
- events denied outright
|
|
88
|
+
- events where the first path argument is checked
|
|
89
|
+
- events where the destination path is checked
|
|
90
|
+
- events where both source and destination paths are checked
|
|
91
|
+
- special cases such as `open`, `os.truncate`, and sensitive `object.__setattr__`
|
|
92
|
+
|
|
93
|
+
Writes and filesystem mutations are allowed only when the relevant parent directory is inside an approved root.
|
|
94
|
+
|
|
95
|
+
Reads are generally allowed.
|
|
96
|
+
|
|
97
|
+
Subprocess creation and similar process escapes are denied by default.
|
|
98
|
+
|
|
99
|
+
The allowed root `'.'` is dynamic: it means the current directory at the time of each checked operation. This lets a sandbox follow allowed `chdir` calls into child directories. `os.chdir` itself is checked against the destination directory, not the destination's parent.
|
|
100
|
+
|
|
101
|
+
Non-stdlib native calls raise a `fastaudit.call` audit event while `audit_perms()` is active when `monitor_calls=True`. Python calls and stdlib calls are ignored by the call monitor. The context manager calls `sys.monitoring.restart_events()` on entry so monitored call sites disabled before the context are seen again inside it. With `monitor_calls=False`, only normal Python audit-hook events are checked.
|
|
102
|
+
|
|
103
|
+
### get/set attr hooks
|
|
104
|
+
|
|
105
|
+
The `object.__setattr__` audit event fires only for a small fixed set of "sensitive" attribute assignments, not for general attribute setting. On types/classes, it fires when setting `__name__`, `__qualname__`, `__module__`, `__bases__`, `__doc__`, or `__type_params__` — these go through `check_set_special_type_attr` in `Objects/typeobject.c`. The `__class__` reassignment on any object is also audited, via `object_set_class` in the same file. On function objects, assignments to `__code__`, `__defaults__`, and `__kwdefaults__` are audited, via the relevant setters in `Objects/funcobject.c`.
|
|
106
|
+
|
|
107
|
+
All other attribute assignments — including ordinary `C.x = 1` on a class, instance attribute assignment, and even some dunders like `__abstractmethods__` and `__annotations__` (which write directly via `PyDict_SetItem`) — bypass the audit hook entirely. This is why `@dataclass` triggers an event (it sets `cls.__doc__`) and `namedtuple` triggers one (it sets `cls.__module__`), while `class C: pass; C.x = 1; C.foo = lambda self: None` is silent. The authoritative list lives in the CPython source at [`Objects/typeobject.c`](https://github.com/python/cpython/blob/v3.12.0/Objects/typeobject.c) and [`Objects/funcobject.c`](https://github.com/python/cpython/blob/v3.12.0/Objects/funcobject.c); the public docs only describe the event as firing for "certain sensitive attribute assignments" without enumerating them.
|
|
108
|
+
|
|
109
|
+
## Host policy
|
|
110
|
+
|
|
111
|
+
Some user-provided tools need permissions that ordinary generated code should not have. For instance, a search tool may need to call `rg`, or a helper may need to spawn a tightly controlled subprocess.
|
|
112
|
+
|
|
113
|
+
`fastaudit` does not define that policy itself. Host code can pass `before_deny`, which is called after `fastaudit` decides an operation should be blocked and before `PermissionError` is raised:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
before_deny(event, args, frame, msg, data)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The callback receives the event name, audit arguments, the first non-`fastaudit` stack frame, the error message, and the current host data. Returning a truthy value allows the operation. Returning a falsey value denies it. Exceptions from the callback propagate.
|
|
120
|
+
|
|
121
|
+
For non-stdlib native calls, host code can also pass `on_call`, which runs before `fastaudit.call` is raised. `on_call` requires `monitor_calls=True`:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
on_call(caller, callee, fn, code, off, data)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
It receives the caller, callee, function object, code object, bytecode offset, and current host data. It can return `False` to suppress the audit event for that call, or `sys.monitoring.DISABLE` to disable that monitored call site. Exceptions from the callback propagate.
|
|
128
|
+
|
|
129
|
+
The optional `data` argument is stored in the audit context config and passed to both callbacks. A host can build mutable policy state outside the sandbox, pass a frozen snapshot to `mk_audit`, and later update that snapshot with `audit_perms.set_data(...)`. Creating or entering a new audit context, or calling `set_data`, raises an internal audit event and is denied while `audit_perms()` is active.
|
|
130
|
+
|
|
131
|
+
`mk_audit()` uses `sys.monitoring` tool id `3` by default when call monitoring is enabled. Pass `tool_id=...` if the host already uses that id.
|
|
132
|
+
|
|
133
|
+
## API sketch
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
audit_perms = mk_audit(['/tmp', os.getcwd()], before_deny=allow_trusted_tool, data=frozenset(allowed))
|
|
137
|
+
|
|
138
|
+
with audit_perms():
|
|
139
|
+
exec(code, restricted_globals)
|
|
140
|
+
|
|
141
|
+
audit_perms.set_data(frozenset(new_allowed))
|
|
142
|
+
|
|
143
|
+
audit_perms = mk_audit(['/tmp'], monitor_calls=False) # audit hooks only
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Implementation notes
|
|
147
|
+
|
|
148
|
+
The hook should avoid relying on mutable globals during enforcement.
|
|
149
|
+
|
|
150
|
+
At construction time, bind or freeze:
|
|
151
|
+
|
|
152
|
+
- approved roots
|
|
153
|
+
- audit event sets
|
|
154
|
+
- write flags
|
|
155
|
+
- path helpers such as `realpath`, `dirname`, and `fsdecode`
|
|
156
|
+
- frame lookup helper
|
|
157
|
+
- call-monitor helpers and callbacks
|
|
158
|
+
|
|
159
|
+
This prevents the most likely accidental disabling paths, such as clearing a global deny set or replacing a helper function. The implementation still does not claim to be secure against deliberate frame walking or introspection.
|
|
160
|
+
|
|
161
|
+
## Limitations
|
|
162
|
+
|
|
163
|
+
`fastaudit` does not provide a hard security boundary.
|
|
164
|
+
|
|
165
|
+
Known limitations:
|
|
166
|
+
|
|
167
|
+
- same-process Python code can inspect a lot of runtime state
|
|
168
|
+
- pre-existing writable file descriptors may bypass path-open checks
|
|
169
|
+
- host callbacks can do anything their implementation permits
|
|
170
|
+
- thread support is intentionally restricted unless explicitly designed for
|
|
171
|
+
- The `CALL` event in `sys.monitoring` does not fire for operators invoked via dedicated bytecode opcodes — `BINARY_OP` (`a + b`), `BINARY_SUBSCR` (`a[i]`), comparisons, etc. These dispatch directly to the C-level numeric/subscript/compare slots, which aren't "calls" in PEP 669's model. Explicit dunder invocations (`a.__add__(b)`) do fire CALL normally.
|
|
172
|
+
|
|
173
|
+
These limitations are acceptable for a guardrail system aimed at LLM-directed execution. They are not acceptable for hostile code.
|
|
174
|
+
|
|
175
|
+
## Design principle
|
|
176
|
+
|
|
177
|
+
The goal is not to make escape impossible. The goal is to make the safe path easy, the risky path explicit, and accidental overreach fail early with a useful error.
|
|
178
|
+
|
|
179
|
+
## Release
|
|
180
|
+
|
|
181
|
+
1) Ensure your GitHub issues are labeled (`bug`, `enhancement`, `breaking`).
|
|
182
|
+
2) Run:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
ship-gh
|
|
186
|
+
ship-pypi
|
|
187
|
+
ship-bump
|
|
188
|
+
```
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
CHANGELOG.md
|
|
2
|
+
LICENSE
|
|
3
|
+
MANIFEST.in
|
|
4
|
+
README.md
|
|
5
|
+
pyproject.toml
|
|
6
|
+
fastaudit/__init__.py
|
|
7
|
+
fastaudit/core.py
|
|
8
|
+
fastaudit.egg-info/PKG-INFO
|
|
9
|
+
fastaudit.egg-info/SOURCES.txt
|
|
10
|
+
fastaudit.egg-info/dependency_links.txt
|
|
11
|
+
fastaudit.egg-info/requires.txt
|
|
12
|
+
fastaudit.egg-info/top_level.txt
|
|
13
|
+
tests/test_core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fastaudit
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fastaudit"
|
|
7
|
+
description = "A lightweight execution guard for running LLM-generated Python in a normal Python process."
|
|
8
|
+
|
|
9
|
+
dynamic = ["version"]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
license = { text = "Apache-2.0" }
|
|
13
|
+
authors = [{ name = "Answer.AI" }]
|
|
14
|
+
classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", ]
|
|
15
|
+
|
|
16
|
+
dependencies = [ "fastcore", ]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
dev = [ "pytest", "orjson", "numpy", "fastship", "build", "twine", ]
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Homepage = "https://github.com/AnswerDotAI/fastaudit"
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.dynamic]
|
|
25
|
+
version = { attr = "fastaudit.__version__" }
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
include = ["fastaudit"]
|
|
29
|
+
|
|
30
|
+
[tool.pytest.ini_options]
|
|
31
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import numpy as np, orjson, os, shutil, subprocess, sys, traceback
|
|
2
|
+
from fastcore.foundation import working_directory
|
|
3
|
+
from fastcore.test import expect_fail
|
|
4
|
+
from fastaudit.core import mk_audit
|
|
5
|
+
from os.path import join,realpath,expanduser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def touch(p, s='x'):
|
|
9
|
+
with open(p, 'w') as f: f.write(s)
|
|
10
|
+
|
|
11
|
+
def test_audit_blocks(tmp_path):
|
|
12
|
+
start = os.getcwd()
|
|
13
|
+
dotdest = tmp_path/'dotdest'
|
|
14
|
+
okdest = tmp_path/'okdest'
|
|
15
|
+
(dotdest/'child').mkdir(parents=True)
|
|
16
|
+
okdest.mkdir()
|
|
17
|
+
okdest = realpath(okdest)
|
|
18
|
+
inside = join(okdest, 'audit-test.txt')
|
|
19
|
+
inside2 = join(okdest, 'audit-test-2.txt')
|
|
20
|
+
inside3 = join(okdest, 'audit-test-3.txt')
|
|
21
|
+
outside = expanduser('~/audit-test-outside.txt')
|
|
22
|
+
permissive = mk_audit([expanduser('~')], monitor_calls=False)
|
|
23
|
+
|
|
24
|
+
with working_directory(dotdest), mk_audit((okdest,'.'))():
|
|
25
|
+
# Sensitive function mutation is blocked.
|
|
26
|
+
def f(): pass
|
|
27
|
+
with expect_fail(PermissionError): f.__code__ = f.__code__
|
|
28
|
+
|
|
29
|
+
# Reads outside approved roots are allowed.
|
|
30
|
+
open('/etc/passwd', 'r').close()
|
|
31
|
+
fd = os.open('/etc/passwd', os.O_RDONLY)
|
|
32
|
+
os.close(fd)
|
|
33
|
+
|
|
34
|
+
# Writes and deletes outside approved roots are blocked.
|
|
35
|
+
with expect_fail(PermissionError): os.open(outside, os.O_WRONLY)
|
|
36
|
+
with expect_fail(PermissionError): os.remove(outside)
|
|
37
|
+
|
|
38
|
+
# Copy destinations must stay inside approved roots.
|
|
39
|
+
shutil.copyfile('/etc/passwd', inside)
|
|
40
|
+
with expect_fail(PermissionError): shutil.copyfile(inside, outside)
|
|
41
|
+
os.remove(inside)
|
|
42
|
+
|
|
43
|
+
# Renames touching unapproved roots, and subprocesses, are blocked.
|
|
44
|
+
touch(inside2)
|
|
45
|
+
with expect_fail(PermissionError): os.rename(outside, inside3)
|
|
46
|
+
with expect_fail(PermissionError): os.rename(inside2, outside)
|
|
47
|
+
os.remove(inside2)
|
|
48
|
+
with expect_fail(PermissionError): subprocess.run(['echo', 'hi'])
|
|
49
|
+
|
|
50
|
+
# "." allows writes under the current directory and chdir checks the destination.
|
|
51
|
+
touch('dot-inside.txt')
|
|
52
|
+
touch('child/nested.txt')
|
|
53
|
+
with expect_fail(PermissionError): touch('../sibling.txt')
|
|
54
|
+
with expect_fail(PermissionError): os.chdir(start)
|
|
55
|
+
|
|
56
|
+
# fastaudit frames are removed from tracebacks.
|
|
57
|
+
try: subprocess.run(['echo', 'hi'])
|
|
58
|
+
except PermissionError as e: frames = traceback.extract_tb(e.__traceback__)
|
|
59
|
+
assert not [f for f in frames if f.filename.endswith('fastaudit/core.py')]
|
|
60
|
+
|
|
61
|
+
# Python classes and callable instances are not treated as native calls.
|
|
62
|
+
class PyCallable:
|
|
63
|
+
def __init__(self): super().__init__()
|
|
64
|
+
def __call__(self): return 'ok'
|
|
65
|
+
assert PyCallable()() == 'ok'
|
|
66
|
+
|
|
67
|
+
# Non-stdlib native calls are blocked.
|
|
68
|
+
with expect_fail(PermissionError): orjson.dumps({'a': 1})
|
|
69
|
+
|
|
70
|
+
# Audit policy cannot be replaced from inside the sandbox.
|
|
71
|
+
with expect_fail(PermissionError): mk_audit([expanduser('~')], monitor_calls=False)
|
|
72
|
+
with expect_fail(PermissionError), permissive(): pass
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_callbacks(tmp_path):
|
|
76
|
+
def before_deny(event, args, frame, msg, data): return event=='subprocess.Popen' and args[1][:1]==['echo']
|
|
77
|
+
def on_call(caller, callee, fn, code, off, data):
|
|
78
|
+
if callee=='orjson.dumps': return False
|
|
79
|
+
if callee.startswith('numpy.'): return sys.monitoring.DISABLE
|
|
80
|
+
|
|
81
|
+
with mk_audit([tmp_path], before_deny=before_deny, on_call=on_call)():
|
|
82
|
+
# Host callbacks can allow specific native calls…
|
|
83
|
+
assert orjson.dumps({'a': 1}) == b'{"a":1}'
|
|
84
|
+
assert np.array([1, 2, 3]).sum() == 6
|
|
85
|
+
# …and audit events
|
|
86
|
+
res = subprocess.run(['echo', 'hi'], capture_output=True, text=True)
|
|
87
|
+
assert res.stdout == 'hi\n'
|
|
88
|
+
# Neighboring subprocess commands remain blocked.
|
|
89
|
+
with expect_fail(PermissionError): subprocess.run(['ls'])
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_monitor_calls_can_be_disabled(tmp_path):
|
|
93
|
+
with expect_fail(RuntimeError): mk_audit([tmp_path], on_call=lambda *args: None, monitor_calls=False)
|
|
94
|
+
with mk_audit([tmp_path], monitor_calls=False)():
|
|
95
|
+
# Audit-hook checks still run without native call monitoring.
|
|
96
|
+
with expect_fail(PermissionError): subprocess.run(['echo', 'hi'])
|
|
97
|
+
assert orjson.dumps({'a': 1}) == b'{"a":1}'
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_implement_allow_list(tmp_path):
|
|
101
|
+
"A brief demo of creating an `allow()` system"
|
|
102
|
+
def trusted_echo(): return subprocess.run(['echo', 'hi'], capture_output=True, text=True)
|
|
103
|
+
|
|
104
|
+
allowed = set()
|
|
105
|
+
def allow(fn): allowed.add(f'{fn.__module__}.{fn.__qualname__}')
|
|
106
|
+
allow(trusted_echo)
|
|
107
|
+
|
|
108
|
+
def before_deny(event, args, frame, msg, data):
|
|
109
|
+
while frame:
|
|
110
|
+
if f"{frame.f_globals.get('__name__')}.{frame.f_code.co_qualname}" in data: return True
|
|
111
|
+
frame = frame.f_back
|
|
112
|
+
|
|
113
|
+
audit_perms = mk_audit([tmp_path], before_deny=before_deny, data=frozenset(allowed))
|
|
114
|
+
with audit_perms():
|
|
115
|
+
# A callback can implement frame-based tool allowance.
|
|
116
|
+
assert trusted_echo().stdout == 'hi\n'
|
|
117
|
+
with expect_fail(PermissionError): subprocess.run(['echo', 'hi'])
|
|
118
|
+
# Callback data cannot be replaced from inside the sandbox.
|
|
119
|
+
with expect_fail(PermissionError): audit_perms.set_data(frozenset())
|
|
120
|
+
|
|
121
|
+
# Trusted host code can update callback data between sandboxed runs.
|
|
122
|
+
audit_perms.set_data(frozenset())
|
|
123
|
+
with audit_perms():
|
|
124
|
+
with expect_fail(PermissionError): trusted_echo()
|