riskkernel 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- riskkernel-0.3.0/.gitignore +38 -0
- riskkernel-0.3.0/PKG-INFO +121 -0
- riskkernel-0.3.0/README.md +100 -0
- riskkernel-0.3.0/pyproject.toml +39 -0
- riskkernel-0.3.0/riskkernel/__init__.py +59 -0
- riskkernel-0.3.0/riskkernel/adapters/__init__.py +8 -0
- riskkernel-0.3.0/riskkernel/adapters/claude_agent.py +75 -0
- riskkernel-0.3.0/riskkernel/adapters/langchain.py +82 -0
- riskkernel-0.3.0/riskkernel/adapters/openai_agents.py +49 -0
- riskkernel-0.3.0/riskkernel/approval.py +86 -0
- riskkernel-0.3.0/riskkernel/client.py +153 -0
- riskkernel-0.3.0/riskkernel/errors.py +40 -0
- riskkernel-0.3.0/riskkernel/runtime.py +226 -0
- riskkernel-0.3.0/tests/__init__.py +0 -0
- riskkernel-0.3.0/tests/test_sdk.py +320 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Binaries
|
|
2
|
+
/riskkernel
|
|
3
|
+
/rk
|
|
4
|
+
/dist/
|
|
5
|
+
*.exe
|
|
6
|
+
*.test
|
|
7
|
+
*.out
|
|
8
|
+
|
|
9
|
+
# Local state — RiskKernel's SQLite store is the file the user owns; never commit it
|
|
10
|
+
*.db
|
|
11
|
+
*.db-wal
|
|
12
|
+
*.db-shm
|
|
13
|
+
*.sqlite
|
|
14
|
+
*.sqlite3
|
|
15
|
+
/data/
|
|
16
|
+
|
|
17
|
+
# Secrets — keys come from env/.env/OS-keyring, never the repo
|
|
18
|
+
.env
|
|
19
|
+
.env.*
|
|
20
|
+
!.env.example
|
|
21
|
+
|
|
22
|
+
# Go
|
|
23
|
+
/vendor/
|
|
24
|
+
coverage.txt
|
|
25
|
+
coverage.html
|
|
26
|
+
|
|
27
|
+
# Editor / OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
.idea/
|
|
30
|
+
.vscode/
|
|
31
|
+
*.swp
|
|
32
|
+
|
|
33
|
+
# Python SDK build artifacts
|
|
34
|
+
sdks/python/build/
|
|
35
|
+
sdks/python/dist/
|
|
36
|
+
sdks/python/*.egg-info/
|
|
37
|
+
__pycache__/
|
|
38
|
+
.venv/
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: riskkernel
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Thin Python client for the RiskKernel reliability runtime (Surface 2).
|
|
5
|
+
Project-URL: Homepage, https://github.com/prashar32/riskkernel
|
|
6
|
+
Project-URL: Source, https://github.com/prashar32/riskkernel
|
|
7
|
+
Author: Adarsh Prashar
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
Keywords: agents,budget,governance,guardrails,llm,reliability
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Requires-Python: >=3.9
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
18
|
+
Provides-Extra: langchain
|
|
19
|
+
Requires-Dist: langchain-core; extra == 'langchain'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# riskkernel (Python SDK)
|
|
23
|
+
|
|
24
|
+
The Python SDK for [RiskKernel](https://github.com/prashar32/riskkernel) — **Surface 2**, deep control over a governed agent run.
|
|
25
|
+
|
|
26
|
+
It is a **thin client** over the self-hosted RiskKernel daemon. Every deterministic
|
|
27
|
+
decision — budgets, loop/time halts, approval policy — happens in the Go core. The
|
|
28
|
+
SDK just makes governed runs ergonomic from Python. **Core install is stdlib-only**
|
|
29
|
+
(no third-party dependencies).
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install riskkernel
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quickstart
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import riskkernel as rk
|
|
39
|
+
|
|
40
|
+
rt = rk.Runtime(base_url="http://localhost:7070") # your daemon
|
|
41
|
+
|
|
42
|
+
with rt.governed_run(name="research",
|
|
43
|
+
budget=rt.budget(dollars=1.00, loops=20, seconds=300)) as run:
|
|
44
|
+
# Route your LLM client through the governing proxy so every model call is
|
|
45
|
+
# metered, priced, and budget-enforced under this run:
|
|
46
|
+
cfg = run.proxy_config()
|
|
47
|
+
# cfg["base_url"] -> http://localhost:7070/v1
|
|
48
|
+
# cfg["headers"] -> {"X-RiskKernel-Run-Id": "<run id>"}
|
|
49
|
+
|
|
50
|
+
for _ in range(100):
|
|
51
|
+
run.step() # raises rk.BudgetExceeded when loops/time run out
|
|
52
|
+
# ... your agent reasoning + tool calls ...
|
|
53
|
+
run.checkpoint("after-step", {"messages": messages})
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
When the governor halts the run (token / dollar / loop / time budget), the next
|
|
57
|
+
`run.step()` — or a proxied model call — raises `rk.BudgetExceeded`.
|
|
58
|
+
|
|
59
|
+
## Resume after a crash
|
|
60
|
+
|
|
61
|
+
The daemon reloads non-terminal runs on restart with the budget and usage they had
|
|
62
|
+
already spent, so a `SIGKILL`'d run keeps enforcing without re-spending. Reattach to
|
|
63
|
+
it by id with `resume_run` and pick your work back up from the last checkpoint:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
with rt.resume_run(run_id) as run: # attaches; never creates or cancels
|
|
67
|
+
cp = run.latest_checkpoint() # the state you saved before the crash
|
|
68
|
+
start = cp["payload"]["cursor"] if cp else 0
|
|
69
|
+
for i in range(start, total): # skip the steps you already paid for
|
|
70
|
+
run.step() # counts against the SAME budget
|
|
71
|
+
# ... your work ...
|
|
72
|
+
run.checkpoint("step", {"cursor": i + 1})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The run resumes against whatever budget it had left, so it can't overspend by
|
|
76
|
+
restarting — `run.step()` still raises `rk.BudgetExceeded` at the original ceiling.
|
|
77
|
+
|
|
78
|
+
## Human-in-the-loop tools
|
|
79
|
+
|
|
80
|
+
Gate side-effecting tools on human approval (the daemon's policy decides what needs
|
|
81
|
+
it; the call blocks until a human resolves it via CLI / web / webhook):
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from riskkernel import governed_tool, ApprovalGate
|
|
85
|
+
|
|
86
|
+
@governed_tool(side_effect="write")
|
|
87
|
+
def write_file(path, content):
|
|
88
|
+
... # only runs if approved; else rk.ApprovalDenied
|
|
89
|
+
|
|
90
|
+
# or explicitly:
|
|
91
|
+
gate = ApprovalGate(run)
|
|
92
|
+
if gate.allow("mcp://shell", side_effect="exec", arguments={"cmd": cmd}):
|
|
93
|
+
run_shell(cmd)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Framework adapters
|
|
97
|
+
|
|
98
|
+
Lazy-imported, so you only pay for what you use:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# LangChain / LangGraph — enforces loop/time budgets per LLM call
|
|
102
|
+
from riskkernel.adapters.langchain import RiskKernelCallbackHandler
|
|
103
|
+
llm.invoke(prompt, config={"callbacks": [RiskKernelCallbackHandler(run)]})
|
|
104
|
+
|
|
105
|
+
# Claude Agent SDK — PreToolUse approval hook
|
|
106
|
+
from riskkernel.adapters.claude_agent import make_pre_tool_use_hook
|
|
107
|
+
hook = make_pre_tool_use_hook(run, side_effect_for={"Bash": "exec", "Write": "write"})
|
|
108
|
+
|
|
109
|
+
# OpenAI Agents SDK — RunHooks (steps + tool approval)
|
|
110
|
+
from riskkernel.adapters.openai_agents import RiskKernelRunHooks
|
|
111
|
+
hooks = RiskKernelRunHooks(run, gate_tools=True)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Configuration
|
|
115
|
+
|
|
116
|
+
`Runtime(base_url=..., token=...)`, or the env vars `RISKKERNEL_BASE_URL` and
|
|
117
|
+
`RISKKERNEL_API_TOKEN` (used by the decorator/convenience API and `default_runtime()`).
|
|
118
|
+
|
|
119
|
+
## License
|
|
120
|
+
|
|
121
|
+
Apache-2.0.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# riskkernel (Python SDK)
|
|
2
|
+
|
|
3
|
+
The Python SDK for [RiskKernel](https://github.com/prashar32/riskkernel) — **Surface 2**, deep control over a governed agent run.
|
|
4
|
+
|
|
5
|
+
It is a **thin client** over the self-hosted RiskKernel daemon. Every deterministic
|
|
6
|
+
decision — budgets, loop/time halts, approval policy — happens in the Go core. The
|
|
7
|
+
SDK just makes governed runs ergonomic from Python. **Core install is stdlib-only**
|
|
8
|
+
(no third-party dependencies).
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install riskkernel
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Quickstart
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
import riskkernel as rk
|
|
18
|
+
|
|
19
|
+
rt = rk.Runtime(base_url="http://localhost:7070") # your daemon
|
|
20
|
+
|
|
21
|
+
with rt.governed_run(name="research",
|
|
22
|
+
budget=rt.budget(dollars=1.00, loops=20, seconds=300)) as run:
|
|
23
|
+
# Route your LLM client through the governing proxy so every model call is
|
|
24
|
+
# metered, priced, and budget-enforced under this run:
|
|
25
|
+
cfg = run.proxy_config()
|
|
26
|
+
# cfg["base_url"] -> http://localhost:7070/v1
|
|
27
|
+
# cfg["headers"] -> {"X-RiskKernel-Run-Id": "<run id>"}
|
|
28
|
+
|
|
29
|
+
for _ in range(100):
|
|
30
|
+
run.step() # raises rk.BudgetExceeded when loops/time run out
|
|
31
|
+
# ... your agent reasoning + tool calls ...
|
|
32
|
+
run.checkpoint("after-step", {"messages": messages})
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
When the governor halts the run (token / dollar / loop / time budget), the next
|
|
36
|
+
`run.step()` — or a proxied model call — raises `rk.BudgetExceeded`.
|
|
37
|
+
|
|
38
|
+
## Resume after a crash
|
|
39
|
+
|
|
40
|
+
The daemon reloads non-terminal runs on restart with the budget and usage they had
|
|
41
|
+
already spent, so a `SIGKILL`'d run keeps enforcing without re-spending. Reattach to
|
|
42
|
+
it by id with `resume_run` and pick your work back up from the last checkpoint:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
with rt.resume_run(run_id) as run: # attaches; never creates or cancels
|
|
46
|
+
cp = run.latest_checkpoint() # the state you saved before the crash
|
|
47
|
+
start = cp["payload"]["cursor"] if cp else 0
|
|
48
|
+
for i in range(start, total): # skip the steps you already paid for
|
|
49
|
+
run.step() # counts against the SAME budget
|
|
50
|
+
# ... your work ...
|
|
51
|
+
run.checkpoint("step", {"cursor": i + 1})
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The run resumes against whatever budget it had left, so it can't overspend by
|
|
55
|
+
restarting — `run.step()` still raises `rk.BudgetExceeded` at the original ceiling.
|
|
56
|
+
|
|
57
|
+
## Human-in-the-loop tools
|
|
58
|
+
|
|
59
|
+
Gate side-effecting tools on human approval (the daemon's policy decides what needs
|
|
60
|
+
it; the call blocks until a human resolves it via CLI / web / webhook):
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from riskkernel import governed_tool, ApprovalGate
|
|
64
|
+
|
|
65
|
+
@governed_tool(side_effect="write")
|
|
66
|
+
def write_file(path, content):
|
|
67
|
+
... # only runs if approved; else rk.ApprovalDenied
|
|
68
|
+
|
|
69
|
+
# or explicitly:
|
|
70
|
+
gate = ApprovalGate(run)
|
|
71
|
+
if gate.allow("mcp://shell", side_effect="exec", arguments={"cmd": cmd}):
|
|
72
|
+
run_shell(cmd)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Framework adapters
|
|
76
|
+
|
|
77
|
+
Lazy-imported, so you only pay for what you use:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
# LangChain / LangGraph — enforces loop/time budgets per LLM call
|
|
81
|
+
from riskkernel.adapters.langchain import RiskKernelCallbackHandler
|
|
82
|
+
llm.invoke(prompt, config={"callbacks": [RiskKernelCallbackHandler(run)]})
|
|
83
|
+
|
|
84
|
+
# Claude Agent SDK — PreToolUse approval hook
|
|
85
|
+
from riskkernel.adapters.claude_agent import make_pre_tool_use_hook
|
|
86
|
+
hook = make_pre_tool_use_hook(run, side_effect_for={"Bash": "exec", "Write": "write"})
|
|
87
|
+
|
|
88
|
+
# OpenAI Agents SDK — RunHooks (steps + tool approval)
|
|
89
|
+
from riskkernel.adapters.openai_agents import RiskKernelRunHooks
|
|
90
|
+
hooks = RiskKernelRunHooks(run, gate_tools=True)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
`Runtime(base_url=..., token=...)`, or the env vars `RISKKERNEL_BASE_URL` and
|
|
96
|
+
`RISKKERNEL_API_TOKEN` (used by the decorator/convenience API and `default_runtime()`).
|
|
97
|
+
|
|
98
|
+
## License
|
|
99
|
+
|
|
100
|
+
Apache-2.0.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "riskkernel"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Thin Python client for the RiskKernel reliability runtime (Surface 2)."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
authors = [{ name = "Adarsh Prashar" }]
|
|
13
|
+
keywords = ["llm", "agents", "governance", "reliability", "budget", "guardrails"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Software Development :: Libraries",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [] # core is stdlib-only
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/prashar32/riskkernel"
|
|
25
|
+
Source = "https://github.com/prashar32/riskkernel"
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
langchain = ["langchain-core"]
|
|
29
|
+
dev = ["pytest"]
|
|
30
|
+
|
|
31
|
+
# This project lives in a subdirectory (sdks/python) of the repo. Hatchling's
|
|
32
|
+
# VCS-aware file selector lists tracked files relative to the repo root, which
|
|
33
|
+
# don't match the project-root-relative `packages` include — so the wheel comes
|
|
34
|
+
# out empty. Selecting files from the filesystem instead of git fixes it. (#32)
|
|
35
|
+
[tool.hatch.build]
|
|
36
|
+
ignore-vcs = true
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["riskkernel"]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""RiskKernel Python SDK — Surface 2 (deep control).
|
|
2
|
+
|
|
3
|
+
A thin client over the self-hosted RiskKernel daemon. The Go core makes every
|
|
4
|
+
deterministic decision (budgets, halts, approval policy); this package just makes
|
|
5
|
+
governed runs ergonomic from Python.
|
|
6
|
+
|
|
7
|
+
Quickstart::
|
|
8
|
+
|
|
9
|
+
import riskkernel as rk
|
|
10
|
+
|
|
11
|
+
rt = rk.Runtime(base_url="http://localhost:7070")
|
|
12
|
+
with rt.governed_run(name="research", budget=rt.budget(dollars=1.00, loops=20)) as run:
|
|
13
|
+
cfg = run.proxy_config() # route your LLM client through the governing proxy
|
|
14
|
+
for _ in range(100):
|
|
15
|
+
run.step() # raises BudgetExceeded when loops/time run out
|
|
16
|
+
... # your agent reasoning + tool calls
|
|
17
|
+
run.checkpoint("after-step", {"messages": messages})
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .client import RiskKernel
|
|
21
|
+
from .errors import (
|
|
22
|
+
APIError,
|
|
23
|
+
ApprovalDenied,
|
|
24
|
+
ApprovalTimeout,
|
|
25
|
+
BudgetExceeded,
|
|
26
|
+
RiskKernelError,
|
|
27
|
+
)
|
|
28
|
+
from .approval import ApprovalGate, governed_tool
|
|
29
|
+
from .runtime import (
|
|
30
|
+
Budget,
|
|
31
|
+
Decision,
|
|
32
|
+
Run,
|
|
33
|
+
Runtime,
|
|
34
|
+
configure,
|
|
35
|
+
current_run,
|
|
36
|
+
default_runtime,
|
|
37
|
+
governed_run,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
__version__ = "0.3.0"
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"RiskKernel",
|
|
44
|
+
"Runtime",
|
|
45
|
+
"Run",
|
|
46
|
+
"Budget",
|
|
47
|
+
"Decision",
|
|
48
|
+
"ApprovalGate",
|
|
49
|
+
"governed_run",
|
|
50
|
+
"governed_tool",
|
|
51
|
+
"current_run",
|
|
52
|
+
"configure",
|
|
53
|
+
"default_runtime",
|
|
54
|
+
"RiskKernelError",
|
|
55
|
+
"APIError",
|
|
56
|
+
"BudgetExceeded",
|
|
57
|
+
"ApprovalDenied",
|
|
58
|
+
"ApprovalTimeout",
|
|
59
|
+
]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Framework adapters that bind a RiskKernel governed run to popular agent
|
|
2
|
+
frameworks. Each adapter lazily imports its framework, so the core SDK has no
|
|
3
|
+
third-party dependencies and you only pay for what you use.
|
|
4
|
+
|
|
5
|
+
- ``langchain`` — a CallbackHandler (loop/time enforcement per LLM call).
|
|
6
|
+
- ``claude_agent`` — a PreToolUse hook for the Claude Agent SDK (approval gate).
|
|
7
|
+
- ``openai_agents`` — RunHooks for the OpenAI Agents SDK (steps + approval gate).
|
|
8
|
+
"""
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Claude Agent SDK adapter: a PreToolUse hook that routes side-effecting tool
|
|
2
|
+
calls through the RiskKernel approval gate. Maps cleanly to the Claude Agent SDK's
|
|
3
|
+
permission model (``permissionDecision: "deny"`` blocks a tool).
|
|
4
|
+
|
|
5
|
+
from riskkernel.adapters.claude_agent import make_pre_tool_use_hook
|
|
6
|
+
hook = make_pre_tool_use_hook(run, side_effect_for={"Bash": "exec", "Write": "write"})
|
|
7
|
+
# register `hook` as your PreToolUse hook in the Claude Agent SDK options.
|
|
8
|
+
|
|
9
|
+
The hook signature follows the Claude Agent SDK: it receives the hook input
|
|
10
|
+
(containing the tool name and input) and returns a decision dict. Because SDK
|
|
11
|
+
versions differ slightly, the returned shape is the documented
|
|
12
|
+
``hookSpecificOutput`` / ``permissionDecision`` form; adjust if your version
|
|
13
|
+
differs.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any, Callable, Dict, Optional
|
|
19
|
+
|
|
20
|
+
from ..runtime import Run
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_pre_tool_use_hook(
|
|
24
|
+
run: Run,
|
|
25
|
+
side_effect_for: Optional[Dict[str, str]] = None,
|
|
26
|
+
default_side_effect: str = "write",
|
|
27
|
+
timeout: Optional[float] = None,
|
|
28
|
+
) -> Callable[..., dict]:
|
|
29
|
+
"""Build a PreToolUse hook bound to a governed run.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
run: the governed Run.
|
|
33
|
+
side_effect_for: map of tool name -> side-effect label. Tools not listed
|
|
34
|
+
use ``default_side_effect``. A tool mapped to "" (empty) is treated as
|
|
35
|
+
read-only and never gated.
|
|
36
|
+
default_side_effect: side effect for unlisted tools.
|
|
37
|
+
timeout: max seconds to await a human decision.
|
|
38
|
+
"""
|
|
39
|
+
side_effect_for = side_effect_for or {}
|
|
40
|
+
|
|
41
|
+
def hook(input_data: Any = None, *args: Any, **kwargs: Any) -> dict:
|
|
42
|
+
tool_name, tool_input = _extract(input_data, kwargs)
|
|
43
|
+
side_effect = side_effect_for.get(tool_name, default_side_effect)
|
|
44
|
+
decision = run.approve(
|
|
45
|
+
tool_name or "tool", side_effect=side_effect,
|
|
46
|
+
arguments={"input": _stringify(tool_input)}, timeout=timeout,
|
|
47
|
+
)
|
|
48
|
+
if decision.approved:
|
|
49
|
+
return {} # allow (no decision == proceed)
|
|
50
|
+
return {
|
|
51
|
+
"hookSpecificOutput": {
|
|
52
|
+
"hookEventName": "PreToolUse",
|
|
53
|
+
"permissionDecision": "deny",
|
|
54
|
+
"permissionDecisionReason": decision.reason or "denied via RiskKernel approval gate",
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return hook
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _extract(input_data: Any, kwargs: dict):
|
|
62
|
+
"""Pull tool name + input out of the hook payload across SDK shapes."""
|
|
63
|
+
data = input_data if isinstance(input_data, dict) else kwargs
|
|
64
|
+
name = data.get("tool_name") or data.get("toolName") or data.get("name") or ""
|
|
65
|
+
tinput = data.get("tool_input") or data.get("toolInput") or data.get("input")
|
|
66
|
+
return name, tinput
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _stringify(v: Any) -> Any:
|
|
70
|
+
try:
|
|
71
|
+
import json
|
|
72
|
+
json.dumps(v)
|
|
73
|
+
return v
|
|
74
|
+
except Exception:
|
|
75
|
+
return repr(v)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""LangChain / LangGraph adapter: a callback handler that enforces a governed
|
|
2
|
+
run's loop and time budgets, ticking one step per LLM call. Point your LangChain
|
|
3
|
+
LLM at the governing proxy (``run.proxy_config()``) for token/cost/budget metering;
|
|
4
|
+
this handler adds the outer-loop enforcement the proxy can't see.
|
|
5
|
+
|
|
6
|
+
from riskkernel.adapters.langchain import RiskKernelCallbackHandler
|
|
7
|
+
handler = RiskKernelCallbackHandler(run)
|
|
8
|
+
llm.invoke(prompt, config={"callbacks": [handler]})
|
|
9
|
+
|
|
10
|
+
A BudgetExceeded raised here propagates out of the LangChain call, halting the
|
|
11
|
+
chain — exactly when the run is out of budget.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any, Optional
|
|
17
|
+
|
|
18
|
+
from ..approval import ApprovalGate
|
|
19
|
+
from ..runtime import Run
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _base_handler():
|
|
23
|
+
# Inherit the real base class when LangChain is installed (best integration);
|
|
24
|
+
# otherwise fall back to object so the module still imports.
|
|
25
|
+
try:
|
|
26
|
+
from langchain_core.callbacks import BaseCallbackHandler # type: ignore
|
|
27
|
+
return BaseCallbackHandler
|
|
28
|
+
except Exception:
|
|
29
|
+
try:
|
|
30
|
+
from langchain.callbacks.base import BaseCallbackHandler # type: ignore
|
|
31
|
+
return BaseCallbackHandler
|
|
32
|
+
except Exception:
|
|
33
|
+
return object
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RiskKernelCallbackHandler(_base_handler()): # type: ignore[misc]
|
|
37
|
+
"""Enforces loop/time budgets and (optionally) gates tools on approval.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
run: the governed Run.
|
|
41
|
+
gate_tools: if True, every tool call must pass the approval gate.
|
|
42
|
+
tool_side_effect: side-effect label reported for gated tools.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# LangChain swallows exceptions raised inside a callback — it logs them and
|
|
46
|
+
# keeps running — UNLESS the handler sets raise_error=True. Without this, a
|
|
47
|
+
# BudgetExceeded (or ApprovalDenied) raised in a hook below would be silently
|
|
48
|
+
# dropped and the chain would keep spending past its budget. This single flag
|
|
49
|
+
# is what makes the deterministic halt actually stop the LangChain run.
|
|
50
|
+
raise_error = True
|
|
51
|
+
|
|
52
|
+
def __init__(self, run: Run, gate_tools: bool = False,
|
|
53
|
+
tool_side_effect: str = "tool"):
|
|
54
|
+
self.run = run
|
|
55
|
+
self.gate_tools = gate_tools
|
|
56
|
+
self.tool_side_effect = tool_side_effect
|
|
57
|
+
self._gate = ApprovalGate(run)
|
|
58
|
+
|
|
59
|
+
# One LLM call == one governed step. Raises BudgetExceeded when spent.
|
|
60
|
+
def on_llm_start(self, serialized: Any, prompts: Any, **kwargs: Any) -> None:
|
|
61
|
+
self.run.step()
|
|
62
|
+
|
|
63
|
+
def on_chat_model_start(self, serialized: Any, messages: Any, **kwargs: Any) -> None:
|
|
64
|
+
self.run.step()
|
|
65
|
+
|
|
66
|
+
def on_tool_start(self, serialized: Any, input_str: Any, **kwargs: Any) -> None:
|
|
67
|
+
if not self.gate_tools:
|
|
68
|
+
return
|
|
69
|
+
name = ""
|
|
70
|
+
if isinstance(serialized, dict):
|
|
71
|
+
name = serialized.get("name", "")
|
|
72
|
+
self._gate.require(name or "tool", side_effect=self.tool_side_effect,
|
|
73
|
+
arguments={"input": _stringify(input_str)})
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _stringify(v: Any) -> Any:
|
|
77
|
+
try:
|
|
78
|
+
import json
|
|
79
|
+
json.dumps(v)
|
|
80
|
+
return v
|
|
81
|
+
except Exception:
|
|
82
|
+
return repr(v)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""OpenAI Agents SDK adapter: lifecycle hooks that tick a governed step per agent
|
|
2
|
+
turn and gate tools through the approval gate.
|
|
3
|
+
|
|
4
|
+
from riskkernel.adapters.openai_agents import RiskKernelRunHooks
|
|
5
|
+
hooks = RiskKernelRunHooks(run, gate_tools=True)
|
|
6
|
+
await Runner.run(agent, input, hooks=hooks)
|
|
7
|
+
|
|
8
|
+
The OpenAI Agents SDK calls ``on_agent_start``/``on_tool_start`` (async). We tick a
|
|
9
|
+
step on each agent start (loop/time enforcement) and, when ``gate_tools`` is set,
|
|
10
|
+
await approval before a tool runs — raising ApprovalDenied to block it.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any, Optional
|
|
16
|
+
|
|
17
|
+
from ..approval import ApprovalGate
|
|
18
|
+
from ..runtime import Run
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _base_hooks():
|
|
22
|
+
try:
|
|
23
|
+
from agents import RunHooks # type: ignore (openai-agents)
|
|
24
|
+
return RunHooks
|
|
25
|
+
except Exception:
|
|
26
|
+
return object
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RiskKernelRunHooks(_base_hooks()): # type: ignore[misc]
|
|
30
|
+
"""RunHooks that bind a governed run to an OpenAI Agents run."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, run: Run, gate_tools: bool = False,
|
|
33
|
+
tool_side_effect: str = "tool", timeout: Optional[float] = None):
|
|
34
|
+
self.run = run
|
|
35
|
+
self.gate_tools = gate_tools
|
|
36
|
+
self.tool_side_effect = tool_side_effect
|
|
37
|
+
self.timeout = timeout
|
|
38
|
+
self._gate = ApprovalGate(run)
|
|
39
|
+
|
|
40
|
+
async def on_agent_start(self, context: Any = None, agent: Any = None, **kwargs: Any) -> None:
|
|
41
|
+
# One agent turn == one governed step (enforces loop/time budgets).
|
|
42
|
+
self.run.step()
|
|
43
|
+
|
|
44
|
+
async def on_tool_start(self, context: Any = None, agent: Any = None,
|
|
45
|
+
tool: Any = None, **kwargs: Any) -> None:
|
|
46
|
+
if not self.gate_tools:
|
|
47
|
+
return
|
|
48
|
+
name = getattr(tool, "name", None) or str(tool)
|
|
49
|
+
self._gate.require(name, side_effect=self.tool_side_effect, timeout=self.timeout)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Human-in-the-loop approval helpers for the SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
from typing import Any, Callable, Optional
|
|
7
|
+
|
|
8
|
+
from .errors import ApprovalDenied, RiskKernelError
|
|
9
|
+
from .runtime import Decision, Run, current_run
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ApprovalGate:
|
|
13
|
+
"""Gates side-effecting actions on human approval. Wraps a Run and asks the
|
|
14
|
+
daemon (deterministic policy) whether a call needs approval, then blocks until
|
|
15
|
+
a human resolves it.
|
|
16
|
+
|
|
17
|
+
gate = ApprovalGate(run)
|
|
18
|
+
if gate.allow("mcp://shell", side_effect="exec", arguments={"cmd": cmd}):
|
|
19
|
+
run_shell(cmd)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, run: Optional[Run] = None):
|
|
23
|
+
self._run = run
|
|
24
|
+
|
|
25
|
+
def _resolve_run(self) -> Run:
|
|
26
|
+
run = self._run or current_run()
|
|
27
|
+
if run is None:
|
|
28
|
+
raise RiskKernelError("ApprovalGate used outside a governed run")
|
|
29
|
+
return run
|
|
30
|
+
|
|
31
|
+
def decide(self, tool: str, side_effect: str = "", arguments: Optional[dict] = None,
|
|
32
|
+
step_index: int = 0, timeout: Optional[float] = None) -> Decision:
|
|
33
|
+
"""Return the Decision (blocking until resolved). Does not raise on denial."""
|
|
34
|
+
return self._resolve_run().approve(
|
|
35
|
+
tool, side_effect=side_effect, arguments=arguments,
|
|
36
|
+
step_index=step_index, timeout=timeout,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def allow(self, tool: str, side_effect: str = "", arguments: Optional[dict] = None,
|
|
40
|
+
step_index: int = 0, timeout: Optional[float] = None) -> bool:
|
|
41
|
+
"""Convenience boolean: True if approved."""
|
|
42
|
+
return self.decide(tool, side_effect, arguments, step_index, timeout).approved
|
|
43
|
+
|
|
44
|
+
def require(self, tool: str, side_effect: str = "", arguments: Optional[dict] = None,
|
|
45
|
+
step_index: int = 0, timeout: Optional[float] = None) -> None:
|
|
46
|
+
"""Raise ApprovalDenied if not approved (use to guard before a side effect)."""
|
|
47
|
+
d = self.decide(tool, side_effect, arguments, step_index, timeout)
|
|
48
|
+
if not d.approved:
|
|
49
|
+
raise ApprovalDenied(tool, d.reason)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def governed_tool(_fn: Optional[Callable] = None, *, tool: Optional[str] = None,
|
|
53
|
+
side_effect: str = "write", timeout: Optional[float] = None):
|
|
54
|
+
"""Decorator for a side-effecting tool function: before it runs, ask the
|
|
55
|
+
approval gate (under the current governed run). Raises ApprovalDenied if the
|
|
56
|
+
human says no.
|
|
57
|
+
|
|
58
|
+
@governed_tool(side_effect="write")
|
|
59
|
+
def write_file(path, content): ...
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def decorate(fn: Callable) -> Callable:
|
|
63
|
+
tool_name = tool or fn.__name__
|
|
64
|
+
|
|
65
|
+
@functools.wraps(fn)
|
|
66
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
67
|
+
ApprovalGate().require(
|
|
68
|
+
tool_name, side_effect=side_effect,
|
|
69
|
+
arguments={"args": _safe(args), "kwargs": _safe(kwargs)},
|
|
70
|
+
timeout=timeout,
|
|
71
|
+
)
|
|
72
|
+
return fn(*args, **kwargs)
|
|
73
|
+
|
|
74
|
+
return wrapper
|
|
75
|
+
|
|
76
|
+
return decorate(_fn) if _fn is not None else decorate
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _safe(obj: Any) -> Any:
|
|
80
|
+
"""Best-effort JSON-able rendering of call arguments for the approver to read."""
|
|
81
|
+
try:
|
|
82
|
+
import json
|
|
83
|
+
json.dumps(obj)
|
|
84
|
+
return obj
|
|
85
|
+
except Exception:
|
|
86
|
+
return repr(obj)
|