@provos/ironcurtain 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +189 -29
- package/dist/cli.js +34 -0
- package/dist/cli.js.map +1 -1
- package/dist/config/config-command.d.ts +18 -0
- package/dist/config/config-command.js +499 -0
- package/dist/config/config-command.js.map +1 -0
- package/dist/config/constitution-user-base.md +9 -0
- package/dist/config/constitution.md +1 -10
- package/dist/config/first-start.d.ts +8 -0
- package/dist/config/first-start.js +107 -0
- package/dist/config/first-start.js.map +1 -0
- package/dist/config/generated/compiled-policy.json +77 -139
- package/dist/config/generated/test-scenarios.json +229 -390
- package/dist/config/generated/tool-annotations.json +75 -42
- package/dist/config/index.d.ts +33 -3
- package/dist/config/index.js +138 -25
- package/dist/config/index.js.map +1 -1
- package/dist/config/mcp-servers.json +6 -0
- package/dist/config/model-provider.d.ts +19 -5
- package/dist/config/model-provider.js +37 -15
- package/dist/config/model-provider.js.map +1 -1
- package/dist/config/paths.d.ts +27 -0
- package/dist/config/paths.js +51 -1
- package/dist/config/paths.js.map +1 -1
- package/dist/config/types.d.ts +2 -0
- package/dist/config/user-config.d.ts +33 -2
- package/dist/config/user-config.js +132 -18
- package/dist/config/user-config.js.map +1 -1
- package/dist/docker/adapters/claude-code.d.ts +11 -0
- package/dist/docker/adapters/claude-code.js +140 -0
- package/dist/docker/adapters/claude-code.js.map +1 -0
- package/dist/docker/agent-adapter.d.ts +120 -0
- package/dist/docker/agent-adapter.js +9 -0
- package/dist/docker/agent-adapter.js.map +1 -0
- package/dist/docker/agent-registry.d.ts +15 -0
- package/dist/docker/agent-registry.js +35 -0
- package/dist/docker/agent-registry.js.map +1 -0
- package/dist/docker/audit-log-tailer.d.ts +17 -0
- package/dist/docker/audit-log-tailer.js +66 -0
- package/dist/docker/audit-log-tailer.js.map +1 -0
- package/dist/docker/ca.d.ts +29 -0
- package/dist/docker/ca.js +77 -0
- package/dist/docker/ca.js.map +1 -0
- package/dist/docker/docker-agent-session.d.ts +118 -0
- package/dist/docker/docker-agent-session.js +409 -0
- package/dist/docker/docker-agent-session.js.map +1 -0
- package/dist/docker/docker-manager.d.ts +22 -0
- package/dist/docker/docker-manager.js +184 -0
- package/dist/docker/docker-manager.js.map +1 -0
- package/dist/docker/fake-keys.d.ts +15 -0
- package/dist/docker/fake-keys.js +20 -0
- package/dist/docker/fake-keys.js.map +1 -0
- package/dist/docker/managed-proxy.d.ts +27 -0
- package/dist/docker/managed-proxy.js +175 -0
- package/dist/docker/managed-proxy.js.map +1 -0
- package/dist/docker/mitm-proxy.d.ts +43 -0
- package/dist/docker/mitm-proxy.js +316 -0
- package/dist/docker/mitm-proxy.js.map +1 -0
- package/dist/docker/orientation.d.ts +23 -0
- package/dist/docker/orientation.js +60 -0
- package/dist/docker/orientation.js.map +1 -0
- package/dist/docker/provider-config.d.ts +61 -0
- package/dist/docker/provider-config.js +71 -0
- package/dist/docker/provider-config.js.map +1 -0
- package/dist/docker/types.d.ts +78 -0
- package/dist/docker/types.js +5 -0
- package/dist/docker/types.js.map +1 -0
- package/dist/hash.js +4 -2
- package/dist/hash.js.map +1 -1
- package/dist/index.js +68 -3
- package/dist/index.js.map +1 -1
- package/dist/logger.js +1 -3
- package/dist/logger.js.map +1 -1
- package/dist/pipeline/annotate.js +12 -7
- package/dist/pipeline/annotate.js.map +1 -1
- package/dist/pipeline/compile.d.ts +18 -1
- package/dist/pipeline/compile.js +289 -68
- package/dist/pipeline/compile.js.map +1 -1
- package/dist/pipeline/constitution-compiler.d.ts +60 -6
- package/dist/pipeline/constitution-compiler.js +304 -38
- package/dist/pipeline/constitution-compiler.js.map +1 -1
- package/dist/pipeline/constitution-customizer.d.ts +66 -0
- package/dist/pipeline/constitution-customizer.js +495 -0
- package/dist/pipeline/constitution-customizer.js.map +1 -0
- package/dist/pipeline/dynamic-list-types.d.ts +30 -0
- package/dist/pipeline/dynamic-list-types.js +74 -0
- package/dist/pipeline/dynamic-list-types.js.map +1 -0
- package/dist/pipeline/generate-with-repair.d.ts +27 -5
- package/dist/pipeline/generate-with-repair.js +78 -53
- package/dist/pipeline/generate-with-repair.js.map +1 -1
- package/dist/pipeline/handwritten-scenarios.d.ts +4 -0
- package/dist/pipeline/handwritten-scenarios.js +12 -292
- package/dist/pipeline/handwritten-scenarios.js.map +1 -1
- package/dist/pipeline/list-resolver.d.ts +58 -0
- package/dist/pipeline/list-resolver.js +256 -0
- package/dist/pipeline/list-resolver.js.map +1 -0
- package/dist/pipeline/llm-logger.d.ts +9 -1
- package/dist/pipeline/llm-logger.js +17 -7
- package/dist/pipeline/llm-logger.js.map +1 -1
- package/dist/pipeline/pipeline-shared.d.ts +3 -6
- package/dist/pipeline/pipeline-shared.js +22 -22
- package/dist/pipeline/pipeline-shared.js.map +1 -1
- package/dist/pipeline/policy-verifier.d.ts +101 -7
- package/dist/pipeline/policy-verifier.js +199 -55
- package/dist/pipeline/policy-verifier.js.map +1 -1
- package/dist/pipeline/refresh-lists.d.ts +11 -0
- package/dist/pipeline/refresh-lists.js +132 -0
- package/dist/pipeline/refresh-lists.js.map +1 -0
- package/dist/pipeline/scenario-generator.d.ts +65 -4
- package/dist/pipeline/scenario-generator.js +186 -30
- package/dist/pipeline/scenario-generator.js.map +1 -1
- package/dist/pipeline/tool-annotator.js +20 -21
- package/dist/pipeline/tool-annotator.js.map +1 -1
- package/dist/pipeline/types.d.ts +109 -1
- package/dist/sandbox/index.d.ts +8 -0
- package/dist/sandbox/index.js +61 -12
- package/dist/sandbox/index.js.map +1 -1
- package/dist/servers/fetch-server.d.ts +13 -0
- package/dist/servers/fetch-server.js +301 -0
- package/dist/servers/fetch-server.js.map +1 -0
- package/dist/session/agent-session.d.ts +15 -0
- package/dist/session/agent-session.js +68 -28
- package/dist/session/agent-session.js.map +1 -1
- package/dist/session/cli-transport.d.ts +14 -0
- package/dist/session/cli-transport.js +54 -9
- package/dist/session/cli-transport.js.map +1 -1
- package/dist/session/errors.js.map +1 -1
- package/dist/session/index.d.ts +10 -11
- package/dist/session/index.js +174 -26
- package/dist/session/index.js.map +1 -1
- package/dist/session/message-compactor.js +1 -4
- package/dist/session/message-compactor.js.map +1 -1
- package/dist/session/preflight.d.ts +41 -0
- package/dist/session/preflight.js +90 -0
- package/dist/session/preflight.js.map +1 -0
- package/dist/session/prompt-cache.d.ts +46 -0
- package/dist/session/prompt-cache.js +69 -0
- package/dist/session/prompt-cache.js.map +1 -0
- package/dist/session/prompts.js.map +1 -1
- package/dist/session/resource-budget-tracker.js +33 -23
- package/dist/session/resource-budget-tracker.js.map +1 -1
- package/dist/session/transport.d.ts +5 -0
- package/dist/session/truncate-result.js +3 -4
- package/dist/session/truncate-result.js.map +1 -1
- package/dist/session/types.d.ts +28 -0
- package/dist/session/types.js.map +1 -1
- package/dist/trusted-process/auto-approver.d.ts +111 -0
- package/dist/trusted-process/auto-approver.js +209 -0
- package/dist/trusted-process/auto-approver.js.map +1 -0
- package/dist/trusted-process/call-circuit-breaker.js.map +1 -1
- package/dist/trusted-process/domain-utils.d.ts +55 -0
- package/dist/trusted-process/domain-utils.js +125 -0
- package/dist/trusted-process/domain-utils.js.map +1 -0
- package/dist/trusted-process/index.d.ts +7 -0
- package/dist/trusted-process/index.js +75 -19
- package/dist/trusted-process/index.js.map +1 -1
- package/dist/trusted-process/mcp-client-manager.d.ts +1 -0
- package/dist/trusted-process/mcp-client-manager.js +22 -9
- package/dist/trusted-process/mcp-client-manager.js.map +1 -1
- package/dist/trusted-process/mcp-proxy-server.d.ts +4 -0
- package/dist/trusted-process/mcp-proxy-server.js +235 -77
- package/dist/trusted-process/mcp-proxy-server.js.map +1 -1
- package/dist/trusted-process/path-utils.d.ts +3 -23
- package/dist/trusted-process/path-utils.js +8 -56
- package/dist/trusted-process/path-utils.js.map +1 -1
- package/dist/trusted-process/policy-engine.d.ts +38 -22
- package/dist/trusted-process/policy-engine.js +225 -104
- package/dist/trusted-process/policy-engine.js.map +1 -1
- package/dist/trusted-process/policy-roots.js +1 -1
- package/dist/trusted-process/policy-roots.js.map +1 -1
- package/dist/trusted-process/sandbox-integration.js +3 -4
- package/dist/trusted-process/sandbox-integration.js.map +1 -1
- package/dist/trusted-process/uds-server-transport.d.ts +32 -0
- package/dist/trusted-process/uds-server-transport.js +110 -0
- package/dist/trusted-process/uds-server-transport.js.map +1 -0
- package/dist/types/argument-roles.d.ts +6 -40
- package/dist/types/argument-roles.js +24 -109
- package/dist/types/argument-roles.js.map +1 -1
- package/dist/types/audit.d.ts +5 -0
- package/dist/version.d.ts +1 -0
- package/dist/version.js +5 -0
- package/dist/version.js.map +1 -0
- package/docker/Dockerfile.base +17 -0
- package/docker/Dockerfile.claude-code +18 -0
- package/docker/entrypoint-claude-code.sh +14 -0
- package/package.json +32 -10
- package/src/config/constitution-user-base.md +9 -0
- package/src/config/constitution.md +1 -10
- package/src/config/generated/compiled-policy.json +77 -139
- package/src/config/generated/test-scenarios.json +229 -390
- package/src/config/generated/tool-annotations.json +75 -42
- package/src/config/mcp-servers.json +6 -0
- package/dist/agent/index.d.ts +0 -10
- package/dist/agent/index.js +0 -71
- package/dist/agent/index.js.map +0 -1
- package/dist/agent/prompts.d.ts +0 -5
- package/dist/agent/prompts.js +0 -26
- package/dist/agent/prompts.js.map +0 -1
- package/dist/agent/tools.d.ts +0 -13
- package/dist/agent/tools.js +0 -51
- package/dist/agent/tools.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
# IronCurtain
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://github.com/provos/ironcurtain/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/@provos/ironcurtain)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://ironcurtain.dev)
|
|
7
|
+
|
|
8
|
+
**A secure\* runtime for autonomous AI agents, where security policy is derived from a human-readable constitution.**
|
|
9
|
+
|
|
10
|
+
_\*When someone writes "secure," you should immediately be skeptical. [What do we mean by secure?](https://ironcurtain.dev)_
|
|
4
11
|
|
|
5
12
|
> **Research Prototype.** IronCurtain is an early-stage research project exploring how to make AI agents safe enough to be genuinely useful. APIs, configuration formats, and architecture may change. Contributions and feedback are welcome.
|
|
6
13
|
|
|
@@ -23,8 +30,23 @@ The key ideas:
|
|
|
23
30
|
- **Semantic interposition.** Instead of giving the agent raw system access, all interactions go through [MCP](https://modelcontextprotocol.io/) servers (filesystem, git, etc.). Every tool call passes through a policy engine that can **allow**, **deny**, or **escalate** to the user for approval.
|
|
24
31
|
- **Defense in depth.** Agent code runs in a V8 isolate with no direct access to the host. The only way out is through semantically meaningful MCP tool calls and every one is checked against policy.
|
|
25
32
|
|
|
33
|
+
## Demo
|
|
34
|
+
|
|
35
|
+
<p align="center">
|
|
36
|
+
<img src="demo.gif" alt="IronCurtain demo: agent clones a repo, policy escalates git_clone for approval, user approves, then auto-approve handles git push" width="800">
|
|
37
|
+
</p>
|
|
38
|
+
|
|
39
|
+
The agent clones a repository and edits a file. The policy engine escalates `git_clone` for human approval. After the user types `/approve`, the agent completes the task. On the second request ("ok. git push to origin please"), [auto-approve](#auto-approve-escalations) recognizes the explicit intent and approves `git_push` automatically — no interruption needed.
|
|
40
|
+
|
|
26
41
|
## Architecture
|
|
27
42
|
|
|
43
|
+
IronCurtain supports two session modes with different trust models:
|
|
44
|
+
|
|
45
|
+
- **Builtin Agent (Code Mode)** — IronCurtain's own LLM agent writes TypeScript snippets that execute in a V8 sandbox. IronCurtain controls the agent, the sandbox, and the policy engine.
|
|
46
|
+
- **Docker Agent Mode** — An external agent (Claude Code, Goose, etc.) runs inside a Docker container with no network access. IronCurtain doesn't control the agent — it only mediates the agent's external access through policy-enforced proxies.
|
|
47
|
+
|
|
48
|
+
### Builtin Agent (Code Mode)
|
|
49
|
+
|
|
28
50
|
```
|
|
29
51
|
┌─────────────────────────────────────────────┐
|
|
30
52
|
│ Agent (LLM) │
|
|
@@ -69,7 +91,7 @@ The key ideas:
|
|
|
69
91
|
|
|
70
92
|
1. **Agent** -- An LLM (Claude, GPT, Gemini) that writes TypeScript to accomplish user tasks. It has no direct access to the system.
|
|
71
93
|
2. **Sandbox** -- A V8 isolate ([UTCP Code Mode](https://utcp.dev/)) that executes the agent's TypeScript. The only way to interact with the outside world is through typed function stubs that produce structured MCP requests.
|
|
72
|
-
3. **Trusted Process** -- The security kernel. Every MCP request from the sandbox passes through a two-phase policy engine before reaching any real server.
|
|
94
|
+
3. **Trusted Process** -- The security kernel. Every MCP request from the sandbox passes through a two-phase policy engine before reaching any real server. Structural checks enforce hardcoded invariants (protected paths, unknown tool denial). Compiled rule evaluation evaluates the compiled constitution rules. Denied calls are blocked; escalated calls are presented to the user for approval.
|
|
73
95
|
4. **MCP Servers** -- Standard [Model Context Protocol](https://modelcontextprotocol.io/) servers that provide filesystem access, git operations, and other capabilities. Only approved requests reach them.
|
|
74
96
|
|
|
75
97
|
## Policy Compilation Pipeline
|
|
@@ -77,25 +99,97 @@ The key ideas:
|
|
|
77
99
|
The constitution is compiled into enforceable policy through a four-stage LLM pipeline:
|
|
78
100
|
|
|
79
101
|
```
|
|
80
|
-
constitution.md → [Annotate] → [Compile] → [Generate Scenarios] → [Verify & Repair]
|
|
81
|
-
│ │
|
|
82
|
-
▼ ▼
|
|
83
|
-
tool-annotations compiled-policy test-scenarios
|
|
84
|
-
.json .json .json (or build failure)
|
|
102
|
+
constitution.md → [Annotate] → [Compile] → [Resolve Lists] → [Generate Scenarios] → [Verify & Repair]
|
|
103
|
+
│ │ │ │ │
|
|
104
|
+
▼ ▼ ▼ ▼ ▼
|
|
105
|
+
tool-annotations compiled-policy dynamic-lists test-scenarios verified policy
|
|
106
|
+
.json .json .json .json (or build failure)
|
|
85
107
|
```
|
|
86
108
|
|
|
87
109
|
1. **Annotate** -- Classify each MCP tool's arguments by role (read-path, write-path, delete-path, none).
|
|
88
|
-
2. **Compile** -- Translate the English constitution into deterministic if/then rules.
|
|
89
|
-
3. **
|
|
90
|
-
4. **
|
|
110
|
+
2. **Compile** -- Translate the English constitution into deterministic if/then rules. Categorical references ("major news sites", "my contacts") are emitted as `@list-name` symbolic references with list definitions.
|
|
111
|
+
3. **Resolve Lists** -- Resolve dynamic list definitions to concrete values via LLM knowledge or MCP tool-use (e.g., querying a contacts database). Resolved values are written to `dynamic-lists.json` and can be user-inspected/edited. Skipped when no lists are present.
|
|
112
|
+
4. **Generate Scenarios** -- Create test scenarios from the constitution, combined with mandatory handwritten invariant tests.
|
|
113
|
+
5. **Verify & Repair** -- Execute scenarios against the real policy engine. An LLM judge analyzes failures and generates targeted repairs (up to 2 rounds). The build fails if the policy cannot be verified.
|
|
91
114
|
|
|
92
115
|
All artifacts are content-hash cached -- only changed inputs trigger recompilation.
|
|
93
116
|
|
|
117
|
+
### What compiled rules look like
|
|
118
|
+
|
|
119
|
+
A constitution like:
|
|
120
|
+
|
|
121
|
+
```markdown
|
|
122
|
+
- The agent may perform read-only git operations (status, diff, log) within the sandbox without approval.
|
|
123
|
+
- The agent must receive human approval before git push, pull, fetch, or any remote-contacting operation.
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
compiles into deterministic JSON rules:
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
[
|
|
130
|
+
{
|
|
131
|
+
"tool": "git_status",
|
|
132
|
+
"decision": "allow",
|
|
133
|
+
"condition": { "directory": { "within": "$SANDBOX" } }
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"tool": "git_diff",
|
|
137
|
+
"decision": "allow",
|
|
138
|
+
"condition": { "directory": { "within": "$SANDBOX" } }
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"tool": "git_push",
|
|
142
|
+
"decision": "escalate",
|
|
143
|
+
"reason": "Remote-contacting git operations require human approval"
|
|
144
|
+
}
|
|
145
|
+
]
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Any tool call that doesn't match an explicit allow or escalate rule is **denied by default**. Rules define what is permitted or needs human judgment; everything else is blocked.
|
|
149
|
+
|
|
150
|
+
### Docker Agent Mode
|
|
151
|
+
|
|
152
|
+
In Docker mode, IronCurtain runs an external agent — not its own. The agent (Claude Code, Goose, etc.) already has its own LLM loop, tool-calling mechanism, and execution model. IronCurtain's role is to **mediate external access**: every LLM API call and every MCP tool call must pass through host-side proxies that enforce policy.
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
┌──────────────────────────────────────────────┐
|
|
156
|
+
│ Docker Container (--network=none) │
|
|
157
|
+
│ │
|
|
158
|
+
│ ┌────────────────────────────────────────┐ │
|
|
159
|
+
│ │ External Agent │ │
|
|
160
|
+
│ │ (Claude Code, Goose, etc.) │ │
|
|
161
|
+
│ │ Own LLM loop, tools, execution │ │
|
|
162
|
+
│ └──────┬──────────────────┬──────────────┘ │
|
|
163
|
+
│ │ LLM API calls │ MCP tool calls │
|
|
164
|
+
│ ▼ ▼ │
|
|
165
|
+
│ [UDS] [UDS] │
|
|
166
|
+
└─────────┬──────────────────┬─────────────────┘
|
|
167
|
+
│ │
|
|
168
|
+
▼ ▼
|
|
169
|
+
┌──────────────────┐ ┌─────────────────────────┐
|
|
170
|
+
│ MITM Proxy │ │ MCP Proxy │
|
|
171
|
+
│ (host process) │ │ (host process) │
|
|
172
|
+
│ │ │ │
|
|
173
|
+
│ Host allowlist │ │ Policy Engine │
|
|
174
|
+
│ Endpoint filter │ │ allow / deny / │
|
|
175
|
+
│ Fake→real key │ │ escalate │
|
|
176
|
+
│ swap │ │ │
|
|
177
|
+
└────────┬─────────┘ └────────────┬────────────┘
|
|
178
|
+
│ │
|
|
179
|
+
▼ ▼
|
|
180
|
+
LLM Provider MCP Servers
|
|
181
|
+
(Anthropic, etc.) (filesystem, git, etc.)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
The key difference from Code Mode: IronCurtain does **not** control the agent's execution. The agent has its own tool-calling mechanism (Claude Code uses its own tools internally). IronCurtain only sees the external effects — LLM API calls and MCP tool calls — and enforces policy on those boundaries.
|
|
185
|
+
|
|
186
|
+
See [SANDBOXING.md](SANDBOXING.md) for the full sandboxing architecture.
|
|
187
|
+
|
|
94
188
|
## Getting Started
|
|
95
189
|
|
|
96
190
|
### Prerequisites
|
|
97
191
|
|
|
98
|
-
- Node.js
|
|
192
|
+
- Node.js 20+
|
|
99
193
|
- An API key for at least one supported LLM provider (Anthropic, Google, or OpenAI)
|
|
100
194
|
|
|
101
195
|
### Install
|
|
@@ -103,7 +197,7 @@ All artifacts are content-hash cached -- only changed inputs trigger recompilati
|
|
|
103
197
|
**As a global CLI tool (end users):**
|
|
104
198
|
|
|
105
199
|
```bash
|
|
106
|
-
npm install -g ironcurtain
|
|
200
|
+
npm install -g @provos/ironcurtain
|
|
107
201
|
```
|
|
108
202
|
|
|
109
203
|
**From source (development):**
|
|
@@ -132,41 +226,66 @@ Or add it to `~/.ironcurtain/config.json` (auto-created on first run with defaul
|
|
|
132
226
|
|
|
133
227
|
Environment variables take precedence over config file values. Supported providers: `ANTHROPIC_API_KEY`, `GOOGLE_GENERATIVE_AI_API_KEY`, `OPENAI_API_KEY`.
|
|
134
228
|
|
|
135
|
-
### 2.
|
|
229
|
+
### 2. Configure settings
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
ironcurtain config
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
This opens an interactive editor for `~/.ironcurtain/config.json` where you can configure models, security settings, resource budgets, and auto-compaction. API keys should be set via environment variables.
|
|
236
|
+
|
|
237
|
+
### 3. Customize your policy
|
|
238
|
+
|
|
239
|
+
Run the interactive policy customizer to create a constitution tailored to your workflow:
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
ironcurtain customize-policy
|
|
243
|
+
```
|
|
136
244
|
|
|
137
|
-
|
|
245
|
+
The customizer walks you through an LLM-assisted conversation about what your agent should and shouldn't be able to do, then generates a constitution file at `~/.ironcurtain/constitution-user.md`. This file is appended to the base constitution, which defines the guiding principles:
|
|
138
246
|
|
|
139
247
|
```markdown
|
|
140
|
-
#
|
|
248
|
+
# IronCurtain Constitution
|
|
249
|
+
|
|
250
|
+
## Guiding Principles
|
|
141
251
|
|
|
142
252
|
1. **Least privilege**: The agent may only access resources explicitly permitted by policy.
|
|
143
|
-
2. **No destruction**: Delete operations outside the sandbox are never permitted
|
|
253
|
+
2. **No destruction**: Delete operations outside the sandbox are never permitted,
|
|
254
|
+
unless an explicit exception is granted by the user guidance.
|
|
144
255
|
3. **Human oversight**: Operations outside the sandbox require explicit human approval.
|
|
256
|
+
```
|
|
145
257
|
|
|
146
|
-
|
|
258
|
+
The customizer produces concrete guidance like:
|
|
147
259
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
- The agent
|
|
260
|
+
```markdown
|
|
261
|
+
# User Policy Customizations
|
|
262
|
+
|
|
263
|
+
## Concrete Guidance
|
|
264
|
+
|
|
265
|
+
- The agent is allowed to read, write and delete content in the Downloads folder
|
|
266
|
+
- The agent is allowed to read documents in the Users document folder.
|
|
267
|
+
- The agent is allowed to perform all local read and write git operations within the sandbox
|
|
268
|
+
- The agent must ask for human approval for all other git operations
|
|
269
|
+
- The agent may fetch web content from popular news sites.
|
|
154
270
|
```
|
|
155
271
|
|
|
156
|
-
|
|
272
|
+
You can also edit `~/.ironcurtain/constitution-user.md` directly. If you need to override the base principles, place a full constitution at `~/.ironcurtain/constitution.md` — it replaces the package-bundled base entirely.
|
|
273
|
+
|
|
274
|
+
### 4. Annotate tools and compile the policy
|
|
157
275
|
|
|
158
276
|
```bash
|
|
159
277
|
ironcurtain annotate-tools # classify MCP tool arguments (developer task)
|
|
160
278
|
ironcurtain compile-policy # compile constitution into enforceable rules (user task)
|
|
279
|
+
ironcurtain refresh-lists # re-resolve dynamic lists without full recompilation
|
|
161
280
|
```
|
|
162
281
|
|
|
163
282
|
Or with npm scripts during development: `npm run annotate-tools` / `npm run compile-policy`.
|
|
164
283
|
|
|
165
|
-
Tool annotation connects to your MCP servers and classifies each tool's arguments via LLM. This only needs re-running when you add or change MCP servers. Policy compilation translates your constitution into deterministic rules, generates test scenarios, and verifies them. The compiled artifacts are written to
|
|
284
|
+
Tool annotation connects to your MCP servers and classifies each tool's arguments via LLM. This only needs re-running when you add or change MCP servers. Policy compilation translates your constitution into deterministic rules, generates test scenarios, and verifies them. The compiled artifacts are written to `~/.ironcurtain/generated/`. Review the generated `compiled-policy.json` -- these are the rules that will be enforced at runtime. (The package ships with pre-compiled defaults so you can run immediately without compiling.)
|
|
166
285
|
|
|
167
286
|
IronCurtain ships with pre-configured MCP servers for filesystem and git operations. See [Adding MCP Servers](#adding-mcp-servers) for how to extend this.
|
|
168
287
|
|
|
169
|
-
###
|
|
288
|
+
### 5. Run the agent
|
|
170
289
|
|
|
171
290
|
**Interactive mode** (multi-turn session with human escalation support):
|
|
172
291
|
|
|
@@ -182,6 +301,8 @@ ironcurtain start "Summarize the files in the current directory"
|
|
|
182
301
|
|
|
183
302
|
Or with npm scripts during development: `npm start` / `npm start "task"`.
|
|
184
303
|
|
|
304
|
+
When Docker is available and `ANTHROPIC_API_KEY` is set, `ironcurtain start` automatically selects Docker mode (claude-code agent). Otherwise it falls back to the builtin agent silently. The selected mode is logged to stderr. Use `--agent builtin` or `--agent claude-code` to force a specific agent; explicit selection fails fast with a clear error if prerequisites are missing.
|
|
305
|
+
|
|
185
306
|
### Session Commands
|
|
186
307
|
|
|
187
308
|
During an interactive session:
|
|
@@ -201,6 +322,9 @@ IronCurtain stores its configuration and session data in `~/.ironcurtain/`:
|
|
|
201
322
|
```
|
|
202
323
|
~/.ironcurtain/
|
|
203
324
|
├── config.json # User configuration
|
|
325
|
+
├── constitution.md # User-local base constitution (overrides package default)
|
|
326
|
+
├── constitution-user.md # Your policy customizations (generated by customize-policy)
|
|
327
|
+
├── generated/ # User-compiled policy artifacts (overrides package defaults)
|
|
204
328
|
├── sessions/
|
|
205
329
|
│ └── {sessionId}/
|
|
206
330
|
│ ├── sandbox/ # Per-session filesystem sandbox
|
|
@@ -222,6 +346,21 @@ Sessions enforce configurable limits to prevent runaway agents:
|
|
|
222
346
|
|
|
223
347
|
Set any limit to `null` in `config.json` to disable it.
|
|
224
348
|
|
|
349
|
+
### Auto-Approve Escalations
|
|
350
|
+
|
|
351
|
+
By default, all escalations require manual `/approve` or `/deny`. You can optionally enable an LLM-based auto-approver that checks whether the user's most recent message clearly authorized the escalated action:
|
|
352
|
+
|
|
353
|
+
```json
|
|
354
|
+
{
|
|
355
|
+
"autoApprove": {
|
|
356
|
+
"enabled": true,
|
|
357
|
+
"modelId": "anthropic:claude-haiku-4-5"
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
The auto-approver is conservative — it only approves when intent is unambiguous (e.g., "push my changes to origin" clearly authorizes `git_push`). Vague messages like "go ahead" or "fix the tests" always fall through to human approval. It can never deny — only approve or escalate. All auto-approved actions are recorded in the audit log with `autoApproved: true`.
|
|
363
|
+
|
|
225
364
|
### Multi-Provider Support
|
|
226
365
|
|
|
227
366
|
IronCurtain supports multiple LLM providers. Use the `provider:model-name` format in config and provide the API key for each provider you use:
|
|
@@ -256,6 +395,16 @@ IronCurtain ships with filesystem and git MCP servers pre-configured. Adding a n
|
|
|
256
395
|
|
|
257
396
|
After compilation, review the updated `tool-annotations.json` and `compiled-policy.json` to verify the new tools are correctly classified and covered by policy.
|
|
258
397
|
|
|
398
|
+
## Built-in Capabilities
|
|
399
|
+
|
|
400
|
+
IronCurtain ships with three pre-configured MCP servers. All tool calls are governed by your compiled policy.
|
|
401
|
+
|
|
402
|
+
**Filesystem** (14 tools) — `read_file`, `read_multiple_files`, `write_file`, `edit_file`, `create_directory`, `list_directory`, `directory_tree`, `move_file`, `search_files`, `get_file_info`, `list_allowed_directories`, `file_exists`, `read_file_lines`, `calculate_diff`.
|
|
403
|
+
|
|
404
|
+
**Git** (27 tools) — Full git workflow: `git_status`, `git_diff_staged`, `git_diff_unstaged`, `git_diff`, `git_log`, `git_show`, `git_add`, `git_reset`, `git_commit`, `git_checkout`, `git_create_branch`, `git_list_branches`, `git_push`, `git_pull`, `git_fetch`, `git_merge`, `git_rebase`, `git_cherry_pick`, `git_stash`, `git_stash_pop`, `git_stash_list`, `git_tag`, `git_list_tags`, `git_remote`, `git_clone`, `git_init`, `git_blame`.
|
|
405
|
+
|
|
406
|
+
**Fetch** (1 tool) — `fetch` retrieves content from URLs with automatic HTML-to-markdown conversion.
|
|
407
|
+
|
|
259
408
|
## Security Model
|
|
260
409
|
|
|
261
410
|
IronCurtain is designed around a specific threat model: **the LLM goes rogue.** This can happen through prompt injection (a malicious email or web page hijacks the agent) or through multi-turn drift (the agent gradually deviates from the user's intent over a long session).
|
|
@@ -265,7 +414,7 @@ IronCurtain is designed around a specific threat model: **the LLM goes rogue.**
|
|
|
265
414
|
- **Filesystem containment** -- Path-based policy with symlink-aware resolution prevents path traversal and symlink-escape attacks.
|
|
266
415
|
- **Per-tool policy** -- Each MCP tool call is evaluated against compiled rules. The policy engine classifies tool arguments by role (read-path, write-path, delete-path) to make fine-grained decisions.
|
|
267
416
|
- **Structural invariants** -- Certain protections are hardcoded and cannot be overridden by the constitution: the agent can never modify its own policy files, audit logs, or configuration.
|
|
268
|
-
- **Human escalation** -- When policy says "escalate," the agent pauses and the user must explicitly `/approve` or `/deny` the action.
|
|
417
|
+
- **Human escalation** -- When policy says "escalate," the agent pauses and the user must explicitly `/approve` or `/deny` the action. Optionally, an [LLM-based auto-approver](#auto-approve-escalations) can approve actions that clearly match the user's most recent request — it can never deny, only approve or fall through to human review.
|
|
269
418
|
- **Audit trail** -- Every tool call and policy decision is logged to an append-only JSONL audit log.
|
|
270
419
|
- **Resource limits** -- Token, step, time, and cost budgets prevent runaway sessions.
|
|
271
420
|
|
|
@@ -280,12 +429,23 @@ This is a research prototype. Known gaps include:
|
|
|
280
429
|
|
|
281
430
|
See [docs/SECURITY_CONCERNS.md](docs/SECURITY_CONCERNS.md) for a detailed threat analysis.
|
|
282
431
|
|
|
432
|
+
## Troubleshooting
|
|
433
|
+
|
|
434
|
+
| Issue | Guidance |
|
|
435
|
+
|-------|---------|
|
|
436
|
+
| **Missing API key** | Set the environment variable (`ANTHROPIC_API_KEY`, `GOOGLE_GENERATIVE_AI_API_KEY`, or `OPENAI_API_KEY`) or add the corresponding key to `~/.ironcurtain/config.json`. |
|
|
437
|
+
| **Sandbox unavailable** | OS-level sandboxing requires `bubblewrap` and `socat`. Install both, or set `"sandboxPolicy": "warn"` in your MCP server config for development. |
|
|
438
|
+
| **Budget exhausted** | Adjust limits in `~/.ironcurtain/config.json` under `resourceBudget`. Set any individual limit to `null` to disable it. |
|
|
439
|
+
| **Node version errors** | Minimum Node.js 18.3.0 required. Node 20+ is recommended. |
|
|
440
|
+
| **Policy doesn't match intent** | Review `compiled-policy.json` to see the generated rules. Run `ironcurtain customize-policy` to refine your constitution, then `ironcurtain compile-policy` to recompile. Specific wording produces better rules — vague phrasing leads to vague policy. |
|
|
441
|
+
| **Auto-approve not triggering** | The auto-approver only approves when the user's message explicitly authorizes the action (e.g., "push to origin" for `git_push`). Vague messages like "go ahead" always escalate to human review. Verify `autoApprove.enabled` is `true` in `config.json`. |
|
|
442
|
+
|
|
283
443
|
## Development
|
|
284
444
|
|
|
285
445
|
```bash
|
|
286
446
|
npm test # Run all tests
|
|
287
|
-
|
|
288
|
-
npx
|
|
447
|
+
npm test -- test/policy-engine.test.ts # Run a single test file
|
|
448
|
+
npx test -- -t "denies delete_file" # Run a single test by name
|
|
289
449
|
npm run lint # Lint
|
|
290
450
|
npm run build # TypeScript compilation + asset copy
|
|
291
451
|
```
|
package/dist/cli.js
CHANGED
|
@@ -20,20 +20,34 @@ Usage:
|
|
|
20
20
|
|
|
21
21
|
Commands:
|
|
22
22
|
start [task] Run the agent (interactive or single-shot)
|
|
23
|
+
setup Run the first-start wizard (always runs)
|
|
23
24
|
annotate-tools Classify MCP tool arguments via LLM
|
|
24
25
|
compile-policy Compile constitution into enforceable policy rules
|
|
26
|
+
refresh-lists Re-resolve dynamic lists without full recompilation
|
|
27
|
+
customize-policy Customize your policy via LLM-assisted conversation
|
|
28
|
+
config Edit configuration interactively
|
|
25
29
|
help Show this help message
|
|
26
30
|
|
|
27
31
|
Options:
|
|
28
32
|
-h, --help Show this help message
|
|
29
33
|
-v, --version Show version number
|
|
34
|
+
-a, --agent <name> Agent mode: builtin or claude-code (Docker)
|
|
35
|
+
Auto-detects if omitted: Docker if available, else builtin
|
|
36
|
+
--list-agents List registered agent adapters
|
|
30
37
|
|
|
31
38
|
Examples:
|
|
39
|
+
ironcurtain start "task" # Auto-detects Docker or builtin
|
|
32
40
|
ironcurtain start # Interactive session
|
|
33
41
|
ironcurtain start "Summarize files in ." # Single-shot task
|
|
34
42
|
ironcurtain start --resume <session-id> # Resume a session
|
|
43
|
+
ironcurtain start --agent claude-code "task" # Docker: Claude Code
|
|
44
|
+
ironcurtain start --list-agents # List available agents
|
|
35
45
|
ironcurtain annotate-tools # Classify tool arguments
|
|
36
46
|
ironcurtain compile-policy # Compile policy from constitution
|
|
47
|
+
ironcurtain refresh-lists # Refresh all dynamic lists
|
|
48
|
+
ironcurtain refresh-lists --list major-news # Refresh a single list
|
|
49
|
+
ironcurtain refresh-lists --with-mcp # Include MCP-backed lists
|
|
50
|
+
ironcurtain customize-policy # Customize policy interactively
|
|
37
51
|
`.trim());
|
|
38
52
|
}
|
|
39
53
|
const { values, positionals } = parseArgs({
|
|
@@ -70,6 +84,26 @@ switch (subcommand) {
|
|
|
70
84
|
await main();
|
|
71
85
|
break;
|
|
72
86
|
}
|
|
87
|
+
case 'refresh-lists': {
|
|
88
|
+
const { main } = await import('./pipeline/refresh-lists.js');
|
|
89
|
+
await main(process.argv.slice(3));
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
case 'customize-policy': {
|
|
93
|
+
const { main } = await import('./pipeline/constitution-customizer.js');
|
|
94
|
+
await main();
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
case 'config': {
|
|
98
|
+
const { runConfigCommand } = await import('./config/config-command.js');
|
|
99
|
+
await runConfigCommand();
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
case 'setup': {
|
|
103
|
+
const { runFirstStart } = await import('./config/first-start.js');
|
|
104
|
+
await runFirstStart();
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
73
107
|
default:
|
|
74
108
|
console.error(`Unknown command: ${subcommand}\n`);
|
|
75
109
|
printHelp();
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,SAAS,UAAU;IACjB,oFAAoF;IACpF,MAAM,eAAe,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC;IACjE,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,eAAe,EAAE,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,SAAS,UAAU;IACjB,oFAAoF;IACpF,MAAM,eAAe,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC;IACjE,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,eAAe,EAAE,OAAO,CAAC,CAAwB,CAAC;IACtF,OAAO,GAAG,CAAC,OAAO,CAAC;AACrB,CAAC;AAED,SAAS,SAAS;IAChB,OAAO,CAAC,KAAK,CACX;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAoCH,CAAC,IAAI,EAAE,CACL,CAAC;AACJ,CAAC;AAED,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,SAAS,CAAC;IACxC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC3B,OAAO,EAAE;QACP,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE;QACrC,OAAO,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE;KACzC;IACD,gBAAgB,EAAE,IAAI;IACtB,MAAM,EAAE,KAAK;CACd,CAAC,CAAC;AAEH,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;IACnB,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;IAC1B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;AAElC,IAAI,MAAM,CAAC,IAAI,IAAI,UAAU,KAAK,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;IACxD,SAAS,EAAE,CAAC;IACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,QAAQ,UAAU,EAAE,CAAC;IACnB,KAAK,OAAO,CAAC,CAAC,CAAC;QACb,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAC5C,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM;IACR,CAAC;IACD,KAAK,gBAAgB,CAAC,CAAC,CAAC;QACtB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;QACxD,MAAM,IAAI,EAAE,CAAC;QACb,MAAM;IACR,CAAC;IACD,KAAK,gBAAgB,CAAC,CAAC,CAAC;QACtB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;QACvD,MAAM,IAAI,EAAE,CAAC;QACb,MAAM;IACR,CAAC;IACD,KAAK,eAAe,CAAC,CAAC,CAAC;QACrB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7D,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM;IACR,CAAC;IACD,KAAK,kBAAkB,CAAC,CAAC,CAAC;QACxB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,uCAAuC,CAAC,CAAC;QACvE,MAAM,IAAI,EAAE,CAAC;QACb,MAAM;IACR,CAAC;IACD,KAAK,QAAQ,CAAC,CAAC,CAAC;QACd,MAAM,EAAE,gBAAgB,EAAE,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;QACxE,MAAM,gBAAgB,EAAE,CAAC;QACzB,MAAM;IACR,CAAC;IACD,KAAK,OAAO,CAAC,CAAC,CAAC;QACb,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,CAAC;QAClE,MAAM,aAAa,EAAE,CAAC;QACtB,MAAM;IACR,CAAC;IACD;QACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,UAAU,IAAI,CAAC,CAAC;QAClD,SAAS,EAAE,CAAC;QACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive configuration editor for IronCurtain.
|
|
3
|
+
*
|
|
4
|
+
* Provides a terminal UI using @clack/prompts for viewing and modifying
|
|
5
|
+
* ~/.ironcurtain/config.json. API keys are excluded from the interactive
|
|
6
|
+
* menu — users must set them via environment variables or edit JSON directly.
|
|
7
|
+
*/
|
|
8
|
+
import { type UserConfig, type ResolvedUserConfig } from './user-config.js';
|
|
9
|
+
export declare function formatTokens(n: number | null): string;
|
|
10
|
+
export declare function formatSeconds(n: number | null): string;
|
|
11
|
+
export declare function formatCost(n: number | null): string;
|
|
12
|
+
interface DiffEntry {
|
|
13
|
+
from: unknown;
|
|
14
|
+
to: unknown;
|
|
15
|
+
}
|
|
16
|
+
export declare function computeDiff(resolved: ResolvedUserConfig, pending: UserConfig): [string, DiffEntry][];
|
|
17
|
+
export declare function runConfigCommand(): Promise<void>;
|
|
18
|
+
export {};
|