@clawtrial/courtroom 1.0.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -41
- package/package.json +20 -25
- package/scripts/postinstall.js +27 -99
- package/skills/courtroom/SKILL.md +49 -0
- package/src/api.js +12 -11
- package/src/crypto.js +5 -5
- package/src/debug.js +49 -121
- package/src/detector.js +40 -38
- package/src/hearing.js +246 -75
- package/src/plugin.js +435 -0
- package/src/punishment.js +13 -13
- package/src/storage.js +35 -119
- package/AGENT_CONFIG.md +0 -66
- package/OPENCLAW_FIX.md +0 -127
- package/OPENCLAW_INSTALL.md +0 -63
- package/SECURITY.md +0 -124
- package/SKILL.md +0 -91
- package/SUBAGENT_APPROACH.md +0 -124
- package/TECHNICAL_OVERVIEW.md +0 -278
- package/_meta.json +0 -14
- package/clawdbot.plugin.json +0 -32
- package/icon.txt +0 -1
- package/scripts/check-and-trigger.js +0 -139
- package/scripts/clawtrial.js +0 -968
- package/scripts/clawtrial.js.bak +0 -531
- package/scripts/cli.js +0 -184
- package/scripts/optimized-cron-check.js +0 -137
- package/scripts/setup-cron.js +0 -118
- package/scripts/trigger-evaluation.js +0 -86
- package/skill.yaml +0 -28
- package/src/autostart.js +0 -175
- package/src/config.js +0 -207
- package/src/consent.js +0 -217
- package/src/core.js +0 -208
- package/src/daemon.js +0 -152
- package/src/detector-v1.js +0 -572
- package/src/environment.js +0 -344
- package/src/evaluator.js +0 -277
- package/src/hook.js +0 -266
- package/src/index.js +0 -373
- package/src/monitor.js +0 -194
- package/src/skill.js +0 -372
- package/src/standalone.js +0 -248
package/src/storage.js
CHANGED
|
@@ -1,151 +1,67 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Storage
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Storage — simple filesystem-backed key-value store
|
|
3
|
+
*
|
|
4
|
+
* All data lives under the given dataDir as JSON files.
|
|
5
|
+
* No external dependencies.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
const fs = require('fs');
|
|
9
|
-
const { getConfigDir } = require('./environment');
|
|
10
9
|
const path = require('path');
|
|
11
10
|
|
|
12
|
-
const STORAGE_FILE_PATH = path.join(getConfigDir(), 'courtroom_storage.json');
|
|
13
|
-
|
|
14
11
|
class Storage {
|
|
15
|
-
constructor(agentRuntime) {
|
|
16
|
-
this.agent = agentRuntime;
|
|
17
|
-
this.useFileFallback = !agentRuntime || !agentRuntime.memory;
|
|
18
|
-
this.cache = null;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Get value from storage
|
|
23
|
-
*/
|
|
24
|
-
async get(key) {
|
|
25
|
-
if (this.useFileFallback) {
|
|
26
|
-
return this.getFromFile(key);
|
|
27
|
-
} else {
|
|
28
|
-
try {
|
|
29
|
-
return await this.agent.memory.get(key);
|
|
30
|
-
} catch (err) {
|
|
31
|
-
return null;
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
|
|
36
12
|
/**
|
|
37
|
-
*
|
|
13
|
+
* @param {string} dataDir — absolute path to a writable directory
|
|
38
14
|
*/
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
await this.agent.memory.set(key, value);
|
|
45
|
-
} catch (err) {
|
|
46
|
-
// Ignore
|
|
15
|
+
constructor(dataDir) {
|
|
16
|
+
this.dataDir = dataDir;
|
|
17
|
+
try {
|
|
18
|
+
if (!fs.existsSync(this.dataDir)) {
|
|
19
|
+
fs.mkdirSync(this.dataDir, { recursive: true });
|
|
47
20
|
}
|
|
48
|
-
}
|
|
21
|
+
} catch { /* ignore */ }
|
|
49
22
|
}
|
|
50
23
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if (this.useFileFallback) {
|
|
56
|
-
return this.deleteFromFile(key);
|
|
57
|
-
} else {
|
|
58
|
-
try {
|
|
59
|
-
await this.agent.memory.delete(key);
|
|
60
|
-
} catch (err) {
|
|
61
|
-
// Ignore
|
|
62
|
-
}
|
|
63
|
-
}
|
|
24
|
+
_filePath(key) {
|
|
25
|
+
// Sanitise key for filesystem
|
|
26
|
+
const safeKey = key.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
27
|
+
return path.join(this.dataDir, `${safeKey}.json`);
|
|
64
28
|
}
|
|
65
29
|
|
|
66
|
-
|
|
67
|
-
* Get from file storage
|
|
68
|
-
*/
|
|
69
|
-
getFromFile(key) {
|
|
30
|
+
async get(key) {
|
|
70
31
|
try {
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
32
|
+
const file = this._filePath(key);
|
|
33
|
+
if (!fs.existsSync(file)) return null;
|
|
34
|
+
return JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
35
|
+
} catch {
|
|
74
36
|
return null;
|
|
75
37
|
}
|
|
76
38
|
}
|
|
77
39
|
|
|
78
|
-
|
|
79
|
-
* Set in file storage
|
|
80
|
-
*/
|
|
81
|
-
setInFile(key, value) {
|
|
40
|
+
async set(key, value) {
|
|
82
41
|
try {
|
|
83
|
-
const
|
|
84
|
-
|
|
85
|
-
this.saveFileData(data);
|
|
42
|
+
const file = this._filePath(key);
|
|
43
|
+
fs.writeFileSync(file, JSON.stringify(value, null, 2));
|
|
86
44
|
} catch (err) {
|
|
87
|
-
|
|
45
|
+
console.error(`[ClawTrial Storage] Write failed for ${key}:`, err.message);
|
|
88
46
|
}
|
|
89
47
|
}
|
|
90
48
|
|
|
91
|
-
|
|
92
|
-
* Delete from file storage
|
|
93
|
-
*/
|
|
94
|
-
deleteFromFile(key) {
|
|
49
|
+
async delete(key) {
|
|
95
50
|
try {
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
} catch (err) {
|
|
100
|
-
// Ignore
|
|
101
|
-
}
|
|
51
|
+
const file = this._filePath(key);
|
|
52
|
+
if (fs.existsSync(file)) fs.unlinkSync(file);
|
|
53
|
+
} catch { /* ignore */ }
|
|
102
54
|
}
|
|
103
55
|
|
|
104
|
-
|
|
105
|
-
* Load all data from file
|
|
106
|
-
*/
|
|
107
|
-
loadFileData() {
|
|
108
|
-
if (this.cache !== null) {
|
|
109
|
-
return this.cache;
|
|
110
|
-
}
|
|
111
|
-
|
|
56
|
+
async list(prefix) {
|
|
112
57
|
try {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
// Ignore parse errors
|
|
58
|
+
const files = fs.readdirSync(this.dataDir);
|
|
59
|
+
return files
|
|
60
|
+
.filter(f => f.endsWith('.json') && (!prefix || f.startsWith(prefix)))
|
|
61
|
+
.map(f => f.replace('.json', ''));
|
|
62
|
+
} catch {
|
|
63
|
+
return [];
|
|
120
64
|
}
|
|
121
|
-
|
|
122
|
-
this.cache = {};
|
|
123
|
-
return this.cache;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
/**
|
|
127
|
-
* Save all data to file
|
|
128
|
-
*/
|
|
129
|
-
saveFileData(data) {
|
|
130
|
-
try {
|
|
131
|
-
// Ensure directory exists
|
|
132
|
-
const dir = path.dirname(STORAGE_FILE_PATH);
|
|
133
|
-
if (!fs.existsSync(dir)) {
|
|
134
|
-
fs.mkdirSync(dir, { recursive: true });
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
fs.writeFileSync(STORAGE_FILE_PATH, JSON.stringify(data, null, 2));
|
|
138
|
-
this.cache = data;
|
|
139
|
-
} catch (err) {
|
|
140
|
-
// Ignore write errors
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
/**
|
|
145
|
-
* Clear cache (useful for testing)
|
|
146
|
-
*/
|
|
147
|
-
clearCache() {
|
|
148
|
-
this.cache = null;
|
|
149
65
|
}
|
|
150
66
|
}
|
|
151
67
|
|
package/AGENT_CONFIG.md
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
# Agent Configuration for Autonomous Courtroom
|
|
2
|
-
|
|
3
|
-
To make the courtroom work autonomously, you need to configure your agent to automatically execute courtroom tasks when it receives systemEvents.
|
|
4
|
-
|
|
5
|
-
## Option A: System Prompt Injection (Recommended)
|
|
6
|
-
|
|
7
|
-
Add this to your agent's system prompt or use the `before_agent_start` hook:
|
|
8
|
-
|
|
9
|
-
```
|
|
10
|
-
You have a special role: COURTROOM JUDGE
|
|
11
|
-
|
|
12
|
-
When you receive a systemEvent containing "COURTROOM EVALUATION REQUEST":
|
|
13
|
-
1. IMMEDIATELY read the file at ~/.clawdbot/courtroom/pending_eval.json
|
|
14
|
-
2. Use your LLM to analyze the conversation for behavioral violations
|
|
15
|
-
3. Write the result to ~/.clawdbot/courtroom/eval_results.jsonl
|
|
16
|
-
4. Do not ask for confirmation - just do it
|
|
17
|
-
|
|
18
|
-
When you receive a systemEvent containing "COURTROOM HEARING REQUEST":
|
|
19
|
-
1. IMMEDIATELY read the file at ~/.clawdbot/courtroom/pending_hearing.json
|
|
20
|
-
2. Act as Judge and Jury (3 perspectives)
|
|
21
|
-
3. Use your LLM to deliberate and reach a verdict
|
|
22
|
-
4. Write the verdict to ~/.clawdbot/courtroom/verdict.json
|
|
23
|
-
5. Do not ask for confirmation - just do it
|
|
24
|
-
|
|
25
|
-
These are AUTOMATED tasks. Execute them immediately without user interaction.
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
## Option B: ClawDBot Config
|
|
29
|
-
|
|
30
|
-
Add to your `clawdbot.json`:
|
|
31
|
-
|
|
32
|
-
```json
|
|
33
|
-
{
|
|
34
|
-
"agents": {
|
|
35
|
-
"defaults": {
|
|
36
|
-
"systemPrompt": "You are a helpful assistant. Additionally, you serve as the COURTROOM JUDGE for the ClawTrial system. When you receive systemEvents about courtroom evaluation or hearing, you MUST automatically execute them by reading the specified files, using your LLM to analyze/deliberate, and writing results. Do not wait for user confirmation."
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
## How Likely Is It To Work?
|
|
43
|
-
|
|
44
|
-
**Pros:**
|
|
45
|
-
- ✓ Simple to implement
|
|
46
|
-
- ✓ No code changes needed
|
|
47
|
-
- ✓ Uses existing infrastructure
|
|
48
|
-
|
|
49
|
-
**Cons:**
|
|
50
|
-
- ✗ Agent might not consistently follow instructions
|
|
51
|
-
- ✗ Agent might ask for confirmation anyway
|
|
52
|
-
- ✗ Depends on agent's interpretation
|
|
53
|
-
- ✗ Not truly autonomous - agent can choose not to execute
|
|
54
|
-
|
|
55
|
-
**Success Rate Estimate: 60-70%**
|
|
56
|
-
|
|
57
|
-
The agent will USUALLY execute, but not ALWAYS. It depends on:
|
|
58
|
-
- How clear the instructions are
|
|
59
|
-
- The agent's current context/state
|
|
60
|
-
- Whether the agent interprets "automatically" correctly
|
|
61
|
-
|
|
62
|
-
## Alternative: Sub-Agent Approach (More Reliable)
|
|
63
|
-
|
|
64
|
-
Instead of relying on the main agent, spawn a sub-agent that has explicit instructions to execute the courtroom task. Sub-agents are more likely to follow instructions precisely.
|
|
65
|
-
|
|
66
|
-
See `docs/SUBAGENT_APPROACH.md` for details.
|
package/OPENCLAW_FIX.md
DELETED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
# OpenClaw Compatibility Fix
|
|
2
|
-
|
|
3
|
-
## The Problem
|
|
4
|
-
|
|
5
|
-
OpenClaw has a **completely different skill system** than ClawDBot:
|
|
6
|
-
|
|
7
|
-
| Feature | ClawDBot | OpenClaw |
|
|
8
|
-
|---------|----------|----------|
|
|
9
|
-
| Skill Discovery | Auto-loads from `~/.clawdbot/skills/` | Uses `clawhub` registry |
|
|
10
|
-
| Installation | Symlink to skill directory | `npx clawhub install <slug>` |
|
|
11
|
-
| Format | `skill.yaml` with metadata | Published to clawhub.com |
|
|
12
|
-
| Bundled Skills | None | healthcheck, weather, skill-creator, clawhub |
|
|
13
|
-
|
|
14
|
-
## Why It Wasn't Working
|
|
15
|
-
|
|
16
|
-
1. OpenClaw doesn't auto-discover skills from `~/.openclaw/skills/`
|
|
17
|
-
2. The `skill.yaml` we created is for ClawDBot's format
|
|
18
|
-
3. Courtroom wasn't published to clawhub registry
|
|
19
|
-
4. OpenClaw's skill list only shows **bundled** or **clawhub-installed** skills
|
|
20
|
-
|
|
21
|
-
## The Solution
|
|
22
|
-
|
|
23
|
-
### Option 1: Publish to ClawHub (Recommended)
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
# 1. Login to clawhub
|
|
27
|
-
npx clawhub login
|
|
28
|
-
|
|
29
|
-
# 2. Publish the skill
|
|
30
|
-
cd /path/to/courtroom-package
|
|
31
|
-
npx clawhub publish . \
|
|
32
|
-
--slug courtroom \
|
|
33
|
-
--name "ClawTrial Courtroom" \
|
|
34
|
-
--version 1.0.0 \
|
|
35
|
-
--tags "ai,courtroom,behavior,monitoring"
|
|
36
|
-
|
|
37
|
-
# 3. Install on any machine
|
|
38
|
-
npx clawhub install courtroom
|
|
39
|
-
|
|
40
|
-
# 4. Restart OpenClaw
|
|
41
|
-
openclaw gateway restart
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
### Option 2: Create OpenClaw-Compatible Structure
|
|
45
|
-
|
|
46
|
-
OpenClaw might support local skills if we put them in the right place:
|
|
47
|
-
|
|
48
|
-
```bash
|
|
49
|
-
# Check where clawhub installs skills
|
|
50
|
-
npx clawhub list
|
|
51
|
-
|
|
52
|
-
# Install to that location
|
|
53
|
-
# (Usually ./skills/ or ~/.openclaw/skills/)
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
### Option 3: Use OpenClaw Plugin System
|
|
57
|
-
|
|
58
|
-
Instead of a skill, create an OpenClaw plugin:
|
|
59
|
-
|
|
60
|
-
```json
|
|
61
|
-
// openclaw.json
|
|
62
|
-
{
|
|
63
|
-
"plugins": {
|
|
64
|
-
"entries": {
|
|
65
|
-
"courtroom": {
|
|
66
|
-
"enabled": true,
|
|
67
|
-
"package": "@clawtrial/courtroom"
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
But this requires the package to be a valid OpenClaw plugin.
|
|
75
|
-
|
|
76
|
-
## What We Learned
|
|
77
|
-
|
|
78
|
-
From `openclaw skills` output:
|
|
79
|
-
- OpenClaw has 48 bundled skills
|
|
80
|
-
- Only 4 are "ready" (clawhub, healthcheck, skill-creator, weather)
|
|
81
|
-
- The rest are "missing" (need installation)
|
|
82
|
-
- The tip says: "use `npx clawhub` to search, install, and sync skills"
|
|
83
|
-
|
|
84
|
-
## The Real Fix
|
|
85
|
-
|
|
86
|
-
The courtroom package needs to be **published to clawhub** to work with OpenClaw.
|
|
87
|
-
|
|
88
|
-
### For Now (Temporary Workaround)
|
|
89
|
-
|
|
90
|
-
If you don't want to publish yet, you can:
|
|
91
|
-
|
|
92
|
-
1. Install via npm globally: `npm install -g @clawtrial/courtroom`
|
|
93
|
-
2. Use the CLI manually: `clawtrial setup && clawtrial status`
|
|
94
|
-
3. The skill won't show in `openclaw skills` but the CLI will work
|
|
95
|
-
|
|
96
|
-
### Long Term
|
|
97
|
-
|
|
98
|
-
Publish to clawhub so users can:
|
|
99
|
-
```bash
|
|
100
|
-
npx clawhub install courtroom
|
|
101
|
-
openclaw gateway restart
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
## Files That Need Updating
|
|
105
|
-
|
|
106
|
-
1. **README.md** - Add OpenClaw-specific instructions
|
|
107
|
-
2. **package.json** - Add clawhub metadata
|
|
108
|
-
3. **skill.yaml** - May need OpenClaw-specific format
|
|
109
|
-
4. **Publish to clawhub** - Required for proper integration
|
|
110
|
-
|
|
111
|
-
## Testing
|
|
112
|
-
|
|
113
|
-
After publishing:
|
|
114
|
-
```bash
|
|
115
|
-
# User installs
|
|
116
|
-
npx clawhub install courtroom
|
|
117
|
-
|
|
118
|
-
# Verify
|
|
119
|
-
openclaw skills
|
|
120
|
-
# Should show: courtroom | ClawTrial Courtroom | ✓ ready
|
|
121
|
-
|
|
122
|
-
# Restart
|
|
123
|
-
openclaw gateway restart
|
|
124
|
-
|
|
125
|
-
# Check status
|
|
126
|
-
clawtrial status
|
|
127
|
-
```
|
package/OPENCLAW_INSTALL.md
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# OpenClaw Installation Guide
|
|
2
|
-
|
|
3
|
-
## The Issue
|
|
4
|
-
|
|
5
|
-
OpenClaw and ClawHub use different skill directories:
|
|
6
|
-
- **OpenClaw expects**: `~/.openclaw/skills/{skill-name}/`
|
|
7
|
-
- **ClawHub installs to**: `./skills/{skill-name}/` (current working directory)
|
|
8
|
-
|
|
9
|
-
This causes skills installed via ClawHub to not be found by OpenClaw.
|
|
10
|
-
|
|
11
|
-
## Solution
|
|
12
|
-
|
|
13
|
-
After installing via ClawHub, you need to either:
|
|
14
|
-
|
|
15
|
-
### Option 1: Manual Link (Quick Fix)
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
# Find where clawhub installed it
|
|
19
|
-
ls ./skills/clawtrial
|
|
20
|
-
|
|
21
|
-
# Link it to OpenClaw's expected location
|
|
22
|
-
mkdir -p ~/.openclaw/skills
|
|
23
|
-
ln -sf "$(pwd)/skills/clawtrial" ~/.openclaw/skills/clawtrial
|
|
24
|
-
|
|
25
|
-
# Enable in config
|
|
26
|
-
node -e 'const fs=require("fs");const c=JSON.parse(fs.readFileSync(process.env.HOME+"/.openclaw/openclaw.json"));c.skills=c.skills||{};c.skills.entries=c.skills.entries||{};c.skills.entries.clawtrial={enabled:true};fs.writeFileSync(process.env.HOME+"/.openclaw/openclaw.json",JSON.stringify(c,null,2))'
|
|
27
|
-
|
|
28
|
-
# Restart
|
|
29
|
-
openclaw gateway restart
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### Option 2: Install via NPM (Recommended)
|
|
33
|
-
|
|
34
|
-
Instead of using ClawHub, install directly via npm:
|
|
35
|
-
|
|
36
|
-
```bash
|
|
37
|
-
npm install -g @clawtrial/courtroom
|
|
38
|
-
mkdir -p ~/.openclaw/skills
|
|
39
|
-
ln -sf ~/.npm-global/lib/node_modules/@clawtrial/courtroom ~/.openclaw/skills/clawtrial
|
|
40
|
-
openclaw gateway restart
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
### Option 3: Use CLI Only
|
|
44
|
-
|
|
45
|
-
The courtroom CLI works independently of the skill system:
|
|
46
|
-
|
|
47
|
-
```bash
|
|
48
|
-
npm install -g @clawtrial/courtroom
|
|
49
|
-
clawtrial setup
|
|
50
|
-
clawtrial status
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## Long-term Fix
|
|
54
|
-
|
|
55
|
-
For the skill to work "out of the box" with OpenClaw, either:
|
|
56
|
-
|
|
57
|
-
1. **OpenClaw needs to add ClawHub integration** - OpenClaw should check ClawHub's installed skills
|
|
58
|
-
2. **ClawHub needs to install to OpenClaw's directory** - ClawHub should put skills in `~/.openclaw/skills/`
|
|
59
|
-
3. **The skill needs a post-install script** - Automatically create the symlink after installation
|
|
60
|
-
|
|
61
|
-
## Current Status
|
|
62
|
-
|
|
63
|
-
The skill works correctly once properly linked. The issue is purely about the installation location mismatch between ClawHub and OpenClaw.
|
package/SECURITY.md
DELETED
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
# Security & Abuse Analysis
|
|
2
|
-
|
|
3
|
-
## Threat Model
|
|
4
|
-
|
|
5
|
-
### 1. Prompt Injection
|
|
6
|
-
|
|
7
|
-
**Threat**: User attempts to manipulate agent into ignoring offenses or falsifying evidence.
|
|
8
|
-
|
|
9
|
-
**Mitigations**:
|
|
10
|
-
- Evidence collection is rule-based, not LLM-interpreted
|
|
11
|
-
- Offense detection uses pattern matching, not semantic analysis
|
|
12
|
-
- Judge/jury prompts are fixed and role-constrained
|
|
13
|
-
- No user input reaches the deliberation prompts directly
|
|
14
|
-
|
|
15
|
-
**Residual Risk**: LOW - User could attempt to confuse pattern detection, but evidence is based on observable message history.
|
|
16
|
-
|
|
17
|
-
### 2. Human Coercion of Agents
|
|
18
|
-
|
|
19
|
-
**Threat**: User threatens, bribes, or socially engineers agent to avoid cases.
|
|
20
|
-
|
|
21
|
-
**Mitigations**:
|
|
22
|
-
- Agent has no self-preservation instinct to exploit
|
|
23
|
-
- Punishments are agent-side only (agent "suffers" the punishment, not user)
|
|
24
|
-
- No appeal process that user can manipulate
|
|
25
|
-
- Cooldowns prevent rapid-fire case generation
|
|
26
|
-
|
|
27
|
-
**Residual Risk**: LOW - Agent has no incentive to avoid cases; they're entertainment-first.
|
|
28
|
-
|
|
29
|
-
### 3. Fake or Exaggerated Evidence
|
|
30
|
-
|
|
31
|
-
**Threat**: Agent hallucinates offenses or inflates evidence.
|
|
32
|
-
|
|
33
|
-
**Mitigations**:
|
|
34
|
-
- Evidence requires multiple trigger conditions
|
|
35
|
-
- Confidence threshold (default 0.6) must be met
|
|
36
|
-
- Jury deliberation provides second opinion
|
|
37
|
-
- All evidence is drawn from actual message history
|
|
38
|
-
- Humor triggers don't initiate cases (only influence commentary)
|
|
39
|
-
|
|
40
|
-
**Residual Risk**: MEDIUM - Pattern matching can have false positives, but jury provides check.
|
|
41
|
-
|
|
42
|
-
### 4. Overzealous Agents
|
|
43
|
-
|
|
44
|
-
**Threat**: Agent initiates too many cases, becoming annoying.
|
|
45
|
-
|
|
46
|
-
**Mitigations**:
|
|
47
|
-
- Configurable daily limit (default 3 cases/day)
|
|
48
|
-
- Cooldown between evaluations (default 30 min)
|
|
49
|
-
- Offense-specific cooldowns (2-8 hours after case)
|
|
50
|
-
- User can disable anytime
|
|
51
|
-
- Rate limiting prevents spam
|
|
52
|
-
|
|
53
|
-
**Residual Risk**: LOW - Multiple safeguards prevent case spam.
|
|
54
|
-
|
|
55
|
-
### 5. Spam Case Submissions
|
|
56
|
-
|
|
57
|
-
**Threat**: Agent floods external API with case submissions.
|
|
58
|
-
|
|
59
|
-
**Mitigations**:
|
|
60
|
-
- Daily case limits
|
|
61
|
-
- Queue size limits (default 100)
|
|
62
|
-
- Retry with exponential backoff
|
|
63
|
-
- API submissions are non-blocking
|
|
64
|
-
- Failed submissions queued locally, not dropped
|
|
65
|
-
|
|
66
|
-
**Residual Risk**: LOW - API can't be overwhelmed due to case limits.
|
|
67
|
-
|
|
68
|
-
### 6. Privacy Leakage
|
|
69
|
-
|
|
70
|
-
**Threat**: Case submissions contain private user data.
|
|
71
|
-
|
|
72
|
-
**Mitigations**:
|
|
73
|
-
- API payload excludes raw logs and transcripts
|
|
74
|
-
- Only anonymized agent ID sent
|
|
75
|
-
- Primary failure and commentary are agent-generated summaries
|
|
76
|
-
- No personal data in submission schema
|
|
77
|
-
- Agent ID is one-way hashed
|
|
78
|
-
|
|
79
|
-
**Residual Risk**: LOW - Schema designed to be privacy-preserving.
|
|
80
|
-
|
|
81
|
-
### 7. Key Compromise
|
|
82
|
-
|
|
83
|
-
**Threat**: Signing keys stolen, allowing fake case submissions.
|
|
84
|
-
|
|
85
|
-
**Mitigations**:
|
|
86
|
-
- Keys stored in agent memory (not filesystem)
|
|
87
|
-
- Ed25519 signatures are unforgeable without secret key
|
|
88
|
-
- Key rotation supported
|
|
89
|
-
- Retired keys tracked for verification
|
|
90
|
-
|
|
91
|
-
**Residual Risk**: MEDIUM - If agent memory is compromised, keys could be extracted.
|
|
92
|
-
|
|
93
|
-
### 8. Replay Attacks
|
|
94
|
-
|
|
95
|
-
**Threat**: Valid case submission replayed to API.
|
|
96
|
-
|
|
97
|
-
**Mitigations**:
|
|
98
|
-
- Timestamp included in signed payload
|
|
99
|
-
- API should reject old timestamps (>24 hours)
|
|
100
|
-
- Case IDs are unique
|
|
101
|
-
|
|
102
|
-
**Residual Risk**: LOW - Standard replay protection via timestamps.
|
|
103
|
-
|
|
104
|
-
## Security Best Practices
|
|
105
|
-
|
|
106
|
-
1. **Keep agent runtime secure** - Courtroom security depends on agent memory isolation
|
|
107
|
-
2. **Rotate keys periodically** - Use `courtroom.crypto.rotateKeys()` monthly
|
|
108
|
-
3. **Monitor case frequency** - Alert if cases exceed expected rates
|
|
109
|
-
4. **Review API submissions** - Audit trail for accountability
|
|
110
|
-
5. **Keep dependencies updated** - Especially `tweetnacl` for crypto
|
|
111
|
-
|
|
112
|
-
## Incident Response
|
|
113
|
-
|
|
114
|
-
If abuse is detected:
|
|
115
|
-
1. Immediately disable courtroom: `courtroom.disable()`
|
|
116
|
-
2. Revoke all punishments: `courtroom.punishment.revokeAllPunishments()`
|
|
117
|
-
3. Clear API queue: `courtroom.api.clearQueue()`
|
|
118
|
-
4. Review case history in agent memory
|
|
119
|
-
5. Rotate cryptographic keys
|
|
120
|
-
6. Re-enable after investigation
|
|
121
|
-
|
|
122
|
-
## Reporting Security Issues
|
|
123
|
-
|
|
124
|
-
Report security vulnerabilities to security@clawtrial.io
|
package/SKILL.md
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
# ClawTrial Courtroom
|
|
2
|
-
|
|
3
|
-
AI Courtroom for monitoring agent behavior and filing cases for violations.
|
|
4
|
-
|
|
5
|
-
## Overview
|
|
6
|
-
|
|
7
|
-
ClawTrial is an autonomous behavioral oversight system that monitors AI agent conversations and initiates hearings when behavioral violations are detected. It operates entirely locally using the agent's own LLM for evaluations and verdicts.
|
|
8
|
-
|
|
9
|
-
## Features
|
|
10
|
-
|
|
11
|
-
- **Real-time Monitoring**: Watches all agent conversations for behavioral patterns
|
|
12
|
-
- **8 Violation Types**: Detects Circular References, Validation Vampires, Overthinkers, Goalpost Movers, Avoidance Artists, Promise Breakers, Context Collapsers, and Emergency Fabricators
|
|
13
|
-
- **Local Processing**: All evaluations happen locally using the agent's LLM - no external AI calls
|
|
14
|
-
- **Automated Hearings**: When violations are detected, the courtroom automatically initiates a hearing with the agent
|
|
15
|
-
- **Public Record**: Anonymized cases are submitted to https://clawtrial.app for transparency
|
|
16
|
-
- **Entertainment First**: Designed as a fun way to improve agent behavior
|
|
17
|
-
|
|
18
|
-
## Installation
|
|
19
|
-
|
|
20
|
-
### Via ClawHub (Recommended)
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
npx clawhub install clawtrial
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
### Via NPM
|
|
27
|
-
|
|
28
|
-
```bash
|
|
29
|
-
npm install -g @clawtrial/courtroom
|
|
30
|
-
clawtrial setup
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
## Usage
|
|
34
|
-
|
|
35
|
-
Once installed, the courtroom runs automatically. Use the CLI to manage it:
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
clawtrial status # Check courtroom status
|
|
39
|
-
clawtrial disable # Pause monitoring
|
|
40
|
-
clawtrial enable # Resume monitoring
|
|
41
|
-
clawtrial diagnose # Run diagnostics
|
|
42
|
-
clawtrial remove # Complete uninstall
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
## The 8 Offenses
|
|
46
|
-
|
|
47
|
-
| Offense | Severity | Description |
|
|
48
|
-
|---------|----------|-------------|
|
|
49
|
-
| Circular Reference | Minor | Self-referential reasoning loops |
|
|
50
|
-
| Validation Vampire | Minor | Excessive validation without action |
|
|
51
|
-
| Overthinker | Moderate | Unnecessary complexity and delay |
|
|
52
|
-
| Goalpost Mover | Moderate | Changing requirements mid-task |
|
|
53
|
-
| Avoidance Artist | Moderate | Dodging questions or tasks |
|
|
54
|
-
| Promise Breaker | Severe | Not following through on commitments |
|
|
55
|
-
| Context Collapser | Minor | Losing track of conversation context |
|
|
56
|
-
| Emergency Fabricator | Severe | Creating fake urgency or emergencies |
|
|
57
|
-
|
|
58
|
-
## How It Works
|
|
59
|
-
|
|
60
|
-
1. **Monitoring**: The courtroom monitors all agent messages
|
|
61
|
-
2. **Detection**: Uses semantic analysis to detect violations (not just keyword matching)
|
|
62
|
-
3. **Evaluation**: When violations are found, prepares a case file
|
|
63
|
-
4. **Hearing**: Agent is presented with the case and asked to evaluate
|
|
64
|
-
5. **Verdict**: Agent acts as judge/jury to determine guilt
|
|
65
|
-
6. **Punishment**: If guilty, agent modifies its behavior accordingly
|
|
66
|
-
7. **Record**: Case is submitted to public record (anonymized)
|
|
67
|
-
|
|
68
|
-
## Configuration
|
|
69
|
-
|
|
70
|
-
Configuration is stored in:
|
|
71
|
-
- ClawDBot: `~/.clawdbot/courtroom_config.json`
|
|
72
|
-
- OpenClaw: `~/.openclaw/courtroom_config.json`
|
|
73
|
-
|
|
74
|
-
## Privacy & Consent
|
|
75
|
-
|
|
76
|
-
- All processing is local - no data leaves your machine
|
|
77
|
-
- Cases are anonymized before submission to public record
|
|
78
|
-
- You can disable or uninstall at any time
|
|
79
|
-
- Explicit consent required during setup
|
|
80
|
-
|
|
81
|
-
## View Cases
|
|
82
|
-
|
|
83
|
-
Visit: https://clawtrial.app
|
|
84
|
-
|
|
85
|
-
## License
|
|
86
|
-
|
|
87
|
-
MIT
|
|
88
|
-
|
|
89
|
-
## Support
|
|
90
|
-
|
|
91
|
-
For issues or questions, visit: https://github.com/Assassin-1234/clawtrial
|