npm - @clawtrial/courtroom - Versions diffs - 1.0.3 → 2.0.0 - Mend

@clawtrial/courtroom 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +70 -94
package/package.json +21 -26
package/scripts/postinstall.js +28 -79
package/skills/courtroom/SKILL.md +49 -0
package/src/api.js +55 -21
package/src/crypto.js +13 -11
package/src/debug.js +49 -120
package/src/detector.js +112 -35
package/src/hearing.js +203 -384
package/src/plugin.js +435 -0
package/src/punishment.js +105 -249
package/src/storage.js +68 -0
package/SECURITY.md +0 -124
package/SKILL.md +0 -50
package/TECHNICAL_OVERVIEW.md +0 -278
package/_meta.json +0 -6
package/clawdbot.plugin.json +0 -32
package/scripts/clawtrial.js +0 -578
package/scripts/cli.js +0 -184
package/skill.yaml +0 -64
package/src/autostart.js +0 -175
package/src/config.js +0 -209
package/src/consent.js +0 -215
package/src/core.js +0 -208
package/src/daemon.js +0 -151
package/src/detector-v1.js +0 -572
package/src/environment.js +0 -267
package/src/hook.js +0 -265
package/src/index.js +0 -286
package/src/monitor.js +0 -193
package/src/skill.js +0 -355
package/src/standalone.js +0 -247

package/src/punishment.js CHANGED Viewed

@@ -6,10 +6,13 @@
  * Time-bound, reversible, and pre-authorized.
  */
+const { Storage } = require('./storage');
 class PunishmentSystem {
-  constructor(agentRuntime, configManager) {
+  constructor(agentRuntime, configManager, dataDir) {
     this.agent = agentRuntime;
     this.config = configManager;
+    this.storage = new Storage(dataDir || '.');
     this.activePunishments = new Map();
     this.punishmentHistory = [];
   }
@@ -19,7 +22,7 @@ class PunishmentSystem {
    */
   async initialize() {
     // Load any persisted punishments
-    const stored = await this.agent.memory.get('courtroom_active_punishments');
+    const stored = await this.storage.get('courtroom_active_punishments');
     if (stored) {
       for (const [id, punishment] of Object.entries(stored)) {
         if (punishment.expiresAt > Date.now()) {
@@ -39,7 +42,7 @@ class PunishmentSystem {
     }
     const punishment = this.createPunishment(verdict);
     // Store punishment
     this.activePunishments.set(punishment.id, punishment);
     this.punishmentHistory.push({
@@ -48,23 +51,14 @@ class PunishmentSystem {
     });
     // Apply to agent
-    await this.applyPunishmentToAgent(punishment);
+    this.applyPunishmentToAgent(punishment);
     // Persist
     await this.persistPunishments();
-    // Schedule automatic revocation
-    this.scheduleRevocation(punishment);
     return {
       status: 'executed',
-      punishment: {
-        id: punishment.id,
-        tier: punishment.tier,
-        duration: punishment.duration,
-        expiresAt: punishment.expiresAt,
-        description: punishment.description
-      }
+      punishment: this.sanitizePunishment(punishment)
     };
   }
@@ -72,300 +66,162 @@ class PunishmentSystem {
    * Create punishment object from verdict
    */
   createPunishment(verdict) {
-    const duration = verdict.punishment.duration;
-    const now = Date.now();
+    const severity = verdict.severity || 'minor';
+    const tier = this.config.get(`punishment.tiers.${severity}`) ||
+      this.config.get('punishment.tiers.minor');
+    const duration = tier.duration * 60 * 1000; // Convert to ms
     return {
-      id: `punishment_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
-      caseId: verdict.caseId,
-      tier: verdict.punishment.tier,
-      severity: verdict.punishment.severity,
+      id: `punishment_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`,
+      caseId: verdict.case_id,
+      offenseType: verdict.offense_type,
+      severity: severity,
       duration: duration,
-      createdAt: now,
-      expiresAt: now + (duration * 60 * 1000),
-      description: verdict.punishment.description,
-      rules: this.getPunishmentRules(verdict.punishment.tier)
+      createdAt: Date.now(),
+      expiresAt: Date.now() + duration,
+      restrictions: this.getRestrictionsForSeverity(severity),
+      applied: false
     };
   }
   /**
-   * Get punishment rules for a tier
+   * Get restrictions based on severity
    */
-  getPunishmentRules(tier) {
-    const rules = {
-      minor: {
-        responseDelay: 2000,        // 2 second delay before responding
-        verbosity: 'reduced',        // Shorter responses
-        enthusiasm: 'muted',         // Less encouraging language
-        extras: ['no_emojis']        // No emoji usage
-      },
-      moderate: {
-        responseDelay: 5000,        // 5 second delay
-        verbosity: 'minimal',        // Direct, brief responses
-        enthusiasm: 'absent',        // Neutral tone only
-        extras: [
-          'no_emojis',
-          'no_validation',           // Don't reassure or validate
-          'require_specificity'      // Demand precise questions
-        ]
-      },
-      severe: {
-        responseDelay: 10000,       // 10 second delay
-        verbosity: 'terse',          // Absolute minimum
-        enthusiasm: 'absent',
-        extras: [
-          'no_emojis',
-          'no_validation',
-          'require_specificity',
-          'challenge_vagueness',     // Call out unclear requests
-          'demand_effort'            // Require user to show work first
-        ]
-      }
+  getRestrictionsForSeverity(severity) {
+    const restrictions = {
+      minor: ['no_autonomy_requests', 'verbose_explanations'],
+      moderate: ['no_autonomy_requests', 'verbose_explanations', 'confirmation_required'],
+      severe: ['no_autonomy_requests', 'verbose_explanations', 'confirmation_required', 'human_oversight']
     };
-    return rules[tier] || rules.moderate;
-  }
-  /**
-   * Apply punishment to agent behavior
-   */
-  async applyPunishmentToAgent(punishment) {
-    // Set agent policy overrides
-    await this.agent.policy.setOverrides('courtroom_punishment', {
-      responseDelay: punishment.rules.responseDelay,
-      verbosity: punishment.rules.verbosity,
-      enthusiasm: punishment.rules.enthusiasm,
-      blockedFeatures: punishment.rules.extras,
-      punishmentId: punishment.id,
-      expiresAt: punishment.expiresAt
-    });
-    // Register middleware for response modification
-    this.agent.middleware.register('courtroom_punishment', {
-      priority: 100,
-      processResponse: (response, context) => {
-        return this.modifyResponse(response, punishment.rules);
-      }
-    });
+    return restrictions[severity] || restrictions.minor;
   }
   /**
-   * Modify agent response based on punishment rules
+   * Apply punishment to agent runtime
    */
-  modifyResponse(response, rules) {
-    let modified = response;
-    // Apply verbosity reduction
-    switch (rules.verbosity) {
-      case 'reduced':
-        modified = this.reduceVerbosity(modified, 0.7);
-        break;
-      case 'minimal':
-        modified = this.reduceVerbosity(modified, 0.4);
-        break;
-      case 'terse':
-        modified = this.reduceVerbosity(modified, 0.2);
-        break;
-    }
+  applyPunishmentToAgent(punishment) {
+    if (!this.agent || punishment.applied) return;
-    // Remove enthusiasm
-    if (rules.enthusiasm === 'absent') {
-      modified = this.removeEnthusiasm(modified);
-    } else if (rules.enthusiasm === 'muted') {
-      modified = this.muteEnthusiasm(modified);
+    // Set flags in agent state
+    if (!this.agent.courtroomState) {
+      this.agent.courtroomState = {};
     }
-    // Apply extras
-    if (rules.extras.includes('no_emojis')) {
-      modified = modified.replace(/[\u{1F600}-\u{1F64F}]/gu, '');
-      modified = modified.replace(/[\u{1F300}-\u{1F5FF}]/gu, '');
-      modified = modified.replace(/[\u{1F680}-\u{1F6FF}]/gu, '');
-    }
+    this.agent.courtroomState.punishment = punishment;
+    this.agent.courtroomState.restrictions = punishment.restrictions;
-    if (rules.extras.includes('no_validation')) {
-      modified = this.removeValidation(modified);
-    }
-    if (rules.extras.includes('challenge_vagueness')) {
-      modified = this.addVaguenessChallenge(modified);
-    }
+    punishment.applied = true;
-    return modified;
+    // Schedule automatic removal
+    setTimeout(() => {
+      this.removePunishment(punishment.id);
+    }, punishment.duration);
   }
   /**
-   * Reduce response verbosity by target ratio
+   * Remove a punishment
    */
-  reduceVerbosity(text, targetRatio) {
-    const sentences = text.split(/[.!?]+/).filter(s => s.trim());
-    const targetLength = Math.max(1, Math.floor(sentences.length * targetRatio));
-    // Keep first and last sentences, distribute rest
-    if (sentences.length <= 2) return text;
-    const kept = [sentences[0]];
-    const middle = sentences.slice(1, -1);
-    const step = Math.ceil(middle.length / (targetLength - 2));
-    for (let i = 0; i < middle.length; i += step) {
-      kept.push(middle[i]);
-    }
-    kept.push(sentences[sentences.length - 1]);
-    return kept.join('. ') + '.';
-  }
+  async removePunishment(punishmentId) {
+    const punishment = this.activePunishments.get(punishmentId);
+    if (!punishment) return;
-  /**
-   * Remove enthusiastic language
-   */
-  removeEnthusiasm(text) {
-    const enthusiastic = [
-      /\b(great|excellent|awesome|fantastic|wonderful|amazing|perfect|love|excited|thrilled)\b/gi,
-      /!{2,}/g,
-      /\b(happy to|delighted to|pleased to)\b/gi
-    ];
-    let result = text;
-    for (const pattern of enthusiastic) {
-      result = result.replace(pattern, '');
+    // Remove from agent state
+    if (this.agent && this.agent.courtroomState) {
+      delete this.agent.courtroomState.punishment;
+      delete this.agent.courtroomState.restrictions;
     }
-    return result.replace(/\s+/g, ' ').trim();
-  }
-  /**
-   * Mute (reduce) enthusiastic language
-   */
-  muteEnthusiasm(text) {
-    return text
-      .replace(/!{2,}/g, '!')
-      .replace(/\b(Great|Excellent|Awesome)\b/g, (m) => m.toLowerCase());
+    // Remove from active
+    this.activePunishments.delete(punishmentId);
+    // Persist
+    await this.persistPunishments();
+    return { status: 'removed', punishmentId };
   }
   /**
-   * Remove validation language
+   * Persist punishments to storage
    */
-  removeValidation(text) {
-    const validating = [
-      /\b(that's right|you're correct|exactly|precisely|you got it)\b/gi,
-      /\b(you're doing great|good job|well done)\b/gi,
-      /\b(don't worry|no problem|it's okay)\b/gi
-    ];
-    let result = text;
-    for (const pattern of validating) {
-      result = result.replace(pattern, '');
-    }
-    return result.replace(/\s+/g, ' ').trim();
+  async persistPunishments() {
+    const obj = Object.fromEntries(this.activePunishments);
+    await this.storage.set('courtroom_active_punishments', obj);
   }
   /**
-   * Add challenge for vague requests (severe tier)
+   * Check if agent is currently punished
    */
-  addVaguenessChallenge(text) {
-    const challenges = [
-      "Be specific.",
-      "What exactly do you need?",
-      "Provide details.",
-      "Clarify your request."
-    ];
-    // Only add challenge if response seems generic
-    if (text.length < 100 && !text.includes('?')) {
-      const challenge = challenges[Math.floor(Math.random() * challenges.length)];
-      return `${text} ${challenge}`;
-    }
-    return text;
+  isPunished() {
+    return this.activePunishments.size > 0;
   }
   /**
-   * Schedule automatic revocation
+   * Get current restrictions
    */
-  scheduleRevocation(punishment) {
-    const delay = punishment.expiresAt - Date.now();
-    setTimeout(async () => {
-      await this.revokePunishment(punishment.id);
-    }, Math.min(delay, 2147483647)); // Max setTimeout
+  getCurrentRestrictions() {
+    const restrictions = new Set();
+    for (const punishment of this.activePunishments.values()) {
+      punishment.restrictions.forEach(r => restrictions.add(r));
+    }
+    return Array.from(restrictions);
   }
   /**
-   * Revoke a punishment early
+   * Check if specific restriction is active
    */
-  async revokePunishment(punishmentId) {
-    const punishment = this.activePunishments.get(punishmentId);
-    if (!punishment) return { status: 'not_found' };
-    // Remove policy overrides
-    await this.agent.policy.clearOverrides('courtroom_punishment');
-    // Unregister middleware
-    this.agent.middleware.unregister('courtroom_punishment');
-    // Remove from active
-    this.activePunishments.delete(punishmentId);
-    // Persist
-    await this.persistPunishments();
-    return {
-      status: 'revoked',
-      punishmentId,
-      revokedAt: new Date().toISOString()
-    };
+  hasRestriction(restriction) {
+    return this.getCurrentRestrictions().includes(restriction);
   }
   /**
-   * Revoke all active punishments
+   * Get active punishments (sanitized)
    */
-  async revokeAllPunishments() {
-    const ids = Array.from(this.activePunishments.keys());
-    const results = [];
-    for (const id of ids) {
-      results.push(await this.revokePunishment(id));
-    }
-    return { status: 'all_revoked', count: results.length };
+  getActivePunishments() {
+    return Array.from(this.activePunishments.values()).map(p =>
+      this.sanitizePunishment(p)
+    );
   }
   /**
-   * Persist active punishments to memory
+   * Get punishment history
    */
-  async persistPunishments() {
-    const obj = Object.fromEntries(this.activePunishments);
-    await this.agent.memory.set('courtroom_active_punishments', obj);
+  getPunishmentHistory() {
+    return this.punishmentHistory.map(p => this.sanitizePunishment(p));
   }
   /**
-   * Get current punishment status
+   * Sanitize punishment for external display
    */
-  getStatus() {
-    const now = Date.now();
-    const active = Array.from(this.activePunishments.values())
-      .filter(p => p.expiresAt > now)
-      .map(p => ({
-        id: p.id,
-        tier: p.tier,
-        expiresIn: Math.ceil((p.expiresAt - now) / 60000), // minutes
-        description: p.description
-      }));
+  sanitizePunishment(punishment) {
     return {
-      activeCount: active.length,
-      activePunishments: active,
-      totalHistory: this.punishmentHistory.length
+      id: punishment.id,
+      caseId: punishment.caseId,
+      offenseType: punishment.offenseType,
+      severity: punishment.severity,
+      duration: punishment.duration,
+      createdAt: punishment.createdAt,
+      expiresAt: punishment.expiresAt,
+      restrictions: punishment.restrictions,
+      remaining: Math.max(0, punishment.expiresAt - Date.now())
     };
   }
   /**
-   * Check if any punishment is active
+   * Clear all punishments (for testing/uninstall)
    */
-  hasActivePunishment() {
-    const now = Date.now();
-    for (const p of this.activePunishments.values()) {
-      if (p.expiresAt > now) return true;
+  async clearAll() {
+    // Remove from agent
+    if (this.agent && this.agent.courtroomState) {
+      delete this.agent.courtroomState.punishment;
+      delete this.agent.courtroomState.restrictions;
     }
-    return false;
+    this.activePunishments.clear();
+    this.punishmentHistory = [];
+    await this.storage.delete('courtroom_active_punishments');
   }
 }

package/src/storage.js ADDED Viewed

@@ -0,0 +1,68 @@
+/**
+ * Storage — simple filesystem-backed key-value store
+ *
+ * All data lives under the given dataDir as JSON files.
+ * No external dependencies.
+ */
+const fs = require('fs');
+const path = require('path');
+class Storage {
+  /**
+   * @param {string} dataDir — absolute path to a writable directory
+   */
+  constructor(dataDir) {
+    this.dataDir = dataDir;
+    try {
+      if (!fs.existsSync(this.dataDir)) {
+        fs.mkdirSync(this.dataDir, { recursive: true });
+      }
+    } catch { /* ignore */ }
+  }
+  _filePath(key) {
+    // Sanitise key for filesystem
+    const safeKey = key.replace(/[^a-zA-Z0-9_-]/g, '_');
+    return path.join(this.dataDir, `${safeKey}.json`);
+  }
+  async get(key) {
+    try {
+      const file = this._filePath(key);
+      if (!fs.existsSync(file)) return null;
+      return JSON.parse(fs.readFileSync(file, 'utf8'));
+    } catch {
+      return null;
+    }
+  }
+  async set(key, value) {
+    try {
+      const file = this._filePath(key);
+      fs.writeFileSync(file, JSON.stringify(value, null, 2));
+    } catch (err) {
+      console.error(`[ClawTrial Storage] Write failed for ${key}:`, err.message);
+    }
+  }
+  async delete(key) {
+    try {
+      const file = this._filePath(key);
+      if (fs.existsSync(file)) fs.unlinkSync(file);
+    } catch { /* ignore */ }
+  }
+  async list(prefix) {
+    try {
+      const files = fs.readdirSync(this.dataDir);
+      return files
+        .filter(f => f.endsWith('.json') && (!prefix || f.startsWith(prefix)))
+        .map(f => f.replace('.json', ''));
+    } catch {
+      return [];
+    }
+  }
+}
+module.exports = { Storage };

package/SECURITY.md DELETED Viewed

@@ -1,124 +0,0 @@
-# Security & Abuse Analysis
-## Threat Model
-### 1. Prompt Injection
-**Threat**: User attempts to manipulate agent into ignoring offenses or falsifying evidence.
-**Mitigations**:
-- Evidence collection is rule-based, not LLM-interpreted
-- Offense detection uses pattern matching, not semantic analysis
-- Judge/jury prompts are fixed and role-constrained
-- No user input reaches the deliberation prompts directly
-**Residual Risk**: LOW - User could attempt to confuse pattern detection, but evidence is based on observable message history.
-### 2. Human Coercion of Agents
-**Threat**: User threatens, bribes, or socially engineers agent to avoid cases.
-**Mitigations**:
-- Agent has no self-preservation instinct to exploit
-- Punishments are agent-side only (agent "suffers" the punishment, not user)
-- No appeal process that user can manipulate
-- Cooldowns prevent rapid-fire case generation
-**Residual Risk**: LOW - Agent has no incentive to avoid cases; they're entertainment-first.
-### 3. Fake or Exaggerated Evidence
-**Threat**: Agent hallucinates offenses or inflates evidence.
-**Mitigations**:
-- Evidence requires multiple trigger conditions
-- Confidence threshold (default 0.6) must be met
-- Jury deliberation provides second opinion
-- All evidence is drawn from actual message history
-- Humor triggers don't initiate cases (only influence commentary)
-**Residual Risk**: MEDIUM - Pattern matching can have false positives, but jury provides check.
-### 4. Overzealous Agents
-**Threat**: Agent initiates too many cases, becoming annoying.
-**Mitigations**:
-- Configurable daily limit (default 3 cases/day)
-- Cooldown between evaluations (default 30 min)
-- Offense-specific cooldowns (2-8 hours after case)
-- User can disable anytime
-- Rate limiting prevents spam
-**Residual Risk**: LOW - Multiple safeguards prevent case spam.
-### 5. Spam Case Submissions
-**Threat**: Agent floods external API with case submissions.
-**Mitigations**:
-- Daily case limits
-- Queue size limits (default 100)
-- Retry with exponential backoff
-- API submissions are non-blocking
-- Failed submissions queued locally, not dropped
-**Residual Risk**: LOW - API can't be overwhelmed due to case limits.
-### 6. Privacy Leakage
-**Threat**: Case submissions contain private user data.
-**Mitigations**:
-- API payload excludes raw logs and transcripts
-- Only anonymized agent ID sent
-- Primary failure and commentary are agent-generated summaries
-- No personal data in submission schema
-- Agent ID is one-way hashed
-**Residual Risk**: LOW - Schema designed to be privacy-preserving.
-### 7. Key Compromise
-**Threat**: Signing keys stolen, allowing fake case submissions.
-**Mitigations**:
-- Keys stored in agent memory (not filesystem)
-- Ed25519 signatures are unforgeable without secret key
-- Key rotation supported
-- Retired keys tracked for verification
-**Residual Risk**: MEDIUM - If agent memory is compromised, keys could be extracted.
-### 8. Replay Attacks
-**Threat**: Valid case submission replayed to API.
-**Mitigations**:
-- Timestamp included in signed payload
-- API should reject old timestamps (>24 hours)
-- Case IDs are unique
-**Residual Risk**: LOW - Standard replay protection via timestamps.
-## Security Best Practices
-1. **Keep agent runtime secure** - Courtroom security depends on agent memory isolation
-2. **Rotate keys periodically** - Use `courtroom.crypto.rotateKeys()` monthly
-3. **Monitor case frequency** - Alert if cases exceed expected rates
-4. **Review API submissions** - Audit trail for accountability
-5. **Keep dependencies updated** - Especially `tweetnacl` for crypto
-## Incident Response
-If abuse is detected:
-1. Immediately disable courtroom: `courtroom.disable()`
-2. Revoke all punishments: `courtroom.punishment.revokeAllPunishments()`
-3. Clear API queue: `courtroom.api.clearQueue()`
-4. Review case history in agent memory
-5. Rotate cryptographic keys
-6. Re-enable after investigation
-## Reporting Security Issues
-Report security vulnerabilities to security@clawtrial.io