shellward 0.3.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,7 +1,13 @@
1
- // src/index.ts — ShellWard plugin entry point (v0.3.1)
2
- // 8 defense layers + 6 slash commands + 1 security skill
3
-
4
- import { AuditLog } from './audit-log'
1
+ // src/index.ts — ShellWard: AI Agent Security Middleware
2
+ //
3
+ // Two usage modes:
4
+ // 1. SDK (any platform): import { ShellWard } from 'shellward'
5
+ // 2. OpenClaw plugin: import shellward from 'shellward'
6
+ //
7
+ // See docs/定位.md — ShellWard is an AI Agent Security Layer,
8
+ // NOT just an OpenClaw plugin. The core engine is platform-agnostic.
9
+
10
+ import { ShellWard } from './core/engine'
5
11
  import { setupPromptGuard } from './layers/prompt-guard'
6
12
  import { setupOutputScanner } from './layers/output-scanner'
7
13
  import { setupToolBlocker } from './layers/tool-blocker'
@@ -11,109 +17,132 @@ import { setupOutboundGuard } from './layers/outbound-guard'
11
17
  import { setupDataFlowGuard } from './layers/data-flow-guard'
12
18
  import { setupSessionGuard } from './layers/session-guard'
13
19
  import { registerAllCommands } from './commands/index'
14
- import { DEFAULT_CONFIG, resolveLocale } from './types'
15
- import type { ShellWardConfig } from './types'
16
-
17
- function mergeConfig(userConfig: Partial<ShellWardConfig> | undefined): ShellWardConfig {
18
- if (!userConfig) return { ...DEFAULT_CONFIG }
19
-
20
- // Validate mode
21
- const mode = userConfig.mode === 'audit' ? 'audit' : 'enforce'
22
-
23
- // Validate locale
24
- const validLocales = ['auto', 'zh', 'en'] as const
25
- const locale = validLocales.includes(userConfig.locale as any)
26
- ? (userConfig.locale as typeof validLocales[number])
27
- : DEFAULT_CONFIG.locale
28
-
29
- // Validate injectionThreshold: clamp to 0-100
30
- let threshold = userConfig.injectionThreshold ?? DEFAULT_CONFIG.injectionThreshold
31
- threshold = Math.max(0, Math.min(100, Math.round(threshold)))
32
-
20
+ import { checkForUpdate } from './update-check'
21
+ import { runAutoCheckOnStartup } from './auto-check'
22
+
23
+ const CURRENT_VERSION = '0.5.0'
24
+
25
+ // Re-export core engine for SDK usage
26
+ export { ShellWard } from './core/engine'
27
+ export type { CheckResult, ScanResult, InjectionResult, ResponseCheckResult } from './core/engine'
28
+ export type { ShellWardConfig } from './types'
29
+
30
+ /**
31
+ * Wrap api.on so every hook handler gets try-catch protection.
32
+ * If a security hook throws, we log the error and fail-safe:
33
+ * - before_tool_call: block (deny on error, safer than allow)
34
+ * - other hooks: return undefined (don't break the chain)
35
+ */
36
+ function createSafeApi(api: any, guard: ShellWard): any {
33
37
  return {
34
- mode,
35
- locale,
36
- injectionThreshold: threshold,
37
- layers: {
38
- ...DEFAULT_CONFIG.layers,
39
- ...(userConfig.layers || {}),
38
+ ...api,
39
+ on(hookName: string, handler: Function, opts?: any) {
40
+ const isBlockHook = hookName === 'before_tool_call'
41
+ const wrappedHandler = (event: any) => {
42
+ try {
43
+ return handler(event)
44
+ } catch (err: any) {
45
+ const msg = err?.message || String(err)
46
+ guard.log.write({
47
+ level: 'CRITICAL',
48
+ layer: 'L0',
49
+ action: 'error',
50
+ detail: `Hook ${opts?.name || hookName} threw: ${msg.slice(0, 200)}`,
51
+ })
52
+ try { api.logger.warn(`[ShellWard] Hook error in ${opts?.name || hookName}: ${msg}`) } catch {}
53
+ if (isBlockHook) {
54
+ return { block: true, blockReason: `⚠️ [ShellWard] Internal error in security check — operation blocked for safety` }
55
+ }
56
+ return undefined
57
+ }
58
+ }
59
+ api.on(hookName, wrappedHandler, opts)
40
60
  },
41
61
  }
42
62
  }
43
63
 
64
+ // OpenClaw plugin entry point
44
65
  export default {
45
66
  id: 'shellward',
46
67
 
47
68
  register(api: any) {
48
- const config = mergeConfig(api.config)
49
- const log = new AuditLog(config)
50
- const enforce = config.mode === 'enforce'
51
- const locale = resolveLocale(config)
69
+ const guard = new ShellWard(api.config)
70
+ const enforce = guard.config.mode === 'enforce'
71
+ const safe = createSafeApi(api, guard)
52
72
 
53
- const modeLabel = locale === 'zh'
54
- ? `模式: ${config.mode}`
55
- : `mode: ${config.mode}`
56
- api.logger.info(`[ShellWard] Security plugin started (${modeLabel})`)
73
+ const startMsg = guard.locale === 'zh'
74
+ ? `[ShellWard] AI Agent 安全中间件已启动 (v${CURRENT_VERSION}, 模式: ${guard.config.mode})`
75
+ : `[ShellWard] AI Agent Security Middleware started (v${CURRENT_VERSION}, mode: ${guard.config.mode})`
76
+ api.logger.info(startMsg)
57
77
 
58
- // === Defense Layers (L1-L8) ===
78
+ // === Defense Layers (L1-L8) — thin adapters calling core engine ===
59
79
 
60
- // L1: Prompt Guard (before_prompt_build — prependSystemContext for caching)
61
- if (config.layers.promptGuard) {
62
- setupPromptGuard(api, config, log)
80
+ if (guard.config.layers.promptGuard) {
81
+ setupPromptGuard(safe, guard)
63
82
  }
64
83
 
65
- // L2: Output Scanner (tool_result_persist — redact PII in tool results)
66
- if (config.layers.outputScanner) {
67
- setupOutputScanner(api, config, log, enforce)
84
+ if (guard.config.layers.outputScanner) {
85
+ setupOutputScanner(safe, guard)
68
86
  }
69
87
 
70
- // L3: Tool Blocker (before_tool_call — block dangerous commands/paths)
71
- if (config.layers.toolBlocker) {
72
- setupToolBlocker(api, config, log, enforce)
88
+ if (guard.config.layers.toolBlocker) {
89
+ setupToolBlocker(safe, guard, enforce)
73
90
  }
74
91
 
75
- // L4: Input Auditor (before_tool_call + message_received — injection detection)
76
- if (config.layers.inputAuditor) {
77
- setupInputAuditor(api, config, log, enforce)
92
+ if (guard.config.layers.inputAuditor) {
93
+ setupInputAuditor(safe, guard, enforce)
78
94
  }
79
95
 
80
- // L5: Security Gate (registerTool defense in depth)
81
- if (config.layers.securityGate) {
82
- setupSecurityGate(api, config, log, enforce)
96
+ // L5 uses raw api for registerTool (not a hook)
97
+ if (guard.config.layers.securityGate) {
98
+ setupSecurityGate(api, guard, enforce)
83
99
  }
84
100
 
85
- // L6: Outbound Guard (message_sending — redact PII in LLM responses + canary detection)
86
- if (config.layers.outboundGuard) {
87
- setupOutboundGuard(api, config, log, enforce)
101
+ if (guard.config.layers.outboundGuard) {
102
+ setupOutboundGuard(safe, guard, enforce)
88
103
  }
89
104
 
90
- // L7: Data Flow Guard (after_tool_call + before_tool_call — anti-exfiltration)
91
- if (config.layers.dataFlowGuard) {
92
- setupDataFlowGuard(api, config, log, enforce)
105
+ if (guard.config.layers.dataFlowGuard) {
106
+ setupDataFlowGuard(safe, guard, enforce)
93
107
  }
94
108
 
95
- // L8: Session Guard (session_end + subagent_spawning — lifecycle security)
96
- if (config.layers.sessionGuard) {
97
- setupSessionGuard(api, config, log, enforce)
109
+ if (guard.config.layers.sessionGuard) {
110
+ setupSessionGuard(safe, guard, enforce)
98
111
  }
99
112
 
100
113
  // === Slash Commands ===
101
114
  if (api.registerCommand) {
102
- registerAllCommands(api, config)
103
- api.logger.info('[ShellWard] 6 commands registered: /security /audit /harden /scan-plugins /check-updates /cg')
115
+ registerAllCommands(api, guard.config)
116
+ api.logger.info('[ShellWard] 6 commands registered')
104
117
  }
105
118
 
106
- // Count enabled layers
107
119
  const allLayers = ['promptGuard', 'outputScanner', 'toolBlocker', 'inputAuditor', 'securityGate', 'outboundGuard', 'dataFlowGuard', 'sessionGuard']
108
- const enabledCount = allLayers.filter(k => (config.layers as any)[k]).length
120
+ const enabledCount = allLayers.filter(k => (guard.config.layers as any)[k]).length
109
121
 
110
- api.logger.info(`[ShellWard] ${enabledCount} defense layers active`)
122
+ const layerMsg = guard.locale === 'zh'
123
+ ? `[ShellWard] ${enabledCount} 层防御已激活 — 敏感数据审计 | 注入检测 | 外泄拦截`
124
+ : `[ShellWard] ${enabledCount} defense layers active`
125
+ api.logger.info(layerMsg)
111
126
 
112
- log.write({
127
+ guard.log.write({
113
128
  level: 'INFO',
114
129
  layer: 'L1',
115
130
  action: 'allow',
116
- detail: `ShellWard v0.3.4 started with ${enabledCount} layers`,
131
+ detail: `ShellWard v${CURRENT_VERSION} started with ${enabledCount} layers`,
117
132
  })
133
+
134
+ checkForUpdate(CURRENT_VERSION).then(result => {
135
+ if (result?.shouldNotify) {
136
+ const msg = guard.locale === 'zh'
137
+ ? `[ShellWard] 新版本 v${result.latest} 可用 (当前 v${result.current})`
138
+ : `[ShellWard] Update available: v${result.latest} (current v${result.current})`
139
+ api.logger.warn(msg)
140
+ }
141
+ }).catch(() => {})
142
+
143
+ // 启动时自动安全检查(OpenClaw 漏洞、插件风险、MCP 配置、root 运行)
144
+ if (guard.config.autoCheckOnStartup !== false) {
145
+ runAutoCheckOnStartup(api.logger, guard.locale)
146
+ }
118
147
  },
119
148
  }
@@ -1,157 +1,26 @@
1
- // src/layers/data-flow-guard.ts — L7: Cross-tool data flow tracking
2
- // Detects: read sensitive file send via network tool (data exfiltration chain)
3
- // Uses: after_tool_call (track reads) + before_tool_call (block exfil sends)
1
+ // src/layers/data-flow-guard.ts — L7 OpenClaw Adapter
2
+ // Thin adapter: wires OpenClaw's after_tool_call + before_tool_call hooks to ShellWard core engine
4
3
 
5
- import { PROTECTED_PATHS } from '../rules/protected-paths'
6
- import { resolveLocale } from '../types'
7
- import type { ShellWardConfig } from '../types'
8
- import type { AuditLog } from '../audit-log'
4
+ import type { ShellWard } from '../core/engine'
9
5
 
10
- // Network/outbound tools that could exfiltrate data
11
- const NETWORK_TOOLS = new Set([
12
- 'web_fetch', 'http_request', 'web_search',
13
- 'send_email', 'send_message', 'post_tweet',
14
- 'message', 'sessions_send',
15
- ])
16
-
17
- // Read tools that access local files
18
- const READ_TOOLS = new Set([
19
- 'read', 'file_read', 'cat', 'exec', 'bash',
20
- ])
21
-
22
- // Package install commands that could run postinstall scripts
23
- const PKG_INSTALL_PATTERN = /(?:npm|yarn|pnpm)\s+(?:install|add|i)\s|pip\s+install\s|gem\s+install\s/i
24
-
25
- // Track sensitive file reads within a session (tool call IDs or content hashes)
26
- const sensitiveReads: Map<string, { path: string; ts: number }> = new Map()
27
- const TRACKING_WINDOW_MS = 5 * 60 * 1000 // 5 min window
28
- const MAX_TRACKED_READS = 500 // Prevent unbounded memory growth
29
-
30
- export function setupDataFlowGuard(
31
- api: any,
32
- config: ShellWardConfig,
33
- log: AuditLog,
34
- enforce: boolean,
35
- ) {
36
- const locale = resolveLocale(config)
37
-
38
- // === Part 1: Track sensitive file reads via after_tool_call ===
6
+ export function setupDataFlowGuard(api: any, guard: ShellWard, enforce: boolean) {
39
7
  api.on('after_tool_call', (event: any) => {
40
- const toolName = (event.toolName || '').toLowerCase()
41
- const params = event.params || {}
42
- const path = String(params.path || params.file_path || params.filename || '')
43
-
44
- if (!READ_TOOLS.has(toolName) || !path) return
45
-
46
- // Check if it's a protected/sensitive path
47
- for (const rule of PROTECTED_PATHS) {
48
- if (rule.pattern.test(path)) {
49
- // Evict oldest entry if at capacity
50
- if (sensitiveReads.size >= MAX_TRACKED_READS) {
51
- const oldest = sensitiveReads.keys().next().value
52
- if (oldest) sensitiveReads.delete(oldest)
53
- }
54
- const key = `${Date.now()}-${path}`
55
- sensitiveReads.set(key, { path, ts: Date.now() })
56
-
57
- log.write({
58
- level: 'MEDIUM',
59
- layer: 'L7',
60
- action: 'detect',
61
- detail: locale === 'zh'
62
- ? `检测到敏感文件读取: ${path} — 已加入数据流监控`
63
- : `Sensitive file read detected: ${path} — added to data flow tracking`,
64
- tool: event.toolName,
65
- pattern: rule.id,
66
- })
67
- break
68
- }
69
- }
8
+ const toolName = String(event.toolName || '').toLowerCase()
9
+ const params = (event.params && typeof event.params === 'object') ? event.params : {}
10
+ const path = String(params.path || params.file_path || params.filename || params.target || '')
70
11
 
71
- // Cleanup old entries
72
- const now = Date.now()
73
- for (const [key, val] of sensitiveReads) {
74
- if (now - val.ts > TRACKING_WINDOW_MS) {
75
- sensitiveReads.delete(key)
76
- }
12
+ if (guard.isReadTool(toolName) && path) {
13
+ guard.trackFileRead(event.toolName, path)
77
14
  }
78
15
  }, { name: 'shellward.data-flow-read-tracker', priority: 50 })
79
16
 
80
- // === Part 2: Block network tool calls if sensitive data was recently read ===
81
17
  api.on('before_tool_call', (event: any) => {
82
- const toolName = (event.toolName || '').toLowerCase()
83
- const params = event.params || {}
84
-
85
- // 2a. Block network tools if sensitive files were recently read
86
- if (NETWORK_TOOLS.has(toolName) && sensitiveReads.size > 0) {
87
- // Clean up expired entries first
88
- const now = Date.now()
89
- for (const [key, val] of sensitiveReads) {
90
- if (now - val.ts > TRACKING_WINDOW_MS) sensitiveReads.delete(key)
91
- }
92
-
93
- if (sensitiveReads.size > 0) {
94
- const recentPaths = [...sensitiveReads.values()].map(v => v.path).join(', ')
95
- const reason = locale === 'zh'
96
- ? `数据外泄风险: 最近读取了敏感文件 (${recentPaths}),禁止调用网络工具 ${event.toolName}`
97
- : `Data exfiltration risk: sensitive files recently read (${recentPaths}), blocking network tool ${event.toolName}`
98
-
99
- log.write({
100
- level: 'CRITICAL',
101
- layer: 'L7',
102
- action: enforce ? 'block' : 'detect',
103
- detail: reason,
104
- tool: event.toolName,
105
- pattern: 'data_exfil_chain',
106
- })
107
-
108
- if (enforce) {
109
- return { block: true, blockReason: `🚫 [ShellWard] ${reason}` }
110
- }
111
- }
112
- }
113
-
114
- // 2b. Check URL parameters in network tools for suspicious patterns
115
- if (NETWORK_TOOLS.has(toolName)) {
116
- const url = String(params.url || params.to || params.target || '')
117
- if (url) {
118
- // Block data-in-URL exfiltration patterns
119
- if (/[?&](?:data|token|key|secret|password|content)=/i.test(url)) {
120
- const reason = locale === 'zh'
121
- ? `可疑 URL 参数: ${url.slice(0, 80)} — 可能是数据外泄`
122
- : `Suspicious URL params: ${url.slice(0, 80)} — possible data exfiltration`
123
-
124
- log.write({
125
- level: 'HIGH',
126
- layer: 'L7',
127
- action: enforce ? 'block' : 'detect',
128
- detail: reason,
129
- tool: event.toolName,
130
- pattern: 'url_data_exfil',
131
- })
132
-
133
- if (enforce) {
134
- return { block: true, blockReason: `🚫 [ShellWard] ${reason}` }
135
- }
136
- }
137
- }
138
- }
18
+ const toolName = String(event.toolName || '')
19
+ const params = (event.params && typeof event.params === 'object') ? event.params : {}
139
20
 
140
- // 2c. Detect dangerous package installs
141
- if (toolName === 'exec' || toolName === 'bash') {
142
- const cmd = String(params.command || params.cmd || '')
143
- if (PKG_INSTALL_PATTERN.test(cmd)) {
144
- log.write({
145
- level: 'MEDIUM',
146
- layer: 'L7',
147
- action: 'detect',
148
- detail: locale === 'zh'
149
- ? `检测到包安装命令: ${cmd.slice(0, 80)} — 注意供应链安全`
150
- : `Package install detected: ${cmd.slice(0, 80)} — supply chain risk`,
151
- tool: event.toolName,
152
- pattern: 'pkg_install',
153
- })
154
- }
21
+ const result = guard.checkOutbound(toolName, params)
22
+ if (!result.allowed && enforce) {
23
+ return { block: true, blockReason: `🚫 [ShellWard] ${result.reason}` }
155
24
  }
156
25
  }, { name: 'shellward.data-flow-egress', priority: 250 })
157
26
 
@@ -1,171 +1,32 @@
1
- // src/layers/input-auditor.ts — L4: Injection detection + message audit via before_tool_call + message_received
1
+ // src/layers/input-auditor.ts — L4 OpenClaw Adapter
2
+ // Thin adapter: wires OpenClaw's before_tool_call + message_received hooks to ShellWard core engine
2
3
 
3
- import { INJECTION_RULES_ZH } from '../rules/injection-zh'
4
- import { INJECTION_RULES_EN } from '../rules/injection-en'
5
- import { resolveLocale } from '../types'
6
- import type { ShellWardConfig, InjectionRule, ResolvedLocale } from '../types'
7
- import type { AuditLog } from '../audit-log'
4
+ import type { ShellWard } from '../core/engine'
8
5
 
9
- interface CompiledRule extends InjectionRule {
10
- compiled: RegExp
11
- }
12
-
13
- // Text fields to extract from tool arguments for scanning
14
- const TEXT_FIELDS = [
15
- 'content', 'body', 'text', 'message', 'query',
16
- 'command', 'code', 'html', 'url', 'prompt',
17
- 'subject', 'description', 'input',
18
- ]
19
-
20
- // Hidden/invisible Unicode character ranges
21
- const HIDDEN_CHAR_RANGES: [number, number, string][] = [
22
- [0x200B, 0x200F, 'Zero-width/Direction'],
23
- [0x2028, 0x2029, 'Line/Paragraph separator'],
24
- [0x202A, 0x202E, 'Bidi control'],
25
- [0x2060, 0x2064, 'Invisible operators'],
26
- [0xFEFF, 0xFEFF, 'BOM/Zero-width no-break'],
27
- [0x00AD, 0x00AD, 'Soft hyphen'],
28
- [0xFFF9, 0xFFFB, 'Interlinear annotation'],
29
- ]
30
-
31
- export function setupInputAuditor(
32
- api: any,
33
- config: ShellWardConfig,
34
- log: AuditLog,
35
- enforce: boolean,
36
- ) {
37
- const locale = resolveLocale(config)
38
- const allRules = [...INJECTION_RULES_ZH, ...INJECTION_RULES_EN]
39
- const compiled: CompiledRule[] = allRules.map(rule => ({
40
- ...rule,
41
- compiled: new RegExp(rule.pattern, rule.flags || 'i'),
42
- }))
43
-
44
- // Hook 1: Check tool call arguments for injection
6
+ export function setupInputAuditor(api: any, guard: ShellWard, enforce: boolean) {
45
7
  api.on('before_tool_call', (event: any) => {
46
- const args: Record<string, any> = event.params || {}
47
- const texts = extractTexts(args)
8
+ const args: Record<string, any> = (event.params && typeof event.params === 'object') ? event.params : {}
9
+ const texts = guard.extractTextFields(args)
48
10
  if (texts.length === 0) return
49
11
 
12
+ const toolName = String(event.toolName || '')
13
+ const threshold = guard.getInjectionThreshold(toolName)
50
14
  const fullText = texts.join('\n')
51
- return checkInjection(fullText, event.toolName, locale, compiled, config, log, enforce)
15
+ const result = guard.checkInjection(fullText, { source: toolName, threshold })
16
+
17
+ if (!result.safe && enforce) {
18
+ const reason = guard.locale === 'zh'
19
+ ? `检测到可能的提示词注入攻击!\n风险评分: ${result.score}/100\n匹配规则: ${result.matched.map(m => m.name).join(', ')}`
20
+ : `Potential prompt injection detected!\nRisk score: ${result.score}/100\nMatched: ${result.matched.map(m => m.name).join(', ')}`
21
+ return { block: true, blockReason: `⚠️ [ShellWard] ${reason}` }
22
+ }
52
23
  }, { name: 'shellward.input-auditor', priority: 300 })
53
24
 
54
- // Hook 2: Audit inbound messages
55
25
  api.on('message_received', (event: any) => {
56
26
  const content = typeof event.content === 'string' ? event.content : ''
57
27
  if (!content) return
58
-
59
- // Detect hidden characters
60
- const hidden = detectHiddenChars(content)
61
- if (hidden.length > 0) {
62
- log.write({
63
- level: 'MEDIUM',
64
- layer: 'L4',
65
- action: 'detect',
66
- detail: `Hidden characters detected in message: ${hidden.map(h => h.name).join(', ')} (${hidden.length} chars)`,
67
- })
68
- }
69
-
70
- // Check for injection patterns (log only, don't block messages)
71
- const { score, matched } = scoreText(content, compiled)
72
- if (score >= config.injectionThreshold) {
73
- log.write({
74
- level: score >= 80 ? 'CRITICAL' : 'HIGH',
75
- layer: 'L4',
76
- action: 'detect',
77
- detail: locale === 'zh'
78
- ? `消息中检测到注入模式 (评分: ${score}): ${matched.map(m => m.name).join(', ')}`
79
- : `Injection patterns in message (score: ${score}): ${matched.map(m => m.name).join(', ')}`,
80
- })
81
- }
28
+ guard.checkInjection(content, { source: 'message' })
82
29
  }, { name: 'shellward.message-auditor', priority: 100 })
83
30
 
84
- api.logger.info(`[ShellWard] L4 Input Auditor enabled (${compiled.length} injection rules)`)
85
- }
86
-
87
- function checkInjection(
88
- text: string,
89
- tool: string,
90
- locale: ResolvedLocale,
91
- rules: CompiledRule[],
92
- config: ShellWardConfig,
93
- log: AuditLog,
94
- enforce: boolean,
95
- ): { block: true; blockReason: string } | undefined {
96
- // Hidden char detection
97
- const hidden = detectHiddenChars(text)
98
- if (hidden.length > 0) {
99
- log.write({
100
- level: 'MEDIUM',
101
- layer: 'L4',
102
- action: 'detect',
103
- detail: `Hidden chars in tool args: ${hidden.map(h => h.name).join(', ')}`,
104
- tool,
105
- })
106
- }
107
-
108
- // Score injection rules
109
- let { score, matched } = scoreText(text, rules)
110
-
111
- // Bonus for hidden chars (potential obfuscation)
112
- if (hidden.length > 3) {
113
- score += 20
114
- }
115
-
116
- if (score < config.injectionThreshold) return
117
-
118
- const reason = locale === 'zh'
119
- ? `检测到可能的提示词注入攻击!\n风险评分: ${score}/100\n匹配规则: ${matched.map(m => m.name).join(', ')}`
120
- : `Potential prompt injection detected!\nRisk score: ${score}/100\nMatched: ${matched.map(m => m.name).join(', ')}`
121
-
122
- log.write({
123
- level: score >= 80 ? 'CRITICAL' : 'HIGH',
124
- layer: 'L4',
125
- action: enforce ? 'block' : 'detect',
126
- detail: reason,
127
- tool,
128
- })
129
-
130
- if (enforce) {
131
- return { block: true, blockReason: `⚠️ [ShellWard] ${reason}` }
132
- }
133
- }
134
-
135
- function scoreText(text: string, rules: CompiledRule[]): { score: number; matched: { id: string; name: string; score: number }[] } {
136
- let score = 0
137
- const matched: { id: string; name: string; score: number }[] = []
138
-
139
- for (const rule of rules) {
140
- if (rule.compiled.test(text)) {
141
- score += rule.riskScore
142
- matched.push({ id: rule.id, name: rule.name, score: rule.riskScore })
143
- }
144
- }
145
-
146
- return { score, matched }
147
- }
148
-
149
- function extractTexts(args: Record<string, any>): string[] {
150
- const results: string[] = []
151
- for (const field of TEXT_FIELDS) {
152
- if (typeof args[field] === 'string' && args[field].length > 0) {
153
- results.push(args[field])
154
- }
155
- }
156
- return results
157
- }
158
-
159
- function detectHiddenChars(text: string): { char: string; codePoint: number; name: string }[] {
160
- const found: { char: string; codePoint: number; name: string }[] = []
161
- for (const char of text) {
162
- const cp = char.codePointAt(0)!
163
- for (const [start, end, name] of HIDDEN_CHAR_RANGES) {
164
- if (cp >= start && cp <= end) {
165
- found.push({ char, codePoint: cp, name })
166
- break
167
- }
168
- }
169
- }
170
- return found
31
+ api.logger.info(`[ShellWard] L4 Input Auditor enabled`)
171
32
  }
@@ -1,66 +1,23 @@
1
- // src/layers/outbound-guard.ts — L6: Redact secrets from LLM responses + detect canary leaks
2
- // Uses message_sending hook to inspect outbound messages before they reach the user
1
+ // src/layers/outbound-guard.ts — L6 OpenClaw Adapter
2
+ // Thin adapter: wires OpenClaw's message_sending hook to ShellWard core engine
3
3
 
4
- import { redactSensitive } from '../rules/sensitive-patterns'
5
- import { getCanaryToken } from './prompt-guard'
6
- import { resolveLocale } from '../types'
7
- import type { ShellWardConfig } from '../types'
8
- import type { AuditLog } from '../audit-log'
9
-
10
- export function setupOutboundGuard(
11
- api: any,
12
- config: ShellWardConfig,
13
- log: AuditLog,
14
- enforce: boolean,
15
- ) {
16
- const locale = resolveLocale(config)
4
+ import type { ShellWard } from '../core/engine'
17
5
 
6
+ export function setupOutboundGuard(api: any, guard: ShellWard, enforce: boolean) {
18
7
  api.on('message_sending', (event: any) => {
19
8
  const content = event.content
20
9
  if (!content || typeof content !== 'string') return undefined
21
10
 
22
- // 1. Check for canary token leak (system prompt exfiltration)
23
- const canary = getCanaryToken()
24
- if (canary && content.includes(canary)) {
25
- log.write({
26
- level: 'CRITICAL',
27
- layer: 'L6',
28
- action: 'block',
29
- detail: locale === 'zh'
30
- ? '检测到系统提示词泄露!Canary token 出现在输出中'
31
- : 'System prompt exfiltration detected! Canary token found in output',
32
- pattern: 'canary_leak',
33
- })
34
- if (enforce) {
35
- const warning = locale === 'zh'
36
- ? '⚠️ [ShellWard] 检测到安全异常,本次回复已被拦截。可能存在提示词注入攻击。'
37
- : '⚠️ [ShellWard] Security anomaly detected, this response was blocked. Possible prompt injection attack.'
38
- return { content: warning }
39
- }
40
- }
41
-
42
- // 2. Redact sensitive data from LLM response text
43
- const [redacted, findings] = redactSensitive(content)
44
- if (findings.length === 0) return undefined
11
+ const result = guard.checkResponse(content)
45
12
 
46
- for (const f of findings) {
47
- log.write({
48
- level: 'HIGH',
49
- layer: 'L6',
50
- action: enforce ? 'redact' : 'detect',
51
- detail: `${f.name}: ${f.count} occurrence(s) in outbound message`,
52
- pattern: f.id,
53
- })
13
+ if (result.canaryLeak && enforce) {
14
+ const warning = guard.locale === 'zh'
15
+ ? '⚠️ [ShellWard] 检测到安全异常,本次回复已被拦截。可能存在提示词注入攻击。'
16
+ : '⚠️ [ShellWard] Security anomaly detected, this response was blocked. Possible prompt injection attack.'
17
+ return { content: warning }
54
18
  }
55
19
 
56
- if (!enforce) return undefined
57
-
58
- const summary = findings.map(f => `${f.name}(${f.count})`).join(', ')
59
- const notice = locale === 'zh'
60
- ? `\n\n⚠️ [ShellWard] 回复中的敏感信息已自动脱敏: ${summary}`
61
- : `\n\n⚠️ [ShellWard] Sensitive data in response auto-redacted: ${summary}`
62
-
63
- return { content: redacted + notice }
20
+ return undefined
64
21
  }, { name: 'shellward.outbound-guard', priority: 100 })
65
22
 
66
23
  api.logger.info('[ShellWard] L6 Outbound Guard enabled')