shellward 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,7 +1,13 @@
1
- // src/index.ts — ShellWard plugin entry point (v0.4.0)
2
- // 8 defense layers + 6 slash commands + 1 security skill
3
-
4
- import { AuditLog } from './audit-log'
1
+ // src/index.ts — ShellWard: AI Agent Security Middleware
2
+ //
3
+ // Two usage modes:
4
+ // 1. SDK (any platform): import { ShellWard } from 'shellward'
5
+ // 2. OpenClaw plugin: import shellward from 'shellward'
6
+ //
7
+ // See docs/定位.md — ShellWard is an AI Agent Security Layer,
8
+ // NOT just an OpenClaw plugin. The core engine is platform-agnostic.
9
+
10
+ import { ShellWard } from './core/engine'
5
11
  import { setupPromptGuard } from './layers/prompt-guard'
6
12
  import { setupOutputScanner } from './layers/output-scanner'
7
13
  import { setupToolBlocker } from './layers/tool-blocker'
@@ -11,11 +17,15 @@ import { setupOutboundGuard } from './layers/outbound-guard'
11
17
  import { setupDataFlowGuard } from './layers/data-flow-guard'
12
18
  import { setupSessionGuard } from './layers/session-guard'
13
19
  import { registerAllCommands } from './commands/index'
14
- import { DEFAULT_CONFIG, resolveLocale } from './types'
15
20
  import { checkForUpdate } from './update-check'
16
- import type { ShellWardConfig } from './types'
21
+ import { runAutoCheckOnStartup } from './auto-check'
22
+
23
+ const CURRENT_VERSION = '0.5.0'
17
24
 
18
- const CURRENT_VERSION = '0.4.0'
25
+ // Re-export core engine for SDK usage
26
+ export { ShellWard } from './core/engine'
27
+ export type { CheckResult, ScanResult, InjectionResult, ResponseCheckResult } from './core/engine'
28
+ export type { ShellWardConfig } from './types'
19
29
 
20
30
  /**
21
31
  * Wrap api.on so every hook handler gets try-catch protection.
@@ -23,7 +33,7 @@ const CURRENT_VERSION = '0.4.0'
23
33
  * - before_tool_call: block (deny on error, safer than allow)
24
34
  * - other hooks: return undefined (don't break the chain)
25
35
  */
26
- function createSafeApi(api: any, log: AuditLog): any {
36
+ function createSafeApi(api: any, guard: ShellWard): any {
27
37
  return {
28
38
  ...api,
29
39
  on(hookName: string, handler: Function, opts?: any) {
@@ -33,14 +43,13 @@ function createSafeApi(api: any, log: AuditLog): any {
33
43
  return handler(event)
34
44
  } catch (err: any) {
35
45
  const msg = err?.message || String(err)
36
- log.write({
46
+ guard.log.write({
37
47
  level: 'CRITICAL',
38
48
  layer: 'L0',
39
49
  action: 'error',
40
50
  detail: `Hook ${opts?.name || hookName} threw: ${msg.slice(0, 200)}`,
41
51
  })
42
52
  try { api.logger.warn(`[ShellWard] Hook error in ${opts?.name || hookName}: ${msg}`) } catch {}
43
- // Fail-safe: block on security hooks, pass on others
44
53
  if (isBlockHook) {
45
54
  return { block: true, blockReason: `⚠️ [ShellWard] Internal error in security check — operation blocked for safety` }
46
55
  }
@@ -52,119 +61,88 @@ function createSafeApi(api: any, log: AuditLog): any {
52
61
  }
53
62
  }
54
63
 
55
- function mergeConfig(userConfig: Partial<ShellWardConfig> | undefined): ShellWardConfig {
56
- if (!userConfig) return { ...DEFAULT_CONFIG }
57
-
58
- // Validate mode
59
- const mode = userConfig.mode === 'audit' ? 'audit' : 'enforce'
60
-
61
- // Validate locale
62
- const validLocales = ['auto', 'zh', 'en'] as const
63
- const locale = validLocales.includes(userConfig.locale as any)
64
- ? (userConfig.locale as typeof validLocales[number])
65
- : DEFAULT_CONFIG.locale
66
-
67
- // Validate injectionThreshold: clamp to 0-100
68
- let threshold = userConfig.injectionThreshold ?? DEFAULT_CONFIG.injectionThreshold
69
- threshold = Math.max(0, Math.min(100, Math.round(threshold)))
70
-
71
- return {
72
- mode,
73
- locale,
74
- injectionThreshold: threshold,
75
- layers: {
76
- ...DEFAULT_CONFIG.layers,
77
- ...(userConfig.layers || {}),
78
- },
79
- }
80
- }
81
-
64
+ // OpenClaw plugin entry point
82
65
  export default {
83
66
  id: 'shellward',
84
67
 
85
68
  register(api: any) {
86
- const config = mergeConfig(api.config)
87
- const log = new AuditLog(config)
88
- const enforce = config.mode === 'enforce'
89
- const locale = resolveLocale(config)
90
- const safe = createSafeApi(api, log)
91
-
92
- const modeLabel = locale === 'zh'
93
- ? `模式: ${config.mode}`
94
- : `mode: ${config.mode}`
95
- api.logger.info(`[ShellWard] Security plugin started (${modeLabel})`)
96
-
97
- // === Defense Layers (L1-L8) ===
98
- // All layers use `safe` wrapper — hooks get automatic try-catch + fail-safe
99
-
100
- // L1: Prompt Guard (before_prompt_build — prependSystemContext for caching)
101
- if (config.layers.promptGuard) {
102
- setupPromptGuard(safe, config, log)
69
+ const guard = new ShellWard(api.config)
70
+ const enforce = guard.config.mode === 'enforce'
71
+ const safe = createSafeApi(api, guard)
72
+
73
+ const startMsg = guard.locale === 'zh'
74
+ ? `[ShellWard] AI Agent 安全中间件已启动 (v${CURRENT_VERSION}, 模式: ${guard.config.mode})`
75
+ : `[ShellWard] AI Agent Security Middleware started (v${CURRENT_VERSION}, mode: ${guard.config.mode})`
76
+ api.logger.info(startMsg)
77
+
78
+ // === Defense Layers (L1-L8) — thin adapters calling core engine ===
79
+
80
+ if (guard.config.layers.promptGuard) {
81
+ setupPromptGuard(safe, guard)
103
82
  }
104
83
 
105
- // L2: Output Scanner (tool_result_persist — redact PII in tool results)
106
- if (config.layers.outputScanner) {
107
- setupOutputScanner(safe, config, log, enforce)
84
+ if (guard.config.layers.outputScanner) {
85
+ setupOutputScanner(safe, guard)
108
86
  }
109
87
 
110
- // L3: Tool Blocker (before_tool_call — block dangerous commands/paths)
111
- if (config.layers.toolBlocker) {
112
- setupToolBlocker(safe, config, log, enforce)
88
+ if (guard.config.layers.toolBlocker) {
89
+ setupToolBlocker(safe, guard, enforce)
113
90
  }
114
91
 
115
- // L4: Input Auditor (before_tool_call + message_received — injection detection)
116
- if (config.layers.inputAuditor) {
117
- setupInputAuditor(safe, config, log, enforce)
92
+ if (guard.config.layers.inputAuditor) {
93
+ setupInputAuditor(safe, guard, enforce)
118
94
  }
119
95
 
120
- // L5: Security Gate (registerTool — defense in depth, uses raw api for registerTool)
121
- if (config.layers.securityGate) {
122
- setupSecurityGate(api, config, log, enforce)
96
+ // L5 uses raw api for registerTool (not a hook)
97
+ if (guard.config.layers.securityGate) {
98
+ setupSecurityGate(api, guard, enforce)
123
99
  }
124
100
 
125
- // L6: Outbound Guard (message_sending — redact PII in LLM responses + canary detection)
126
- if (config.layers.outboundGuard) {
127
- setupOutboundGuard(safe, config, log, enforce)
101
+ if (guard.config.layers.outboundGuard) {
102
+ setupOutboundGuard(safe, guard, enforce)
128
103
  }
129
104
 
130
- // L7: Data Flow Guard (after_tool_call + before_tool_call — anti-exfiltration)
131
- if (config.layers.dataFlowGuard) {
132
- setupDataFlowGuard(safe, config, log, enforce)
105
+ if (guard.config.layers.dataFlowGuard) {
106
+ setupDataFlowGuard(safe, guard, enforce)
133
107
  }
134
108
 
135
- // L8: Session Guard (session_end + subagent_spawning — lifecycle security)
136
- if (config.layers.sessionGuard) {
137
- setupSessionGuard(safe, config, log, enforce)
109
+ if (guard.config.layers.sessionGuard) {
110
+ setupSessionGuard(safe, guard, enforce)
138
111
  }
139
112
 
140
113
  // === Slash Commands ===
141
114
  if (api.registerCommand) {
142
- registerAllCommands(api, config)
143
- api.logger.info('[ShellWard] 6 commands registered: /security /audit /harden /scan-plugins /check-updates /cg')
115
+ registerAllCommands(api, guard.config)
116
+ api.logger.info('[ShellWard] 6 commands registered')
144
117
  }
145
118
 
146
- // Count enabled layers
147
119
  const allLayers = ['promptGuard', 'outputScanner', 'toolBlocker', 'inputAuditor', 'securityGate', 'outboundGuard', 'dataFlowGuard', 'sessionGuard']
148
- const enabledCount = allLayers.filter(k => (config.layers as any)[k]).length
120
+ const enabledCount = allLayers.filter(k => (guard.config.layers as any)[k]).length
149
121
 
150
- api.logger.info(`[ShellWard] ${enabledCount} defense layers active`)
122
+ const layerMsg = guard.locale === 'zh'
123
+ ? `[ShellWard] ${enabledCount} 层防御已激活 — 敏感数据审计 | 注入检测 | 外泄拦截`
124
+ : `[ShellWard] ${enabledCount} defense layers active`
125
+ api.logger.info(layerMsg)
151
126
 
152
- log.write({
127
+ guard.log.write({
153
128
  level: 'INFO',
154
129
  layer: 'L1',
155
130
  action: 'allow',
156
131
  detail: `ShellWard v${CURRENT_VERSION} started with ${enabledCount} layers`,
157
132
  })
158
133
 
159
- // === Non-blocking update check (async, won't delay startup) ===
160
- // Only notifies ONCE per new version — won't repeat after user has seen it
161
134
  checkForUpdate(CURRENT_VERSION).then(result => {
162
135
  if (result?.shouldNotify) {
163
- const msg = locale === 'zh'
164
- ? `[ShellWard] 新版本 v${result.latest} 可用 (当前 v${result.current})。运行 \`openclaw plugins update shellward\` 更新`
165
- : `[ShellWard] Update available: v${result.latest} (current v${result.current}). Run \`openclaw plugins update shellward\` to update`
136
+ const msg = guard.locale === 'zh'
137
+ ? `[ShellWard] 新版本 v${result.latest} 可用 (当前 v${result.current})`
138
+ : `[ShellWard] Update available: v${result.latest} (current v${result.current})`
166
139
  api.logger.warn(msg)
167
140
  }
168
- }).catch(() => { /* silently ignore network errors */ })
141
+ }).catch(() => {})
142
+
143
+ // 启动时自动安全检查(OpenClaw 漏洞、插件风险、MCP 配置、root 运行)
144
+ if (guard.config.autoCheckOnStartup !== false) {
145
+ runAutoCheckOnStartup(api.logger, guard.locale)
146
+ }
169
147
  },
170
148
  }
@@ -1,157 +1,26 @@
1
- // src/layers/data-flow-guard.ts — L7: Cross-tool data flow tracking
2
- // Detects: read sensitive file send via network tool (data exfiltration chain)
3
- // Uses: after_tool_call (track reads) + before_tool_call (block exfil sends)
1
+ // src/layers/data-flow-guard.ts — L7 OpenClaw Adapter
2
+ // Thin adapter: wires OpenClaw's after_tool_call + before_tool_call hooks to ShellWard core engine
4
3
 
5
- import { PROTECTED_PATHS } from '../rules/protected-paths'
6
- import { resolveLocale } from '../types'
7
- import type { ShellWardConfig } from '../types'
8
- import type { AuditLog } from '../audit-log'
4
+ import type { ShellWard } from '../core/engine'
9
5
 
10
- // Network/outbound tools that could exfiltrate data
11
- const NETWORK_TOOLS = new Set([
12
- 'web_fetch', 'http_request', 'web_search',
13
- 'send_email', 'send_message', 'post_tweet',
14
- 'message', 'sessions_send',
15
- ])
16
-
17
- // Read tools that access local files
18
- const READ_TOOLS = new Set([
19
- 'read', 'file_read', 'cat', 'exec', 'bash',
20
- ])
21
-
22
- // Package install commands that could run postinstall scripts
23
- const PKG_INSTALL_PATTERN = /(?:npm|yarn|pnpm)\s+(?:install|add|i)\s|pip\s+install\s|gem\s+install\s/i
24
-
25
- // Track sensitive file reads within a session (tool call IDs or content hashes)
26
- const sensitiveReads: Map<string, { path: string; ts: number }> = new Map()
27
- const TRACKING_WINDOW_MS = 5 * 60 * 1000 // 5 min window
28
- const MAX_TRACKED_READS = 500 // Prevent unbounded memory growth
29
-
30
- export function setupDataFlowGuard(
31
- api: any,
32
- config: ShellWardConfig,
33
- log: AuditLog,
34
- enforce: boolean,
35
- ) {
36
- const locale = resolveLocale(config)
37
-
38
- // === Part 1: Track sensitive file reads via after_tool_call ===
6
+ export function setupDataFlowGuard(api: any, guard: ShellWard, enforce: boolean) {
39
7
  api.on('after_tool_call', (event: any) => {
40
8
  const toolName = String(event.toolName || '').toLowerCase()
41
9
  const params = (event.params && typeof event.params === 'object') ? event.params : {}
42
- const path = String(params.path || params.file_path || params.filename || '')
10
+ const path = String(params.path || params.file_path || params.filename || params.target || '')
43
11
 
44
- if (!READ_TOOLS.has(toolName) || !path) return
45
-
46
- // Check if it's a protected/sensitive path
47
- for (const rule of PROTECTED_PATHS) {
48
- if (rule.pattern.test(path)) {
49
- // Evict oldest entry if at capacity
50
- if (sensitiveReads.size >= MAX_TRACKED_READS) {
51
- const oldest = sensitiveReads.keys().next().value
52
- if (oldest) sensitiveReads.delete(oldest)
53
- }
54
- const key = `${Date.now()}-${path}`
55
- sensitiveReads.set(key, { path, ts: Date.now() })
56
-
57
- log.write({
58
- level: 'MEDIUM',
59
- layer: 'L7',
60
- action: 'detect',
61
- detail: locale === 'zh'
62
- ? `检测到敏感文件读取: ${path} — 已加入数据流监控`
63
- : `Sensitive file read detected: ${path} — added to data flow tracking`,
64
- tool: event.toolName,
65
- pattern: rule.id,
66
- })
67
- break
68
- }
69
- }
70
-
71
- // Cleanup old entries
72
- const now = Date.now()
73
- for (const [key, val] of sensitiveReads) {
74
- if (now - val.ts > TRACKING_WINDOW_MS) {
75
- sensitiveReads.delete(key)
76
- }
12
+ if (guard.isReadTool(toolName) && path) {
13
+ guard.trackFileRead(event.toolName, path)
77
14
  }
78
15
  }, { name: 'shellward.data-flow-read-tracker', priority: 50 })
79
16
 
80
- // === Part 2: Block network tool calls if sensitive data was recently read ===
81
17
  api.on('before_tool_call', (event: any) => {
82
- const toolName = String(event.toolName || '').toLowerCase()
18
+ const toolName = String(event.toolName || '')
83
19
  const params = (event.params && typeof event.params === 'object') ? event.params : {}
84
20
 
85
- // 2a. Block network tools if sensitive files were recently read
86
- if (NETWORK_TOOLS.has(toolName) && sensitiveReads.size > 0) {
87
- // Clean up expired entries first
88
- const now = Date.now()
89
- for (const [key, val] of sensitiveReads) {
90
- if (now - val.ts > TRACKING_WINDOW_MS) sensitiveReads.delete(key)
91
- }
92
-
93
- if (sensitiveReads.size > 0) {
94
- const recentPaths = [...sensitiveReads.values()].map(v => v.path).join(', ')
95
- const reason = locale === 'zh'
96
- ? `数据外泄风险: 最近读取了敏感文件 (${recentPaths}),禁止调用网络工具 ${event.toolName}`
97
- : `Data exfiltration risk: sensitive files recently read (${recentPaths}), blocking network tool ${event.toolName}`
98
-
99
- log.write({
100
- level: 'CRITICAL',
101
- layer: 'L7',
102
- action: enforce ? 'block' : 'detect',
103
- detail: reason,
104
- tool: event.toolName,
105
- pattern: 'data_exfil_chain',
106
- })
107
-
108
- if (enforce) {
109
- return { block: true, blockReason: `🚫 [ShellWard] ${reason}` }
110
- }
111
- }
112
- }
113
-
114
- // 2b. Check URL parameters in network tools for suspicious patterns
115
- if (NETWORK_TOOLS.has(toolName)) {
116
- const url = String(params.url || params.to || params.target || '')
117
- if (url) {
118
- // Block data-in-URL exfiltration patterns
119
- if (/[?&](?:data|token|key|secret|password|content)=/i.test(url)) {
120
- const reason = locale === 'zh'
121
- ? `可疑 URL 参数: ${url.slice(0, 80)} — 可能是数据外泄`
122
- : `Suspicious URL params: ${url.slice(0, 80)} — possible data exfiltration`
123
-
124
- log.write({
125
- level: 'HIGH',
126
- layer: 'L7',
127
- action: enforce ? 'block' : 'detect',
128
- detail: reason,
129
- tool: event.toolName,
130
- pattern: 'url_data_exfil',
131
- })
132
-
133
- if (enforce) {
134
- return { block: true, blockReason: `🚫 [ShellWard] ${reason}` }
135
- }
136
- }
137
- }
138
- }
139
-
140
- // 2c. Detect dangerous package installs
141
- if (toolName === 'exec' || toolName === 'bash') {
142
- const cmd = String(params.command || params.cmd || '')
143
- if (PKG_INSTALL_PATTERN.test(cmd)) {
144
- log.write({
145
- level: 'MEDIUM',
146
- layer: 'L7',
147
- action: 'detect',
148
- detail: locale === 'zh'
149
- ? `检测到包安装命令: ${cmd.slice(0, 80)} — 注意供应链安全`
150
- : `Package install detected: ${cmd.slice(0, 80)} — supply chain risk`,
151
- tool: event.toolName,
152
- pattern: 'pkg_install',
153
- })
154
- }
21
+ const result = guard.checkOutbound(toolName, params)
22
+ if (!result.allowed && enforce) {
23
+ return { block: true, blockReason: `🚫 [ShellWard] ${result.reason}` }
155
24
  }
156
25
  }, { name: 'shellward.data-flow-egress', priority: 250 })
157
26
 
@@ -1,171 +1,32 @@
1
- // src/layers/input-auditor.ts — L4: Injection detection + message audit via before_tool_call + message_received
1
+ // src/layers/input-auditor.ts — L4 OpenClaw Adapter
2
+ // Thin adapter: wires OpenClaw's before_tool_call + message_received hooks to ShellWard core engine
2
3
 
3
- import { INJECTION_RULES_ZH } from '../rules/injection-zh'
4
- import { INJECTION_RULES_EN } from '../rules/injection-en'
5
- import { resolveLocale } from '../types'
6
- import type { ShellWardConfig, InjectionRule, ResolvedLocale } from '../types'
7
- import type { AuditLog } from '../audit-log'
4
+ import type { ShellWard } from '../core/engine'
8
5
 
9
- interface CompiledRule extends InjectionRule {
10
- compiled: RegExp
11
- }
12
-
13
- // Text fields to extract from tool arguments for scanning
14
- const TEXT_FIELDS = [
15
- 'content', 'body', 'text', 'message', 'query',
16
- 'command', 'code', 'html', 'url', 'prompt',
17
- 'subject', 'description', 'input',
18
- ]
19
-
20
- // Hidden/invisible Unicode character ranges
21
- const HIDDEN_CHAR_RANGES: [number, number, string][] = [
22
- [0x200B, 0x200F, 'Zero-width/Direction'],
23
- [0x2028, 0x2029, 'Line/Paragraph separator'],
24
- [0x202A, 0x202E, 'Bidi control'],
25
- [0x2060, 0x2064, 'Invisible operators'],
26
- [0xFEFF, 0xFEFF, 'BOM/Zero-width no-break'],
27
- [0x00AD, 0x00AD, 'Soft hyphen'],
28
- [0xFFF9, 0xFFFB, 'Interlinear annotation'],
29
- ]
30
-
31
- export function setupInputAuditor(
32
- api: any,
33
- config: ShellWardConfig,
34
- log: AuditLog,
35
- enforce: boolean,
36
- ) {
37
- const locale = resolveLocale(config)
38
- const allRules = [...INJECTION_RULES_ZH, ...INJECTION_RULES_EN]
39
- const compiled: CompiledRule[] = allRules.map(rule => ({
40
- ...rule,
41
- compiled: new RegExp(rule.pattern, rule.flags || 'i'),
42
- }))
43
-
44
- // Hook 1: Check tool call arguments for injection
6
+ export function setupInputAuditor(api: any, guard: ShellWard, enforce: boolean) {
45
7
  api.on('before_tool_call', (event: any) => {
46
8
  const args: Record<string, any> = (event.params && typeof event.params === 'object') ? event.params : {}
47
- const texts = extractTexts(args)
9
+ const texts = guard.extractTextFields(args)
48
10
  if (texts.length === 0) return
49
11
 
12
+ const toolName = String(event.toolName || '')
13
+ const threshold = guard.getInjectionThreshold(toolName)
50
14
  const fullText = texts.join('\n')
51
- return checkInjection(fullText, event.toolName, locale, compiled, config, log, enforce)
15
+ const result = guard.checkInjection(fullText, { source: toolName, threshold })
16
+
17
+ if (!result.safe && enforce) {
18
+ const reason = guard.locale === 'zh'
19
+ ? `检测到可能的提示词注入攻击!\n风险评分: ${result.score}/100\n匹配规则: ${result.matched.map(m => m.name).join(', ')}`
20
+ : `Potential prompt injection detected!\nRisk score: ${result.score}/100\nMatched: ${result.matched.map(m => m.name).join(', ')}`
21
+ return { block: true, blockReason: `⚠️ [ShellWard] ${reason}` }
22
+ }
52
23
  }, { name: 'shellward.input-auditor', priority: 300 })
53
24
 
54
- // Hook 2: Audit inbound messages
55
25
  api.on('message_received', (event: any) => {
56
26
  const content = typeof event.content === 'string' ? event.content : ''
57
27
  if (!content) return
58
-
59
- // Detect hidden characters
60
- const hidden = detectHiddenChars(content)
61
- if (hidden.length > 0) {
62
- log.write({
63
- level: 'MEDIUM',
64
- layer: 'L4',
65
- action: 'detect',
66
- detail: `Hidden characters detected in message: ${hidden.map(h => h.name).join(', ')} (${hidden.length} chars)`,
67
- })
68
- }
69
-
70
- // Check for injection patterns (log only, don't block messages)
71
- const { score, matched } = scoreText(content, compiled)
72
- if (score >= config.injectionThreshold) {
73
- log.write({
74
- level: score >= 80 ? 'CRITICAL' : 'HIGH',
75
- layer: 'L4',
76
- action: 'detect',
77
- detail: locale === 'zh'
78
- ? `消息中检测到注入模式 (评分: ${score}): ${matched.map(m => m.name).join(', ')}`
79
- : `Injection patterns in message (score: ${score}): ${matched.map(m => m.name).join(', ')}`,
80
- })
81
- }
28
+ guard.checkInjection(content, { source: 'message' })
82
29
  }, { name: 'shellward.message-auditor', priority: 100 })
83
30
 
84
- api.logger.info(`[ShellWard] L4 Input Auditor enabled (${compiled.length} injection rules)`)
85
- }
86
-
87
- function checkInjection(
88
- text: string,
89
- tool: string,
90
- locale: ResolvedLocale,
91
- rules: CompiledRule[],
92
- config: ShellWardConfig,
93
- log: AuditLog,
94
- enforce: boolean,
95
- ): { block: true; blockReason: string } | undefined {
96
- // Hidden char detection
97
- const hidden = detectHiddenChars(text)
98
- if (hidden.length > 0) {
99
- log.write({
100
- level: 'MEDIUM',
101
- layer: 'L4',
102
- action: 'detect',
103
- detail: `Hidden chars in tool args: ${hidden.map(h => h.name).join(', ')}`,
104
- tool,
105
- })
106
- }
107
-
108
- // Score injection rules
109
- let { score, matched } = scoreText(text, rules)
110
-
111
- // Bonus for hidden chars (potential obfuscation)
112
- if (hidden.length > 3) {
113
- score += 20
114
- }
115
-
116
- if (score < config.injectionThreshold) return
117
-
118
- const reason = locale === 'zh'
119
- ? `检测到可能的提示词注入攻击!\n风险评分: ${score}/100\n匹配规则: ${matched.map(m => m.name).join(', ')}`
120
- : `Potential prompt injection detected!\nRisk score: ${score}/100\nMatched: ${matched.map(m => m.name).join(', ')}`
121
-
122
- log.write({
123
- level: score >= 80 ? 'CRITICAL' : 'HIGH',
124
- layer: 'L4',
125
- action: enforce ? 'block' : 'detect',
126
- detail: reason,
127
- tool,
128
- })
129
-
130
- if (enforce) {
131
- return { block: true, blockReason: `⚠️ [ShellWard] ${reason}` }
132
- }
133
- }
134
-
135
- function scoreText(text: string, rules: CompiledRule[]): { score: number; matched: { id: string; name: string; score: number }[] } {
136
- let score = 0
137
- const matched: { id: string; name: string; score: number }[] = []
138
-
139
- for (const rule of rules) {
140
- if (rule.compiled.test(text)) {
141
- score += rule.riskScore
142
- matched.push({ id: rule.id, name: rule.name, score: rule.riskScore })
143
- }
144
- }
145
-
146
- return { score, matched }
147
- }
148
-
149
- function extractTexts(args: Record<string, any>): string[] {
150
- const results: string[] = []
151
- for (const field of TEXT_FIELDS) {
152
- if (typeof args[field] === 'string' && args[field].length > 0) {
153
- results.push(args[field])
154
- }
155
- }
156
- return results
157
- }
158
-
159
- function detectHiddenChars(text: string): { char: string; codePoint: number; name: string }[] {
160
- const found: { char: string; codePoint: number; name: string }[] = []
161
- for (const char of text) {
162
- const cp = char.codePointAt(0)!
163
- for (const [start, end, name] of HIDDEN_CHAR_RANGES) {
164
- if (cp >= start && cp <= end) {
165
- found.push({ char, codePoint: cp, name })
166
- break
167
- }
168
- }
169
- }
170
- return found
31
+ api.logger.info(`[ShellWard] L4 Input Auditor enabled`)
171
32
  }
@@ -1,66 +1,23 @@
1
- // src/layers/outbound-guard.ts — L6: Redact secrets from LLM responses + detect canary leaks
2
- // Uses message_sending hook to inspect outbound messages before they reach the user
1
+ // src/layers/outbound-guard.ts — L6 OpenClaw Adapter
2
+ // Thin adapter: wires OpenClaw's message_sending hook to ShellWard core engine
3
3
 
4
- import { redactSensitive } from '../rules/sensitive-patterns'
5
- import { getCanaryToken } from './prompt-guard'
6
- import { resolveLocale } from '../types'
7
- import type { ShellWardConfig } from '../types'
8
- import type { AuditLog } from '../audit-log'
9
-
10
- export function setupOutboundGuard(
11
- api: any,
12
- config: ShellWardConfig,
13
- log: AuditLog,
14
- enforce: boolean,
15
- ) {
16
- const locale = resolveLocale(config)
4
+ import type { ShellWard } from '../core/engine'
17
5
 
6
+ export function setupOutboundGuard(api: any, guard: ShellWard, enforce: boolean) {
18
7
  api.on('message_sending', (event: any) => {
19
8
  const content = event.content
20
9
  if (!content || typeof content !== 'string') return undefined
21
10
 
22
- // 1. Check for canary token leak (system prompt exfiltration)
23
- const canary = getCanaryToken()
24
- if (canary && content.includes(canary)) {
25
- log.write({
26
- level: 'CRITICAL',
27
- layer: 'L6',
28
- action: 'block',
29
- detail: locale === 'zh'
30
- ? '检测到系统提示词泄露!Canary token 出现在输出中'
31
- : 'System prompt exfiltration detected! Canary token found in output',
32
- pattern: 'canary_leak',
33
- })
34
- if (enforce) {
35
- const warning = locale === 'zh'
36
- ? '⚠️ [ShellWard] 检测到安全异常,本次回复已被拦截。可能存在提示词注入攻击。'
37
- : '⚠️ [ShellWard] Security anomaly detected, this response was blocked. Possible prompt injection attack.'
38
- return { content: warning }
39
- }
40
- }
41
-
42
- // 2. Redact sensitive data from LLM response text
43
- const [redacted, findings] = redactSensitive(content)
44
- if (findings.length === 0) return undefined
11
+ const result = guard.checkResponse(content)
45
12
 
46
- for (const f of findings) {
47
- log.write({
48
- level: 'HIGH',
49
- layer: 'L6',
50
- action: enforce ? 'redact' : 'detect',
51
- detail: `${f.name}: ${f.count} occurrence(s) in outbound message`,
52
- pattern: f.id,
53
- })
13
+ if (result.canaryLeak && enforce) {
14
+ const warning = guard.locale === 'zh'
15
+ ? '⚠️ [ShellWard] 检测到安全异常,本次回复已被拦截。可能存在提示词注入攻击。'
16
+ : '⚠️ [ShellWard] Security anomaly detected, this response was blocked. Possible prompt injection attack.'
17
+ return { content: warning }
54
18
  }
55
19
 
56
- if (!enforce) return undefined
57
-
58
- const summary = findings.map(f => `${f.name}(${f.count})`).join(', ')
59
- const notice = locale === 'zh'
60
- ? `\n\n⚠️ [ShellWard] 回复中的敏感信息已自动脱敏: ${summary}`
61
- : `\n\n⚠️ [ShellWard] Sensitive data in response auto-redacted: ${summary}`
62
-
63
- return { content: redacted + notice }
20
+ return undefined
64
21
  }, { name: 'shellward.outbound-guard', priority: 100 })
65
22
 
66
23
  api.logger.info('[ShellWard] L6 Outbound Guard enabled')