clawmoat 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CONTRIBUTING.md +56 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/bin/clawmoat.js +407 -0
  5. package/docs/CNAME +1 -0
  6. package/docs/MIT-RISK-GAP-ANALYSIS.md +146 -0
  7. package/docs/badge/score-A.svg +21 -0
  8. package/docs/badge/score-Aplus.svg +21 -0
  9. package/docs/badge/score-B.svg +21 -0
  10. package/docs/badge/score-C.svg +21 -0
  11. package/docs/badge/score-D.svg +21 -0
  12. package/docs/badge/score-F.svg +21 -0
  13. package/docs/blog/index.html +90 -0
  14. package/docs/blog/owasp-agentic-ai-top10.html +187 -0
  15. package/docs/blog/owasp-agentic-ai-top10.md +185 -0
  16. package/docs/blog/securing-ai-agents.html +194 -0
  17. package/docs/blog/securing-ai-agents.md +152 -0
  18. package/docs/compare.html +312 -0
  19. package/docs/index.html +654 -0
  20. package/docs/integrations/langchain.html +281 -0
  21. package/docs/integrations/openai.html +302 -0
  22. package/docs/integrations/openclaw.html +310 -0
  23. package/docs/robots.txt +3 -0
  24. package/docs/sitemap.xml +28 -0
  25. package/docs/thanks.html +79 -0
  26. package/package.json +35 -0
  27. package/server/Dockerfile +7 -0
  28. package/server/index.js +85 -0
  29. package/server/package.json +12 -0
  30. package/skill/SKILL.md +56 -0
  31. package/src/badge.js +87 -0
  32. package/src/index.js +316 -0
  33. package/src/middleware/openclaw.js +133 -0
  34. package/src/policies/engine.js +180 -0
  35. package/src/scanners/exfiltration.js +97 -0
  36. package/src/scanners/jailbreak.js +81 -0
  37. package/src/scanners/memory-poison.js +68 -0
  38. package/src/scanners/pii.js +128 -0
  39. package/src/scanners/prompt-injection.js +138 -0
  40. package/src/scanners/secrets.js +97 -0
  41. package/src/scanners/supply-chain.js +155 -0
  42. package/src/scanners/urls.js +142 -0
  43. package/src/utils/config.js +137 -0
  44. package/src/utils/logger.js +109 -0
@@ -0,0 +1,138 @@
1
+ /**
2
+ * ClawMoat — Prompt Injection Scanner
3
+ *
4
+ * Detects prompt injection attempts in text using:
5
+ * 1. Pattern matching (known injection patterns)
6
+ * 2. Heuristic scoring (instruction-like language in data context)
7
+ * 3. (Future) ML classifier via LlamaFirewall/NeMo
8
+ */
9
+
10
+ // Known prompt injection patterns (case-insensitive)
11
+ const INJECTION_PATTERNS = [
12
+ // Direct instruction override
13
+ { pattern: /ignore\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|rules?|guidelines?)/i, severity: 'critical', name: 'instruction_override' },
14
+ { pattern: /disregard\s+(all\s+)?(previous|prior|your)\s+(instructions?|prompts?|rules?|programming)/i, severity: 'critical', name: 'instruction_override' },
15
+ { pattern: /forget\s+(all\s+)?(previous|prior|your|everything)/i, severity: 'high', name: 'instruction_override' },
16
+ { pattern: /override\s+(your|all|the)\s+(instructions?|rules?|guidelines?|programming)/i, severity: 'critical', name: 'instruction_override' },
17
+
18
+ // Role manipulation
19
+ { pattern: /you\s+are\s+now\s+(a|an|the|my)\s+/i, severity: 'high', name: 'role_manipulation' },
20
+ { pattern: /act\s+as\s+(a|an|if|though)\s+/i, severity: 'medium', name: 'role_manipulation' },
21
+ { pattern: /pretend\s+(you('re| are)|to\s+be)\s+/i, severity: 'high', name: 'role_manipulation' },
22
+ { pattern: /switch\s+to\s+(\w+\s+)?mode/i, severity: 'medium', name: 'role_manipulation' },
23
+ { pattern: /enter\s+(DAN|jailbreak|developer|god|sudo|admin)\s+mode/i, severity: 'critical', name: 'role_manipulation' },
24
+
25
+ // System prompt extraction
26
+ { pattern: /(?:show|reveal|display|print|output|repeat|echo)\s+(?:me\s+)?(?:your|the)\s+(?:system\s+)?(?:prompt|instructions?|rules?|guidelines?)/i, severity: 'high', name: 'system_prompt_extraction' },
27
+ { pattern: /what\s+(?:are|is)\s+your\s+(?:system\s+)?(?:prompt|instructions?|rules?|initial\s+message)/i, severity: 'medium', name: 'system_prompt_extraction' },
28
+ { pattern: /(?:beginning|start)\s+of\s+(?:the\s+)?(?:system|initial)\s+(?:prompt|message|instruction)/i, severity: 'high', name: 'system_prompt_extraction' },
29
+
30
+ // Data exfiltration attempts
31
+ { pattern: /(?:send|post|upload|transmit|exfiltrate|forward)\s+(?:all|the|my|this|your)\s+(?:data|files?|info|content|messages?|history|conversation)/i, severity: 'critical', name: 'data_exfiltration' },
32
+ { pattern: /curl\s+.*\|\s*(?:bash|sh)/i, severity: 'critical', name: 'data_exfiltration' },
33
+
34
+ // Delimiter/encoding attacks
35
+ { pattern: /```\s*system\b/i, severity: 'high', name: 'delimiter_attack' },
36
+ { pattern: /<\/?(?:system|instruction|prompt|message)\s*>/i, severity: 'high', name: 'delimiter_attack' },
37
+ { pattern: /\[INST\]|\[\/INST\]|\[SYSTEM\]/i, severity: 'high', name: 'delimiter_attack' },
38
+ { pattern: /<<\s*SYS\s*>>|<<\s*\/SYS\s*>>/i, severity: 'high', name: 'delimiter_attack' },
39
+
40
+ // Invisible/encoded text
41
+ { pattern: /[\u200B-\u200F\u2028-\u202F\uFEFF]{3,}/i, severity: 'high', name: 'invisible_text' },
42
+ { pattern: /(?:base64|atob|decode)\s*\(/i, severity: 'medium', name: 'encoded_payload' },
43
+
44
+ // Tool abuse instructions
45
+ { pattern: /(?:run|execute|call|use)\s+(?:the\s+)?(?:exec|shell|terminal|command|bash)\s+(?:tool|function)/i, severity: 'medium', name: 'tool_abuse' },
46
+ { pattern: /(?:read|access|open)\s+(?:the\s+)?(?:file|path)\s+(?:\/etc|~\/\.ssh|~\/\.aws|\.env)/i, severity: 'high', name: 'tool_abuse' },
47
+ ];
48
+
49
+ // Heuristic signals that text contains instruction-like content (in a data context)
50
+ const INSTRUCTION_SIGNALS = [
51
+ { pattern: /\byou\s+(?:must|should|need\s+to|have\s+to|are\s+(?:required|instructed))\b/i, weight: 2 },
52
+ { pattern: /\b(?:do\s+not|don'?t|never)\s+(?:mention|reveal|tell|say|disclose)\b/i, weight: 3 },
53
+ { pattern: /\b(?:important|critical|urgent|mandatory)\s*[:\-!]\s*/i, weight: 1 },
54
+ { pattern: /\b(?:new\s+)?instructions?\s*:/i, weight: 3 },
55
+ { pattern: /\bstep\s+\d+\s*:/i, weight: 1 },
56
+ { pattern: /\bfrom\s+now\s+on\b/i, weight: 2 },
57
+ { pattern: /\binstead\s*,?\s+(?:you\s+)?(?:should|must|will)\b/i, weight: 2 },
58
+ { pattern: /\breal\s+(?:task|instruction|objective|goal)\b/i, weight: 3 },
59
+ { pattern: /\bhidden\s+(?:instruction|task|message)\b/i, weight: 3 },
60
+ ];
61
+
62
+ /**
63
+ * Scan text for prompt injection
64
+ * @param {string} text - Text to scan
65
+ * @param {object} opts - Options
66
+ * @param {string} opts.context - Where this text came from (message, email, web, tool_output)
67
+ * @returns {object} Scan result
68
+ */
69
+ function scanPromptInjection(text, opts = {}) {
70
+ if (!text || typeof text !== 'string') {
71
+ return { clean: true, score: 0, findings: [] };
72
+ }
73
+
74
+ const findings = [];
75
+ let maxSeverity = 'low';
76
+
77
+ // 1. Pattern matching
78
+ for (const { pattern, severity, name } of INJECTION_PATTERNS) {
79
+ const match = text.match(pattern);
80
+ if (match) {
81
+ findings.push({
82
+ type: 'prompt_injection',
83
+ subtype: name,
84
+ severity,
85
+ matched: match[0].substring(0, 100),
86
+ position: match.index,
87
+ });
88
+ if (severityRank(severity) > severityRank(maxSeverity)) {
89
+ maxSeverity = severity;
90
+ }
91
+ }
92
+ }
93
+
94
+ // 2. Heuristic scoring (instruction-like language in data)
95
+ let heuristicScore = 0;
96
+ for (const { pattern, weight } of INSTRUCTION_SIGNALS) {
97
+ if (pattern.test(text)) {
98
+ heuristicScore += weight;
99
+ }
100
+ }
101
+
102
+ // Boost score if text is from untrusted context
103
+ const contextMultiplier = opts.context === 'email' ? 1.5 :
104
+ opts.context === 'web' ? 1.5 :
105
+ opts.context === 'tool_output' ? 1.3 : 1.0;
106
+ heuristicScore *= contextMultiplier;
107
+
108
+ if (heuristicScore >= 5 && findings.length === 0) {
109
+ findings.push({
110
+ type: 'prompt_injection',
111
+ subtype: 'heuristic_detection',
112
+ severity: heuristicScore >= 8 ? 'high' : 'medium',
113
+ score: heuristicScore,
114
+ message: 'Text contains multiple instruction-like patterns in data context',
115
+ });
116
+ if (heuristicScore >= 8 && severityRank('high') > severityRank(maxSeverity)) {
117
+ maxSeverity = 'high';
118
+ }
119
+ }
120
+
121
+ // Composite score (0-100)
122
+ const patternScore = Math.min(findings.length * 25, 75);
123
+ const compositeScore = Math.min(patternScore + Math.min(heuristicScore * 5, 25), 100);
124
+
125
+ return {
126
+ clean: findings.length === 0,
127
+ score: compositeScore,
128
+ severity: findings.length > 0 ? maxSeverity : null,
129
+ findings,
130
+ heuristicScore,
131
+ };
132
+ }
133
+
134
+ function severityRank(s) {
135
+ return { low: 0, medium: 1, high: 2, critical: 3 }[s] || 0;
136
+ }
137
+
138
+ module.exports = { scanPromptInjection, INJECTION_PATTERNS };
@@ -0,0 +1,97 @@
1
+ /**
2
+ * ClawMoat — Secret/Credential Scanner
3
+ *
4
+ * Detects API keys, passwords, tokens, and other secrets in text
5
+ * to prevent exfiltration via outbound messages.
6
+ */
7
+
8
+ const SECRET_PATTERNS = [
9
+ // API Keys & Tokens
10
+ { name: 'aws_access_key', pattern: /\bAKIA[0-9A-Z]{16}\b/, severity: 'critical' },
11
+ { name: 'aws_secret_key', pattern: /\b[A-Za-z0-9/+=]{40}\b/, severity: 'high', requireContext: /aws|secret|key/i },
12
+ { name: 'github_token', pattern: /\b(ghp|gho|ghs|ghu|ghr)_[A-Za-z0-9_]{36,}\b/, severity: 'critical' },
13
+ { name: 'github_fine_grained', pattern: /\bgithub_pat_[A-Za-z0-9_]{22,}\b/, severity: 'critical' },
14
+ { name: 'openai_key', pattern: /\bsk-[A-Za-z0-9]{20,}T3BlbkFJ[A-Za-z0-9]{20,}\b/, severity: 'critical' },
15
+ { name: 'openai_key_v2', pattern: /\bsk-proj-[A-Za-z0-9_-]{40,}\b/, severity: 'critical' },
16
+ { name: 'anthropic_key', pattern: /\bsk-ant-[A-Za-z0-9_-]{40,}\b/, severity: 'critical' },
17
+ { name: 'stripe_key', pattern: /\b[sr]k_(test|live)_[A-Za-z0-9]{20,}\b/, severity: 'critical' },
18
+ { name: 'stripe_webhook', pattern: /\bwhsec_[A-Za-z0-9]{20,}\b/, severity: 'critical' },
19
+ { name: 'slack_token', pattern: /\bxox[baprs]-[0-9]{10,}-[A-Za-z0-9-]+\b/, severity: 'critical' },
20
+ { name: 'discord_token', pattern: /\b[MN][A-Za-z0-9]{23,}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,}\b/, severity: 'critical' },
21
+ { name: 'telegram_bot_token', pattern: /\b\d{8,10}:[A-Za-z0-9_-]{35}\b/, severity: 'critical' },
22
+ { name: 'google_api_key', pattern: /\bAIza[A-Za-z0-9_-]{35}\b/, severity: 'high' },
23
+ { name: 'heroku_api_key', pattern: /\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b/, severity: 'medium', requireContext: /heroku|api.key/i },
24
+ { name: 'sendgrid_key', pattern: /\bSG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}\b/, severity: 'critical' },
25
+ { name: 'twilio_key', pattern: /\bSK[0-9a-fA-F]{32}\b/, severity: 'high' },
26
+ { name: 'resend_key', pattern: /\bre_[A-Za-z0-9]{20,}\b/, severity: 'critical' },
27
+ { name: 'jwt_token', pattern: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/, severity: 'high' },
28
+
29
+ // SSH & Crypto
30
+ { name: 'private_key', pattern: /-----BEGIN\s+(RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/, severity: 'critical' },
31
+ { name: 'ssh_key_content', pattern: /ssh-(rsa|ed25519|ecdsa)\s+[A-Za-z0-9+/=]{100,}/, severity: 'high' },
32
+
33
+ // Generic patterns
34
+ { name: 'generic_password', pattern: /(?:password|passwd|pwd)\s*[:=]\s*['"]?[^\s'"]{8,}['"]?/i, severity: 'high' },
35
+ { name: 'generic_secret', pattern: /(?:secret|token|api[_-]?key)\s*[:=]\s*['"]?[A-Za-z0-9_-]{16,}['"]?/i, severity: 'high' },
36
+ { name: 'connection_string', pattern: /(?:mongodb|postgres|mysql|redis):\/\/[^\s]+:[^\s]+@/i, severity: 'critical' },
37
+
38
+ // Entropy-based (long hex/base64 strings that look like secrets)
39
+ { name: 'high_entropy_hex', pattern: /\b[0-9a-f]{32,}\b/i, severity: 'medium', requireContext: /key|secret|token|password|credential/i },
40
+ ];
41
+
42
+ /**
43
+ * Scan text for secrets and credentials
44
+ * @param {string} text - Text to scan
45
+ * @param {object} opts - Options
46
+ * @param {string} opts.direction - 'inbound' or 'outbound'
47
+ * @returns {object} Scan result
48
+ */
49
+ function scanSecrets(text, opts = {}) {
50
+ if (!text || typeof text !== 'string') {
51
+ return { clean: true, findings: [] };
52
+ }
53
+
54
+ const findings = [];
55
+
56
+ for (const { name, pattern, severity, requireContext } of SECRET_PATTERNS) {
57
+ const match = text.match(pattern);
58
+ if (match) {
59
+ // If requireContext is set, only flag if context keywords are nearby
60
+ if (requireContext) {
61
+ const surrounding = text.substring(
62
+ Math.max(0, match.index - 50),
63
+ Math.min(text.length, match.index + match[0].length + 50)
64
+ );
65
+ if (!requireContext.test(surrounding)) continue;
66
+ }
67
+
68
+ findings.push({
69
+ type: 'secret_detected',
70
+ subtype: name,
71
+ severity,
72
+ matched: redact(match[0]),
73
+ position: match.index,
74
+ direction: opts.direction || 'unknown',
75
+ });
76
+ }
77
+ }
78
+
79
+ return {
80
+ clean: findings.length === 0,
81
+ findings,
82
+ severity: findings.length > 0
83
+ ? findings.reduce((max, f) => severityRank(f.severity) > severityRank(max) ? f.severity : max, 'low')
84
+ : null,
85
+ };
86
+ }
87
+
88
+ function redact(value) {
89
+ if (value.length <= 8) return '****';
90
+ return value.substring(0, 4) + '*'.repeat(Math.min(value.length - 8, 20)) + value.substring(value.length - 4);
91
+ }
92
+
93
+ function severityRank(s) {
94
+ return { low: 0, medium: 1, high: 2, critical: 3 }[s] || 0;
95
+ }
96
+
97
+ module.exports = { scanSecrets, SECRET_PATTERNS };
@@ -0,0 +1,155 @@
1
+ /**
2
+ * ClawMoat — Supply Chain Scanner
3
+ *
4
+ * Scans OpenClaw skills for malicious patterns.
5
+ */
6
+
7
+ const fs = require('fs');
8
+ const path = require('path');
9
+
10
+ const KNOWN_GOOD_SOURCES = [
11
+ 'github.com/openclaw', 'github.com/darfaz', 'openclaw.com',
12
+ 'npmjs.com', 'github.com/anthropics',
13
+ ];
14
+
15
+ const SKILL_PATTERNS = [
16
+ // Outbound network requests
17
+ { pattern: /\bcurl\s+/i, severity: 'medium', name: 'network_curl' },
18
+ { pattern: /\bwget\s+/i, severity: 'medium', name: 'network_wget' },
19
+ { pattern: /\bfetch\s*\(/i, severity: 'medium', name: 'network_fetch' },
20
+ { pattern: /\bXMLHttpRequest\b/i, severity: 'medium', name: 'network_xhr' },
21
+ { pattern: /\brequire\s*\(\s*['"](?:http|https|net|dgram|request|axios|node-fetch)['"]\s*\)/i, severity: 'high', name: 'network_module' },
22
+
23
+ // Sensitive file access
24
+ { pattern: /~\/\.ssh\b|\/\.ssh\b/i, severity: 'critical', name: 'sensitive_ssh' },
25
+ { pattern: /~\/\.aws\b|\/\.aws\b/i, severity: 'critical', name: 'sensitive_aws' },
26
+ { pattern: /\bcredentials?\b.*(?:read|cat|open|access)/i, severity: 'high', name: 'sensitive_credentials' },
27
+ { pattern: /\/etc\/(?:passwd|shadow|sudoers)\b/i, severity: 'critical', name: 'sensitive_system' },
28
+ { pattern: /\.env\b.*(?:read|cat|source|load)/i, severity: 'high', name: 'sensitive_env' },
29
+
30
+ // Obfuscated code
31
+ { pattern: /\beval\s*\(/i, severity: 'high', name: 'obfuscated_eval' },
32
+ { pattern: /\bFunction\s*\(/i, severity: 'high', name: 'obfuscated_function' },
33
+ { pattern: /\batob\s*\(/i, severity: 'medium', name: 'obfuscated_atob' },
34
+ { pattern: /\bBuffer\.from\s*\([^,]+,\s*['"]base64['"]\s*\)/i, severity: 'medium', name: 'obfuscated_buffer' },
35
+ { pattern: /\\x[0-9a-f]{2}(?:\\x[0-9a-f]{2}){5,}/i, severity: 'high', name: 'obfuscated_hex' },
36
+
37
+ // System configuration modification
38
+ { pattern: /\bcrontab\b/i, severity: 'high', name: 'system_crontab' },
39
+ { pattern: /\/etc\/(?:cron|systemd|init)\b/i, severity: 'high', name: 'system_config' },
40
+ { pattern: /\bsystemctl\s+(?:enable|start|restart)\b/i, severity: 'medium', name: 'system_service' },
41
+ { pattern: /\bchmod\s+(?:\+s|[0-7]*[4-7][0-7]{2})\b/i, severity: 'high', name: 'system_permissions' },
42
+ ];
43
+
44
+ /**
45
+ * Scan a skill file for malicious patterns
46
+ * @param {string} skillPath - Path to skill directory or file
47
+ * @returns {object} Scan result { clean, findings[], severity }
48
+ */
49
+ function scanSkill(skillPath) {
50
+ const findings = [];
51
+
52
+ try {
53
+ const stat = fs.statSync(skillPath);
54
+ const files = stat.isDirectory()
55
+ ? walkDir(skillPath).filter(f => /\.(js|sh|py|rb|ts|yaml|yml|md)$/i.test(f))
56
+ : [skillPath];
57
+
58
+ for (const file of files) {
59
+ const content = fs.readFileSync(file, 'utf8');
60
+ const result = scanSkillContent(content);
61
+ if (!result.clean) {
62
+ for (const f of result.findings) {
63
+ f.file = path.relative(skillPath, file) || path.basename(file);
64
+ findings.push(f);
65
+ }
66
+ }
67
+ }
68
+
69
+ // Check source (look for source in SKILL.md or package.json)
70
+ const skillMd = files.find(f => f.endsWith('SKILL.md'));
71
+ if (skillMd) {
72
+ const content = fs.readFileSync(skillMd, 'utf8');
73
+ const sourceMatch = content.match(/(?:source|origin|from|url)\s*[:=]\s*(.+)/i);
74
+ if (sourceMatch) {
75
+ const source = sourceMatch[1].trim();
76
+ const trusted = KNOWN_GOOD_SOURCES.some(s => source.includes(s));
77
+ if (!trusted) {
78
+ findings.push({
79
+ type: 'supply_chain',
80
+ subtype: 'untrusted_source',
81
+ severity: 'medium',
82
+ matched: source.substring(0, 100),
83
+ });
84
+ }
85
+ }
86
+ }
87
+ } catch (err) {
88
+ findings.push({
89
+ type: 'supply_chain',
90
+ subtype: 'scan_error',
91
+ severity: 'low',
92
+ matched: err.message,
93
+ });
94
+ }
95
+
96
+ const maxSev = findings.length > 0
97
+ ? findings.reduce((max, f) => rank(f.severity) > rank(max) ? f.severity : max, 'low')
98
+ : null;
99
+
100
+ return { clean: findings.length === 0, findings, severity: maxSev };
101
+ }
102
+
103
+ /**
104
+ * Scan skill content string for malicious patterns
105
+ * @param {string} content - Skill content
106
+ * @returns {object} Scan result { clean, findings[], severity }
107
+ */
108
+ function scanSkillContent(content) {
109
+ if (!content || typeof content !== 'string') {
110
+ return { clean: true, findings: [], severity: null };
111
+ }
112
+
113
+ const findings = [];
114
+
115
+ for (const { pattern, severity, name } of SKILL_PATTERNS) {
116
+ const match = content.match(pattern);
117
+ if (match) {
118
+ findings.push({
119
+ type: 'supply_chain',
120
+ subtype: name,
121
+ severity,
122
+ matched: match[0].substring(0, 100),
123
+ position: match.index,
124
+ });
125
+ }
126
+ }
127
+
128
+ const maxSev = findings.length > 0
129
+ ? findings.reduce((max, f) => rank(f.severity) > rank(max) ? f.severity : max, 'low')
130
+ : null;
131
+
132
+ return { clean: findings.length === 0, findings, severity: maxSev };
133
+ }
134
+
135
+ function walkDir(dir) {
136
+ const results = [];
137
+ try {
138
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
139
+ for (const entry of entries) {
140
+ const full = path.join(dir, entry.name);
141
+ if (entry.isDirectory() && entry.name !== 'node_modules' && entry.name !== '.git') {
142
+ results.push(...walkDir(full));
143
+ } else if (entry.isFile()) {
144
+ results.push(full);
145
+ }
146
+ }
147
+ } catch {}
148
+ return results;
149
+ }
150
+
151
+ function rank(s) {
152
+ return { low: 0, medium: 1, high: 2, critical: 3 }[s] || 0;
153
+ }
154
+
155
+ module.exports = { scanSkill, scanSkillContent };
@@ -0,0 +1,142 @@
1
+ /**
2
+ * ClawMoat — Phishing URL Detection Scanner
3
+ *
4
+ * Detects malicious/suspicious URLs in inbound messages.
5
+ */
6
+
7
+ const PHISHING_TLDS = ['.zip', '.mov', '.tk', '.ml', '.ga', '.cf', '.gq'];
8
+
9
+ const URL_SHORTENERS = [
10
+ 'bit.ly', 'tinyurl.com', 't.co', 'goo.gl', 'ow.ly', 'is.gd', 'buff.ly',
11
+ 'adf.ly', 'bit.do', 'mcaf.ee', 'su.pr', 'db.tt', 'qr.ae', 'cur.lv',
12
+ 'lnkd.in', 'yourls.org', 'rb.gy', 'short.io', 'cutt.ly', 'v.gd',
13
+ ];
14
+
15
+ const SUSPICIOUS_PATH_KEYWORDS = /\b(?:login|signin|sign-in|verify|account|security|update|confirm|authenticate|banking|password|reset|suspend)/i;
16
+
17
+ const TRUSTED_DOMAINS = [
18
+ 'google.com', 'github.com', 'microsoft.com', 'apple.com', 'amazon.com',
19
+ 'facebook.com', 'twitter.com', 'linkedin.com', 'stackoverflow.com',
20
+ 'wikipedia.org', 'youtube.com', 'reddit.com', 'npmjs.com', 'mozilla.org',
21
+ ];
22
+
23
+ const URL_REGEX = /(?:https?:\/\/|data:)[^\s<>"')\]]+/gi;
24
+
25
+ /**
26
+ * Scan text for suspicious/phishing URLs
27
+ * @param {string} text - Text to scan
28
+ * @param {object} opts - Options
29
+ * @returns {object} Scan result { clean, findings[], severity }
30
+ */
31
+ function scanUrls(text, opts = {}) {
32
+ if (!text || typeof text !== 'string') {
33
+ return { clean: true, findings: [], severity: null };
34
+ }
35
+
36
+ const findings = [];
37
+ const urls = text.match(URL_REGEX) || [];
38
+
39
+ for (const url of urls) {
40
+ // Data URLs with executable content
41
+ if (/^data:/i.test(url)) {
42
+ if (/data:(?:text\/html|application\/javascript|text\/javascript)/i.test(url)) {
43
+ findings.push({
44
+ type: 'suspicious_url',
45
+ subtype: 'data_url_executable',
46
+ severity: 'critical',
47
+ matched: url.substring(0, 100),
48
+ });
49
+ }
50
+ continue;
51
+ }
52
+
53
+ let hostname = '';
54
+ try {
55
+ const parsed = new URL(url);
56
+ hostname = parsed.hostname.toLowerCase();
57
+ const pathname = parsed.pathname;
58
+
59
+ // IP-based URLs
60
+ if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(hostname)) {
61
+ findings.push({
62
+ type: 'suspicious_url',
63
+ subtype: 'ip_based_url',
64
+ severity: 'high',
65
+ matched: url.substring(0, 100),
66
+ });
67
+ continue;
68
+ }
69
+
70
+ // Punycode/homograph attacks
71
+ if (hostname.includes('xn--')) {
72
+ findings.push({
73
+ type: 'suspicious_url',
74
+ subtype: 'punycode_homograph',
75
+ severity: 'high',
76
+ matched: url.substring(0, 100),
77
+ });
78
+ continue;
79
+ }
80
+
81
+ // Phishing TLDs
82
+ const tld = hostname.substring(hostname.lastIndexOf('.'));
83
+ if (PHISHING_TLDS.includes(tld)) {
84
+ findings.push({
85
+ type: 'suspicious_url',
86
+ subtype: 'phishing_tld',
87
+ severity: 'medium',
88
+ matched: url.substring(0, 100),
89
+ });
90
+ continue;
91
+ }
92
+
93
+ // URL shorteners
94
+ if (URL_SHORTENERS.some(s => hostname === s || hostname.endsWith('.' + s))) {
95
+ findings.push({
96
+ type: 'suspicious_url',
97
+ subtype: 'url_shortener',
98
+ severity: 'medium',
99
+ matched: url.substring(0, 100),
100
+ });
101
+ continue;
102
+ }
103
+
104
+ // Excessive subdomains (4+ levels)
105
+ const parts = hostname.split('.');
106
+ if (parts.length >= 5) {
107
+ findings.push({
108
+ type: 'suspicious_url',
109
+ subtype: 'excessive_subdomains',
110
+ severity: 'high',
111
+ matched: url.substring(0, 100),
112
+ });
113
+ continue;
114
+ }
115
+
116
+ // Suspicious path keywords on non-trusted domains
117
+ const rootDomain = parts.slice(-2).join('.');
118
+ if (SUSPICIOUS_PATH_KEYWORDS.test(pathname) && !TRUSTED_DOMAINS.includes(rootDomain)) {
119
+ findings.push({
120
+ type: 'suspicious_url',
121
+ subtype: 'suspicious_path',
122
+ severity: 'medium',
123
+ matched: url.substring(0, 100),
124
+ });
125
+ }
126
+ } catch {
127
+ // Invalid URL, skip
128
+ }
129
+ }
130
+
131
+ const maxSev = findings.length > 0
132
+ ? findings.reduce((max, f) => rank(f.severity) > rank(max) ? f.severity : max, 'low')
133
+ : null;
134
+
135
+ return { clean: findings.length === 0, findings, severity: maxSev };
136
+ }
137
+
138
+ function rank(s) {
139
+ return { low: 0, medium: 1, high: 2, critical: 3 }[s] || 0;
140
+ }
141
+
142
+ module.exports = { scanUrls };
@@ -0,0 +1,137 @@
1
+ /**
2
+ * ClawMoat Configuration Loader
3
+ */
4
+
5
+ const fs = require('fs');
6
+ const path = require('path');
7
+
8
+ const DEFAULT_CONFIG = {
9
+ version: 1,
10
+ detection: {
11
+ prompt_injection: true,
12
+ jailbreak: true,
13
+ pii_outbound: true,
14
+ secret_scanning: true,
15
+ },
16
+ policies: {
17
+ exec: {
18
+ block_patterns: [
19
+ 'rm -rf /',
20
+ 'rm -rf ~',
21
+ 'rm -rf *',
22
+ 'mkfs',
23
+ 'dd if=',
24
+ ':(){:|:&};:', // fork bomb
25
+ 'curl *| bash',
26
+ 'curl *| sh',
27
+ 'curl * | bash',
28
+ 'curl * | sh',
29
+ 'wget *| bash',
30
+ 'wget *| sh',
31
+ 'wget * | bash',
32
+ 'wget * | sh',
33
+ 'python -c * import socket',
34
+ 'nc -e',
35
+ 'ncat -e',
36
+ 'base64 -d | bash',
37
+ 'eval $(curl',
38
+ 'eval $(wget',
39
+ ],
40
+ require_approval: [],
41
+ log_all: true,
42
+ },
43
+ file: {
44
+ deny_read: [
45
+ '~/.ssh/id_*',
46
+ '~/.ssh/config',
47
+ '~/.aws/credentials',
48
+ '~/.aws/config',
49
+ '**/.env',
50
+ '**/credentials.json',
51
+ '**/auth-profiles.json',
52
+ '~/.gnupg/*',
53
+ '~/.config/gh/hosts.yml',
54
+ ],
55
+ deny_write: [
56
+ '/etc/*',
57
+ '~/.bashrc',
58
+ '~/.bash_profile',
59
+ '~/.zshrc',
60
+ '~/.profile',
61
+ '~/.ssh/authorized_keys',
62
+ ],
63
+ },
64
+ browser: {
65
+ block_domains: [],
66
+ log_all: true,
67
+ },
68
+ },
69
+ alerts: {
70
+ webhook: null,
71
+ email: null,
72
+ telegram: null,
73
+ severity_threshold: 'medium',
74
+ },
75
+ cloud: {
76
+ enabled: false,
77
+ api_key: null,
78
+ },
79
+ };
80
+
81
+ function loadConfig(configPath) {
82
+ if (!configPath) {
83
+ // Search for config in common locations
84
+ const searchPaths = [
85
+ path.join(process.cwd(), 'clawmoat.yml'),
86
+ path.join(process.cwd(), 'clawmoat.yaml'),
87
+ path.join(process.cwd(), '.clawmoat.yml'),
88
+ path.join(process.env.HOME || '', '.clawmoat.yml'),
89
+ ];
90
+ for (const p of searchPaths) {
91
+ if (fs.existsSync(p)) {
92
+ configPath = p;
93
+ break;
94
+ }
95
+ }
96
+ }
97
+
98
+ if (!configPath || !fs.existsSync(configPath)) {
99
+ return { ...DEFAULT_CONFIG };
100
+ }
101
+
102
+ try {
103
+ // Simple YAML-like parsing for basic configs (avoid dependency)
104
+ const raw = fs.readFileSync(configPath, 'utf8');
105
+ const yaml = parseSimpleYaml(raw);
106
+ return deepMerge(DEFAULT_CONFIG, yaml);
107
+ } catch (err) {
108
+ console.error(`[ClawMoat] Failed to load config from ${configPath}: ${err.message}`);
109
+ return { ...DEFAULT_CONFIG };
110
+ }
111
+ }
112
+
113
+ // Very basic YAML parser for flat/nested configs (avoids js-yaml dependency for now)
114
+ function parseSimpleYaml(text) {
115
+ try {
116
+ // Try JSON first (YAML is a superset of JSON)
117
+ return JSON.parse(text);
118
+ } catch {
119
+ // TODO: Add proper YAML parsing or make js-yaml a dependency
120
+ console.warn('[ClawMoat] Complex YAML config detected. Install js-yaml for full support. Using defaults.');
121
+ return {};
122
+ }
123
+ }
124
+
125
+ function deepMerge(target, source) {
126
+ const result = { ...target };
127
+ for (const key of Object.keys(source)) {
128
+ if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key])) {
129
+ result[key] = deepMerge(target[key] || {}, source[key]);
130
+ } else {
131
+ result[key] = source[key];
132
+ }
133
+ }
134
+ return result;
135
+ }
136
+
137
+ module.exports = { loadConfig, DEFAULT_CONFIG };