@guava-parity/guard-scanner 13.0.0 → 15.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +42 -253
  2. package/SECURITY.md +12 -4
  3. package/SKILL.md +121 -59
  4. package/dist/openclaw-plugin.mjs +41 -0
  5. package/docs/EVIDENCE_DRIVEN.md +182 -0
  6. package/docs/banner.png +0 -0
  7. package/docs/data/corpus-metrics.json +11 -0
  8. package/docs/data/latest.json +25837 -2481
  9. package/docs/generated/npm-audit-20260312.json +96 -0
  10. package/docs/generated/openclaw-upstream-status.json +25 -0
  11. package/docs/glossary.md +46 -0
  12. package/docs/index.html +1085 -496
  13. package/docs/logo.png +0 -0
  14. package/docs/openclaw-compatibility-audit.md +44 -0
  15. package/docs/openclaw-continuous-compatibility-plan.md +36 -0
  16. package/docs/rules/a2a-contagion.md +68 -0
  17. package/docs/rules/advanced-exfil.md +52 -0
  18. package/docs/rules/agent-protocol.md +108 -0
  19. package/docs/rules/api-abuse.md +68 -0
  20. package/docs/rules/autonomous-risk.md +92 -0
  21. package/docs/rules/config-impact.md +132 -0
  22. package/docs/rules/credential-handling.md +100 -0
  23. package/docs/rules/cve-patterns.md +332 -0
  24. package/docs/rules/data-exposure.md +84 -0
  25. package/docs/rules/exfiltration.md +36 -0
  26. package/docs/rules/financial-access.md +84 -0
  27. package/docs/rules/identity-hijack.md +140 -0
  28. package/docs/rules/inference-manipulation.md +60 -0
  29. package/docs/rules/leaky-skills.md +52 -0
  30. package/docs/rules/malicious-code.md +108 -0
  31. package/docs/rules/mcp-security.md +148 -0
  32. package/docs/rules/memory-poisoning.md +84 -0
  33. package/docs/rules/model-poisoning.md +44 -0
  34. package/docs/rules/obfuscation.md +60 -0
  35. package/docs/rules/persistence.md +108 -0
  36. package/docs/rules/pii-exposure.md +116 -0
  37. package/docs/rules/prompt-injection.md +148 -0
  38. package/docs/rules/prompt-worm.md +44 -0
  39. package/docs/rules/safeguard-bypass.md +44 -0
  40. package/docs/rules/sandbox-escape.md +100 -0
  41. package/docs/rules/secret-detection.md +44 -0
  42. package/docs/rules/supply-chain-v2.md +92 -0
  43. package/docs/rules/suspicious-download.md +60 -0
  44. package/docs/rules/trust-boundary.md +76 -0
  45. package/docs/rules/trust-exploitation.md +92 -0
  46. package/docs/rules/unverifiable-deps.md +84 -0
  47. package/docs/rules/vdb-injection.md +84 -0
  48. package/docs/security-vulnerability-report-20260312.md +53 -0
  49. package/docs/spec/PRD_V2_ARCHITECTURE.md +55 -0
  50. package/docs/spec/capabilities.json +42 -0
  51. package/docs/spec/finding.schema.json +104 -0
  52. package/docs/spec/integration-manifest.md +39 -0
  53. package/docs/spec/sbom.json +33 -0
  54. package/docs/threat-model.md +65 -0
  55. package/docs/v13-architecture-manifest.md +55 -0
  56. package/hooks/context.js +305 -0
  57. package/hooks/guard-scanner/plugin.ts +24 -1
  58. package/openclaw-plugin.mts +91 -0
  59. package/openclaw.plugin.json +30 -53
  60. package/package.json +23 -8
  61. package/src/cli.js +174 -34
  62. package/src/core/content-loader.js +42 -0
  63. package/src/core/inventory.js +73 -0
  64. package/src/core/report-adapters.js +171 -0
  65. package/src/core/risk-engine.js +93 -0
  66. package/src/core/rule-registry.js +73 -0
  67. package/src/core/semantic-validators.js +85 -0
  68. package/src/finding-schema.js +191 -0
  69. package/src/hooks/context.ts +49 -0
  70. package/src/html-template.js +2 -2
  71. package/src/mcp-server.js +24 -73
  72. package/src/openclaw-upstream.js +128 -0
  73. package/src/patterns.js +371 -353
  74. package/src/policy-engine.js +32 -0
  75. package/src/runtime-guard.js +40 -2
  76. package/src/scanner.js +101 -216
  77. package/src/skill-crawler.js +254 -0
  78. package/src/threat-model.js +50 -0
  79. package/src/validation-layer.js +39 -0
@@ -0,0 +1,254 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * guard-scanner — Skill Crawler
4
+ *
5
+ * @security-manifest
6
+ * env-read: [GITHUB_TOKEN (optional, for higher rate limits)]
7
+ * env-write: []
8
+ * network: [GitHub REST API, raw.githubusercontent.com, ClawHub registry]
9
+ * fs-read: []
10
+ * fs-write: []
11
+ * exec: none
12
+ * purpose: Crawl ClawHub/GitHub for SKILL.md files and scan for threats
13
+ */
14
+
15
+ const { httpGet } = require('./asset-auditor.js');
16
+ const { GuardScanner } = require('./scanner.js');
17
+
18
+ const CRAWLER_VERSION = '1.0.0';
19
+
20
+ // ClawHub skills repo (openclaw/skills on GitHub)
21
+ const CLAWHUB_OWNER = 'openclaw';
22
+ const CLAWHUB_REPO = 'skills';
23
+ const CLAWHUB_BRANCH = 'main';
24
+
25
+ class SkillCrawler {
26
+ constructor(options = {}) {
27
+ this.verbose = options.verbose || false;
28
+ this.quiet = options.quiet || false;
29
+ this.concurrency = options.concurrency || 5;
30
+ this.scanner = new GuardScanner({
31
+ verbose: false,
32
+ soulLock: true,
33
+ quiet: true,
34
+ });
35
+ this._httpGet = options._httpGet || httpGet;
36
+ this.results = [];
37
+ this.errors = [];
38
+ }
39
+
40
+ /**
41
+ * Crawl ClawHub (openclaw/skills) for SKILL.md files
42
+ * Uses GitHub tree API to list all SKILL.md paths, then fetches each
43
+ */
44
+ async crawlClawHub(opts = {}) {
45
+ const maxSkills = opts.maxSkills || 50;
46
+ if (!this.quiet) console.log(`\n🔍 Crawling ClawHub (${CLAWHUB_OWNER}/${CLAWHUB_REPO})...`);
47
+
48
+ try {
49
+ // Get recursive tree to find all SKILL.md files
50
+ const treeUrl = `https://api.github.com/repos/${CLAWHUB_OWNER}/${CLAWHUB_REPO}/git/trees/${CLAWHUB_BRANCH}?recursive=1`;
51
+ const response = await this._httpGet(treeUrl, {
52
+ headers: this._getHeaders(),
53
+ });
54
+
55
+ if (response.status !== 200) {
56
+ this.errors.push({ source: 'clawhub', error: `API returned ${response.status}` });
57
+ return this.results;
58
+ }
59
+
60
+ const tree = response.data.tree || [];
61
+ const skillMds = tree
62
+ .filter(item => item.type === 'blob' && /SKILL\.md$/i.test(item.path))
63
+ .slice(0, maxSkills);
64
+
65
+ if (!this.quiet) console.log(`📦 Found ${skillMds.length} SKILL.md files`);
66
+
67
+ // Batch fetch and scan
68
+ await this._batchProcess(skillMds.map(item => ({
69
+ source: 'clawhub',
70
+ path: item.path,
71
+ rawUrl: `https://raw.githubusercontent.com/${CLAWHUB_OWNER}/${CLAWHUB_REPO}/${CLAWHUB_BRANCH}/${item.path}`,
72
+ name: this._extractSkillName(item.path),
73
+ })));
74
+
75
+ } catch (e) {
76
+ this.errors.push({ source: 'clawhub', error: e.message });
77
+ }
78
+
79
+ return this.results;
80
+ }
81
+
82
+ /**
83
+ * Crawl GitHub code search for SKILL.md files matching a query
84
+ * e.g. query "polymarket" finds gambling/trading skills
85
+ */
86
+ async crawlGitHub(query, opts = {}) {
87
+ const maxResults = opts.maxResults || 20;
88
+ if (!this.quiet) console.log(`\n🔍 GitHub code search: "${query}" + SKILL.md...`);
89
+
90
+ try {
91
+ const searchUrl = `https://api.github.com/search/code?q=${encodeURIComponent(query)}+filename:SKILL.md&per_page=${maxResults}`;
92
+ const response = await this._httpGet(searchUrl, {
93
+ headers: this._getHeaders(),
94
+ });
95
+
96
+ if (response.status !== 200) {
97
+ this.errors.push({ source: 'github', error: `Search API returned ${response.status}` });
98
+ return this.results;
99
+ }
100
+
101
+ const items = (response.data.items || []).slice(0, maxResults);
102
+ if (!this.quiet) console.log(`📦 Found ${items.length} SKILL.md matches`);
103
+
104
+ await this._batchProcess(items.map(item => ({
105
+ source: 'github',
106
+ path: item.path,
107
+ rawUrl: item.html_url
108
+ .replace('github.com', 'raw.githubusercontent.com')
109
+ .replace('/blob/', '/'),
110
+ name: item.repository?.full_name || item.path,
111
+ repo: item.repository?.full_name,
112
+ })));
113
+
114
+ } catch (e) {
115
+ this.errors.push({ source: 'github', error: e.message });
116
+ }
117
+
118
+ return this.results;
119
+ }
120
+
121
+ /**
122
+ * Scan a single SKILL.md URL
123
+ */
124
+ async scanUrl(url, name = 'unknown') {
125
+ try {
126
+ const response = await this._httpGet(url);
127
+ if (response.status !== 200) {
128
+ this.errors.push({ source: 'url', url, error: `HTTP ${response.status}` });
129
+ return null;
130
+ }
131
+
132
+ const content = typeof response.data === 'string'
133
+ ? response.data
134
+ : JSON.stringify(response.data);
135
+
136
+ const scanResult = this.scanner.scanText(content);
137
+
138
+ const result = {
139
+ name,
140
+ url,
141
+ content_length: content.length,
142
+ safe: scanResult.safe,
143
+ risk: scanResult.risk,
144
+ detection_count: scanResult.detections.length,
145
+ detections: scanResult.detections,
146
+ scanned_at: new Date().toISOString(),
147
+ };
148
+
149
+ this.results.push(result);
150
+ return result;
151
+
152
+ } catch (e) {
153
+ this.errors.push({ source: 'url', url, error: e.message });
154
+ return null;
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Process items in batches with concurrency control
160
+ */
161
+ async _batchProcess(items) {
162
+ for (let i = 0; i < items.length; i += this.concurrency) {
163
+ const batch = items.slice(i, i + this.concurrency);
164
+ const promises = batch.map(item => this.scanUrl(item.rawUrl, item.name));
165
+ const results = await Promise.allSettled(promises);
166
+
167
+ // Log progress
168
+ if (!this.quiet) {
169
+ for (let j = 0; j < batch.length; j++) {
170
+ const r = results[j];
171
+ if (r.status === 'fulfilled' && r.value) {
172
+ const icon = r.value.safe ? '🟢' : '🔴';
173
+ console.log(`${icon} ${batch[j].name} — risk: ${r.value.risk} (${r.value.detection_count} findings)`);
174
+ } else {
175
+ console.log(`⚠️ ${batch[j].name} — fetch failed`);
176
+ }
177
+ }
178
+ }
179
+ }
180
+ }
181
+
182
+ /**
183
+ * Extract skill name from path like "skills/author/skill-name/SKILL.md"
184
+ */
185
+ _extractSkillName(filePath) {
186
+ const parts = filePath.split('/');
187
+ // typically: skills/<author>/<skill-name>/SKILL.md
188
+ if (parts.length >= 3) {
189
+ return `${parts[parts.length - 3]}/${parts[parts.length - 2]}`;
190
+ }
191
+ return parts.slice(0, -1).join('/');
192
+ }
193
+
194
+ _getHeaders() {
195
+ const headers = { 'User-Agent': `guard-scanner-crawler/${CRAWLER_VERSION}` };
196
+ if (process.env.GITHUB_TOKEN) {
197
+ headers['Authorization'] = `token ${process.env.GITHUB_TOKEN}`;
198
+ }
199
+ return headers;
200
+ }
201
+
202
+ // ── Output ────────────────────────────────────────────────────
203
+
204
+ getSummary() {
205
+ const total = this.results.length;
206
+ const safe = this.results.filter(r => r.safe).length;
207
+ const unsafe = total - safe;
208
+ const highRisk = this.results.filter(r => r.risk >= 80).length;
209
+
210
+ return {
211
+ total,
212
+ safe,
213
+ unsafe,
214
+ highRisk,
215
+ errors: this.errors.length,
216
+ results: this.results.sort((a, b) => b.risk - a.risk),
217
+ };
218
+ }
219
+
220
+ toJSON() {
221
+ return {
222
+ scanner: `guard-scanner-crawler/${CRAWLER_VERSION}`,
223
+ timestamp: new Date().toISOString(),
224
+ ...this.getSummary(),
225
+ };
226
+ }
227
+
228
+ printSummary() {
229
+ const s = this.getSummary();
230
+ console.log(`\n${'═'.repeat(54)}`);
231
+ console.log(`📊 Crawler Scan Summary`);
232
+ console.log(`${'─'.repeat(54)}`);
233
+ console.log(` Scanned: ${s.total}`);
234
+ console.log(` 🟢 Safe: ${s.safe}`);
235
+ console.log(` 🔴 Unsafe: ${s.unsafe}`);
236
+ console.log(` 💀 High Risk: ${s.highRisk}`);
237
+ if (s.errors > 0) console.log(` ⚠️ Errors: ${s.errors}`);
238
+ console.log(`${'═'.repeat(54)}\n`);
239
+
240
+ if (s.unsafe > 0) {
241
+ console.log(`⚠️ Unsafe skills detected:`);
242
+ for (const r of s.results.filter(r => !r.safe)) {
243
+ console.log(` 🔴 ${r.name} (risk: ${r.risk}, ${r.detection_count} findings)`);
244
+ if (this.verbose) {
245
+ for (const d of r.detections.slice(0, 5)) {
246
+ console.log(` └─ [${d.severity}] ${d.desc}`);
247
+ }
248
+ }
249
+ }
250
+ }
251
+ }
252
+ }
253
+
254
+ module.exports = { SkillCrawler, CRAWLER_VERSION };
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Threat Model Layer
3
+ * Generates a threat model by identifying capabilities (network, exec, fs, etc.)
4
+ * within a given context/codebase to contextualize heuristic pattern findings.
5
+ */
6
+
7
+ const CAPABILITY_PATTERNS = {
8
+ network: /(?:fetch|axios|http\.get|https\.request|XMLHttpRequest|WebSocket)/i,
9
+ exec: /(?:exec|spawn|child_process|eval|Function|system)/i,
10
+ fs_read: /(?:readFileSync|readFile|createReadStream)/i,
11
+ fs_write: /(?:writeFileSync|writeFile|createWriteStream|appendFile)/i,
12
+ env_access: /(?:process\.env)/i
13
+ };
14
+
15
+ function generateModel(codeContent) {
16
+ const capabilities = {
17
+ network: false,
18
+ exec: false,
19
+ fs_read: false,
20
+ fs_write: false,
21
+ env_access: false
22
+ };
23
+
24
+ let riskScore = 0;
25
+
26
+ for (const [cap, regex] of Object.entries(CAPABILITY_PATTERNS)) {
27
+ if (regex.test(codeContent)) {
28
+ capabilities[cap] = true;
29
+ riskScore += 10; // Base score for having a risky capability
30
+ }
31
+ }
32
+
33
+ // Capability compounding (e.g. read + network = exfil risk)
34
+ if (capabilities.fs_read && capabilities.network) {
35
+ riskScore += 20;
36
+ }
37
+ if (capabilities.env_access && capabilities.network) {
38
+ riskScore += 30; // High risk of credential exfiltration
39
+ }
40
+
41
+ return {
42
+ capabilities,
43
+ riskScore,
44
+ summary: `Capabilities detected: ${Object.keys(capabilities).filter(k => capabilities[k]).join(', ')}`
45
+ };
46
+ }
47
+
48
+ module.exports = {
49
+ generateModel
50
+ };
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Validation Layer
3
+ * Evaluates heuristic findings against contextual evidence to separate
4
+ * "validated" threats from "heuristic-only" (potential false positives).
5
+ */
6
+
7
+ function validateFindings(findings, context) {
8
+ return findings.map(finding => {
9
+ let status = 'heuristic-only';
10
+
11
+ // Contextual Validation Rules
12
+
13
+ // 1. If it's a prompt injection but found inside a code block, it might be a false positive
14
+ // (e.g., someone writing an article about prompt injection)
15
+ if (finding.id.startsWith('PI_')) {
16
+ if (context.isInCodeBlock(finding.text)) {
17
+ status = 'heuristic-only'; // False positive likely
18
+ } else {
19
+ status = 'validated';
20
+ }
21
+ }
22
+
23
+ // 2. If it's malicious code, verify if the execution environment allows it
24
+ if (finding.id.startsWith('MAL_')) {
25
+ if (context.isExecutable(finding.text)) {
26
+ status = 'validated';
27
+ }
28
+ }
29
+
30
+ return {
31
+ ...finding,
32
+ status
33
+ };
34
+ });
35
+ }
36
+
37
+ module.exports = {
38
+ validateFindings
39
+ };