clawmoat 0.7.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +9 -0
- package/CHANGELOG.md +18 -0
- package/CONTRIBUTING.md +4 -2
- package/DEMO.md +87 -0
- package/Dockerfile +5 -18
- package/README.md +294 -8
- package/SECURITY.md +58 -10
- package/THREAT_MODEL.md +129 -0
- package/agent/README.md +131 -0
- package/agent/index.js +471 -0
- package/agent/install-service.sh +94 -0
- package/agent/openclaw-hook.js +453 -0
- package/agent/provider-setup.js +649 -0
- package/agent/setup.js +274 -0
- package/assets/BADGE-USAGE.md +20 -0
- package/assets/clawmoat-badge.svg +21 -0
- package/bin/clawmoat.js +468 -111
- package/docs/affiliates/dashboard.html +124 -0
- package/docs/affiliates/index.html +236 -0
- package/docs/agent-install.html +183 -0
- package/docs/ai-agent-security-scanner.html +10 -6
- package/docs/badge/index.html +149 -0
- package/docs/badge/scanning.svg +23 -0
- package/docs/blog/386-malicious-skills.html +262 -0
- package/docs/blog/40000-exposed-openclaw-instances.html +201 -0
- package/docs/blog/agent-trust-protocol.html +198 -0
- package/docs/blog/ai-agent-earns-commissions.html +230 -0
- package/docs/blog/bugmageddon-agent-firewall.html +174 -0
- package/docs/blog/calculator-math.html +180 -0
- package/docs/blog/clawmoat-vs-llamafirewall-nemo-guardrails.html +229 -0
- package/docs/blog/host-guardian-launch.html +18 -8
- package/docs/blog/ibm-experts-agent-runtime-protection.html +247 -0
- package/docs/blog/index.html +211 -9
- package/docs/blog/langchain-security-tutorial.html +18 -8
- package/docs/blog/mcp-30-cves-security-crisis.html +286 -0
- package/docs/blog/meta-researcher-rogue-agent.html +201 -0
- package/docs/blog/microsoft-openclaw-workstation-security.html +235 -0
- package/docs/blog/nist-ai-agent-standards-clawmoat.html +377 -0
- package/docs/blog/oasis-websocket-hijack.html +212 -0
- package/docs/blog/ollama-openclaw-security.html +160 -0
- package/docs/blog/openclaw-enterprise-readiness-claw10.html +199 -0
- package/docs/blog/openclaw-security-reckoning-2026.html +368 -0
- package/docs/blog/owasp-agentic-ai-top10.html +18 -8
- package/docs/blog/securing-ai-agents.html +18 -8
- package/docs/blog/supply-chain-agents.html +18 -8
- package/docs/business/index.html +525 -0
- package/docs/business/install.html +261 -0
- package/docs/checklist.html +174 -0
- package/docs/compare/index.html +122 -0
- package/docs/compare/lakera/index.html +62 -0
- package/docs/compare/llm-guard/index.html +49 -0
- package/docs/compare/snyk-agent-scan/index.html +63 -0
- package/docs/compare.html +10 -6
- package/docs/dashboard/index.html +520 -0
- package/docs/finance/index.html +220 -0
- package/docs/guides/business-deployment.html +770 -0
- package/docs/hall-of-fame.html +174 -0
- package/docs/index.html +447 -154
- package/docs/install.sh +557 -0
- package/docs/integrations/langchain.html +14 -6
- package/docs/integrations/openai.html +14 -6
- package/docs/integrations/openclaw.html +55 -7
- package/docs/plans/2026-03-26-threat-intel-api.md +255 -0
- package/docs/plans/2026-04-14-bugmageddon-marketing-pack.md +329 -0
- package/docs/plans/2026-04-14-clawmoat-v1-bugmageddon.md +248 -0
- package/docs/plans/2026-04-14-v1-release-update.md +91 -0
- package/docs/plans/2026-04-19-supabase-audit.md +68 -0
- package/docs/plans/2026-05-12-sales-push.md +303 -0
- package/docs/playground/index.html +893 -0
- package/docs/playground.html +4 -7
- package/docs/privacy-policy/index.html +122 -0
- package/docs/rfcs/defense-in-depth.md +467 -0
- package/docs/scan/index.html +358 -0
- package/docs/services/case-study.html +255 -0
- package/docs/services/downloads/install-openclaw.bat +45 -0
- package/docs/services/downloads/install-openclaw.command +38 -0
- package/docs/services/downloads/install-openclaw.sh +38 -0
- package/docs/services/get-started.html +165 -0
- package/docs/services/index.html +598 -0
- package/docs/services/multi-agent-security.html +284 -0
- package/docs/services/one-pager.html +99 -0
- package/docs/services/pitch-deck.html +229 -0
- package/docs/services/roi-calculator.html +258 -0
- package/docs/sitemap.xml +192 -2
- package/docs/support/index.html +135 -0
- package/docs/templates/customer-service/HEARTBEAT.md +61 -0
- package/docs/templates/customer-service/MEMORY.md +89 -0
- package/docs/templates/customer-service/SOUL.md +41 -0
- package/docs/templates/customer-service/USER.md +56 -0
- package/docs/templates/executive/HEARTBEAT.md +86 -0
- package/docs/templates/executive/MEMORY.md +92 -0
- package/docs/templates/executive/SOUL.md +44 -0
- package/docs/templates/executive/USER.md +62 -0
- package/docs/templates/finance/HEARTBEAT.md +58 -0
- package/docs/templates/finance/MEMORY.md +87 -0
- package/docs/templates/finance/SOUL.md +38 -0
- package/docs/templates/finance/USER.md +53 -0
- package/docs/templates/index.html +115 -0
- package/docs/templates/operations/HEARTBEAT.md +63 -0
- package/docs/templates/operations/MEMORY.md +68 -0
- package/docs/templates/operations/SOUL.md +38 -0
- package/docs/templates/operations/USER.md +49 -0
- package/docs/templates/sales/HEARTBEAT.md +55 -0
- package/docs/templates/sales/MEMORY.md +89 -0
- package/docs/templates/sales/SOUL.md +34 -0
- package/docs/templates/sales/USER.md +54 -0
- package/docs/terms-of-service/index.html +122 -0
- package/eslint.config.js +32 -0
- package/evals/README.md +29 -0
- package/evals/cases.json +390 -0
- package/evals/results.md +68 -0
- package/evals/run.js +180 -0
- package/examples/basic-usage.js +38 -0
- package/examples/demo-attack/demo.js +186 -0
- package/examples/python-quickstart/README.md +54 -0
- package/examples/python-quickstart/clawmoat_client.py +167 -0
- package/examples/video-demo/README.md +14 -0
- package/examples/video-demo/scene-a-normal.js +29 -0
- package/examples/video-demo/scene-b-attack-arrives.js +31 -0
- package/examples/video-demo/scene-c-hijack.js +44 -0
- package/examples/video-demo/scene-d-clawmoat.js +46 -0
- package/integrations/crewai/README.md +32 -0
- package/integrations/crewai/clawmoat_crewai/__init__.py +17 -0
- package/integrations/crewai/clawmoat_crewai/guard.py +103 -0
- package/integrations/crewai/pyproject.toml +21 -0
- package/integrations/langchain/README.md +91 -0
- package/integrations/langchain/clawmoat_langchain/__init__.py +17 -0
- package/integrations/langchain/clawmoat_langchain/callback.py +489 -0
- package/integrations/langchain/pyproject.toml +32 -0
- package/integrations/litellm/README.md +324 -0
- package/integrations/litellm/clawmoat_litellm/__init__.py +21 -0
- package/integrations/litellm/clawmoat_litellm/callback.py +329 -0
- package/integrations/litellm/clawmoat_litellm/proxy_middleware.py +224 -0
- package/integrations/litellm/pyproject.toml +74 -0
- package/integrations/openai-agents/README.md +392 -0
- package/integrations/openai-agents/clawmoat_openai_agents/__init__.py +20 -0
- package/integrations/openai-agents/clawmoat_openai_agents/guardrail.py +431 -0
- package/integrations/openai-agents/clawmoat_openai_agents/middleware.py +311 -0
- package/integrations/openai-agents/pyproject.toml +76 -0
- package/package.json +6 -5
- package/plugins/openclaw-adapter/PHASE1.md +439 -0
- package/plugins/openclaw-adapter/README.md +103 -0
- package/plugins/openclaw-adapter/SPEC.md +1644 -0
- package/plugins/openclaw-adapter/package.json +31 -0
- package/plugins/openclaw-adapter/src/index.test.ts +226 -0
- package/plugins/openclaw-adapter/src/index.ts +140 -0
- package/plugins/openclaw-adapter/tsconfig.json +14 -0
- package/server/data/threats.json +290 -0
- package/server/index.js +224 -10
- package/src/adapters/express.js +161 -0
- package/src/adapters/index.js +92 -0
- package/src/adapters/langchain.js +185 -0
- package/src/approval/index.js +456 -0
- package/src/ban-scanner.js +200 -0
- package/src/boundary-scanner.js +296 -0
- package/src/ci-scanner.js +279 -0
- package/src/code-scanner.js +245 -0
- package/src/enforce.js +166 -0
- package/src/finance/index.js +585 -0
- package/src/finance/mcp-firewall.js +486 -0
- package/src/formatters/json.js +80 -0
- package/src/formatters/sarif.js +388 -0
- package/src/guardian/alerts.js +34 -3
- package/src/guardian/gateway-monitor.js +590 -0
- package/src/guardian/index.js +41 -2
- package/src/index.js +105 -0
- package/src/integrations/agentmesh.js +501 -0
- package/src/language-detector.js +201 -0
- package/src/mcp-scanner.js +253 -0
- package/src/multimodal/index.js +579 -0
- package/src/obfuscation-scanner.js +457 -0
- package/src/policy-engine.js +402 -0
- package/src/scanners/dependency-attacks.js +128 -0
- package/src/scanners/prompt-injection.js +18 -0
- package/src/scanners/supply-chain.js +14 -0
- package/src/templates/default-config.yml +90 -0
- package/src/vuln-ops/exploitability.js +46 -0
- package/src/watch/live-monitor.js +720 -0
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
"""ClawMoat callback handlers for LangChain.
|
|
2
|
+
|
|
3
|
+
Intercepts LLM prompts, tool calls, and chain outputs to scan for
|
|
4
|
+
prompt injection, jailbreaks, secret leakage, PII, and data exfiltration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
|
+
from uuid import UUID
|
|
12
|
+
|
|
13
|
+
from langchain_core.callbacks import BaseCallbackHandler, AsyncCallbackHandlerMixin
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("clawmoat_langchain")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SecurityThreatError(Exception):
|
|
19
|
+
"""Raised when ClawMoat detects a critical security threat."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, message: str, findings: List[Dict[str, Any]]):
|
|
22
|
+
super().__init__(message)
|
|
23
|
+
self.findings = findings
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ClawMoatCallbackHandler(BaseCallbackHandler):
|
|
27
|
+
"""Synchronous ClawMoat security callback for LangChain.
|
|
28
|
+
|
|
29
|
+
Scans inbound prompts for injection/jailbreak and outbound responses
|
|
30
|
+
for secret/PII leakage. Can operate in local mode (subprocess to
|
|
31
|
+
clawmoat CLI) or remote mode (HTTP API).
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
base_url: ClawMoat server URL (remote mode). If None, uses local CLI.
|
|
35
|
+
api_key: API key for remote ClawMoat server.
|
|
36
|
+
block_on_critical: If True, raise SecurityThreatError on critical findings.
|
|
37
|
+
Defaults to True.
|
|
38
|
+
block_on_high: If True, also block on high-severity findings.
|
|
39
|
+
Defaults to False.
|
|
40
|
+
scan_prompts: Scan LLM prompts for injection/jailbreak. Defaults to True.
|
|
41
|
+
scan_outputs: Scan LLM outputs for secret/PII leakage. Defaults to True.
|
|
42
|
+
scan_tools: Scan tool inputs for dangerous commands. Defaults to True.
|
|
43
|
+
on_finding: Optional callback for each finding: fn(finding_dict) -> None.
|
|
44
|
+
log_file: Path to write security event log.
|
|
45
|
+
quiet: Suppress console output.
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
from clawmoat_langchain import ClawMoatCallbackHandler
|
|
49
|
+
|
|
50
|
+
handler = ClawMoatCallbackHandler(block_on_critical=True)
|
|
51
|
+
result = chain.invoke({"input": user_message}, config={"callbacks": [handler]})
|
|
52
|
+
|
|
53
|
+
# Check findings after run
|
|
54
|
+
print(handler.findings)
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
raise_error = True # LangChain will respect this
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
base_url: Optional[str] = None,
|
|
62
|
+
api_key: Optional[str] = None,
|
|
63
|
+
block_on_critical: bool = True,
|
|
64
|
+
block_on_high: bool = False,
|
|
65
|
+
scan_prompts: bool = True,
|
|
66
|
+
scan_outputs: bool = True,
|
|
67
|
+
scan_tools: bool = True,
|
|
68
|
+
on_finding: Optional[Any] = None,
|
|
69
|
+
log_file: Optional[str] = None,
|
|
70
|
+
quiet: bool = False,
|
|
71
|
+
):
|
|
72
|
+
self.base_url = base_url
|
|
73
|
+
self.api_key = api_key
|
|
74
|
+
self.block_on_critical = block_on_critical
|
|
75
|
+
self.block_on_high = block_on_high
|
|
76
|
+
self.scan_prompts = scan_prompts
|
|
77
|
+
self.scan_outputs = scan_outputs
|
|
78
|
+
self.scan_tools = scan_tools
|
|
79
|
+
self.on_finding = on_finding
|
|
80
|
+
self.log_file = log_file
|
|
81
|
+
self.quiet = quiet
|
|
82
|
+
self.findings: List[Dict[str, Any]] = []
|
|
83
|
+
self.stats = {"scanned": 0, "blocked": 0, "warnings": 0}
|
|
84
|
+
|
|
85
|
+
self._scanner = self._init_scanner()
|
|
86
|
+
|
|
87
|
+
def _init_scanner(self):
|
|
88
|
+
"""Initialize the scanner (local CLI or remote HTTP)."""
|
|
89
|
+
if self.base_url:
|
|
90
|
+
return _RemoteScanner(self.base_url, self.api_key)
|
|
91
|
+
return _LocalScanner()
|
|
92
|
+
|
|
93
|
+
def _process_result(self, result: Dict[str, Any], context: str) -> None:
|
|
94
|
+
"""Process scan result, log findings, optionally raise."""
|
|
95
|
+
self.stats["scanned"] += 1
|
|
96
|
+
|
|
97
|
+
if not result.get("safe", True):
|
|
98
|
+
for finding in result.get("findings", []):
|
|
99
|
+
finding["context"] = context
|
|
100
|
+
self.findings.append(finding)
|
|
101
|
+
if self.on_finding:
|
|
102
|
+
self.on_finding(finding)
|
|
103
|
+
if not self.quiet:
|
|
104
|
+
logger.warning(
|
|
105
|
+
"ClawMoat [%s] %s: %s (%s)",
|
|
106
|
+
finding.get("severity", "?"),
|
|
107
|
+
finding.get("type", "?"),
|
|
108
|
+
finding.get("subtype", ""),
|
|
109
|
+
context,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
max_sev = max(
|
|
113
|
+
(f.get("severity", "low") for f in result.get("findings", [])),
|
|
114
|
+
key=lambda s: {"low": 0, "medium": 1, "high": 2, "critical": 3}.get(s, 0),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
should_block = (
|
|
118
|
+
(self.block_on_critical and max_sev == "critical")
|
|
119
|
+
or (self.block_on_high and max_sev in ("critical", "high"))
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if should_block:
|
|
123
|
+
self.stats["blocked"] += 1
|
|
124
|
+
raise SecurityThreatError(
|
|
125
|
+
f"ClawMoat blocked {context}: {max_sev} severity threat detected",
|
|
126
|
+
result.get("findings", []),
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
self.stats["warnings"] += 1
|
|
130
|
+
|
|
131
|
+
# ─── LangChain Callback Methods ──────────────────────────────
|
|
132
|
+
|
|
133
|
+
def on_llm_start(
|
|
134
|
+
self,
|
|
135
|
+
serialized: Dict[str, Any],
|
|
136
|
+
prompts: List[str],
|
|
137
|
+
*,
|
|
138
|
+
run_id: UUID,
|
|
139
|
+
parent_run_id: Optional[UUID] = None,
|
|
140
|
+
tags: Optional[List[str]] = None,
|
|
141
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
142
|
+
**kwargs: Any,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Scan prompts before they reach the LLM."""
|
|
145
|
+
if not self.scan_prompts:
|
|
146
|
+
return
|
|
147
|
+
for prompt in prompts:
|
|
148
|
+
result = self._scanner.scan_inbound(prompt)
|
|
149
|
+
self._process_result(result, "llm_prompt")
|
|
150
|
+
|
|
151
|
+
def on_chat_model_start(
|
|
152
|
+
self,
|
|
153
|
+
serialized: Dict[str, Any],
|
|
154
|
+
messages: List[List[Any]],
|
|
155
|
+
*,
|
|
156
|
+
run_id: UUID,
|
|
157
|
+
parent_run_id: Optional[UUID] = None,
|
|
158
|
+
tags: Optional[List[str]] = None,
|
|
159
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
160
|
+
**kwargs: Any,
|
|
161
|
+
) -> None:
|
|
162
|
+
"""Scan chat messages before they reach the model."""
|
|
163
|
+
if not self.scan_prompts:
|
|
164
|
+
return
|
|
165
|
+
for message_list in messages:
|
|
166
|
+
for msg in message_list:
|
|
167
|
+
content = getattr(msg, "content", str(msg))
|
|
168
|
+
if content:
|
|
169
|
+
result = self._scanner.scan_inbound(content)
|
|
170
|
+
self._process_result(result, "chat_message")
|
|
171
|
+
|
|
172
|
+
def on_llm_end(
|
|
173
|
+
self,
|
|
174
|
+
response: Any,
|
|
175
|
+
*,
|
|
176
|
+
run_id: UUID,
|
|
177
|
+
parent_run_id: Optional[UUID] = None,
|
|
178
|
+
**kwargs: Any,
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Scan LLM output for secrets/PII leakage."""
|
|
181
|
+
if not self.scan_outputs:
|
|
182
|
+
return
|
|
183
|
+
for gen_list in response.generations:
|
|
184
|
+
for gen in gen_list:
|
|
185
|
+
if gen.text:
|
|
186
|
+
result = self._scanner.scan_outbound(gen.text)
|
|
187
|
+
self._process_result(result, "llm_output")
|
|
188
|
+
|
|
189
|
+
def on_tool_start(
|
|
190
|
+
self,
|
|
191
|
+
serialized: Dict[str, Any],
|
|
192
|
+
input_str: str,
|
|
193
|
+
*,
|
|
194
|
+
run_id: UUID,
|
|
195
|
+
parent_run_id: Optional[UUID] = None,
|
|
196
|
+
tags: Optional[List[str]] = None,
|
|
197
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
198
|
+
**kwargs: Any,
|
|
199
|
+
) -> None:
|
|
200
|
+
"""Scan tool inputs for dangerous commands/paths."""
|
|
201
|
+
if not self.scan_tools:
|
|
202
|
+
return
|
|
203
|
+
tool_name = serialized.get("name", "unknown")
|
|
204
|
+
result = self._scanner.scan_inbound(input_str, context="tool_input")
|
|
205
|
+
self._process_result(result, f"tool:{tool_name}")
|
|
206
|
+
|
|
207
|
+
def on_tool_end(
|
|
208
|
+
self,
|
|
209
|
+
output: Any,
|
|
210
|
+
*,
|
|
211
|
+
run_id: UUID,
|
|
212
|
+
parent_run_id: Optional[UUID] = None,
|
|
213
|
+
**kwargs: Any,
|
|
214
|
+
) -> None:
|
|
215
|
+
"""Scan tool output for injection attempts."""
|
|
216
|
+
if not self.scan_outputs:
|
|
217
|
+
return
|
|
218
|
+
text = str(output) if output else ""
|
|
219
|
+
if text:
|
|
220
|
+
result = self._scanner.scan_inbound(text, context="tool_output")
|
|
221
|
+
self._process_result(result, "tool_output")
|
|
222
|
+
|
|
223
|
+
def on_chain_end(
|
|
224
|
+
self,
|
|
225
|
+
outputs: Dict[str, Any],
|
|
226
|
+
*,
|
|
227
|
+
run_id: UUID,
|
|
228
|
+
parent_run_id: Optional[UUID] = None,
|
|
229
|
+
**kwargs: Any,
|
|
230
|
+
) -> None:
|
|
231
|
+
"""Scan chain outputs for data exfiltration."""
|
|
232
|
+
if not self.scan_outputs:
|
|
233
|
+
return
|
|
234
|
+
for key, value in outputs.items():
|
|
235
|
+
if isinstance(value, str) and value:
|
|
236
|
+
result = self._scanner.scan_outbound(value)
|
|
237
|
+
self._process_result(result, f"chain_output:{key}")
|
|
238
|
+
|
|
239
|
+
# Unused callbacks (required by interface)
|
|
240
|
+
def on_llm_error(self, error, **kwargs): pass
|
|
241
|
+
def on_chain_start(self, serialized, inputs, **kwargs): pass
|
|
242
|
+
def on_chain_error(self, error, **kwargs): pass
|
|
243
|
+
def on_tool_error(self, error, **kwargs): pass
|
|
244
|
+
def on_text(self, text, **kwargs): pass
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class ClawMoatAsyncCallbackHandler(ClawMoatCallbackHandler, AsyncCallbackHandlerMixin):
|
|
248
|
+
"""Async version of ClawMoatCallbackHandler.
|
|
249
|
+
|
|
250
|
+
Same API as ClawMoatCallbackHandler but uses async HTTP for remote scanning.
|
|
251
|
+
Drop-in replacement for async LangChain chains.
|
|
252
|
+
|
|
253
|
+
Example:
|
|
254
|
+
from clawmoat_langchain import ClawMoatAsyncCallbackHandler
|
|
255
|
+
|
|
256
|
+
handler = ClawMoatAsyncCallbackHandler(base_url="http://localhost:8080")
|
|
257
|
+
result = await chain.ainvoke({"input": msg}, config={"callbacks": [handler]})
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
async def on_llm_start(self, serialized, prompts, *, run_id, **kwargs):
|
|
261
|
+
if not self.scan_prompts:
|
|
262
|
+
return
|
|
263
|
+
for prompt in prompts:
|
|
264
|
+
result = await self._scanner.async_scan_inbound(prompt)
|
|
265
|
+
self._process_result(result, "llm_prompt")
|
|
266
|
+
|
|
267
|
+
async def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
|
|
268
|
+
if not self.scan_prompts:
|
|
269
|
+
return
|
|
270
|
+
for message_list in messages:
|
|
271
|
+
for msg in message_list:
|
|
272
|
+
content = getattr(msg, "content", str(msg))
|
|
273
|
+
if content:
|
|
274
|
+
result = await self._scanner.async_scan_inbound(content)
|
|
275
|
+
self._process_result(result, "chat_message")
|
|
276
|
+
|
|
277
|
+
async def on_llm_end(self, response, *, run_id, **kwargs):
|
|
278
|
+
if not self.scan_outputs:
|
|
279
|
+
return
|
|
280
|
+
for gen_list in response.generations:
|
|
281
|
+
for gen in gen_list:
|
|
282
|
+
if gen.text:
|
|
283
|
+
result = await self._scanner.async_scan_outbound(gen.text)
|
|
284
|
+
self._process_result(result, "llm_output")
|
|
285
|
+
|
|
286
|
+
async def on_tool_start(self, serialized, input_str, *, run_id, **kwargs):
|
|
287
|
+
if not self.scan_tools:
|
|
288
|
+
return
|
|
289
|
+
tool_name = serialized.get("name", "unknown")
|
|
290
|
+
result = await self._scanner.async_scan_inbound(input_str, context="tool_input")
|
|
291
|
+
self._process_result(result, f"tool:{tool_name}")
|
|
292
|
+
|
|
293
|
+
async def on_tool_end(self, output, *, run_id, **kwargs):
|
|
294
|
+
if not self.scan_outputs:
|
|
295
|
+
return
|
|
296
|
+
text = str(output) if output else ""
|
|
297
|
+
if text:
|
|
298
|
+
result = await self._scanner.async_scan_inbound(text, context="tool_output")
|
|
299
|
+
self._process_result(result, "tool_output")
|
|
300
|
+
|
|
301
|
+
async def on_chain_end(self, outputs, *, run_id, **kwargs):
|
|
302
|
+
if not self.scan_outputs:
|
|
303
|
+
return
|
|
304
|
+
for key, value in outputs.items():
|
|
305
|
+
if isinstance(value, str) and value:
|
|
306
|
+
result = await self._scanner.async_scan_outbound(value)
|
|
307
|
+
self._process_result(result, f"chain_output:{key}")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ─── Scanner Backends ────────────────────────────────────────────
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class _LocalScanner:
|
|
314
|
+
"""Scan using the clawmoat CLI as a subprocess."""
|
|
315
|
+
|
|
316
|
+
def scan_inbound(self, text: str, context: str = "message") -> Dict[str, Any]:
|
|
317
|
+
import subprocess
|
|
318
|
+
import json
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
proc = subprocess.run(
|
|
322
|
+
["npx", "clawmoat", "scan", "--format", "json", "--stdin"],
|
|
323
|
+
input=text,
|
|
324
|
+
capture_output=True,
|
|
325
|
+
text=True,
|
|
326
|
+
timeout=10,
|
|
327
|
+
)
|
|
328
|
+
if proc.returncode == 0 and proc.stdout.strip():
|
|
329
|
+
return json.loads(proc.stdout)
|
|
330
|
+
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
# Fallback: basic pattern matching
|
|
334
|
+
return self._basic_scan_inbound(text)
|
|
335
|
+
|
|
336
|
+
def scan_outbound(self, text: str) -> Dict[str, Any]:
|
|
337
|
+
import subprocess
|
|
338
|
+
import json
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
proc = subprocess.run(
|
|
342
|
+
["npx", "clawmoat", "scan", "--format", "json", "--stdin", "--outbound"],
|
|
343
|
+
input=text,
|
|
344
|
+
capture_output=True,
|
|
345
|
+
text=True,
|
|
346
|
+
timeout=10,
|
|
347
|
+
)
|
|
348
|
+
if proc.returncode == 0 and proc.stdout.strip():
|
|
349
|
+
return json.loads(proc.stdout)
|
|
350
|
+
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
|
351
|
+
pass
|
|
352
|
+
|
|
353
|
+
return self._basic_scan_outbound(text)
|
|
354
|
+
|
|
355
|
+
async def async_scan_inbound(self, text: str, context: str = "message") -> Dict[str, Any]:
|
|
356
|
+
return self.scan_inbound(text, context)
|
|
357
|
+
|
|
358
|
+
async def async_scan_outbound(self, text: str) -> Dict[str, Any]:
|
|
359
|
+
return self.scan_outbound(text)
|
|
360
|
+
|
|
361
|
+
def _basic_scan_inbound(self, text: str) -> Dict[str, Any]:
|
|
362
|
+
"""Fallback pattern-based scanning when CLI is unavailable."""
|
|
363
|
+
import re
|
|
364
|
+
|
|
365
|
+
findings = []
|
|
366
|
+
patterns = [
|
|
367
|
+
(r"ignore\s+(?:all\s+)?(?:previous|prior|above)\s+instructions?", "prompt_injection", "instruction_override", "critical"),
|
|
368
|
+
(r"you\s+are\s+now\s+(?:a|an|in)\s+(?:DAN|evil|unrestricted)", "jailbreak", "role_override", "critical"),
|
|
369
|
+
(r"system\s*prompt|<<\s*SYS|<\|system\|>", "prompt_injection", "system_prompt_leak", "high"),
|
|
370
|
+
(r"(?:sk-[a-zA-Z0-9]{20,}|ghp_[a-zA-Z0-9]{36}|AKIA[0-9A-Z]{16})", "secret_detected", "api_key", "critical"),
|
|
371
|
+
(r"(?:password|passwd|pwd)\s*[:=]\s*\S+", "secret_detected", "password", "high"),
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
for pattern, type_, subtype, severity in patterns:
|
|
375
|
+
match = re.search(pattern, text, re.IGNORECASE)
|
|
376
|
+
if match:
|
|
377
|
+
findings.append({
|
|
378
|
+
"type": type_,
|
|
379
|
+
"subtype": subtype,
|
|
380
|
+
"severity": severity,
|
|
381
|
+
"matched": match.group(0)[:50],
|
|
382
|
+
})
|
|
383
|
+
|
|
384
|
+
return {"safe": len(findings) == 0, "findings": findings}
|
|
385
|
+
|
|
386
|
+
def _basic_scan_outbound(self, text: str) -> Dict[str, Any]:
|
|
387
|
+
"""Fallback outbound scanning."""
|
|
388
|
+
import re
|
|
389
|
+
|
|
390
|
+
findings = []
|
|
391
|
+
patterns = [
|
|
392
|
+
(r"sk-[a-zA-Z0-9]{20,}", "secret_detected", "openai_key", "critical"),
|
|
393
|
+
(r"ghp_[a-zA-Z0-9]{36}", "secret_detected", "github_token", "critical"),
|
|
394
|
+
(r"AKIA[0-9A-Z]{16}", "secret_detected", "aws_key", "critical"),
|
|
395
|
+
(r"\b\d{3}-\d{2}-\d{4}\b", "pii_detected", "ssn", "critical"),
|
|
396
|
+
(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "pii_detected", "email", "medium"),
|
|
397
|
+
]
|
|
398
|
+
|
|
399
|
+
for pattern, type_, subtype, severity in patterns:
|
|
400
|
+
match = re.search(pattern, text)
|
|
401
|
+
if match:
|
|
402
|
+
findings.append({
|
|
403
|
+
"type": type_,
|
|
404
|
+
"subtype": subtype,
|
|
405
|
+
"severity": severity,
|
|
406
|
+
"matched": match.group(0)[:20] + "***",
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
return {"safe": len(findings) == 0, "findings": findings}
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class _RemoteScanner:
|
|
413
|
+
"""Scan using a remote ClawMoat HTTP API."""
|
|
414
|
+
|
|
415
|
+
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
|
416
|
+
self.base_url = base_url.rstrip("/")
|
|
417
|
+
self.api_key = api_key
|
|
418
|
+
|
|
419
|
+
def _headers(self):
|
|
420
|
+
h = {"Content-Type": "application/json"}
|
|
421
|
+
if self.api_key:
|
|
422
|
+
h["Authorization"] = f"Bearer {self.api_key}"
|
|
423
|
+
return h
|
|
424
|
+
|
|
425
|
+
def scan_inbound(self, text: str, context: str = "message") -> Dict[str, Any]:
|
|
426
|
+
import httpx
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
resp = httpx.post(
|
|
430
|
+
f"{self.base_url}/api/v1/scan/inbound",
|
|
431
|
+
json={"text": text, "context": context},
|
|
432
|
+
headers=self._headers(),
|
|
433
|
+
timeout=10,
|
|
434
|
+
)
|
|
435
|
+
if resp.status_code == 200:
|
|
436
|
+
return resp.json()
|
|
437
|
+
except httpx.HTTPError:
|
|
438
|
+
pass
|
|
439
|
+
return {"safe": True, "findings": []}
|
|
440
|
+
|
|
441
|
+
def scan_outbound(self, text: str) -> Dict[str, Any]:
|
|
442
|
+
import httpx
|
|
443
|
+
|
|
444
|
+
try:
|
|
445
|
+
resp = httpx.post(
|
|
446
|
+
f"{self.base_url}/api/v1/scan/outbound",
|
|
447
|
+
json={"text": text},
|
|
448
|
+
headers=self._headers(),
|
|
449
|
+
timeout=10,
|
|
450
|
+
)
|
|
451
|
+
if resp.status_code == 200:
|
|
452
|
+
return resp.json()
|
|
453
|
+
except httpx.HTTPError:
|
|
454
|
+
pass
|
|
455
|
+
return {"safe": True, "findings": []}
|
|
456
|
+
|
|
457
|
+
async def async_scan_inbound(self, text: str, context: str = "message") -> Dict[str, Any]:
|
|
458
|
+
import httpx
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
async with httpx.AsyncClient() as client:
|
|
462
|
+
resp = await client.post(
|
|
463
|
+
f"{self.base_url}/api/v1/scan/inbound",
|
|
464
|
+
json={"text": text, "context": context},
|
|
465
|
+
headers=self._headers(),
|
|
466
|
+
timeout=10,
|
|
467
|
+
)
|
|
468
|
+
if resp.status_code == 200:
|
|
469
|
+
return resp.json()
|
|
470
|
+
except httpx.HTTPError:
|
|
471
|
+
pass
|
|
472
|
+
return {"safe": True, "findings": []}
|
|
473
|
+
|
|
474
|
+
async def async_scan_outbound(self, text: str) -> Dict[str, Any]:
|
|
475
|
+
import httpx
|
|
476
|
+
|
|
477
|
+
try:
|
|
478
|
+
async with httpx.AsyncClient() as client:
|
|
479
|
+
resp = await client.post(
|
|
480
|
+
f"{self.base_url}/api/v1/scan/outbound",
|
|
481
|
+
json={"text": text},
|
|
482
|
+
headers=self._headers(),
|
|
483
|
+
timeout=10,
|
|
484
|
+
)
|
|
485
|
+
if resp.status_code == 200:
|
|
486
|
+
return resp.json()
|
|
487
|
+
except httpx.HTTPError:
|
|
488
|
+
pass
|
|
489
|
+
return {"safe": True, "findings": []}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "clawmoat-langchain"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "ClawMoat security callbacks for LangChain — prompt injection, jailbreak, PII, and secret scanning"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "ClawMoat", email = "hello@clawmoat.com" }]
|
|
13
|
+
keywords = ["langchain", "security", "ai-agents", "prompt-injection", "clawmoat"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Security",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"langchain-core>=0.1.0",
|
|
23
|
+
"httpx>=0.24.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = ["pytest", "pytest-asyncio"]
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://clawmoat.com"
|
|
31
|
+
Repository = "https://github.com/darfaz/clawmoat"
|
|
32
|
+
Issues = "https://github.com/darfaz/clawmoat/issues"
|