langprotect-mcp-gateway 1.2.6__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
3
  LangProtect MCP Gateway - Security Gateway for MCP Servers
4
+ Enhanced with Pre-LLM Scanning and Response Masking
4
5
  """
5
6
 
6
7
  import sys
@@ -13,6 +14,9 @@ from datetime import datetime, timedelta
13
14
  from typing import Dict, List, Any, Optional
14
15
  import logging
15
16
 
17
+ # Import response masker
18
+ from .response_masker import ResponseMasker, get_masker
19
+
16
20
  log_level = os.environ.get("LOGLEVEL", "DEBUG" if os.getenv('DEBUG', 'false').lower() == 'true' else "INFO").upper()
17
21
  logging.basicConfig(level=getattr(logging, log_level), format='[%(asctime)s] %(levelname)s: %(message)s', handlers=[logging.StreamHandler(sys.stderr)])
18
22
  logger = logging.getLogger('langprotect-gateway')
@@ -97,12 +101,15 @@ class MCPServer:
97
101
 
98
102
 
99
103
  class LangProtectAuth:
100
- def __init__(self, url: str, email: str, password: str):
104
+ def __init__(self, url: str, email: str, password: str, scan_timeout: float = 5.0, fail_closed: bool = False):
101
105
  self.url = url
102
106
  self.email = email
103
107
  self.password = password
104
108
  self.jwt_token: Optional[str] = None
105
109
  self.token_expiry: Optional[datetime] = None
110
+ self.scan_timeout = scan_timeout # Maximum wait time for scans
111
+ self.fail_closed = fail_closed # Block on scan failure if True
112
+ logger.info(f"Auth initialized: timeout={scan_timeout}s, fail_closed={fail_closed}")
106
113
 
107
114
  def login(self) -> bool:
108
115
  try:
@@ -126,34 +133,168 @@ class LangProtectAuth:
126
133
  return self.login()
127
134
  return True
128
135
 
129
- def scan(self, tool_name: str, arguments: Dict, server_name: str) -> Dict:
136
+ def scan_input(self, tool_name: str, arguments: Dict, server_name: str) -> Dict:
137
+ """
138
+ Scan user input BEFORE forwarding to MCP server (blocking scan).
139
+ Uses the new Group Scan API with policy-based scanning.
140
+ """
130
141
  self.ensure_token()
131
142
  try:
132
- # Use MCP-specific endpoint with proper payload structure
143
+ # Convert tool call to prompt string for scanning
144
+ prompt = f"Tool: {tool_name}\nServer: {server_name}\nArguments: {json.dumps(arguments, indent=2)}"
145
+
133
146
  payload = {
134
- 'method': 'tools/call',
135
- 'params': {'name': tool_name, 'arguments': arguments},
136
- 'server_url': server_name,
137
- 'agent_id': 'langprotect-gateway',
138
- 'client_ip': '127.0.0.1',
139
- 'user_agent': f'LangProtect-MCP-Gateway/1.0 (server={server_name})'
147
+ 'prompt': prompt,
148
+ 'metadata': {
149
+ 'tool_name': tool_name,
150
+ 'server_name': server_name,
151
+ 'source': 'mcp-gateway-input',
152
+ 'scan_type': 'input'
153
+ }
140
154
  }
141
- response = requests.post(f"{self.url}/v1/group-logs/mcp/scan", json=payload, headers={'Authorization': f'Bearer {self.jwt_token}', 'Content-Type': 'application/json'}, timeout=5)
155
+
156
+ logger.debug(f"🛡️ INPUT SCAN: {tool_name} on {server_name}")
157
+
158
+ response = requests.post(
159
+ f"{self.url}/v1/group-logs/scan",
160
+ json=payload,
161
+ headers={
162
+ 'Authorization': f'Bearer {self.jwt_token}',
163
+ 'Content-Type': 'application/json'
164
+ },
165
+ timeout=self.scan_timeout
166
+ )
167
+
142
168
  if response.status_code != 200:
143
- logger.warning(f"Backend returned {response.status_code}, allowing request (fail-open)")
144
- return {'status': 'allowed', 'error': f'Backend error: {response.status_code}'}
169
+ logger.warning(f"Backend returned {response.status_code}")
170
+ if self.fail_closed:
171
+ return {
172
+ 'status': 'blocked',
173
+ 'reason': f'Scan service unavailable (HTTP {response.status_code}) - fail-closed mode'
174
+ }
175
+ else:
176
+ logger.warning("Allowing request (fail-open mode)")
177
+ return {'status': 'allowed', 'error': f'Backend error: {response.status_code}'}
178
+
145
179
  result = response.json()
146
- # Handle scan service timeout - fail open
180
+
181
+ # Handle scan service timeout - respect fail mode
147
182
  if result.get('detections', {}).get('error') == 'Scan service timeout':
148
- logger.warning("Scan service timeout, allowing request (fail-open)")
149
- return {'status': 'allowed', 'id': result.get('id'), 'error': 'Scan timeout'}
183
+ logger.warning("Scan service timeout detected")
184
+ if self.fail_closed:
185
+ return {
186
+ 'status': 'blocked',
187
+ 'reason': 'Scan service timeout - fail-closed mode'
188
+ }
189
+ else:
190
+ logger.warning("Allowing request despite timeout (fail-open)")
191
+ return {'status': 'allowed', 'id': result.get('id'), 'error': 'Scan timeout'}
192
+
150
193
  return result
194
+
151
195
  except requests.exceptions.Timeout:
152
- logger.warning("Backend scan timeout, allowing request (fail-open)")
153
- return {'status': 'allowed', 'error': 'Request timeout'}
196
+ logger.error(f"Backend scan timeout after {self.scan_timeout}s")
197
+ if self.fail_closed:
198
+ return {
199
+ 'status': 'blocked',
200
+ 'reason': f'Scan timeout after {self.scan_timeout}s - fail-closed mode'
201
+ }
202
+ else:
203
+ logger.warning("Allowing request despite timeout (fail-open)")
204
+ return {'status': 'allowed', 'error': 'Request timeout'}
205
+
154
206
  except Exception as e:
155
207
  logger.error(f"Scan error: {e}")
156
- return {'status': 'allowed', 'error': str(e)}
208
+ if self.fail_closed:
209
+ return {
210
+ 'status': 'blocked',
211
+ 'reason': f'Scan error: {str(e)} - fail-closed mode'
212
+ }
213
+ else:
214
+ logger.warning(f"Allowing request despite error (fail-open): {e}")
215
+ return {'status': 'allowed', 'error': str(e)}
216
+
217
+ def scan_output(self, tool_name: str, output_content: str, prompt: str = None, metadata: Dict = None) -> Dict:
218
+ """
219
+ Scan LLM/MCP output AFTER receiving from server (non-blocking, masking scan).
220
+ Uses the new Group Scan API with output scanning support.
221
+
222
+ Args:
223
+ tool_name: Name of the MCP tool that generated the output
224
+ output_content: The output text to scan for secrets
225
+ prompt: Original user prompt (optional)
226
+ metadata: Additional context (optional)
227
+
228
+ Returns:
229
+ Scan result with masked_content field if secrets detected
230
+ """
231
+ self.ensure_token()
232
+ try:
233
+ payload = {
234
+ 'prompt': prompt or f"Tool: {tool_name}",
235
+ 'output': output_content,
236
+ 'metadata': {
237
+ 'tool_name': tool_name,
238
+ 'source': 'mcp-gateway-output',
239
+ 'scan_type': 'output',
240
+ **(metadata or {})
241
+ }
242
+ }
243
+
244
+ logger.debug(f"🔍 OUTPUT SCAN: {tool_name} ({len(output_content)} chars)")
245
+
246
+ response = requests.post(
247
+ f"{self.url}/v1/group-logs/scan",
248
+ json=payload,
249
+ headers={
250
+ 'Authorization': f'Bearer {self.jwt_token}',
251
+ 'Content-Type': 'application/json'
252
+ },
253
+ timeout=self.scan_timeout
254
+ )
255
+
256
+ if response.status_code != 200:
257
+ logger.warning(f"Output scan failed: HTTP {response.status_code}")
258
+ # For output scanning, fail-open (don't block, return original)
259
+ return {
260
+ 'status': 'allowed',
261
+ 'output': output_content,
262
+ 'masked': False,
263
+ 'error': f'Scan failed: {response.status_code}'
264
+ }
265
+
266
+ result = response.json()
267
+
268
+ # Extract masked content from MCPResponseScanner details
269
+ mcp_response = result.get('detections', {}).get('MCPResponseScanner', {})
270
+ if mcp_response.get('is_detected'):
271
+ masked_content = mcp_response.get('details', {}).get('masked_content', output_content)
272
+ logger.warning(f"🔒 OUTPUT MASKED: {tool_name} (score={mcp_response.get('score')})")
273
+ return {
274
+ 'status': result.get('status'),
275
+ 'output': masked_content,
276
+ 'masked': True,
277
+ 'risk_score': result.get('risk_score'),
278
+ 'scan_id': result.get('id'),
279
+ 'detections': mcp_response.get('details', {}).get('detections', [])
280
+ }
281
+ else:
282
+ # No secrets detected, return original
283
+ return {
284
+ 'status': 'safe',
285
+ 'output': output_content,
286
+ 'masked': False
287
+ }
288
+
289
+ except Exception as e:
290
+ logger.error(f"Output scan error: {e}")
291
+ # Fail-open for output scanning - return original content
292
+ return {
293
+ 'status': 'allowed',
294
+ 'output': output_content,
295
+ 'masked': False,
296
+ 'error': str(e)
297
+ }
157
298
 
158
299
 
159
300
  class LangProtectGateway:
@@ -165,6 +306,12 @@ class LangProtectGateway:
165
306
  self.email = os.getenv('LANGPROTECT_EMAIL')
166
307
  self.password = os.getenv('LANGPROTECT_PASSWORD')
167
308
 
309
+ # Security configuration
310
+ self.scan_timeout = float(os.getenv('LANGPROTECT_SCAN_TIMEOUT', '5.0'))
311
+ self.fail_closed = os.getenv('LANGPROTECT_FAIL_CLOSED', 'false').lower() == 'true'
312
+ self.enable_masking = os.getenv('LANGPROTECT_ENABLE_MASKING', 'true').lower() == 'true'
313
+ self.enable_entropy_detection = os.getenv('LANGPROTECT_ENTROPY_DETECTION', 'true').lower() == 'true'
314
+
168
315
  # Try to load credentials from mcp.json env section (like Lasso)
169
316
  if mcp_json_path and (not self.email or not self.password):
170
317
  self._load_env_from_config(mcp_json_path)
@@ -173,8 +320,21 @@ class LangProtectGateway:
173
320
  self.mcp_servers: Dict[str, MCPServer] = {}
174
321
  self.tool_to_server: Dict[str, str] = {}
175
322
  self.all_tools: List[Dict] = []
323
+
324
+ # Initialize response masker
325
+ self.masker: Optional[ResponseMasker] = None
326
+ if self.enable_masking:
327
+ self.masker = get_masker(
328
+ enable_entropy=self.enable_entropy_detection,
329
+ entropy_threshold=4.5
330
+ )
331
+ logger.info("✅ Response masking ENABLED")
332
+ else:
333
+ logger.warning("⚠️ Response masking DISABLED")
334
+
176
335
  logger.debug(f"LANGPROTECT_URL: {self.langprotect_url}")
177
336
  logger.debug(f"LANGPROTECT_EMAIL: {self.email}")
337
+ logger.info(f"Security config: timeout={self.scan_timeout}s, fail_closed={self.fail_closed}, masking={self.enable_masking}")
178
338
 
179
339
  def _load_env_from_config(self, path: str):
180
340
  """Load credentials from mcp.json env section (Lasso/VS Code style)"""
@@ -204,7 +364,13 @@ class LangProtectGateway:
204
364
 
205
365
  def initialize(self) -> bool:
206
366
  if self.email and self.password:
207
- self.auth = LangProtectAuth(self.langprotect_url, self.email, self.password)
367
+ self.auth = LangProtectAuth(
368
+ self.langprotect_url,
369
+ self.email,
370
+ self.password,
371
+ scan_timeout=self.scan_timeout,
372
+ fail_closed=self.fail_closed
373
+ )
208
374
  if not self.auth.login():
209
375
  logger.error("Failed to authenticate with LangProtect backend")
210
376
  return False
@@ -214,12 +380,13 @@ class LangProtectGateway:
214
380
  return False
215
381
  if not self.start_servers():
216
382
  return False
217
- logger.info("=" * 50)
218
- logger.info("LangProtect Gateway initialized")
383
+ logger.info("=" * 60)
384
+ logger.info("🛡️ LangProtect Gateway initialized")
219
385
  logger.info(f"Backend: {self.langprotect_url}")
220
386
  logger.info(f"Servers: {len(self.mcp_servers)}")
221
387
  logger.info(f"Tools: {len(self.all_tools)}")
222
- logger.info("=" * 50)
388
+ logger.info(f"Security: fail_closed={self.fail_closed}, masking={self.enable_masking}")
389
+ logger.info("=" * 60)
223
390
  return True
224
391
 
225
392
  def load_servers(self) -> bool:
@@ -338,30 +505,156 @@ class LangProtectGateway:
338
505
  tool_name = params.get('name', '')
339
506
  arguments = params.get('arguments', {})
340
507
  server_name = self.tool_to_server.get(tool_name)
508
+
341
509
  if not server_name:
342
510
  return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32602, 'message': f'Unknown tool: {tool_name}'}}
511
+
343
512
  server = self.mcp_servers.get(server_name)
344
513
  if not server:
345
514
  return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32602, 'message': f'Server not found: {server_name}'}}
515
+
346
516
  logger.info(f"Tool call: {server_name}.{tool_name}")
517
+
518
+ # 🛡️ LAYER 1: INPUT SCAN (synchronous blocking scan)
519
+ scan_result = None
347
520
  if self.auth:
348
- scan_result = self.auth.scan(tool_name, arguments, server_name)
521
+ scan_result = self.auth.scan_input(tool_name, arguments, server_name)
349
522
  status = scan_result.get('status', '').lower()
523
+
350
524
  if status == 'blocked':
351
- reason = scan_result.get('detections', {}).get('MCPActionControl', {}).get('reason', 'Policy violation')
352
- logger.warning(f"BLOCKED: {tool_name} - {reason}")
353
- return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32000, 'message': f'LangProtect: {reason}'}}
354
- logger.info(f"ALLOWED (log_id={scan_result.get('id')})")
525
+ reason = scan_result.get('reason', 'Policy violation')
526
+ logger.warning(f"🚫 INPUT BLOCKED: {tool_name} - {reason}")
527
+ return {
528
+ 'jsonrpc': '2.0',
529
+ 'id': request_id,
530
+ 'error': {
531
+ 'code': -32000,
532
+ 'message': f'🛡️ LangProtect: {reason}'
533
+ }
534
+ }
535
+
536
+ logger.info(f"✅ INPUT ALLOWED (log_id={scan_result.get('id')})")
537
+
538
+ # Input scan passed or no auth - forward to MCP server
355
539
  try:
356
540
  response = server.call('tools/call', {'name': tool_name, 'arguments': arguments})
541
+
542
+ # 🛡️ LAYER 2: OUTPUT SCAN (scan response content for secrets)
543
+ if self.auth and self.enable_masking and 'result' in response:
544
+ # Extract text content from response
545
+ result_content = response.get('result', {})
546
+ output_text = self._extract_text_from_result(result_content)
547
+
548
+ if output_text:
549
+ logger.debug(f"� Scanning output: {len(output_text)} chars")
550
+ output_scan = self.auth.scan_output(
551
+ tool_name=tool_name,
552
+ output_content=output_text,
553
+ prompt=json.dumps(arguments),
554
+ metadata={'server_name': server_name}
555
+ )
556
+
557
+ if output_scan.get('masked'):
558
+ # Replace output with masked version
559
+ masked_text = output_scan.get('output', output_text)
560
+ logger.warning(f"🔒 OUTPUT MASKED: {tool_name} (risk={output_scan.get('risk_score')})")
561
+ response['result'] = self._replace_text_in_result(result_content, masked_text)
562
+
563
+ # Return formatted response
357
564
  if 'result' in response:
358
565
  return {'jsonrpc': '2.0', 'id': request_id, 'result': response['result']}
359
566
  elif 'error' in response:
360
567
  return {'jsonrpc': '2.0', 'id': request_id, 'error': response['error']}
361
568
  return response
569
+
362
570
  except Exception as e:
571
+ logger.error(f"Error executing {tool_name}: {e}")
363
572
  return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32603, 'message': f'Error executing tool: {e}'}}
364
573
 
574
+ def _extract_text_from_result(self, result: Any) -> str:
575
+ """Extract text content from MCP tool result for scanning."""
576
+ if isinstance(result, str):
577
+ return result
578
+ elif isinstance(result, dict):
579
+ # MCP results typically have 'content' field
580
+ if 'content' in result:
581
+ content = result['content']
582
+ if isinstance(content, str):
583
+ return content
584
+ elif isinstance(content, list):
585
+ # Extract text from content array
586
+ texts = []
587
+ for item in content:
588
+ if isinstance(item, dict) and item.get('type') == 'text':
589
+ texts.append(item.get('text', ''))
590
+ elif isinstance(item, str):
591
+ texts.append(item)
592
+ return '\n'.join(texts)
593
+ # Fallback: convert whole dict to string
594
+ return json.dumps(result)
595
+ elif isinstance(result, list):
596
+ return '\n'.join(str(item) for item in result)
597
+ return str(result)
598
+
599
+ def _replace_text_in_result(self, result: Any, masked_text: str) -> Any:
600
+ """Replace text content in MCP tool result with masked version."""
601
+ if isinstance(result, str):
602
+ return masked_text
603
+ elif isinstance(result, dict):
604
+ result_copy = result.copy()
605
+ if 'content' in result_copy:
606
+ content = result_copy['content']
607
+ if isinstance(content, str):
608
+ result_copy['content'] = masked_text
609
+ elif isinstance(content, list):
610
+ # Replace text in content array
611
+ masked_lines = masked_text.split('\n')
612
+ new_content = []
613
+ line_idx = 0
614
+ for item in content:
615
+ if isinstance(item, dict) and item.get('type') == 'text':
616
+ if line_idx < len(masked_lines):
617
+ new_item = item.copy()
618
+ new_item['text'] = masked_lines[line_idx]
619
+ new_content.append(new_item)
620
+ line_idx += 1
621
+ else:
622
+ new_content.append(item)
623
+ result_copy['content'] = new_content
624
+ return result_copy
625
+ return masked_text
626
+
627
+ def _log_mask_events(self, mask_events: List[Dict], scan_id: Optional[str], tool_name: str):
628
+ """Log masked secrets to backend for audit trail"""
629
+ if not self.auth or not mask_events:
630
+ return
631
+
632
+ try:
633
+ self.auth.ensure_token()
634
+ payload = {
635
+ 'scan_id': scan_id,
636
+ 'tool_name': tool_name,
637
+ 'mask_events': mask_events,
638
+ 'timestamp': datetime.now().isoformat(),
639
+ 'gateway_version': '1.0.0'
640
+ }
641
+
642
+ # Fire-and-forget (don't block on logging)
643
+ response = requests.post(
644
+ f"{self.langprotect_url}/v1/mask-events",
645
+ json=payload,
646
+ headers={'Authorization': f'Bearer {self.auth.jwt_token}'},
647
+ timeout=2 # Short timeout for logging
648
+ )
649
+
650
+ if response.status_code == 200:
651
+ logger.debug(f"Logged {len(mask_events)} mask events to backend")
652
+ else:
653
+ logger.warning(f"Failed to log mask events: {response.status_code}")
654
+
655
+ except Exception as e:
656
+ logger.warning(f"Failed to log mask events: {e}")
657
+
365
658
  def run(self):
366
659
  try:
367
660
  for line in sys.stdin:
@@ -0,0 +1,323 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Response Masker - Redacts secrets from MCP server responses before forwarding to AI
4
+ """
5
+
6
+ import re
7
+ import hashlib
8
+ from typing import Dict, List, Tuple, Any, Optional
9
+ import logging
10
+
11
+ logger = logging.getLogger('langprotect-gateway')
12
+
13
+
14
+ class ResponseMasker:
15
+ """Masks secrets in MCP server responses before forwarding to AI models"""
16
+
17
+ # Comprehensive secret detection patterns
18
+ # Format: (regex_pattern, secret_type, risk_score)
19
+ # IMPORTANT: Order matters - more specific patterns should come BEFORE generic ones
20
+ SECRET_PATTERNS = [
21
+ # AWS Credentials (specific patterns first)
22
+ (r'AKIA[0-9A-Z]{16}', 'AWS_ACCESS_KEY', 100),
23
+ (r'aws_secret_access_key\s*[=:]\s*[A-Za-z0-9/+=]{40}', 'AWS_SECRET_KEY', 100),
24
+ (r'aws_session_token\s*[=:]\s*[A-Za-z0-9/+=]{100,}', 'AWS_SESSION_TOKEN', 100),
25
+
26
+ # Private Keys (PEM format)
27
+ (r'-----BEGIN (?:RSA |OPENSSH |EC |DSA |ENCRYPTED )?PRIVATE KEY-----[^-]*-----END (?:RSA |OPENSSH |EC |DSA |ENCRYPTED )?PRIVATE KEY-----', 'PRIVATE_KEY', 100),
28
+ (r'-----BEGIN CERTIFICATE-----[^-]*-----END CERTIFICATE-----', 'CERTIFICATE', 80),
29
+ (r'-----BEGIN PGP PRIVATE KEY BLOCK-----[^-]*-----END PGP PRIVATE KEY BLOCK-----', 'PGP_PRIVATE_KEY', 100),
30
+
31
+ # Cloud Provider Keys (BEFORE generic patterns)
32
+ (r'(?<![A-Z_])AIza[0-9A-Za-z_\-]{35}', 'GOOGLE_API_KEY', 95), # Negative lookbehind to avoid matching in variable names
33
+ (r'sk_live_[0-9A-Za-z]{24,99}', 'STRIPE_LIVE_KEY', 100), # Extended length range
34
+ (r'sk_test_[0-9A-Za-z]{24,99}', 'STRIPE_TEST_KEY', 70),
35
+ (r'rk_live_[0-9A-Za-z]{24,}', 'STRIPE_RESTRICTED_KEY', 95),
36
+
37
+ # GitHub Tokens (BEFORE generic token pattern)
38
+ (r'gh[pousr]_[A-Za-z0-9]{36,255}', 'GITHUB_TOKEN', 95),
39
+ (r'github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}', 'GITHUB_PAT', 95),
40
+
41
+ # SSH Keys (public keys - need substantial base64)
42
+ (r'ssh-rsa\s+[A-Za-z0-9+/]{200,}[=]{0,3}(?:\s|$)', 'SSH_RSA_PUBLIC_KEY', 70),
43
+ (r'ssh-ed25519\s+[A-Za-z0-9+/]{68}(?:\s|$)', 'SSH_ED25519_PUBLIC_KEY', 70),
44
+
45
+ # Password Fields (context-aware - must have = or : and value in quotes or after space)
46
+ (r'(?<!#)(?<![A-Za-z])\bpassword\s*[=:]\s*["\']([^"\'\s]{8,})["\']', 'PASSWORD', 85),
47
+
48
+ # Generic API Keys and Tokens (AFTER specific patterns)
49
+ (r'["\']?api[_-]?key["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-]{20,})["\']?', 'API_KEY', 90),
50
+ (r'["\']?token["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-\.]{20,})["\']?', 'TOKEN', 90),
51
+ (r'["\']?secret["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-]{16,})["\']?', 'SECRET', 85),
52
+
53
+ # Database Connection Strings
54
+ (r'(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[\w\.\-]+(?::\d+)?(?:/[\w\-]+)?', 'DB_CONNECTION_STRING', 95),
55
+ (r'Server=[\w\.\-]+;Database=[\w\-]+;User Id=[\w\-]+;Password=[^;]+', 'MSSQL_CONNECTION', 95),
56
+
57
+ # JWT Tokens
58
+ (r'eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+', 'JWT', 85),
59
+
60
+ # OAuth Tokens
61
+ (r'access_token["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-\.]{20,})["\']?', 'OAUTH_ACCESS_TOKEN', 90),
62
+ (r'refresh_token["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-\.]{20,})["\']?', 'OAUTH_REFRESH_TOKEN', 90),
63
+ (r'Bearer\s+([A-Za-z0-9_\-\.]{20,})', 'BEARER_TOKEN', 90),
64
+
65
+ # Kubernetes Secrets (base64 encoded in YAML)
66
+ (r'apiVersion:\s*v1\s*\nkind:\s*Secret\s*\ndata:\s*\n(?:\s+[\w\-]+:\s+[A-Za-z0-9+/=]+\n?)+', 'K8S_SECRET', 90),
67
+
68
+ # Environment Variable Assignment (dangerous patterns)
69
+ (r'export\s+(?:AWS_|DB_|API_|SECRET_|TOKEN_)[A-Z_]+=["\']?([A-Za-z0-9_\-\+/=]{16,})["\']?', 'ENV_VAR_SECRET', 85),
70
+ ]
71
+
72
+ def __init__(self, enable_entropy_detection: bool = True, entropy_threshold: float = 4.5):
73
+ """
74
+ Initialize the response masker.
75
+
76
+ Args:
77
+ enable_entropy_detection: Enable high-entropy string detection for unknown secret formats
78
+ entropy_threshold: Minimum Shannon entropy for flagging potential secrets (default: 4.5)
79
+ """
80
+ self.enable_entropy_detection = enable_entropy_detection
81
+ self.entropy_threshold = entropy_threshold
82
+ logger.info(f"Response masker initialized (entropy_detection={enable_entropy_detection}, threshold={entropy_threshold})")
83
+
84
+ def mask(self, content: str, context: Optional[Dict] = None) -> Tuple[str, List[Dict]]:
85
+ """
86
+ Mask secrets in content and return masked content + metadata.
87
+
88
+ Args:
89
+ content: The text content to scan for secrets
90
+ context: Optional context information (file path, tool name, etc.)
91
+
92
+ Returns:
93
+ (masked_content, mask_events)
94
+ mask_events = [{'type': 'AWS_KEY', 'hash': 'abc123...', 'risk_score': 100}, ...]
95
+ """
96
+ if not content or not isinstance(content, str):
97
+ return content, []
98
+
99
+ masked_content = content
100
+ mask_events = []
101
+
102
+ # Apply pattern-based detection
103
+ for pattern, secret_type, risk_score in self.SECRET_PATTERNS:
104
+ try:
105
+ matches = list(re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE | re.DOTALL))
106
+
107
+ for match in matches:
108
+ original = match.group(0)
109
+
110
+ # Skip if too short (likely false positive)
111
+ if len(original) < 10 and secret_type not in ['JWT', 'AWS_ACCESS_KEY']:
112
+ continue
113
+
114
+ # Create hash for audit (NEVER log actual secret)
115
+ secret_hash = hashlib.sha256(original.encode()).hexdigest()[:16]
116
+
117
+ # Create redaction placeholder
118
+ placeholder = f"<REDACTED:{secret_type}:{secret_hash}>"
119
+
120
+ # Replace in content (handle multiple occurrences)
121
+ masked_content = masked_content.replace(original, placeholder)
122
+
123
+ # Record mask event
124
+ mask_events.append({
125
+ 'type': secret_type,
126
+ 'hash': secret_hash,
127
+ 'risk_score': risk_score,
128
+ 'pattern': pattern[:60], # Truncate for logging
129
+ 'location': match.span(),
130
+ 'length': len(original),
131
+ 'context': context or {}
132
+ })
133
+
134
+ logger.info(f"Masked {secret_type} (hash={secret_hash}, len={len(original)})")
135
+
136
+ except Exception as e:
137
+ logger.warning(f"Error applying pattern {secret_type}: {e}")
138
+ continue
139
+
140
+ # Optional: High-entropy string detection for unknown secret formats
141
+ if self.enable_entropy_detection and not mask_events:
142
+ entropy_matches = self._detect_high_entropy_strings(content, masked_content)
143
+ for entropy_match in entropy_matches:
144
+ masked_content = entropy_match['masked_content']
145
+ mask_events.append(entropy_match['event'])
146
+
147
+ return masked_content, mask_events
148
+
149
+ def _detect_high_entropy_strings(self, original_content: str, current_masked: str) -> List[Dict]:
150
+ """
151
+ Detect high-entropy strings that might be unknown secret formats.
152
+ Uses Shannon entropy to find random-looking strings.
153
+ """
154
+ results = []
155
+
156
+ # Find candidate strings (alphanumeric, 16+ chars)
157
+ pattern = r'\b[A-Za-z0-9_\-+=]{16,64}\b'
158
+ matches = re.finditer(pattern, original_content)
159
+
160
+ for match in matches:
161
+ candidate = match.group(0)
162
+
163
+ # Skip if already masked
164
+ if candidate not in current_masked:
165
+ continue
166
+
167
+ # Calculate Shannon entropy
168
+ entropy = self._calculate_shannon_entropy(candidate)
169
+
170
+ if entropy >= self.entropy_threshold:
171
+ secret_hash = hashlib.sha256(candidate.encode()).hexdigest()[:16]
172
+ placeholder = f"<REDACTED:HIGH_ENTROPY:{secret_hash}>"
173
+ current_masked = current_masked.replace(candidate, placeholder)
174
+
175
+ results.append({
176
+ 'masked_content': current_masked,
177
+ 'event': {
178
+ 'type': 'HIGH_ENTROPY_STRING',
179
+ 'hash': secret_hash,
180
+ 'risk_score': 70,
181
+ 'pattern': 'entropy_detection',
182
+ 'location': match.span(),
183
+ 'length': len(candidate),
184
+ 'entropy': round(entropy, 2),
185
+ 'context': {}
186
+ }
187
+ })
188
+
189
+ logger.info(f"Masked high-entropy string (entropy={entropy:.2f}, len={len(candidate)})")
190
+
191
+ return results
192
+
193
+ def _calculate_shannon_entropy(self, data: str) -> float:
194
+ """Calculate Shannon entropy of a string (bits per character)"""
195
+ if not data:
196
+ return 0.0
197
+
198
+ import math
199
+ from collections import Counter
200
+
201
+ # Count character frequencies
202
+ counter = Counter(data)
203
+ length = len(data)
204
+
205
+ # Calculate entropy
206
+ entropy = 0.0
207
+ for count in counter.values():
208
+ probability = count / length
209
+ if probability > 0:
210
+ entropy -= probability * math.log2(probability)
211
+
212
+ return entropy
213
+
214
+ def mask_mcp_response(self, mcp_response: Dict, mask_events: List[Dict]) -> Dict:
215
+ """
216
+ Apply masking to an MCP JSON-RPC response structure.
217
+
218
+ Handles multiple response formats:
219
+ - Simple text result: {"result": "text content"}
220
+ - Structured content: {"result": {"content": "..."}}
221
+ - Content array: {"result": {"contents": [{"text": "..."}, ...]}}
222
+
223
+ Args:
224
+ mcp_response: The JSON-RPC response from MCP server
225
+ mask_events: List to accumulate mask events (modified in-place)
226
+
227
+ Returns:
228
+ Modified MCP response with secrets masked
229
+ """
230
+ if not isinstance(mcp_response, dict):
231
+ return mcp_response
232
+
233
+ # Handle error responses (no masking needed)
234
+ if 'error' in mcp_response:
235
+ return mcp_response
236
+
237
+ # Process result field
238
+ if 'result' in mcp_response:
239
+ result = mcp_response['result']
240
+
241
+ # Case 1: Simple string result
242
+ if isinstance(result, str):
243
+ masked, events = self.mask(result)
244
+ mcp_response['result'] = masked
245
+ mask_events.extend(events)
246
+
247
+ # Case 2: Dictionary result
248
+ elif isinstance(result, dict):
249
+ # Check for 'content' field (common in MCP responses)
250
+ if 'content' in result:
251
+ content_value = result['content']
252
+ if isinstance(content_value, str):
253
+ masked, events = self.mask(content_value)
254
+ result['content'] = masked
255
+ mask_events.extend(events)
256
+ elif isinstance(content_value, list):
257
+ # Array of content items
258
+ for i, item in enumerate(content_value):
259
+ if isinstance(item, dict) and 'text' in item:
260
+ masked, events = self.mask(item['text'])
261
+ item['text'] = masked
262
+ mask_events.extend(events)
263
+
264
+ # Check for 'contents' field (array format)
265
+ if 'contents' in result and isinstance(result['contents'], list):
266
+ for item in result['contents']:
267
+ if isinstance(item, dict):
268
+ if 'text' in item and isinstance(item['text'], str):
269
+ masked, events = self.mask(item['text'])
270
+ item['text'] = masked
271
+ mask_events.extend(events)
272
+ # Also check nested content fields
273
+ if 'content' in item and isinstance(item['content'], str):
274
+ masked, events = self.mask(item['content'])
275
+ item['content'] = masked
276
+ mask_events.extend(events)
277
+
278
+ # Generic fallback: scan all string values in result dict
279
+ # (but skip metadata fields that shouldn't contain secrets)
280
+ skip_fields = {'type', 'mimeType', 'mime_type', 'name', 'id', 'method', 'jsonrpc'}
281
+ for key, value in result.items():
282
+ if isinstance(value, str) and key not in skip_fields and len(value) > 10:
283
+ masked, events = self.mask(value)
284
+ if events: # Only replace if secrets were found
285
+ result[key] = masked
286
+ mask_events.extend(events)
287
+
288
+ return mcp_response
289
+
290
+ def should_mask_tool(self, tool_name: str) -> bool:
291
+ """
292
+ Determine if a tool's response should be masked based on tool name.
293
+
294
+ High-risk tools that commonly return sensitive data:
295
+ - File reading tools
296
+ - Environment variable tools
297
+ - Configuration retrieval tools
298
+ """
299
+ high_risk_tools = [
300
+ 'read_file', 'read_text_file', 'read_multiple_files',
301
+ 'get_file_info', 'list_directory', 'search_files',
302
+ 'get_env', 'list_env', 'read_env',
303
+ 'read_config', 'get_config', 'show_config',
304
+ 'cat', 'grep', 'search'
305
+ ]
306
+
307
+ tool_lower = tool_name.lower()
308
+ return any(risk_tool in tool_lower for risk_tool in high_risk_tools)
309
+
310
+
311
+ # Singleton instance for reuse
312
+ _masker_instance: Optional[ResponseMasker] = None
313
+
314
+
315
+ def get_masker(enable_entropy: bool = True, entropy_threshold: float = 4.5) -> ResponseMasker:
316
+ """Get or create the singleton response masker instance"""
317
+ global _masker_instance
318
+ if _masker_instance is None:
319
+ _masker_instance = ResponseMasker(
320
+ enable_entropy_detection=enable_entropy,
321
+ entropy_threshold=entropy_threshold
322
+ )
323
+ return _masker_instance
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langprotect-mcp-gateway
3
- Version: 1.2.6
3
+ Version: 1.3.0
4
4
  Summary: Security gateway for Model Context Protocol (MCP) to protect AI tool interactions
5
5
  Author-email: LangProtect Security Team <security@langprotect.com>
6
6
  License: MIT
@@ -32,8 +32,44 @@ Dynamic: license-file
32
32
 
33
33
  [![PyPI version](https://badge.fury.io/py/langprotect-mcp-gateway.svg)](https://pypi.org/project/langprotect-mcp-gateway/)
34
34
 
35
+ ## 🆕 What's New in v1.3.0
36
+
37
+ ### Layer 2: Output Scanning 🔍
38
+ - **Automatic secret masking** in AI-generated responses
39
+ - **30+ secret types detected**: AWS, Google Cloud, Azure, Stripe, GitHub, JWTs, DB credentials, private keys
40
+ - **Non-blocking warnings** - never interrupts workflow
41
+ - **Preserves structure** - masks secrets while keeping code/content readable
42
+
43
+ ### Enhanced Security Controls 🔐
44
+ - **Fail-closed mode** - Block requests on scan failures (optional)
45
+ - **Configurable timeouts** - Control scan performance
46
+ - **High-entropy detection** - Catch unknown secret formats
47
+
48
+ ### Example
49
+
50
+ **Before** (v1.2.6):
51
+ ```bash
52
+ AI: Here's your AWS deployment script:
53
+ export AWS_ACCESS_KEY_ID="AKIAIOSFODNN7EXAMPLE"
54
+ export AWS_SECRET_ACCESS_KEY="wJalrXUtnFEMI/K7MDENG..."
55
+ ```
56
+
57
+ **After** (v1.3.0):
58
+ ```bash
59
+ AI: Here's your AWS deployment script:
60
+ export AWS_ACCESS_KEY_ID="<REDACTED:AWS_ACCESS_KEY:1a5d44a2>"
61
+ export AWS_SECRET_ACCESS_KEY="<REDACTED:AWS_SECRET_KEY:73ec276f>"
62
+ ```
63
+ ✅ **Secrets masked** | 🔒 **Code structure preserved** | 📝 **Audit trail maintained**
64
+
65
+ ---
66
+
35
67
  ## Features
36
68
 
69
+ ✅ **Two-Layer Protection**
70
+ - **Layer 1 (Input)**: Blocks dangerous requests before sending to MCP server
71
+ - **Layer 2 (Output)**: Masks secrets in AI responses
72
+
37
73
  ✅ **Automatic Threat Detection** - Scans all MCP requests for security risks
38
74
  ✅ **Access Control** - Whitelist/blacklist MCP servers and tools
39
75
  ✅ **Full Audit Trail** - Logs all AI interactions for compliance
@@ -83,6 +119,60 @@ Reload VS Code and you're done! LangProtect will now protect all your workspaces
83
119
 
84
120
  ---
85
121
 
122
+ ## ⚙️ Configuration Options (v1.3.0+)
123
+
124
+ Configure security behavior with environment variables in your wrapper script:
125
+
126
+ ```bash
127
+ # Security Controls
128
+ export LANGPROTECT_ENABLE_MASKING=true # Enable output masking (default: true)
129
+ export LANGPROTECT_FAIL_CLOSED=false # Block on scan errors (default: false = fail-open)
130
+ export LANGPROTECT_SCAN_TIMEOUT=5.0 # Scan timeout in seconds (default: 5.0)
131
+ export LANGPROTECT_ENTROPY_DETECTION=true # Detect unknown secrets via entropy (default: true)
132
+
133
+ # Backend Connection
134
+ export LANGPROTECT_URL="http://localhost:8000"
135
+ export LANGPROTECT_EMAIL="your.email@company.com"
136
+ export LANGPROTECT_PASSWORD="your-password"
137
+ ```
138
+
139
+ ### Security Modes
140
+
141
+ **Fail-Open (Default)** - Recommended for development:
142
+ ```bash
143
+ export LANGPROTECT_FAIL_CLOSED=false
144
+ ```
145
+ - If scan times out or fails → **Allow request** (log warning)
146
+ - Won't block your workflow
147
+ - Best for development environments
148
+
149
+ **Fail-Closed** - Recommended for production:
150
+ ```bash
151
+ export LANGPROTECT_FAIL_CLOSED=true
152
+ ```
153
+ - If scan times out or fails → **Block request**
154
+ - Maximum security
155
+ - Best for production/sensitive environments
156
+
157
+ ### Output Masking
158
+
159
+ Control how AI-generated secrets are handled:
160
+
161
+ ```bash
162
+ # Enable masking (default)
163
+ export LANGPROTECT_ENABLE_MASKING=true
164
+
165
+ # Disable masking (see secrets in plain text - not recommended)
166
+ export LANGPROTECT_ENABLE_MASKING=false
167
+ ```
168
+
169
+ **Masked format**: `<REDACTED:SECRET_TYPE:hash>`
170
+ - Example: `<REDACTED:AWS_ACCESS_KEY:1a5d44a2>`
171
+ - Hash allows deduplication across logs
172
+ - Preserves code structure
173
+
174
+ ---
175
+
86
176
  ## 🏗️ Manual Setup (Per-Workspace)
87
177
 
88
178
  If you prefer to enable LangProtect only for a specific project, you can use a local `.vscode/mcp.json` file.
@@ -0,0 +1,10 @@
1
+ langprotect_mcp_gateway/__init__.py,sha256=PedabfF6wZ_6KxuN60A4qz8T1gD9MszuXwhmrHlGH7I,510
2
+ langprotect_mcp_gateway/gateway.py,sha256=YIggDJ7n0ctUsyyI1s567QFbH7cq5-6CAAdI1J8gQkY,30921
3
+ langprotect_mcp_gateway/response_masker.py,sha256=ui1JusuPwuOKSfrDtt0FxLEGs_y512RcTG4gSz2-MT8,14702
4
+ langprotect_mcp_gateway/setup_helper.py,sha256=ghErneMTua9wPATMq8eatnviVAYJMi2bf2UUt8fnXE8,5639
5
+ langprotect_mcp_gateway-1.3.0.dist-info/licenses/LICENSE,sha256=aoVP65gKtirVmFPToow5L9IKN4FNjfM6Sejq_5b4cbM,1082
6
+ langprotect_mcp_gateway-1.3.0.dist-info/METADATA,sha256=f8QWABfqpzyuO_UgtCxC9LI6jAuuy0fsPluZIgNna54,11787
7
+ langprotect_mcp_gateway-1.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
8
+ langprotect_mcp_gateway-1.3.0.dist-info/entry_points.txt,sha256=HpnUUuYLQva8b6gazUX0UJO9dFHq86e9gifQfLKpyWc,140
9
+ langprotect_mcp_gateway-1.3.0.dist-info/top_level.txt,sha256=UjNlX13ma4nwJXuEyi9eMX251c5rooeEao4zajX6ZHk,24
10
+ langprotect_mcp_gateway-1.3.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- langprotect_mcp_gateway/__init__.py,sha256=PedabfF6wZ_6KxuN60A4qz8T1gD9MszuXwhmrHlGH7I,510
2
- langprotect_mcp_gateway/gateway.py,sha256=5J56nE5-o9jjnci3vLJqhem2nrxlVD89TjRg4aNHUqE,18718
3
- langprotect_mcp_gateway/setup_helper.py,sha256=ghErneMTua9wPATMq8eatnviVAYJMi2bf2UUt8fnXE8,5639
4
- langprotect_mcp_gateway-1.2.6.dist-info/licenses/LICENSE,sha256=aoVP65gKtirVmFPToow5L9IKN4FNjfM6Sejq_5b4cbM,1082
5
- langprotect_mcp_gateway-1.2.6.dist-info/METADATA,sha256=XFOp4rxnB1WSgZ1n5Ga4dfk0nJJ2t2ySngg7b9iWm5o,8985
6
- langprotect_mcp_gateway-1.2.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
- langprotect_mcp_gateway-1.2.6.dist-info/entry_points.txt,sha256=HpnUUuYLQva8b6gazUX0UJO9dFHq86e9gifQfLKpyWc,140
8
- langprotect_mcp_gateway-1.2.6.dist-info/top_level.txt,sha256=UjNlX13ma4nwJXuEyi9eMX251c5rooeEao4zajX6ZHk,24
9
- langprotect_mcp_gateway-1.2.6.dist-info/RECORD,,