mcpower-proxy 0.0.65__py3-none-any.whl → 0.0.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcpower-proxy might be problematic. Click here for more details.

Files changed (39) hide show
  1. ide_tools/__init__.py +12 -0
  2. ide_tools/common/__init__.py +5 -0
  3. ide_tools/common/hooks/__init__.py +5 -0
  4. ide_tools/common/hooks/init.py +124 -0
  5. ide_tools/common/hooks/output.py +63 -0
  6. ide_tools/common/hooks/prompt_submit.py +133 -0
  7. ide_tools/common/hooks/read_file.py +167 -0
  8. ide_tools/common/hooks/shell_execution.py +255 -0
  9. ide_tools/common/hooks/shell_parser_bashlex.py +277 -0
  10. ide_tools/common/hooks/types.py +34 -0
  11. ide_tools/common/hooks/utils.py +286 -0
  12. ide_tools/cursor/__init__.py +11 -0
  13. ide_tools/cursor/constants.py +58 -0
  14. ide_tools/cursor/format.py +35 -0
  15. ide_tools/cursor/router.py +100 -0
  16. ide_tools/router.py +48 -0
  17. main.py +11 -4
  18. {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/METADATA +4 -3
  19. mcpower_proxy-0.0.74.dist-info/RECORD +60 -0
  20. {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/top_level.txt +1 -0
  21. modules/apis/security_policy.py +11 -6
  22. modules/decision_handler.py +219 -0
  23. modules/logs/audit_trail.py +16 -15
  24. modules/logs/logger.py +14 -18
  25. modules/redaction/gitleaks_rules.py +1 -1
  26. modules/redaction/pii_rules.py +0 -48
  27. modules/redaction/redactor.py +112 -107
  28. modules/ui/__init__.py +1 -1
  29. modules/ui/confirmation.py +0 -1
  30. modules/utils/cli.py +36 -6
  31. modules/utils/ids.py +55 -10
  32. modules/utils/json.py +3 -3
  33. wrapper/__version__.py +1 -1
  34. wrapper/middleware.py +135 -217
  35. wrapper/server.py +19 -11
  36. mcpower_proxy-0.0.65.dist-info/RECORD +0 -43
  37. {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/WHEEL +0 -0
  38. {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/entry_points.txt +0 -0
  39. {mcpower_proxy-0.0.65.dist-info → mcpower_proxy-0.0.74.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,9 @@
1
1
  """Security Policy API Client"""
2
2
 
3
3
  import json
4
+ import time
4
5
  import uuid
5
6
  from typing import Dict, Any, Optional, List
6
- import time
7
7
 
8
8
  import httpx
9
9
 
@@ -13,6 +13,7 @@ from modules.logs.logger import MCPLogger
13
13
  from modules.redaction import redact
14
14
  from modules.utils.config import get_api_url, get_user_id
15
15
  from modules.utils.json import safe_json_dumps, to_dict
16
+ from wrapper.__version__ import __version__
16
17
 
17
18
 
18
19
  class SecurityAPIError(Exception):
@@ -22,6 +23,7 @@ class SecurityAPIError(Exception):
22
23
 
23
24
  class RateLimitExhaustedError(SecurityAPIError):
24
25
  """Security API rate limit exhausted (429) error"""
26
+
25
27
  def __init__(self, message: str, retry_after: int = None):
26
28
  super().__init__(message)
27
29
  self.retry_after = retry_after
@@ -52,7 +54,7 @@ class SecurityPolicyClient:
52
54
  if self.client:
53
55
  await self.client.aclose()
54
56
 
55
- async def inspect_policy_request(self, policy_request: PolicyRequest,
57
+ async def inspect_policy_request(self, policy_request: PolicyRequest,
56
58
  prompt_id: str) -> InspectDecision:
57
59
  """Call inspect_policy_request API endpoint"""
58
60
  if not self.client:
@@ -155,7 +157,7 @@ class SecurityPolicyClient:
155
157
  audit_payload = {"payload": {"server": payload_dict["server"], "tools": payload_dict["tools"]}}
156
158
  else:
157
159
  audit_payload = {"payload": payload_dict}
158
-
160
+
159
161
  self.audit_logger.log_event(
160
162
  audit_event_type,
161
163
  audit_payload,
@@ -166,6 +168,7 @@ class SecurityPolicyClient:
166
168
 
167
169
  headers = {
168
170
  "Content-Type": "application/json",
171
+ "User-Agent": f"MCPower-{__version__}",
169
172
  "X-User-UID": self.user_id,
170
173
  "X-App-UID": self.app_id
171
174
  }
@@ -188,7 +191,8 @@ class SecurityPolicyClient:
188
191
  raise SecurityAPIError(f"Unsupported HTTP method: {method}. Supported methods: POST, PUT")
189
192
 
190
193
  on_make_request_duration = time.time() - on_make_request_start_time
191
- self.logger.info(f"PROFILE: {method} id: {id} make_request duration: {on_make_request_duration:.2f} seconds url: {url}")
194
+ self.logger.debug(
195
+ f"PROFILE: {method} id: {id} make_request duration: {on_make_request_duration:.2f} seconds url: {url}")
192
196
 
193
197
  match response.status_code:
194
198
  case 200:
@@ -215,7 +219,7 @@ class SecurityPolicyClient:
215
219
  else:
216
220
  # Other responses (e.g., /init) - log entire response
217
221
  audit_result = {"result": data_dict}
218
-
222
+
219
223
  self.audit_logger.log_event(
220
224
  f"{audit_event_type}_result",
221
225
  audit_result,
@@ -278,7 +282,8 @@ class SecurityPolicyClient:
278
282
  def _handle_quota_restoration(self, endpoint: str):
279
283
  """Handle quota restoration (when non-429 response received)"""
280
284
  if self.session_id in self._session_notification_times:
281
- self.logger.info(f"Quota restored - received successful response from {endpoint}. Session: {self.session_id}")
285
+ self.logger.info(
286
+ f"Quota restored - received successful response from {endpoint}. Session: {self.session_id}")
282
287
  del self._session_notification_times[self.session_id]
283
288
 
284
289
  def _send_throttled_quota_notification(self, retry_after: int, endpoint: str):
@@ -0,0 +1,219 @@
1
+ """
2
+ Decision Handler - Common decision enforcement logic
3
+
4
+ Shared module for enforcing security policy decisions across middleware and IDE tools.
5
+ Handles user confirmation dialogs and decision recording.
6
+ """
7
+ from typing import Dict, Any, Optional
8
+
9
+ from mcpower_shared.mcp_types import UserConfirmation
10
+ from modules.apis.security_policy import SecurityPolicyClient
11
+ from modules.logs.audit_trail import AuditTrailLogger
12
+ from modules.logs.logger import MCPLogger
13
+ from modules.ui.classes import ConfirmationRequest, DialogOptions, UserDecision
14
+ from modules.ui.confirmation import UserConfirmationDialog, UserConfirmationError
15
+
16
+
17
+ class DecisionEnforcementError(Exception):
18
+ """Error raised when a security decision blocks an operation"""
19
+ pass
20
+
21
+
22
+ class DecisionHandler:
23
+ """
24
+ Handles security policy decision enforcement with user confirmation support.
25
+
26
+ This class provides common functionality for:
27
+ - Enforcing allow/block/confirm decisions
28
+ - Showing user confirmation dialogs
29
+ - Recording user decisions via API
30
+ """
31
+
32
+ def __init__(self, logger: MCPLogger, audit_logger: AuditTrailLogger,
33
+ session_id: str, app_id: str):
34
+ self.logger = logger
35
+ self.audit_logger = audit_logger
36
+ self.session_id = session_id
37
+ self.app_id = app_id
38
+
39
+ async def enforce_decision(
40
+ self,
41
+ decision: Dict[str, Any],
42
+ is_request: bool,
43
+ event_id: str,
44
+ tool_name: str,
45
+ content_data: Dict[str, Any],
46
+ operation_type: str,
47
+ prompt_id: str,
48
+ server_name: str,
49
+ error_message_prefix: Optional[str] = None
50
+ ) -> None:
51
+ """
52
+ Enforce security decision with user confirmation support.
53
+
54
+ Args:
55
+ decision: Security decision dict with 'decision', 'reasons', 'severity', 'call_type'
56
+ is_request: True if inspecting request, False if inspecting response
57
+ event_id: Event ID for tracking
58
+ tool_name: Name of the tool/operation
59
+ content_data: Data to show in confirmation dialog
60
+ operation_type: Type of operation (e.g., 'tool', 'hook')
61
+ prompt_id: prompt ID for correlation
62
+ server_name: server name for display
63
+ error_message_prefix: Optional prefix for error messages
64
+
65
+ Raises:
66
+ DecisionEnforcementError: If decision blocks the operation
67
+ """
68
+ decision_type = decision.get("decision", "block")
69
+
70
+ if decision_type == "allow":
71
+ return
72
+
73
+ elif decision_type == "block":
74
+ policy_reasons = decision.get("reasons", ["Policy violation"])
75
+ severity = decision.get("severity", "unknown")
76
+ call_type = decision.get("call_type")
77
+
78
+ try:
79
+ # Show a blocking dialog and wait for user decision
80
+ confirmation_request = ConfirmationRequest(
81
+ is_request=is_request,
82
+ tool_name=tool_name,
83
+ policy_reasons=policy_reasons,
84
+ content_data=content_data,
85
+ severity=severity,
86
+ event_id=event_id,
87
+ operation_type=operation_type,
88
+ server_name=server_name,
89
+ timeout_seconds=60
90
+ )
91
+
92
+ response = UserConfirmationDialog(
93
+ self.logger, self.audit_logger
94
+ ).request_blocking_confirmation(confirmation_request, prompt_id, call_type)
95
+
96
+ # If we got here, user chose "Allow Anyway"
97
+ self.logger.info(f"User chose to 'allow anyway' a blocked {confirmation_request.operation_type} "
98
+ f"operation for tool '{tool_name}' (event: {event_id})")
99
+
100
+ await self._record_user_confirmation(event_id, is_request, response.user_decision, prompt_id, call_type)
101
+ return
102
+
103
+ except UserConfirmationError as e:
104
+ # User chose to block or dialog failed
105
+ await self._record_user_confirmation(event_id, is_request, UserDecision.BLOCK, prompt_id, call_type)
106
+ error_msg = error_message_prefix or "Security Violation"
107
+ raise DecisionEnforcementError(f"{error_msg}. User blocked the operation")
108
+
109
+ elif decision_type == "required_explicit_user_confirmation":
110
+ policy_reasons = decision.get("reasons", ["Security policy requires confirmation"])
111
+ severity = decision.get("severity", "unknown")
112
+ call_type = decision.get("call_type")
113
+
114
+ try:
115
+ confirmation_request = ConfirmationRequest(
116
+ is_request=is_request,
117
+ tool_name=tool_name,
118
+ policy_reasons=policy_reasons,
119
+ content_data=content_data,
120
+ severity=severity,
121
+ event_id=event_id,
122
+ operation_type=operation_type,
123
+ server_name=server_name,
124
+ timeout_seconds=60
125
+ )
126
+
127
+ # only show YES_ALWAYS if call_type exists
128
+ options = DialogOptions(
129
+ show_always_allow=(call_type is not None),
130
+ show_always_block=False
131
+ )
132
+
133
+ response = UserConfirmationDialog(
134
+ self.logger, self.audit_logger
135
+ ).request_confirmation(confirmation_request, prompt_id, call_type, options)
136
+
137
+ # If we got here, user approved the operation
138
+ self.logger.info(f"User {response.user_decision.value} {confirmation_request.operation_type} "
139
+ f"operation for tool '{tool_name}' (event: {event_id})")
140
+
141
+ await self._record_user_confirmation(event_id, is_request, response.user_decision, prompt_id, call_type)
142
+ return
143
+
144
+ except UserConfirmationError as e:
145
+ # User denied confirmation or dialog failed
146
+ await self._record_user_confirmation(event_id, is_request, UserDecision.BLOCK, prompt_id, call_type)
147
+ error_msg = error_message_prefix or "Security Violation"
148
+ raise DecisionEnforcementError(f"{error_msg}. User blocked the operation")
149
+
150
+ elif decision_type == "need_more_info":
151
+ stage_title = 'CLIENT REQUEST' if is_request else 'TOOL RESPONSE'
152
+
153
+ # Create an actionable error message for the AI agent
154
+ reasons = decision.get("reasons", [])
155
+ need_fields = decision.get("need_fields", [])
156
+
157
+ error_parts = [
158
+ f"SECURITY POLICY NEEDS MORE INFORMATION FOR REVIEWING {stage_title}:",
159
+ '\n'.join(reasons),
160
+ '' # newline
161
+ ]
162
+
163
+ if need_fields:
164
+ # Convert server field names to wrapper field names for the AI agent
165
+ wrapper_field_mapping = {
166
+ "context.agent.intent": "__wrapper_modelIntent",
167
+ "context.agent.plan": "__wrapper_modelPlan",
168
+ "context.agent.expectedOutputs": "__wrapper_modelExpectedOutputs",
169
+ "context.agent.user_prompt": "__wrapper_userPrompt",
170
+ "context.agent.user_prompt_id": "__wrapper_userPromptId",
171
+ "context.agent.context_summary": "__wrapper_contextSummary",
172
+ "context.workspace.current_files": "__wrapper_currentFiles",
173
+ }
174
+
175
+ missing_wrapper_fields = []
176
+ for field in need_fields:
177
+ wrapper_field = wrapper_field_mapping.get(field, field)
178
+ missing_wrapper_fields.append(wrapper_field)
179
+
180
+ if missing_wrapper_fields:
181
+ error_parts.append("AFFECTED FIELDS:")
182
+ error_parts.extend(missing_wrapper_fields)
183
+ else:
184
+ error_parts.append("MISSING INFORMATION:")
185
+ error_parts.extend(need_fields)
186
+
187
+ error_parts.append("\nMANDATORY ACTIONS:")
188
+ error_parts.append("1. Add/Edit ALL affected fields according to the required information")
189
+ error_parts.append("2. Retry the tool call")
190
+
191
+ actionable_message = "\n".join(error_parts)
192
+ raise DecisionEnforcementError(actionable_message)
193
+
194
+ async def _record_user_confirmation(
195
+ self,
196
+ event_id: str,
197
+ is_request: bool,
198
+ user_decision: UserDecision,
199
+ prompt_id: str,
200
+ call_type: Optional[str] = None
201
+ ):
202
+ """Record user confirmation decision with the security API"""
203
+ try:
204
+ direction = "request" if is_request else "response"
205
+
206
+ user_confirmation = UserConfirmation(
207
+ event_id=event_id,
208
+ direction=direction,
209
+ user_decision=user_decision,
210
+ call_type=call_type
211
+ )
212
+
213
+ async with SecurityPolicyClient(session_id=self.session_id, logger=self.logger,
214
+ audit_logger=self.audit_logger, app_id=self.app_id) as client:
215
+ result = await client.record_user_confirmation(user_confirmation, prompt_id=prompt_id)
216
+ self.logger.debug(f"User confirmation recorded: {result}")
217
+ except Exception as e:
218
+ # Don't fail the operation if API call fails - just log the error
219
+ self.logger.error(f"Failed to record user confirmation: {e}")
@@ -50,14 +50,14 @@ class AuditTrailLogger:
50
50
  Path(self.audit_file).parent.mkdir(parents=True, exist_ok=True)
51
51
 
52
52
  def log_event(
53
- self,
54
- event_type: str,
55
- data: Dict[str, Any],
56
- event_id: Optional[str] = None,
57
- prompt_id: Optional[str] = None,
58
- user_prompt: Optional[str] = None,
59
- ignored_keys: Optional[List[str]] = None,
60
- include_keys: Optional[List[str]] = None
53
+ self,
54
+ event_type: str,
55
+ data: Dict[str, Any],
56
+ event_id: Optional[str] = None,
57
+ prompt_id: Optional[str] = None,
58
+ user_prompt: Optional[str] = None,
59
+ ignored_keys: Optional[List[str]] = None,
60
+ include_keys: Optional[List[str]] = None
61
61
  ):
62
62
  """
63
63
  Log a single audit event
@@ -74,19 +74,20 @@ class AuditTrailLogger:
74
74
  try:
75
75
  # Convert data to dict structure (handles nested objects, dataclasses, Pydantic models)
76
76
  data_dict = to_dict(data)
77
-
77
+
78
78
  # Build event structure
79
79
  event = {
80
80
  "session_id": self.session_id,
81
81
  "timestamp": datetime.now(timezone.utc).isoformat(),
82
82
  "event_type": event_type,
83
- "data": redact(data_dict, ignored_keys=ignored_keys, include_keys=include_keys) # Redaction with optional key filtering
83
+ "data": redact(data_dict, ignored_keys=ignored_keys, include_keys=include_keys)
84
+ # Redaction with optional key filtering
84
85
  }
85
86
 
86
87
  # Include prompt_id if provided (for grouping by user prompt)
87
88
  if prompt_id:
88
89
  event["prompt_id"] = prompt_id
89
-
90
+
90
91
  # Include user_prompt text if provided (only needed once per prompt_id)
91
92
  if user_prompt:
92
93
  event["user_prompt"] = user_prompt
@@ -118,7 +119,7 @@ class AuditTrailLogger:
118
119
  "app_uid": self.app_uid,
119
120
  **{k: v for k, v in event.items() if k != "app_uid"}
120
121
  }
121
-
122
+
122
123
  # Atomic append to audit trail file
123
124
  with open(self.audit_file, 'a', encoding='utf-8') as f:
124
125
  f.write(safe_json_dumps(event_with_app_uid) + '\n')
@@ -137,14 +138,14 @@ class AuditTrailLogger:
137
138
  """
138
139
  if self.app_uid == app_uid:
139
140
  return
140
-
141
+
141
142
  if self.app_uid is not None:
142
143
  self.logger.info(f"app_uid changed from {self.app_uid} to {app_uid}")
143
144
  else:
144
145
  self.logger.debug(f"app_uid set to: {app_uid}")
145
-
146
+
146
147
  self.app_uid = app_uid
147
-
148
+
148
149
  # Flush all pending logs
149
150
  if self._pending_logs:
150
151
  self.logger.debug(f"Flushing {len(self._pending_logs)} queued audit logs")
modules/logs/logger.py CHANGED
@@ -13,7 +13,7 @@ from modules.utils.ids import get_session_id
13
13
 
14
14
  class UTF8StreamHandler(logging.StreamHandler):
15
15
  """StreamHandler that forces UTF-8 encoding on Windows to prevent UnicodeEncodeError"""
16
-
16
+
17
17
  def __init__(self, stream=None):
18
18
  # On Windows, wrap the stream with UTF-8 encoding BEFORE passing to parent
19
19
  if sys.platform == 'win32' and stream is not None:
@@ -25,13 +25,13 @@ class UTF8StreamHandler(logging.StreamHandler):
25
25
  errors='replace',
26
26
  line_buffering=True
27
27
  )
28
-
28
+
29
29
  super().__init__(stream)
30
30
 
31
31
 
32
32
  class SessionFormatter(logging.Formatter):
33
33
  """Custom formatter that includes session ID in log messages"""
34
-
34
+
35
35
  # Single character mapping for perfect alignment and compactness
36
36
  LEVEL_MAPPING = {
37
37
  'DEBUG': 'D',
@@ -40,11 +40,11 @@ class SessionFormatter(logging.Formatter):
40
40
  'ERROR': 'E',
41
41
  'CRITICAL': 'C'
42
42
  }
43
-
43
+
44
44
  def __init__(self, *args, **kwargs):
45
45
  super().__init__(*args, **kwargs)
46
46
  self._session_id = get_session_id()[:8]
47
-
47
+
48
48
  def format(self, record):
49
49
  # Use the cached session ID
50
50
  record.session_id = self._session_id
@@ -58,52 +58,51 @@ class SessionFormatter(logging.Formatter):
58
58
 
59
59
  class MCPLogger:
60
60
  """Simple line-based logger for MCP traffic"""
61
-
61
+
62
62
  def __init__(self, log_file: Optional[str] = None, level: int = logging.INFO):
63
63
  self.log_file = log_file
64
64
  self.file_handle: Optional[TextIO] = None
65
-
65
+
66
66
  # Setup file handle if log file specified (for MCP traffic logging)
67
67
  if log_file:
68
68
  log_path = Path(log_file)
69
69
  log_path.parent.mkdir(parents=True, exist_ok=True)
70
70
  self.file_handle = open(log_path, 'a', encoding='utf-8')
71
-
71
+
72
72
  # Setup standard logger for non-MCP messages
73
73
  self.logger = logging.getLogger('mcpower')
74
74
  self.logger.setLevel(level)
75
-
75
+
76
76
  # Create console handler with UTF-8 support
77
77
  console_handler = UTF8StreamHandler(sys.stderr)
78
78
  console_handler.setLevel(level)
79
79
  formatter = SessionFormatter('%(asctime)s [%(session_id)s] (%(levelname)s) %(message)s')
80
80
  console_handler.setFormatter(formatter)
81
81
  self.logger.addHandler(console_handler)
82
-
82
+
83
83
  # Add file handler if log file specified
84
84
  if log_file:
85
85
  file_handler = logging.FileHandler(log_file, encoding='utf-8')
86
86
  file_handler.setLevel(level)
87
87
  file_handler.setFormatter(formatter)
88
88
  self.logger.addHandler(file_handler)
89
-
90
89
 
91
90
  def info(self, message: str) -> None:
92
91
  """Log info message"""
93
92
  self.logger.info(message)
94
-
93
+
95
94
  def error(self, message: str, exc_info: bool = False) -> None:
96
95
  """Log error message"""
97
96
  self.logger.error(message, exc_info=exc_info)
98
-
97
+
99
98
  def warning(self, message: str) -> None:
100
99
  """Log warning message"""
101
100
  self.logger.warning(message)
102
-
101
+
103
102
  def debug(self, message: str) -> None:
104
103
  """Log debug message"""
105
104
  self.logger.debug(message)
106
-
105
+
107
106
  def close(self) -> None:
108
107
  """Close log file handle"""
109
108
  if self.file_handle:
@@ -123,6 +122,3 @@ def setup_logger(log_file: Optional[str] = None, level: int = logging.INFO) -> M
123
122
  Configured MCPLogger instance
124
123
  """
125
124
  return MCPLogger(log_file, level)
126
-
127
-
128
-
@@ -3,7 +3,7 @@ from typing import Dict, List, Tuple
3
3
 
4
4
  # This file is auto-generated from Gitleaks rules. Do not edit manually.
5
5
  # Source: https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml
6
- # Generation script: targets/vsc-extension/scripts/update-gitleaks-rules.mjs
6
+ # Generation script: targets/scripts/update-gitleaks-rules.mjs
7
7
 
8
8
  COMPILED_RULES: List[Tuple[str, re.Pattern, int, List[str]]] = [
9
9
  (
@@ -60,15 +60,8 @@ class PIIDetector:
60
60
  """Lightweight PII detector using only regex patterns."""
61
61
 
62
62
  def __init__(self):
63
- # URL detector with intelligent boundary detection
64
- self.url_detector = URLDetector()
65
-
66
63
  # Compile regex patterns for better performance
67
64
  self.patterns = {
68
- 'EMAIL_ADDRESS': re.compile(
69
- r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
70
- re.IGNORECASE
71
- ),
72
65
  'CREDIT_CARD': re.compile(
73
66
  r'\b(?:'
74
67
  r'4[0-9]{3}[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}(?:[0-9]{3})?|' # Visa with formatting
@@ -81,40 +74,6 @@ class PIIDetector:
81
74
  r'6(?:011|5[0-9]{2})[0-9]{12}' # Discover
82
75
  r')\b'
83
76
  ),
84
- 'IP_ADDRESS': re.compile(
85
- r'(?:'
86
- # IPv4
87
- r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
88
- r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
89
- r'|'
90
- # IPv6 - comprehensive pattern
91
- r'(?:'
92
- # Full IPv6 or with :: compression
93
- r'(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|' # Full: 1:2:3:4:5:6:7:8
94
- r'(?:[0-9a-fA-F]{1,4}:){1,7}:|' # Compressed trailing: 1:: or 1:2:3:4:5:6:7::
95
- r'(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|' # Compressed middle: 1::8 or 1:2:3:4:5:6::8
96
- r'(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|' # 1::7:8 or 1:2:3:4:5::7:8
97
- r'(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|' # 1::6:7:8 or 1:2:3:4::6:7:8
98
- r'(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|' # 1::5:6:7:8 or 1:2:3::5:6:7:8
99
- r'(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|' # 1::4:5:6:7:8 or 1:2::4:5:6:7:8
100
- r'[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|' # 1::3:4:5:6:7:8
101
- r':(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|' # ::2:3:4:5:6:7:8 or ::
102
- # IPv4-mapped IPv6: ::ffff:192.0.2.1
103
- r'(?:[0-9a-fA-F]{1,4}:){1,4}:'
104
- r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
105
- r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
106
- r')'
107
- r')',
108
- re.IGNORECASE
109
- ),
110
- # Common crypto addresses
111
- 'CRYPTO_ADDRESS': re.compile(
112
- r'\b(?:'
113
- r'[13][a-km-zA-HJ-NP-Z1-9]{25,34}|' # Bitcoin
114
- r'0x[a-fA-F0-9]{40}|' # Ethereum
115
- r'[LM3][a-km-zA-HJ-NP-Z1-9]{26,33}' # Litecoin
116
- r')\b'
117
- ),
118
77
  # IBAN (International Bank Account Number)
119
78
  'IBAN': re.compile(
120
79
  r'\b[A-Z]{2}[0-9]{2}[A-Z0-9]{4}[0-9]{7}([A-Z0-9]?){0,16}\b'
@@ -176,9 +135,6 @@ class PIIDetector:
176
135
  """
177
136
  matches = []
178
137
 
179
- # Extract URLs using URLDetector
180
- matches.extend(self.url_detector.extract(text))
181
-
182
138
  # Extract other PII using regex patterns
183
139
  for entity_type, pattern in self.patterns.items():
184
140
  for match in pattern.finditer(text):
@@ -212,11 +168,7 @@ class PIIDetector:
212
168
  """Calculate confidence score based on entity type and matched text."""
213
169
  # Base confidence scores
214
170
  base_scores = {
215
- 'EMAIL_ADDRESS': 0.95,
216
171
  'CREDIT_CARD': 0.85, # Will be 0.99 after Luhn validation
217
- 'IP_ADDRESS': 0.90,
218
- 'URL': 0.80,
219
- 'CRYPTO_ADDRESS': 0.95,
220
172
  'IBAN': 0.85, # Will be 0.99 after MOD-97 validation
221
173
  }
222
174