mcp-code-indexer 4.0.1__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. mcp_code_indexer/__init__.py +7 -5
  2. mcp_code_indexer/ask_handler.py +2 -2
  3. mcp_code_indexer/claude_api_handler.py +10 -5
  4. mcp_code_indexer/cleanup_manager.py +20 -12
  5. mcp_code_indexer/commands/makelocal.py +85 -63
  6. mcp_code_indexer/data/stop_words_english.txt +1 -1
  7. mcp_code_indexer/database/connection_health.py +29 -20
  8. mcp_code_indexer/database/database.py +44 -31
  9. mcp_code_indexer/database/database_factory.py +19 -20
  10. mcp_code_indexer/database/exceptions.py +10 -10
  11. mcp_code_indexer/database/models.py +126 -1
  12. mcp_code_indexer/database/path_resolver.py +22 -21
  13. mcp_code_indexer/database/retry_executor.py +37 -19
  14. mcp_code_indexer/deepask_handler.py +3 -3
  15. mcp_code_indexer/error_handler.py +46 -20
  16. mcp_code_indexer/file_scanner.py +15 -12
  17. mcp_code_indexer/git_hook_handler.py +71 -76
  18. mcp_code_indexer/logging_config.py +13 -5
  19. mcp_code_indexer/main.py +85 -22
  20. mcp_code_indexer/middleware/__init__.py +1 -1
  21. mcp_code_indexer/middleware/auth.py +47 -43
  22. mcp_code_indexer/middleware/error_middleware.py +15 -15
  23. mcp_code_indexer/middleware/logging.py +44 -42
  24. mcp_code_indexer/middleware/security.py +84 -76
  25. mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
  26. mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
  27. mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  28. mcp_code_indexer/query_preprocessor.py +2 -2
  29. mcp_code_indexer/server/mcp_server.py +158 -94
  30. mcp_code_indexer/transport/__init__.py +1 -1
  31. mcp_code_indexer/transport/base.py +19 -17
  32. mcp_code_indexer/transport/http_transport.py +89 -76
  33. mcp_code_indexer/transport/stdio_transport.py +12 -8
  34. mcp_code_indexer/vector_mode/__init__.py +36 -0
  35. mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  36. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  37. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  38. mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  39. mcp_code_indexer/vector_mode/config.py +155 -0
  40. mcp_code_indexer/vector_mode/daemon.py +335 -0
  41. mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  42. mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  43. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  44. mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  45. mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
  46. mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
  47. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
  48. mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
  49. mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  50. mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  51. mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  52. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/METADATA +82 -24
  53. mcp_code_indexer-4.1.0.dist-info/RECORD +66 -0
  54. mcp_code_indexer-4.0.1.dist-info/RECORD +0 -47
  55. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/LICENSE +0 -0
  56. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/WHEEL +0 -0
  57. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,212 @@
1
+ """
2
+ Voyage AI client for embedding generation.
3
+
4
+ Provides integration with Voyage AI's embedding API for generating
5
+ high-quality code embeddings using the voyage-code-2 model.
6
+ """
7
+
8
+ import logging
9
+ from typing import List, Dict, Any, Optional, Union
10
+ import tiktoken
11
+
12
+ from .base_provider import BaseProvider, ProviderError
13
+ from ..config import VectorConfig
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class VoyageClient(BaseProvider):
18
+ """Client for Voyage AI embedding generation."""
19
+
20
+ def __init__(
21
+ self,
22
+ api_key: str,
23
+ model: str = "voyage-code-2",
24
+ base_url: str = "https://api.voyageai.com/v1",
25
+ **kwargs
26
+ ):
27
+ super().__init__(api_key, base_url, **kwargs)
28
+ self.model = model
29
+ self._embedding_dimension: Optional[int] = None
30
+
31
+ # Note: Voyage AI uses proprietary tokenizer, not tiktoken
32
+ # We'll use approximate counting and let the API handle truncation
33
+ self.tokenizer = None
34
+ logger.info("Using approximate token counting - Voyage AI handles tokenization internally")
35
+
36
+ async def health_check(self) -> bool:
37
+ """Check if Voyage AI service is healthy."""
38
+ try:
39
+ # Make a small test request
40
+ await self.generate_embeddings(["test"], input_type="query")
41
+ return True
42
+ except Exception as e:
43
+ logger.warning(f"Voyage AI health check failed: {e}")
44
+ return False
45
+
46
+ def _count_tokens(self, text: str) -> int:
47
+ """Approximate token count - Voyage AI handles exact tokenization."""
48
+ # Voyage AI uses proprietary tokenizer - this is just for batching estimates
49
+ # Rough approximation: 4 characters per token (conservative estimate)
50
+ return len(text) // 4
51
+
52
+ def _batch_texts_by_tokens(
53
+ self,
54
+ texts: List[str],
55
+ max_tokens_per_batch: int = 120000 # Leave buffer under 128k limit
56
+ ) -> List[List[str]]:
57
+ """Batch texts to stay under token limits."""
58
+ batches = []
59
+ current_batch = []
60
+ current_tokens = 0
61
+
62
+ for text in texts:
63
+ text_tokens = self._count_tokens(text)
64
+
65
+ # If single text exceeds limit, truncate it (let Voyage API handle exact truncation)
66
+ if text_tokens > max_tokens_per_batch:
67
+ # Rough character-based truncation - Voyage API will handle exact tokenization
68
+ target_chars = (max_tokens_per_batch - 100) * 4 # Conservative estimate
69
+ text = text[:target_chars]
70
+ text_tokens = self._count_tokens(text)
71
+
72
+ logger.warning(f"Pre-truncated text to ~{text_tokens} tokens (Voyage API will handle exact tokenization)")
73
+
74
+ # Check if adding this text would exceed the batch limit
75
+ if current_tokens + text_tokens > max_tokens_per_batch and current_batch:
76
+ batches.append(current_batch)
77
+ current_batch = [text]
78
+ current_tokens = text_tokens
79
+ else:
80
+ current_batch.append(text)
81
+ current_tokens += text_tokens
82
+
83
+ if current_batch:
84
+ batches.append(current_batch)
85
+
86
+ return batches
87
+
88
+ async def generate_embeddings(
89
+ self,
90
+ texts: List[str],
91
+ input_type: str = "document",
92
+ truncation: bool = True,
93
+ **kwargs
94
+ ) -> List[List[float]]:
95
+ """
96
+ Generate embeddings for a list of texts.
97
+
98
+ Args:
99
+ texts: List of texts to embed
100
+ input_type: Type of input ("document" or "query")
101
+ truncation: Whether to enable truncation
102
+ **kwargs: Additional arguments
103
+
104
+ Returns:
105
+ List of embedding vectors
106
+ """
107
+ if not texts:
108
+ return []
109
+
110
+ logger.info(f"Generating embeddings for {len(texts)} texts using {self.model}")
111
+
112
+ # Batch texts to stay under token limits
113
+ batches = self._batch_texts_by_tokens(texts)
114
+ all_embeddings = []
115
+
116
+ for i, batch in enumerate(batches):
117
+ logger.debug(f"Processing batch {i+1}/{len(batches)} with {len(batch)} texts")
118
+
119
+ request_data = {
120
+ "input": batch,
121
+ "model": self.model,
122
+ "input_type": input_type,
123
+ "truncation": truncation,
124
+ }
125
+
126
+ try:
127
+ response = await self._make_request(
128
+ method="POST",
129
+ endpoint="/embeddings",
130
+ data=request_data,
131
+ )
132
+
133
+ # Extract embeddings from response
134
+ if "data" not in response:
135
+ raise ProviderError("Invalid response format from Voyage AI")
136
+
137
+ batch_embeddings = [item["embedding"] for item in response["data"]]
138
+ all_embeddings.extend(batch_embeddings)
139
+
140
+ # Log usage information if available
141
+ if "usage" in response:
142
+ usage = response["usage"]
143
+ logger.debug(
144
+ f"Batch {i+1} usage: {usage.get('total_tokens', 0)} tokens"
145
+ )
146
+
147
+ except Exception as e:
148
+ logger.error(f"Failed to generate embeddings for batch {i+1}: {e}")
149
+ raise ProviderError(f"Embedding generation failed: {e}")
150
+
151
+ logger.info(f"Successfully generated {len(all_embeddings)} embeddings")
152
+ return all_embeddings
153
+
154
+ async def get_embedding_dimension(self) -> int:
155
+ """Get the dimension of embeddings produced by this model."""
156
+ if self._embedding_dimension is not None:
157
+ return self._embedding_dimension
158
+
159
+ # Generate a test embedding to determine dimension
160
+ try:
161
+ test_embeddings = await self.generate_embeddings(["test"], input_type="query")
162
+ if test_embeddings:
163
+ self._embedding_dimension = len(test_embeddings[0])
164
+ logger.info(f"Detected embedding dimension: {self._embedding_dimension}")
165
+ return self._embedding_dimension
166
+ except Exception as e:
167
+ logger.warning(f"Could not determine embedding dimension: {e}")
168
+
169
+ # Default dimensions for known Voyage models (as of 2024)
170
+ # Note: These may change - verify with Voyage AI documentation
171
+ model_dimensions = {
172
+ "voyage-code-2": 1536, # Code-optimized model
173
+ "voyage-2": 1024, # General purpose
174
+ "voyage-large-2": 1536, # Large general purpose
175
+ "voyage-3": 1024, # Newer general purpose (if available)
176
+ }
177
+
178
+ self._embedding_dimension = model_dimensions.get(self.model, 1536)
179
+ logger.info(f"Using default dimension for {self.model}: {self._embedding_dimension}")
180
+ return self._embedding_dimension
181
+
182
+ async def generate_query_embedding(self, query: str) -> List[float]:
183
+ """Generate a single embedding for a search query."""
184
+ embeddings = await self.generate_embeddings([query], input_type="query")
185
+ return embeddings[0] if embeddings else []
186
+
187
+ async def estimate_cost(self, texts: List[str]) -> Dict[str, Any]:
188
+ """Estimate the cost of embedding generation."""
189
+ total_tokens = sum(self._count_tokens(text) for text in texts)
190
+
191
+ # Voyage AI pricing (approximate, may change)
192
+ cost_per_1k_tokens = 0.00013 # voyage-code-2 pricing
193
+ estimated_cost = (total_tokens / 1000) * cost_per_1k_tokens
194
+
195
+ return {
196
+ "total_tokens": total_tokens,
197
+ "total_texts": len(texts),
198
+ "estimated_cost_usd": round(estimated_cost, 6),
199
+ "model": self.model,
200
+ }
201
+
202
+ def create_voyage_client(config: VectorConfig) -> VoyageClient:
203
+ """Create a Voyage client from configuration."""
204
+ if not config.voyage_api_key:
205
+ raise ValueError("VOYAGE_API_KEY is required for embedding generation")
206
+
207
+ return VoyageClient(
208
+ api_key=config.voyage_api_key,
209
+ model=config.embedding_model,
210
+ timeout=30.0,
211
+ max_retries=3,
212
+ )
@@ -0,0 +1,11 @@
1
+ """
2
+ Security module for vector mode.
3
+
4
+ Provides secret redaction capabilities to prevent sensitive information
5
+ from being sent to external APIs for embedding generation.
6
+ """
7
+
8
+ from .redactor import SecretRedactor, RedactionResult
9
+ from .patterns import SecurityPatterns
10
+
11
+ __all__ = ["SecretRedactor", "RedactionResult", "SecurityPatterns"]
@@ -0,0 +1,297 @@
1
+ """
2
+ Security patterns for detecting secrets in code.
3
+
4
+ Comprehensive collection of regex patterns to identify API keys, tokens,
5
+ passwords, connection strings, and other sensitive information.
6
+ """
7
+
8
+ import re
9
+ from typing import List, Dict, Pattern, NamedTuple
10
+ from dataclasses import dataclass
11
+
12
+ class PatternMatch(NamedTuple):
13
+ """Represents a detected secret pattern match."""
14
+ pattern_name: str
15
+ pattern_type: str
16
+ start_pos: int
17
+ end_pos: int
18
+ matched_text: str
19
+ confidence: float
20
+
21
+ @dataclass
22
+ class SecurityPattern:
23
+ """Represents a security pattern with metadata."""
24
+ name: str
25
+ pattern: Pattern[str]
26
+ pattern_type: str
27
+ description: str
28
+ confidence: float = 1.0
29
+ context_required: bool = False
30
+
31
+ class SecurityPatterns:
32
+ """Collection of security patterns for secret detection."""
33
+
34
+ def __init__(self):
35
+ self.patterns = self._build_patterns()
36
+
37
+ def _build_patterns(self) -> List[SecurityPattern]:
38
+ """Build comprehensive list of security patterns."""
39
+ patterns = []
40
+
41
+ # API Keys and Tokens
42
+ patterns.extend([
43
+ SecurityPattern(
44
+ name="aws_access_key",
45
+ pattern=re.compile(r'AKIA[0-9A-Z]{16}', re.IGNORECASE),
46
+ pattern_type="api_key",
47
+ description="AWS Access Key ID",
48
+ confidence=0.95
49
+ ),
50
+ SecurityPattern(
51
+ name="aws_secret_key",
52
+ pattern=re.compile(r'[A-Za-z0-9/+=]{40}', re.IGNORECASE),
53
+ pattern_type="api_key",
54
+ description="AWS Secret Access Key",
55
+ confidence=0.7,
56
+ context_required=True
57
+ ),
58
+ SecurityPattern(
59
+ name="github_token",
60
+ pattern=re.compile(r'gh[pousr]_[A-Za-z0-9_]{36,}', re.IGNORECASE),
61
+ pattern_type="api_key",
62
+ description="GitHub Token",
63
+ confidence=0.95
64
+ ),
65
+ SecurityPattern(
66
+ name="google_api_key",
67
+ pattern=re.compile(r'AIza[0-9A-Za-z\-_]{35}', re.IGNORECASE),
68
+ pattern_type="api_key",
69
+ description="Google API Key",
70
+ confidence=0.95
71
+ ),
72
+ SecurityPattern(
73
+ name="slack_token",
74
+ pattern=re.compile(r'xox[baprs]-([0-9a-zA-Z]{10,48})', re.IGNORECASE),
75
+ pattern_type="api_key",
76
+ description="Slack Token",
77
+ confidence=0.95
78
+ ),
79
+ SecurityPattern(
80
+ name="stripe_key",
81
+ pattern=re.compile(r'[rs]k_(test|live)_[0-9a-zA-Z]{24}', re.IGNORECASE),
82
+ pattern_type="api_key",
83
+ description="Stripe API Key",
84
+ confidence=0.95
85
+ ),
86
+ SecurityPattern(
87
+ name="openai_api_key",
88
+ pattern=re.compile(r'sk-[a-zA-Z0-9]{48}', re.IGNORECASE),
89
+ pattern_type="api_key",
90
+ description="OpenAI API Key",
91
+ confidence=0.95
92
+ ),
93
+ SecurityPattern(
94
+ name="anthropic_api_key",
95
+ pattern=re.compile(r'sk-ant-api03-[a-zA-Z0-9\-_]{95}', re.IGNORECASE),
96
+ pattern_type="api_key",
97
+ description="Anthropic API Key",
98
+ confidence=0.95
99
+ ),
100
+ SecurityPattern(
101
+ name="voyage_api_key",
102
+ pattern=re.compile(r'pa-[a-zA-Z0-9]{32}', re.IGNORECASE),
103
+ pattern_type="api_key",
104
+ description="Voyage AI API Key",
105
+ confidence=0.95
106
+ ),
107
+ ])
108
+
109
+ # JWT Tokens
110
+ patterns.append(
111
+ SecurityPattern(
112
+ name="jwt_token",
113
+ pattern=re.compile(r'eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*', re.IGNORECASE),
114
+ pattern_type="token",
115
+ description="JWT Token",
116
+ confidence=0.9
117
+ )
118
+ )
119
+
120
+ # Database Connection Strings
121
+ patterns.extend([
122
+ SecurityPattern(
123
+ name="postgres_url",
124
+ pattern=re.compile(r'postgres(?:ql)?://[^\s]+', re.IGNORECASE),
125
+ pattern_type="connection_string",
126
+ description="PostgreSQL Connection String",
127
+ confidence=0.85
128
+ ),
129
+ SecurityPattern(
130
+ name="mysql_url",
131
+ pattern=re.compile(r'mysql://[^\s]+', re.IGNORECASE),
132
+ pattern_type="connection_string",
133
+ description="MySQL Connection String",
134
+ confidence=0.85
135
+ ),
136
+ SecurityPattern(
137
+ name="mongodb_url",
138
+ pattern=re.compile(r'mongodb(?:\+srv)?://[^\s]+', re.IGNORECASE),
139
+ pattern_type="connection_string",
140
+ description="MongoDB Connection String",
141
+ confidence=0.85
142
+ ),
143
+ SecurityPattern(
144
+ name="redis_url",
145
+ pattern=re.compile(r'redis://[^\s]+', re.IGNORECASE),
146
+ pattern_type="connection_string",
147
+ description="Redis Connection String",
148
+ confidence=0.85
149
+ ),
150
+ ])
151
+
152
+ # Private Keys
153
+ patterns.extend([
154
+ SecurityPattern(
155
+ name="rsa_private_key",
156
+ pattern=re.compile(r'-----BEGIN RSA PRIVATE KEY-----[^-]+-----END RSA PRIVATE KEY-----', re.MULTILINE | re.DOTALL),
157
+ pattern_type="private_key",
158
+ description="RSA Private Key",
159
+ confidence=1.0
160
+ ),
161
+ SecurityPattern(
162
+ name="ssh_private_key",
163
+ pattern=re.compile(r'-----BEGIN OPENSSH PRIVATE KEY-----[^-]+-----END OPENSSH PRIVATE KEY-----', re.MULTILINE | re.DOTALL),
164
+ pattern_type="private_key",
165
+ description="SSH Private Key",
166
+ confidence=1.0
167
+ ),
168
+ SecurityPattern(
169
+ name="ec_private_key",
170
+ pattern=re.compile(r'-----BEGIN EC PRIVATE KEY-----[^-]+-----END EC PRIVATE KEY-----', re.MULTILINE | re.DOTALL),
171
+ pattern_type="private_key",
172
+ description="EC Private Key",
173
+ confidence=1.0
174
+ ),
175
+ ])
176
+
177
+ # Environment Variable Patterns
178
+ patterns.extend([
179
+ SecurityPattern(
180
+ name="env_password",
181
+ pattern=re.compile(r'(?i)(password|passwd|pwd)\s*[=:]\s*["\']?[^\s"\']+["\']?', re.IGNORECASE),
182
+ pattern_type="password",
183
+ description="Environment Variable Password",
184
+ confidence=0.7,
185
+ context_required=True
186
+ ),
187
+ SecurityPattern(
188
+ name="env_secret",
189
+ pattern=re.compile(r'(?i)(secret|token|key)\s*[=:]\s*["\']?[^\s"\']+["\']?', re.IGNORECASE),
190
+ pattern_type="secret",
191
+ description="Environment Variable Secret",
192
+ confidence=0.6,
193
+ context_required=True
194
+ ),
195
+ ])
196
+
197
+ # Generic Patterns (lower confidence)
198
+ patterns.extend([
199
+ SecurityPattern(
200
+ name="base64_encoded",
201
+ pattern=re.compile(r'[A-Za-z0-9+/]{32,}={0,2}', re.IGNORECASE),
202
+ pattern_type="encoded_data",
203
+ description="Base64 Encoded Data",
204
+ confidence=0.3,
205
+ context_required=True
206
+ ),
207
+ SecurityPattern(
208
+ name="hex_encoded",
209
+ pattern=re.compile(r'[a-fA-F0-9]{32,}', re.IGNORECASE),
210
+ pattern_type="encoded_data",
211
+ description="Hex Encoded Data",
212
+ confidence=0.3,
213
+ context_required=True
214
+ ),
215
+ SecurityPattern(
216
+ name="uuid",
217
+ pattern=re.compile(r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', re.IGNORECASE),
218
+ pattern_type="identifier",
219
+ description="UUID",
220
+ confidence=0.2,
221
+ context_required=True
222
+ ),
223
+ ])
224
+
225
+ # URLs with embedded credentials
226
+ patterns.append(
227
+ SecurityPattern(
228
+ name="url_with_credentials",
229
+ pattern=re.compile(r'https?://[^:/\s]+:[^@/\s]+@[^\s]+', re.IGNORECASE),
230
+ pattern_type="credential_url",
231
+ description="URL with embedded credentials",
232
+ confidence=0.9
233
+ )
234
+ )
235
+
236
+ return patterns
237
+
238
+ def get_patterns_by_type(self, pattern_type: str) -> List[SecurityPattern]:
239
+ """Get all patterns of a specific type."""
240
+ return [p for p in self.patterns if p.pattern_type == pattern_type]
241
+
242
+ def get_high_confidence_patterns(self, min_confidence: float = 0.8) -> List[SecurityPattern]:
243
+ """Get patterns with confidence above threshold."""
244
+ return [p for p in self.patterns if p.confidence >= min_confidence]
245
+
246
+ def get_context_sensitive_patterns(self) -> List[SecurityPattern]:
247
+ """Get patterns that require context for accurate detection."""
248
+ return [p for p in self.patterns if p.context_required]
249
+
250
+ def find_matches(self, text: str, min_confidence: float = 0.5) -> List[PatternMatch]:
251
+ """Find all pattern matches in text above confidence threshold."""
252
+ matches = []
253
+
254
+ for pattern in self.patterns:
255
+ if pattern.confidence < min_confidence:
256
+ continue
257
+
258
+ for match in pattern.pattern.finditer(text):
259
+ # For context-sensitive patterns, check surrounding context
260
+ if pattern.context_required:
261
+ if not self._has_suspicious_context(text, match.start(), match.end()):
262
+ continue
263
+
264
+ matches.append(PatternMatch(
265
+ pattern_name=pattern.name,
266
+ pattern_type=pattern.pattern_type,
267
+ start_pos=match.start(),
268
+ end_pos=match.end(),
269
+ matched_text=match.group(),
270
+ confidence=pattern.confidence
271
+ ))
272
+
273
+ # Sort by position for consistent output
274
+ return sorted(matches, key=lambda m: m.start_pos)
275
+
276
+ def _has_suspicious_context(self, text: str, start: int, end: int, context_size: int = 50) -> bool:
277
+ """Check if match has suspicious context indicating it's likely a secret."""
278
+ # Get surrounding context
279
+ context_start = max(0, start - context_size)
280
+ context_end = min(len(text), end + context_size)
281
+ context = text[context_start:context_end].lower()
282
+
283
+ # Keywords that suggest secret/credential usage
284
+ suspicious_keywords = [
285
+ 'password', 'passwd', 'pwd', 'secret', 'token', 'key', 'api',
286
+ 'auth', 'credential', 'login', 'access', 'private', 'confidential',
287
+ 'env', 'config', 'setting', 'var', 'export', 'process.env'
288
+ ]
289
+
290
+ return any(keyword in context for keyword in suspicious_keywords)
291
+
292
+ def get_pattern_summary(self) -> Dict[str, int]:
293
+ """Get summary of patterns by type."""
294
+ summary = {}
295
+ for pattern in self.patterns:
296
+ summary[pattern.pattern_type] = summary.get(pattern.pattern_type, 0) + 1
297
+ return summary