@voodocs/cli 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,379 @@
1
+ """
2
+ Invariant Checker
3
+
4
+ Validates that code respects documented invariants.
5
+ """
6
+
7
+ import re
8
+ import ast
9
+ from pathlib import Path
10
+ from typing import List, Dict, Optional, Tuple
11
+ from dataclasses import dataclass
12
+ from enum import Enum
13
+
14
+
15
+ class ViolationSeverity(Enum):
16
+ """Severity levels for invariant violations."""
17
+ ERROR = "error"
18
+ WARNING = "warning"
19
+ INFO = "info"
20
+
21
+
22
+ @dataclass
23
+ class Violation:
24
+ """A potential invariant violation."""
25
+ invariant: str
26
+ file_path: str
27
+ line_number: int
28
+ line_content: str
29
+ severity: ViolationSeverity
30
+ explanation: str
31
+
32
+
33
+ @dataclass
34
+ class CheckResult:
35
+ """Result of checking an invariant."""
36
+ invariant: str
37
+ passed: bool
38
+ violations: List[Violation]
39
+ checked_files: int
40
+
41
+
42
+ class InvariantChecker:
43
+ """Checks code against documented invariants."""
44
+
45
+ # Common invariant patterns and their detection strategies
46
+ PATTERNS = {
47
+ 'password': {
48
+ 'keywords': ['password', 'passwd', 'pwd'],
49
+ 'safe_patterns': [
50
+ r'hash\(',
51
+ r'bcrypt\.',
52
+ r'hashlib\.',
53
+ r'crypto\.',
54
+ r'pbkdf2',
55
+ r'scrypt',
56
+ r'argon2'
57
+ ],
58
+ 'unsafe_patterns': [
59
+ r'password\s*=\s*["\']',
60
+ r'password\s*:\s*["\']',
61
+ r'\.password\s*=',
62
+ ],
63
+ 'message': 'Password should be hashed before storage'
64
+ },
65
+ 'api_key': {
66
+ 'keywords': ['api key', 'api_key', 'apikey', 'secret', 'token'],
67
+ 'unsafe_patterns': [
68
+ r'log\(',
69
+ r'print\(',
70
+ r'console\.log\(',
71
+ r'logger\.',
72
+ r'logging\.',
73
+ r'echo\s+',
74
+ r'puts\s+'
75
+ ],
76
+ 'message': 'API keys/secrets should not be logged'
77
+ },
78
+ 'sql': {
79
+ 'keywords': ['sql', 'database', 'query', 'queries'],
80
+ 'unsafe_patterns': [
81
+ r'execute\([f"\'].*\{',
82
+ r'execute\(.*\+',
83
+ r'query\([f"\'].*\{',
84
+ r'query\(.*\+',
85
+ r'SELECT.*\{',
86
+ r'INSERT.*\{',
87
+ r'UPDATE.*\{',
88
+ r'DELETE.*\{',
89
+ ],
90
+ 'safe_patterns': [
91
+ r'execute\(.*,\s*\(',
92
+ r'execute\(.*,\s*\[',
93
+ r'execute\(.*,\s*\{',
94
+ r'\?',
95
+ r'%s',
96
+ r'\$\d+',
97
+ ],
98
+ 'message': 'SQL queries should use parameterized statements'
99
+ },
100
+ 'validation': {
101
+ 'keywords': ['input', 'user input', 'validate', 'validation'],
102
+ 'safe_patterns': [
103
+ r'validate\(',
104
+ r'sanitize\(',
105
+ r'clean\(',
106
+ r'isinstance\(',
107
+ r'type\(',
108
+ r'assert\s+',
109
+ r'if\s+.*isinstance',
110
+ r'if\s+.*type\(',
111
+ ],
112
+ 'message': 'User input should be validated'
113
+ },
114
+ 'null_check': {
115
+ 'keywords': ['null', 'none', 'nil', 'undefined'],
116
+ 'safe_patterns': [
117
+ r'if\s+.*is\s+None',
118
+ r'if\s+.*==\s*None',
119
+ r'if\s+.*is\s+not\s+None',
120
+ r'if\s+.*!=\s*None',
121
+ r'if\s+.*===\s*null',
122
+ r'if\s+.*!==\s*null',
123
+ r'if\s+.*\?\.',
124
+ r'Optional\[',
125
+ ],
126
+ 'message': 'Null/None values should be checked'
127
+ },
128
+ 'error_handling': {
129
+ 'keywords': ['error', 'exception', 'failure'],
130
+ 'safe_patterns': [
131
+ r'try:',
132
+ r'except\s+',
133
+ r'catch\s*\(',
134
+ r'\.catch\(',
135
+ r'Result\[',
136
+ r'Option\[',
137
+ r'Either\[',
138
+ ],
139
+ 'message': 'Errors should be handled properly'
140
+ }
141
+ }
142
+
143
+ def __init__(self):
144
+ self.results: List[CheckResult] = []
145
+
146
+ def check_invariants(
147
+ self,
148
+ invariants: List[str],
149
+ source_dir: Path,
150
+ module_filter: Optional[str] = None
151
+ ) -> List[CheckResult]:
152
+ """
153
+ Check all invariants against the codebase.
154
+
155
+ Args:
156
+ invariants: List of invariant strings to check
157
+ source_dir: Directory to scan for code
158
+ module_filter: Optional module name filter
159
+
160
+ Returns:
161
+ List of CheckResult objects
162
+ """
163
+ self.results = []
164
+
165
+ for invariant in invariants:
166
+ result = self.check_invariant(invariant, source_dir, module_filter)
167
+ self.results.append(result)
168
+
169
+ return self.results
170
+
171
+ def check_invariant(
172
+ self,
173
+ invariant: str,
174
+ source_dir: Path,
175
+ module_filter: Optional[str] = None
176
+ ) -> CheckResult:
177
+ """
178
+ Check a single invariant against the codebase.
179
+
180
+ Args:
181
+ invariant: Invariant string to check
182
+ source_dir: Directory to scan for code
183
+ module_filter: Optional module name filter
184
+
185
+ Returns:
186
+ CheckResult object
187
+ """
188
+ # Detect invariant pattern
189
+ pattern_type = self._detect_pattern_type(invariant)
190
+
191
+ if not pattern_type:
192
+ # Generic check - just search for violations
193
+ return self._generic_check(invariant, source_dir, module_filter)
194
+
195
+ # Pattern-specific check
196
+ return self._pattern_check(invariant, pattern_type, source_dir, module_filter)
197
+
198
+ def _detect_pattern_type(self, invariant: str) -> Optional[str]:
199
+ """Detect which pattern type an invariant matches."""
200
+ invariant_lower = invariant.lower()
201
+
202
+ for pattern_type, pattern_info in self.PATTERNS.items():
203
+ keywords = pattern_info['keywords']
204
+ if any(keyword in invariant_lower for keyword in keywords):
205
+ return pattern_type
206
+
207
+ return None
208
+
209
+ def _pattern_check(
210
+ self,
211
+ invariant: str,
212
+ pattern_type: str,
213
+ source_dir: Path,
214
+ module_filter: Optional[str]
215
+ ) -> CheckResult:
216
+ """Check invariant using pattern-specific logic."""
217
+ pattern_info = self.PATTERNS[pattern_type]
218
+ violations = []
219
+ checked_files = 0
220
+
221
+ # Get all source files
222
+ files = self._get_source_files(source_dir, module_filter)
223
+
224
+ for file_path in files:
225
+ checked_files += 1
226
+ file_violations = self._check_file(
227
+ file_path,
228
+ invariant,
229
+ pattern_info
230
+ )
231
+ violations.extend(file_violations)
232
+
233
+ return CheckResult(
234
+ invariant=invariant,
235
+ passed=len(violations) == 0,
236
+ violations=violations,
237
+ checked_files=checked_files
238
+ )
239
+
240
+ def _generic_check(
241
+ self,
242
+ invariant: str,
243
+ source_dir: Path,
244
+ module_filter: Optional[str]
245
+ ) -> CheckResult:
246
+ """Generic check for invariants without specific patterns."""
247
+ # For now, just return passed (no violations)
248
+ # Future: Could use LLM to analyze code
249
+ return CheckResult(
250
+ invariant=invariant,
251
+ passed=True,
252
+ violations=[],
253
+ checked_files=0
254
+ )
255
+
256
+ def _check_file(
257
+ self,
258
+ file_path: Path,
259
+ invariant: str,
260
+ pattern_info: Dict
261
+ ) -> List[Violation]:
262
+ """Check a single file for violations."""
263
+ violations = []
264
+
265
+ try:
266
+ with open(file_path, 'r', encoding='utf-8') as f:
267
+ lines = f.readlines()
268
+
269
+ for line_num, line in enumerate(lines, start=1):
270
+ violation = self._check_line(
271
+ line,
272
+ line_num,
273
+ file_path,
274
+ invariant,
275
+ pattern_info
276
+ )
277
+ if violation:
278
+ violations.append(violation)
279
+
280
+ except Exception:
281
+ # Skip files that can't be read
282
+ pass
283
+
284
+ return violations
285
+
286
+ def _check_line(
287
+ self,
288
+ line: str,
289
+ line_num: int,
290
+ file_path: Path,
291
+ invariant: str,
292
+ pattern_info: Dict
293
+ ) -> Optional[Violation]:
294
+ """Check a single line for violations."""
295
+ # Skip comments
296
+ if line.strip().startswith('#') or line.strip().startswith('//'):
297
+ return None
298
+
299
+ # Check for unsafe patterns
300
+ unsafe_patterns = pattern_info.get('unsafe_patterns', [])
301
+ for pattern in unsafe_patterns:
302
+ if re.search(pattern, line, re.IGNORECASE):
303
+ # Check if safe pattern is also present
304
+ safe_patterns = pattern_info.get('safe_patterns', [])
305
+ has_safe_pattern = any(
306
+ re.search(safe_pattern, line, re.IGNORECASE)
307
+ for safe_pattern in safe_patterns
308
+ )
309
+
310
+ if not has_safe_pattern:
311
+ return Violation(
312
+ invariant=invariant,
313
+ file_path=str(file_path),
314
+ line_number=line_num,
315
+ line_content=line.strip(),
316
+ severity=ViolationSeverity.WARNING,
317
+ explanation=pattern_info['message']
318
+ )
319
+
320
+ # Check for missing safe patterns (if keyword present)
321
+ keywords = pattern_info.get('keywords', [])
322
+ has_keyword = any(keyword in line.lower() for keyword in keywords)
323
+
324
+ if has_keyword and 'safe_patterns' in pattern_info:
325
+ safe_patterns = pattern_info['safe_patterns']
326
+ has_safe_pattern = any(
327
+ re.search(safe_pattern, line, re.IGNORECASE)
328
+ for safe_pattern in safe_patterns
329
+ )
330
+
331
+ # If keyword present but no safe pattern, might be a violation
332
+ # But only if it's an assignment or usage context
333
+ if not has_safe_pattern and ('=' in line or '(' in line):
334
+ return Violation(
335
+ invariant=invariant,
336
+ file_path=str(file_path),
337
+ line_number=line_num,
338
+ line_content=line.strip(),
339
+ severity=ViolationSeverity.INFO,
340
+ explanation=pattern_info['message']
341
+ )
342
+
343
+ return None
344
+
345
+ def _get_source_files(
346
+ self,
347
+ source_dir: Path,
348
+ module_filter: Optional[str]
349
+ ) -> List[Path]:
350
+ """Get all source files to check."""
351
+ extensions = {'.py', '.ts', '.tsx', '.js', '.jsx', '.java', '.cpp',
352
+ '.cc', '.cxx', '.h', '.hpp', '.cs', '.go', '.rs'}
353
+
354
+ skip_dirs = {'node_modules', '.git', '__pycache__', 'venv', '.venv',
355
+ 'dist', 'build', 'target', '.next', '.nuxt'}
356
+
357
+ files = []
358
+
359
+ for path in source_dir.rglob('*'):
360
+ # Skip directories
361
+ if path.is_dir():
362
+ continue
363
+
364
+ # Skip if in excluded directory
365
+ if any(skip_dir in path.parts for skip_dir in skip_dirs):
366
+ continue
367
+
368
+ # Check extension
369
+ if path.suffix.lower() not in extensions:
370
+ continue
371
+
372
+ # Apply module filter if specified
373
+ if module_filter:
374
+ if module_filter not in str(path):
375
+ continue
376
+
377
+ files.append(path)
378
+
379
+ return files