@voodocs/cli 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +454 -0
- package/cli.py +32 -3
- package/lib/darkarts/annotations/DARKARTS_SYMBOLS.md +529 -0
- package/lib/darkarts/annotations/TRANSFORMATION_EXAMPLES.md +478 -0
- package/lib/darkarts/annotations/__init__.py +42 -0
- package/lib/darkarts/annotations/darkarts_parser.py +238 -0
- package/lib/darkarts/annotations/parser.py +186 -5
- package/lib/darkarts/annotations/symbols.py +244 -0
- package/lib/darkarts/annotations/translator.py +386 -0
- package/lib/darkarts/context/ai_instructions.py +8 -1
- package/lib/darkarts/context/ai_integrations.py +22 -1
- package/lib/darkarts/context/checker.py +291 -40
- package/lib/darkarts/context/commands.py +375 -267
- package/lib/darkarts/context/diagram.py +22 -1
- package/lib/darkarts/context/errors.py +164 -0
- package/lib/darkarts/context/models.py +23 -1
- package/lib/darkarts/context/module_utils.py +198 -0
- package/lib/darkarts/context/ui.py +337 -0
- package/lib/darkarts/context/validation.py +311 -0
- package/lib/darkarts/context/yaml_utils.py +130 -16
- package/lib/darkarts/exceptions.py +5 -0
- package/lib/darkarts/plugins/voodocs/instruction_generator.py +8 -1
- package/package.json +1 -1
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""@darkarts
|
|
2
|
+
⊢checker:invariants.validator
|
|
3
|
+
∂{re,ast,pathlib,dataclasses}
|
|
4
|
+
⚠{src:utf8,invariants:natural-lang,fs:readable,code:syntactically-valid}
|
|
5
|
+
⊨{∀check→¬modify-src,∀read→handle-encoding,pattern:case-insensitive,∀check→return-violations,severity∈{error,warning,info}}
|
|
6
|
+
🔒{read-only,¬exec}
|
|
7
|
+
⚡{O((n'*m*l'/c)/p)|n'=filtered-files,l'=filtered-lines,c=precompile,p=cores,speedup=40-80x}
|
|
8
|
+
|
|
2
9
|
Invariant Checker
|
|
3
10
|
|
|
4
11
|
Validates that code respects documented invariants.
|
|
@@ -10,6 +17,13 @@ from pathlib import Path
|
|
|
10
17
|
from typing import List, Dict, Optional, Tuple
|
|
11
18
|
from dataclasses import dataclass
|
|
12
19
|
from enum import Enum
|
|
20
|
+
from multiprocessing import Pool, cpu_count
|
|
21
|
+
from functools import partial
|
|
22
|
+
try:
|
|
23
|
+
from tqdm import tqdm
|
|
24
|
+
HAS_TQDM = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
HAS_TQDM = False
|
|
13
27
|
|
|
14
28
|
|
|
15
29
|
class ViolationSeverity(Enum):
|
|
@@ -142,6 +156,46 @@ class InvariantChecker:
|
|
|
142
156
|
|
|
143
157
|
def __init__(self):
|
|
144
158
|
self.results: List[CheckResult] = []
|
|
159
|
+
# Phase 1 Optimization: Pre-compile all regex patterns
|
|
160
|
+
self._compiled_patterns = self._precompile_patterns()
|
|
161
|
+
|
|
162
|
+
def _precompile_patterns(self) -> Dict[str, Dict[str, List[re.Pattern]]]:
|
|
163
|
+
"""
|
|
164
|
+
Phase 1 Optimization: Pre-compile all regex patterns.
|
|
165
|
+
|
|
166
|
+
This avoids re-compiling the same patterns for every line check,
|
|
167
|
+
providing a 2-3x speedup.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Dict mapping pattern_type to compiled safe/unsafe patterns
|
|
171
|
+
"""
|
|
172
|
+
compiled = {}
|
|
173
|
+
|
|
174
|
+
for pattern_type, pattern_info in self.PATTERNS.items():
|
|
175
|
+
compiled[pattern_type] = {
|
|
176
|
+
'safe': [],
|
|
177
|
+
'unsafe': []
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# Compile safe patterns
|
|
181
|
+
for pattern in pattern_info.get('safe_patterns', []):
|
|
182
|
+
try:
|
|
183
|
+
compiled[pattern_type]['safe'].append(
|
|
184
|
+
re.compile(pattern, re.IGNORECASE)
|
|
185
|
+
)
|
|
186
|
+
except re.error:
|
|
187
|
+
pass # Skip invalid patterns
|
|
188
|
+
|
|
189
|
+
# Compile unsafe patterns
|
|
190
|
+
for pattern in pattern_info.get('unsafe_patterns', []):
|
|
191
|
+
try:
|
|
192
|
+
compiled[pattern_type]['unsafe'].append(
|
|
193
|
+
re.compile(pattern, re.IGNORECASE)
|
|
194
|
+
)
|
|
195
|
+
except re.error:
|
|
196
|
+
pass # Skip invalid patterns
|
|
197
|
+
|
|
198
|
+
return compiled
|
|
145
199
|
|
|
146
200
|
def check_invariants(
|
|
147
201
|
self,
|
|
@@ -196,10 +250,19 @@ class InvariantChecker:
|
|
|
196
250
|
return self._pattern_check(invariant, pattern_type, source_dir, module_filter)
|
|
197
251
|
|
|
198
252
|
def _detect_pattern_type(self, invariant: str) -> Optional[str]:
|
|
199
|
-
"""Detect which pattern type an invariant matches."""
|
|
253
|
+
"""Detect which pattern type an invariant matches (instance method wrapper)."""
|
|
254
|
+
return self._detect_pattern_type_static(invariant)
|
|
255
|
+
|
|
256
|
+
@staticmethod
|
|
257
|
+
def _detect_pattern_type_static(invariant: str) -> Optional[str]:
|
|
258
|
+
"""
|
|
259
|
+
Detect which pattern type an invariant matches (static for parallel processing).
|
|
260
|
+
|
|
261
|
+
Phase 2 Optimization: Static method can be pickled for multiprocessing.
|
|
262
|
+
"""
|
|
200
263
|
invariant_lower = invariant.lower()
|
|
201
264
|
|
|
202
|
-
for pattern_type, pattern_info in
|
|
265
|
+
for pattern_type, pattern_info in InvariantChecker.PATTERNS.items():
|
|
203
266
|
keywords = pattern_info['keywords']
|
|
204
267
|
if any(keyword in invariant_lower for keyword in keywords):
|
|
205
268
|
return pattern_type
|
|
@@ -213,29 +276,109 @@ class InvariantChecker:
|
|
|
213
276
|
source_dir: Path,
|
|
214
277
|
module_filter: Optional[str]
|
|
215
278
|
) -> CheckResult:
|
|
216
|
-
"""
|
|
279
|
+
"""
|
|
280
|
+
Check invariant using pattern-specific logic.
|
|
281
|
+
|
|
282
|
+
Phase 2 Optimization: Use parallel processing for file checking.
|
|
283
|
+
"""
|
|
217
284
|
pattern_info = self.PATTERNS[pattern_type]
|
|
218
|
-
violations = []
|
|
219
|
-
checked_files = 0
|
|
220
285
|
|
|
221
|
-
# Get all source files
|
|
222
|
-
files = self._get_source_files(source_dir, module_filter)
|
|
286
|
+
# Get all source files (Phase 1: with pattern-specific filtering)
|
|
287
|
+
files = self._get_source_files(source_dir, module_filter, pattern_type)
|
|
288
|
+
|
|
289
|
+
# Phase 2 Optimization: Parallel file processing
|
|
290
|
+
if len(files) > 10: # Only parallelize if worth the overhead
|
|
291
|
+
violations = self._check_files_parallel(files, invariant, pattern_info)
|
|
292
|
+
else:
|
|
293
|
+
violations = self._check_files_sequential(files, invariant, pattern_info)
|
|
294
|
+
|
|
295
|
+
return CheckResult(
|
|
296
|
+
invariant=invariant,
|
|
297
|
+
passed=len(violations) == 0,
|
|
298
|
+
violations=violations,
|
|
299
|
+
checked_files=len(files)
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
def _check_files_sequential(
|
|
303
|
+
self,
|
|
304
|
+
files: List[Path],
|
|
305
|
+
invariant: str,
|
|
306
|
+
pattern_info: Dict
|
|
307
|
+
) -> List[Violation]:
|
|
308
|
+
"""
|
|
309
|
+
Check files sequentially (for small file sets).
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
files: List of file paths to check
|
|
313
|
+
invariant: Invariant text
|
|
314
|
+
pattern_info: Pattern information dict
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
List of all violations found
|
|
318
|
+
"""
|
|
319
|
+
violations = []
|
|
223
320
|
|
|
224
321
|
for file_path in files:
|
|
225
|
-
checked_files += 1
|
|
226
322
|
file_violations = self._check_file(
|
|
227
|
-
file_path,
|
|
228
|
-
invariant,
|
|
323
|
+
file_path,
|
|
324
|
+
invariant,
|
|
229
325
|
pattern_info
|
|
230
326
|
)
|
|
231
327
|
violations.extend(file_violations)
|
|
232
328
|
|
|
233
|
-
return
|
|
329
|
+
return violations
|
|
330
|
+
|
|
331
|
+
def _check_files_parallel(
|
|
332
|
+
self,
|
|
333
|
+
files: List[Path],
|
|
334
|
+
invariant: str,
|
|
335
|
+
pattern_info: Dict
|
|
336
|
+
) -> List[Violation]:
|
|
337
|
+
"""
|
|
338
|
+
Check files in parallel using multiprocessing.
|
|
339
|
+
|
|
340
|
+
Phase 2 Optimization: Distribute file checking across CPU cores.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
files: List of file paths to check
|
|
344
|
+
invariant: Invariant text
|
|
345
|
+
pattern_info: Pattern information dict
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
List of all violations found
|
|
349
|
+
"""
|
|
350
|
+
# Create partial function with fixed arguments
|
|
351
|
+
check_func = partial(
|
|
352
|
+
self._check_file_static,
|
|
234
353
|
invariant=invariant,
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
checked_files=checked_files
|
|
354
|
+
pattern_info=pattern_info,
|
|
355
|
+
compiled_patterns=self._compiled_patterns
|
|
238
356
|
)
|
|
357
|
+
|
|
358
|
+
# Use all available CPU cores
|
|
359
|
+
num_cores = cpu_count()
|
|
360
|
+
|
|
361
|
+
with Pool(num_cores) as pool:
|
|
362
|
+
# Phase 2 Enhancement: Add progress bar if tqdm is available
|
|
363
|
+
if HAS_TQDM:
|
|
364
|
+
# Use imap for progress tracking
|
|
365
|
+
results = list(tqdm(
|
|
366
|
+
pool.imap(check_func, files),
|
|
367
|
+
total=len(files),
|
|
368
|
+
desc="Checking files",
|
|
369
|
+
unit="file",
|
|
370
|
+
leave=False
|
|
371
|
+
))
|
|
372
|
+
else:
|
|
373
|
+
# Fallback to regular map without progress
|
|
374
|
+
results = pool.map(check_func, files)
|
|
375
|
+
|
|
376
|
+
# Flatten list of lists
|
|
377
|
+
violations = []
|
|
378
|
+
for file_violations in results:
|
|
379
|
+
violations.extend(file_violations)
|
|
380
|
+
|
|
381
|
+
return violations
|
|
239
382
|
|
|
240
383
|
def _generic_check(
|
|
241
384
|
self,
|
|
@@ -259,7 +402,35 @@ class InvariantChecker:
|
|
|
259
402
|
invariant: str,
|
|
260
403
|
pattern_info: Dict
|
|
261
404
|
) -> List[Violation]:
|
|
262
|
-
"""Check a single file for violations."""
|
|
405
|
+
"""Check a single file for violations (instance method wrapper)."""
|
|
406
|
+
return self._check_file_static(
|
|
407
|
+
file_path,
|
|
408
|
+
invariant,
|
|
409
|
+
pattern_info,
|
|
410
|
+
self._compiled_patterns
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
def _check_file_static(
|
|
415
|
+
file_path: Path,
|
|
416
|
+
invariant: str,
|
|
417
|
+
pattern_info: Dict,
|
|
418
|
+
compiled_patterns: Dict
|
|
419
|
+
) -> List[Violation]:
|
|
420
|
+
"""
|
|
421
|
+
Check a single file for violations (static for parallel processing).
|
|
422
|
+
|
|
423
|
+
Phase 2 Optimization: Static method can be pickled for multiprocessing.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
file_path: Path to file to check
|
|
427
|
+
invariant: Invariant text
|
|
428
|
+
pattern_info: Pattern information dict
|
|
429
|
+
compiled_patterns: Pre-compiled regex patterns
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
List of violations found in this file
|
|
433
|
+
"""
|
|
263
434
|
violations = []
|
|
264
435
|
|
|
265
436
|
try:
|
|
@@ -267,12 +438,13 @@ class InvariantChecker:
|
|
|
267
438
|
lines = f.readlines()
|
|
268
439
|
|
|
269
440
|
for line_num, line in enumerate(lines, start=1):
|
|
270
|
-
violation =
|
|
271
|
-
line,
|
|
272
|
-
line_num,
|
|
273
|
-
file_path,
|
|
441
|
+
violation = InvariantChecker._check_line_static(
|
|
442
|
+
line,
|
|
443
|
+
line_num,
|
|
444
|
+
file_path,
|
|
274
445
|
invariant,
|
|
275
|
-
pattern_info
|
|
446
|
+
pattern_info,
|
|
447
|
+
compiled_patterns
|
|
276
448
|
)
|
|
277
449
|
if violation:
|
|
278
450
|
violations.append(violation)
|
|
@@ -284,27 +456,71 @@ class InvariantChecker:
|
|
|
284
456
|
return violations
|
|
285
457
|
|
|
286
458
|
def _check_line(
|
|
287
|
-
self,
|
|
288
|
-
line: str,
|
|
459
|
+
self,
|
|
460
|
+
line: str,
|
|
289
461
|
line_num: int,
|
|
290
462
|
file_path: Path,
|
|
291
463
|
invariant: str,
|
|
292
464
|
pattern_info: Dict
|
|
293
465
|
) -> Optional[Violation]:
|
|
294
|
-
"""Check a single line for violations."""
|
|
295
|
-
|
|
296
|
-
|
|
466
|
+
"""Check a single line for violations (instance method wrapper)."""
|
|
467
|
+
return self._check_line_static(
|
|
468
|
+
line,
|
|
469
|
+
line_num,
|
|
470
|
+
file_path,
|
|
471
|
+
invariant,
|
|
472
|
+
pattern_info,
|
|
473
|
+
self._compiled_patterns
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
@staticmethod
|
|
477
|
+
def _check_line_static(
|
|
478
|
+
line: str,
|
|
479
|
+
line_num: int,
|
|
480
|
+
file_path: Path,
|
|
481
|
+
invariant: str,
|
|
482
|
+
pattern_info: Dict,
|
|
483
|
+
compiled_patterns: Dict
|
|
484
|
+
) -> Optional[Violation]:
|
|
485
|
+
"""
|
|
486
|
+
Check a single line for violations (static for parallel processing).
|
|
487
|
+
|
|
488
|
+
Phase 1 Optimization: Use pre-compiled patterns and skip irrelevant lines.
|
|
489
|
+
Phase 2 Optimization: Static method can be pickled for multiprocessing.
|
|
490
|
+
"""
|
|
491
|
+
stripped = line.strip()
|
|
492
|
+
|
|
493
|
+
# Phase 1 Optimization: Skip more line types
|
|
494
|
+
if not stripped: # Empty lines
|
|
495
|
+
return None
|
|
496
|
+
if stripped.startswith('#'): # Python comments
|
|
497
|
+
return None
|
|
498
|
+
if stripped.startswith('//'): # C-style comments
|
|
499
|
+
return None
|
|
500
|
+
if stripped.startswith('/*') or stripped.startswith('*'): # Block comments
|
|
501
|
+
return None
|
|
502
|
+
if stripped.startswith('"""') or stripped.startswith("'''"): # Docstrings
|
|
503
|
+
return None
|
|
504
|
+
if stripped.startswith('import ') or stripped.startswith('from '): # Imports
|
|
505
|
+
return None
|
|
506
|
+
|
|
507
|
+
# Detect pattern type to get compiled patterns
|
|
508
|
+
pattern_type = InvariantChecker._detect_pattern_type_static(invariant)
|
|
509
|
+
if not pattern_type:
|
|
297
510
|
return None
|
|
298
511
|
|
|
512
|
+
compiled = compiled_patterns.get(pattern_type, {})
|
|
513
|
+
|
|
514
|
+
# Phase 1 Optimization: Use pre-compiled patterns instead of re.search
|
|
299
515
|
# Check for unsafe patterns
|
|
300
|
-
|
|
301
|
-
for
|
|
302
|
-
if
|
|
516
|
+
unsafe_compiled = compiled.get('unsafe', [])
|
|
517
|
+
for compiled_pattern in unsafe_compiled:
|
|
518
|
+
if compiled_pattern.search(line):
|
|
303
519
|
# Check if safe pattern is also present
|
|
304
|
-
|
|
520
|
+
safe_compiled = compiled.get('safe', [])
|
|
305
521
|
has_safe_pattern = any(
|
|
306
|
-
|
|
307
|
-
for safe_pattern in
|
|
522
|
+
safe_pattern.search(line)
|
|
523
|
+
for safe_pattern in safe_compiled
|
|
308
524
|
)
|
|
309
525
|
|
|
310
526
|
if not has_safe_pattern:
|
|
@@ -312,7 +528,7 @@ class InvariantChecker:
|
|
|
312
528
|
invariant=invariant,
|
|
313
529
|
file_path=str(file_path),
|
|
314
530
|
line_number=line_num,
|
|
315
|
-
line_content=
|
|
531
|
+
line_content=stripped,
|
|
316
532
|
severity=ViolationSeverity.WARNING,
|
|
317
533
|
explanation=pattern_info['message']
|
|
318
534
|
)
|
|
@@ -321,11 +537,11 @@ class InvariantChecker:
|
|
|
321
537
|
keywords = pattern_info.get('keywords', [])
|
|
322
538
|
has_keyword = any(keyword in line.lower() for keyword in keywords)
|
|
323
539
|
|
|
324
|
-
if has_keyword and '
|
|
325
|
-
|
|
540
|
+
if has_keyword and compiled.get('safe'):
|
|
541
|
+
safe_compiled = compiled['safe']
|
|
326
542
|
has_safe_pattern = any(
|
|
327
|
-
|
|
328
|
-
for safe_pattern in
|
|
543
|
+
safe_pattern.search(line)
|
|
544
|
+
for safe_pattern in safe_compiled
|
|
329
545
|
)
|
|
330
546
|
|
|
331
547
|
# If keyword present but no safe pattern, might be a violation
|
|
@@ -335,7 +551,7 @@ class InvariantChecker:
|
|
|
335
551
|
invariant=invariant,
|
|
336
552
|
file_path=str(file_path),
|
|
337
553
|
line_number=line_num,
|
|
338
|
-
line_content=
|
|
554
|
+
line_content=stripped,
|
|
339
555
|
severity=ViolationSeverity.INFO,
|
|
340
556
|
explanation=pattern_info['message']
|
|
341
557
|
)
|
|
@@ -345,15 +561,39 @@ class InvariantChecker:
|
|
|
345
561
|
def _get_source_files(
|
|
346
562
|
self,
|
|
347
563
|
source_dir: Path,
|
|
348
|
-
module_filter: Optional[str]
|
|
564
|
+
module_filter: Optional[str],
|
|
565
|
+
pattern_type: Optional[str] = None
|
|
349
566
|
) -> List[Path]:
|
|
350
|
-
"""
|
|
567
|
+
"""
|
|
568
|
+
Get all source files to check.
|
|
569
|
+
|
|
570
|
+
Phase 1 Optimization: Filter files by pattern type to skip irrelevant files.
|
|
571
|
+
For example, SQL invariants only need to check files with DB code.
|
|
572
|
+
|
|
573
|
+
Args:
|
|
574
|
+
source_dir: Root directory to scan
|
|
575
|
+
module_filter: Optional module name filter
|
|
576
|
+
pattern_type: Optional pattern type for smart filtering
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
List of file paths to check
|
|
580
|
+
"""
|
|
581
|
+
# Code file extensions
|
|
351
582
|
extensions = {'.py', '.ts', '.tsx', '.js', '.jsx', '.java', '.cpp',
|
|
352
583
|
'.cc', '.cxx', '.h', '.hpp', '.cs', '.go', '.rs'}
|
|
353
584
|
|
|
585
|
+
# Directories to skip
|
|
354
586
|
skip_dirs = {'node_modules', '.git', '__pycache__', 'venv', '.venv',
|
|
355
587
|
'dist', 'build', 'target', '.next', '.nuxt'}
|
|
356
588
|
|
|
589
|
+
# Phase 1 Optimization: Pattern-specific file filtering
|
|
590
|
+
# Only check relevant files based on pattern type
|
|
591
|
+
relevant_patterns = {
|
|
592
|
+
'sql': ['model', 'db', 'database', 'query', 'repository', 'dao'],
|
|
593
|
+
'password': ['auth', 'user', 'account', 'login', 'register'],
|
|
594
|
+
'api_key': ['config', 'env', 'settings', 'api', 'client'],
|
|
595
|
+
}
|
|
596
|
+
|
|
357
597
|
files = []
|
|
358
598
|
|
|
359
599
|
for path in source_dir.rglob('*'):
|
|
@@ -369,6 +609,17 @@ class InvariantChecker:
|
|
|
369
609
|
if path.suffix.lower() not in extensions:
|
|
370
610
|
continue
|
|
371
611
|
|
|
612
|
+
# Phase 1 Optimization: Skip files not relevant to pattern type
|
|
613
|
+
if pattern_type and pattern_type in relevant_patterns:
|
|
614
|
+
path_str = str(path).lower()
|
|
615
|
+
patterns = relevant_patterns[pattern_type]
|
|
616
|
+
# Only include if path contains relevant keywords
|
|
617
|
+
if not any(pattern in path_str for pattern in patterns):
|
|
618
|
+
# Still include files without specific markers (could be relevant)
|
|
619
|
+
# But skip obvious non-matches like tests, docs, etc.
|
|
620
|
+
if any(skip in path_str for skip in ['test', 'spec', 'doc', 'example']):
|
|
621
|
+
continue
|
|
622
|
+
|
|
372
623
|
# Apply module filter if specified
|
|
373
624
|
if module_filter:
|
|
374
625
|
if module_filter not in str(path):
|