empathy-framework 4.6.2__py3-none-any.whl → 4.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/METADATA +1 -1
  2. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/RECORD +53 -20
  3. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/WHEEL +1 -1
  4. empathy_os/__init__.py +1 -1
  5. empathy_os/cli.py +361 -32
  6. empathy_os/config/xml_config.py +8 -3
  7. empathy_os/core.py +37 -4
  8. empathy_os/leverage_points.py +2 -1
  9. empathy_os/memory/short_term.py +45 -1
  10. empathy_os/meta_workflows/agent_creator 2.py +254 -0
  11. empathy_os/meta_workflows/builtin_templates 2.py +567 -0
  12. empathy_os/meta_workflows/cli_meta_workflows 2.py +1551 -0
  13. empathy_os/meta_workflows/form_engine 2.py +304 -0
  14. empathy_os/meta_workflows/intent_detector 2.py +298 -0
  15. empathy_os/meta_workflows/pattern_learner 2.py +754 -0
  16. empathy_os/meta_workflows/session_context 2.py +398 -0
  17. empathy_os/meta_workflows/template_registry 2.py +229 -0
  18. empathy_os/meta_workflows/workflow 2.py +980 -0
  19. empathy_os/models/token_estimator.py +16 -9
  20. empathy_os/models/validation.py +7 -1
  21. empathy_os/orchestration/pattern_learner 2.py +699 -0
  22. empathy_os/orchestration/real_tools 2.py +938 -0
  23. empathy_os/orchestration/real_tools.py +4 -2
  24. empathy_os/socratic/__init__ 2.py +273 -0
  25. empathy_os/socratic/ab_testing 2.py +969 -0
  26. empathy_os/socratic/blueprint 2.py +532 -0
  27. empathy_os/socratic/cli 2.py +689 -0
  28. empathy_os/socratic/collaboration 2.py +1112 -0
  29. empathy_os/socratic/domain_templates 2.py +916 -0
  30. empathy_os/socratic/embeddings 2.py +734 -0
  31. empathy_os/socratic/engine 2.py +729 -0
  32. empathy_os/socratic/explainer 2.py +663 -0
  33. empathy_os/socratic/feedback 2.py +767 -0
  34. empathy_os/socratic/forms 2.py +624 -0
  35. empathy_os/socratic/generator 2.py +716 -0
  36. empathy_os/socratic/llm_analyzer 2.py +635 -0
  37. empathy_os/socratic/mcp_server 2.py +751 -0
  38. empathy_os/socratic/session 2.py +306 -0
  39. empathy_os/socratic/storage 2.py +635 -0
  40. empathy_os/socratic/storage.py +2 -1
  41. empathy_os/socratic/success 2.py +719 -0
  42. empathy_os/socratic/visual_editor 2.py +812 -0
  43. empathy_os/socratic/web_ui 2.py +925 -0
  44. empathy_os/tier_recommender.py +5 -2
  45. empathy_os/workflow_commands.py +11 -6
  46. empathy_os/workflows/base.py +1 -1
  47. empathy_os/workflows/batch_processing 2.py +310 -0
  48. empathy_os/workflows/release_prep_crew 2.py +968 -0
  49. empathy_os/workflows/test_coverage_boost_crew 2.py +848 -0
  50. empathy_os/workflows/test_maintenance.py +3 -2
  51. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/entry_points.txt +0 -0
  52. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/licenses/LICENSE +0 -0
  53. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,938 @@
1
+ """Real tool implementations for meta-orchestration agents.
2
+
3
+ This module provides actual tool integrations for agents to interact with
4
+ real systems instead of returning mock data.
5
+
6
+ Security:
7
+ - All file operations validated with _validate_file_path()
8
+ - Subprocess calls sanitized
9
+ - Output size limited to prevent memory issues
10
+ """
11
+
12
+ import json
13
+ import logging
14
+ import subprocess
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _validate_file_path(path: str) -> Path:
23
+ """Validate file path to prevent path traversal (simplified version).
24
+
25
+ Args:
26
+ path: File path to validate
27
+
28
+ Returns:
29
+ Validated Path object
30
+
31
+ Raises:
32
+ ValueError: If path is invalid
33
+ """
34
+ if not path or not isinstance(path, str):
35
+ raise ValueError("path must be a non-empty string")
36
+
37
+ if "\x00" in path:
38
+ raise ValueError("path contains null bytes")
39
+
40
+ try:
41
+ resolved = Path(path).resolve()
42
+ except (OSError, RuntimeError) as e:
43
+ raise ValueError(f"Invalid path: {e}") from e
44
+
45
+ # Block system directories
46
+ dangerous_paths = ["/etc", "/sys", "/proc", "/dev"]
47
+ for dangerous in dangerous_paths:
48
+ if str(resolved).startswith(dangerous):
49
+ raise ValueError(f"Cannot write to system directory: {dangerous}")
50
+
51
+ return resolved
52
+
53
+
54
+ @dataclass
55
+ class CoverageReport:
56
+ """Coverage analysis report from pytest-cov."""
57
+
58
+ total_coverage: float
59
+ files_analyzed: int
60
+ uncovered_files: list[dict[str, Any]]
61
+ missing_lines: dict[str, list[int]]
62
+
63
+
64
+ class RealCoverageAnalyzer:
65
+ """Runs real pytest coverage analysis."""
66
+
67
+ def __init__(self, project_root: str = "."):
68
+ """Initialize coverage analyzer.
69
+
70
+ Args:
71
+ project_root: Project root directory
72
+ """
73
+ self.project_root = Path(project_root).resolve()
74
+
75
+ def analyze(self, use_existing: bool = True) -> CoverageReport:
76
+ """Run coverage analysis on all project packages.
77
+
78
+ Analyzes coverage for: empathy_os, empathy_llm_toolkit,
79
+ empathy_software_plugin, empathy_healthcare_plugin
80
+
81
+ Args:
82
+ use_existing: Use existing coverage.json if available (default: True)
83
+
84
+ Returns:
85
+ CoverageReport with results
86
+
87
+ Raises:
88
+ RuntimeError: If coverage analysis fails
89
+ """
90
+ logger.info("Running coverage analysis on all packages")
91
+
92
+ coverage_file = self.project_root / "coverage.json"
93
+
94
+ # Check if we can use existing coverage data
95
+ if use_existing and coverage_file.exists():
96
+ import time
97
+
98
+ file_age = time.time() - coverage_file.stat().st_mtime
99
+ # Use existing file if less than 1 hour old
100
+ if file_age < 3600:
101
+ logger.info(
102
+ f"Using existing coverage data (age: {file_age/60:.1f} minutes)"
103
+ )
104
+ else:
105
+ logger.info("Existing coverage data is stale, regenerating")
106
+ use_existing = False
107
+
108
+ if not use_existing or not coverage_file.exists():
109
+ try:
110
+ # Run pytest with coverage on test suite
111
+ logger.info("Running test suite to generate coverage (may take 2-5 minutes)")
112
+
113
+ # Use actual package names (match pyproject.toml configuration)
114
+ cov_packages = [
115
+ "empathy_os",
116
+ "empathy_llm_toolkit",
117
+ "empathy_software_plugin",
118
+ "empathy_healthcare_plugin",
119
+ ]
120
+
121
+ cmd = [
122
+ "pytest",
123
+ "tests/", # Run all tests to measure coverage
124
+ "--cov-report=json",
125
+ "--cov-report=term-missing",
126
+ "-q",
127
+ "--tb=no",
128
+ "--maxfail=50", # Continue despite failures
129
+ ]
130
+
131
+ # Add --cov for each package
132
+ for pkg in cov_packages:
133
+ cmd.append(f"--cov={pkg}")
134
+
135
+ _result = subprocess.run( # Result not needed, only coverage.json
136
+ cmd,
137
+ cwd=self.project_root,
138
+ capture_output=True,
139
+ text=True,
140
+ timeout=600, # Increased to 10 minutes
141
+ )
142
+
143
+ except subprocess.TimeoutExpired:
144
+ logger.warning("Coverage generation timed out, checking for partial results")
145
+ # Fall through to use whatever coverage.json exists
146
+
147
+ # Read coverage.json
148
+ if not coverage_file.exists():
149
+ raise RuntimeError(
150
+ "Coverage report not found. Run 'pytest --cov=src --cov-report=json' first."
151
+ )
152
+
153
+ try:
154
+ with coverage_file.open() as f:
155
+ coverage_data = json.load(f)
156
+
157
+ # Parse results
158
+ total_coverage = coverage_data["totals"]["percent_covered"]
159
+ files = coverage_data.get("files", {})
160
+
161
+ # Identify low coverage files
162
+ uncovered_files = []
163
+ missing_lines = {}
164
+
165
+ for filepath, file_data in files.items():
166
+ file_coverage = file_data["summary"]["percent_covered"]
167
+ if file_coverage < 80: # Below target
168
+ uncovered_files.append(
169
+ {
170
+ "path": filepath,
171
+ "coverage": file_coverage,
172
+ "missing_lines": file_data["missing_lines"],
173
+ }
174
+ )
175
+ missing_lines[filepath] = file_data["missing_lines"]
176
+
177
+ logger.info(
178
+ f"Coverage analysis complete: {total_coverage:.1f}% "
179
+ f"({len(uncovered_files)} files below 80%)"
180
+ )
181
+
182
+ return CoverageReport(
183
+ total_coverage=total_coverage,
184
+ files_analyzed=len(files),
185
+ uncovered_files=uncovered_files,
186
+ missing_lines=missing_lines,
187
+ )
188
+
189
+ except Exception as e:
190
+ logger.error(f"Coverage analysis failed: {e}")
191
+ raise RuntimeError(f"Coverage analysis failed: {e}") from e
192
+
193
+
194
+ class RealTestGenerator:
195
+ """Generates actual test code using LLM."""
196
+
197
+ def __init__(
198
+ self,
199
+ project_root: str = ".",
200
+ output_dir: str = "tests/generated",
201
+ api_key: str | None = None,
202
+ use_llm: bool = True,
203
+ ):
204
+ """Initialize test generator.
205
+
206
+ Args:
207
+ project_root: Project root directory
208
+ output_dir: Directory for generated tests (relative to project_root)
209
+ api_key: Anthropic API key (or uses env var)
210
+ use_llm: Whether to use LLM for intelligent test generation
211
+ """
212
+ self.project_root = Path(project_root).resolve()
213
+ self.output_dir = self.project_root / output_dir
214
+ self.output_dir.mkdir(parents=True, exist_ok=True)
215
+ self.api_key = api_key
216
+ self.use_llm = use_llm
217
+
218
+ # Initialize LLM client if needed
219
+ self._llm = None
220
+ if use_llm:
221
+ self._initialize_llm()
222
+
223
+ def _initialize_llm(self):
224
+ """Initialize Anthropic LLM client."""
225
+ try:
226
+ import os
227
+
228
+ from anthropic import Anthropic
229
+
230
+ # Try to load .env file
231
+ try:
232
+ from dotenv import load_dotenv
233
+ load_dotenv()
234
+ except ImportError:
235
+ pass # python-dotenv not required
236
+
237
+ api_key = self.api_key or os.environ.get("ANTHROPIC_API_KEY")
238
+ if not api_key:
239
+ logger.warning(
240
+ "No Anthropic API key found. Set ANTHROPIC_API_KEY environment variable "
241
+ "or create .env file with ANTHROPIC_API_KEY=your_key_here. "
242
+ "Falling back to basic templates."
243
+ )
244
+ self.use_llm = False
245
+ return
246
+
247
+ self._llm = Anthropic(api_key=api_key)
248
+ logger.info("✓ LLM client initialized successfully with Claude")
249
+
250
+ except ImportError as e:
251
+ logger.warning(f"Required package not installed: {e}. Falling back to templates")
252
+ self.use_llm = False
253
+ except Exception as e:
254
+ logger.warning(f"Failed to initialize LLM: {e}. Falling back to templates")
255
+ self.use_llm = False
256
+
257
+ def generate_tests_for_file(
258
+ self, source_file: str, missing_lines: list[int]
259
+ ) -> Path:
260
+ """Generate tests for uncovered code in a file.
261
+
262
+ Args:
263
+ source_file: Path to source file
264
+ missing_lines: Line numbers without coverage
265
+
266
+ Returns:
267
+ Path to generated test file
268
+
269
+ Raises:
270
+ RuntimeError: If test generation fails
271
+ """
272
+ logger.info(f"Generating tests for {source_file} (lines: {missing_lines[:5]}...)")
273
+
274
+ # Read source file
275
+ source_path = Path(source_file)
276
+ if not source_path.exists():
277
+ source_path = self.project_root / source_file
278
+
279
+ # Resolve to absolute path for relative_to() to work correctly
280
+ source_path = source_path.resolve()
281
+
282
+ try:
283
+ source_code = source_path.read_text()
284
+ except Exception as e:
285
+ raise RuntimeError(f"Cannot read source file: {e}") from e
286
+
287
+ # Create unique test name from full path to avoid collisions
288
+ # Example: src/empathy_os/telemetry/cli.py → test_src_empathy_os_telemetry_cli_generated.py
289
+ relative_path = str(source_path.relative_to(self.project_root))
290
+ test_name = f"test_{relative_path.replace('/', '_').replace('.py', '')}_generated.py"
291
+ test_path = self.output_dir / test_name
292
+
293
+ # Generate tests using LLM or template
294
+ if self.use_llm and self._llm:
295
+ test_code = self._generate_llm_tests(source_file, source_code, missing_lines)
296
+ else:
297
+ test_code = self._generate_basic_test_template(
298
+ source_file, source_code, missing_lines
299
+ )
300
+
301
+ # Write test file
302
+ validated_path = _validate_file_path(str(test_path))
303
+ validated_path.write_text(test_code)
304
+
305
+ logger.info(f"Generated test file: {test_path}")
306
+ return test_path
307
+
308
+ def _generate_llm_tests(
309
+ self, source_file: str, source_code: str, missing_lines: list[int]
310
+ ) -> str:
311
+ """Generate tests using LLM (Claude).
312
+
313
+ Args:
314
+ source_file: Source file path
315
+ source_code: Source file content
316
+ missing_lines: Uncovered line numbers
317
+
318
+ Returns:
319
+ Generated test code
320
+
321
+ Raises:
322
+ RuntimeError: If LLM generation fails
323
+ """
324
+ logger.info(f"Using LLM to generate intelligent tests for {source_file}")
325
+
326
+ # Extract API signatures using AST
327
+ api_docs = self._extract_api_docs(source_code)
328
+
329
+ # Extract module path
330
+ module_path = source_file.replace("/", ".").replace(".py", "")
331
+
332
+ # Create prompt for Claude with full context
333
+ prompt = f"""Generate comprehensive pytest tests for the following Python code.
334
+
335
+ **Source File:** `{source_file}`
336
+ **Module Path:** `{module_path}`
337
+ **Uncovered Lines:** {missing_lines[:20]}
338
+
339
+ {api_docs}
340
+
341
+ **Full Source Code:**
342
+ ```python
343
+ {source_code}
344
+ ```
345
+
346
+ **CRITICAL Requirements - API Accuracy:**
347
+ 1. **READ THE SOURCE CODE CAREFULLY** - Extract exact API signatures from:
348
+ - Dataclass definitions (@dataclass) - use EXACT parameter names
349
+ - Function signatures - match parameter names and types
350
+ - Class __init__ methods - use correct constructor arguments
351
+
352
+ 2. **DO NOT GUESS** parameter names - if you see:
353
+ ```python
354
+ @dataclass
355
+ class Foo:
356
+ bar: str # Parameter name is 'bar', NOT 'bar_name'
357
+ ```
358
+ Then use: `Foo(bar="value")` NOT `Foo(bar_name="value")`
359
+
360
+ 3. **Computed Properties** - Do NOT pass @property values to constructors:
361
+ - If source has `@property def total(self): return self.a + self.b`
362
+ - Then DO NOT use `Foo(total=10)` - it's computed from `a` and `b`
363
+
364
+ **Test Requirements:**
365
+ 1. Write complete, runnable pytest tests
366
+ 2. Focus on covering uncovered lines: {missing_lines[:10]}
367
+ 3. Include:
368
+ - Test class with descriptive name
369
+ - Test methods for key functions/classes
370
+ - Proper imports from the actual module path
371
+ - Mock external dependencies (database, API calls, etc.)
372
+ - Edge cases (empty inputs, None, zero, negative numbers)
373
+ - Error handling tests (invalid input, exceptions)
374
+ 4. Follow pytest best practices
375
+ 5. Use clear, descriptive test method names
376
+ 6. Add docstrings explaining what each test validates
377
+
378
+ **Output Format:**
379
+ Return ONLY the Python test code, starting with imports. No markdown, no explanations.
380
+ """
381
+
382
+ try:
383
+ # Try Sonnet models only (Capable tier) - do NOT downgrade
384
+ models_to_try = [
385
+ "claude-sonnet-4-5-20250929", # Sonnet 4.5 (January 2025 - latest)
386
+ "claude-3-5-sonnet-20241022", # 3.5 Sonnet Oct 2024
387
+ "claude-3-5-sonnet-20240620", # 3.5 Sonnet Jun 2024
388
+ ]
389
+
390
+ response = None
391
+ last_error = None
392
+
393
+ for model_name in models_to_try:
394
+ try:
395
+ response = self._llm.messages.create(
396
+ model=model_name,
397
+ max_tokens=12000, # Increased to prevent truncation on large files
398
+ temperature=0.3, # Lower temperature for consistent code
399
+ messages=[{"role": "user", "content": prompt}],
400
+ )
401
+ logger.info(f"✓ Using Sonnet model: {model_name}")
402
+ break
403
+ except Exception as e:
404
+ last_error = e
405
+ logger.debug(f"Model {model_name} not available: {e}")
406
+ continue
407
+
408
+ if response is None:
409
+ error_msg = f"All Sonnet models unavailable. Last error: {last_error}"
410
+ logger.error(error_msg)
411
+ raise RuntimeError(error_msg)
412
+
413
+ test_code = response.content[0].text
414
+
415
+ # Clean up markdown if present
416
+ if "```python" in test_code:
417
+ test_code = test_code.split("```python")[1].split("```")[0].strip()
418
+ elif "```" in test_code:
419
+ test_code = test_code.split("```")[1].split("```")[0].strip()
420
+
421
+ logger.info(f"✓ LLM generated {len(test_code)} chars of test code")
422
+ return test_code
423
+
424
+ except Exception as e:
425
+ logger.error(f"LLM test generation failed: {e}, falling back to template")
426
+ return self._generate_basic_test_template(
427
+ source_file, source_code, missing_lines
428
+ )
429
+
430
+ def _extract_api_docs(self, source_code: str) -> str:
431
+ """Extract API signatures from source code using AST.
432
+
433
+ Args:
434
+ source_code: Python source code
435
+
436
+ Returns:
437
+ Formatted API documentation for LLM prompt
438
+ """
439
+ try:
440
+ import sys
441
+ from pathlib import Path
442
+
443
+ # Add scripts to path
444
+ scripts_dir = Path(__file__).parent.parent.parent.parent / "scripts"
445
+ if str(scripts_dir) not in sys.path:
446
+ sys.path.insert(0, str(scripts_dir))
447
+
448
+ from ast_api_extractor import extract_api_signatures, format_api_docs
449
+
450
+ classes, functions = extract_api_signatures(source_code)
451
+ return format_api_docs(classes, functions)
452
+ except Exception as e:
453
+ logger.warning(f"AST extraction failed: {e}, proceeding without API docs")
454
+ return "# API extraction failed - use source code carefully"
455
+
456
+ def _generate_basic_test_template(
457
+ self, source_file: str, source_code: str, missing_lines: list[int]
458
+ ) -> str:
459
+ """Generate basic test template.
460
+
461
+ Args:
462
+ source_file: Source file path
463
+ source_code: Source file content
464
+ missing_lines: Uncovered line numbers
465
+
466
+ Returns:
467
+ Test code as string
468
+ """
469
+ # Extract module name
470
+ module_path = source_file.replace("/", ".").replace(".py", "")
471
+
472
+ template = f'''"""Auto-generated tests for {source_file}.
473
+
474
+ Coverage gaps on lines: {missing_lines[:10]}
475
+ """
476
+
477
+ import pytest
478
+
479
+
480
+ class TestGeneratedCoverage:
481
+ """Tests to improve coverage for {source_file}."""
482
+
483
+ def test_module_imports(self):
484
+ """Test that module can be imported."""
485
+ try:
486
+ import {module_path}
487
+ assert True
488
+ except ImportError as e:
489
+ pytest.fail(f"Module import failed: {{e}}")
490
+
491
+ def test_placeholder_for_lines_{missing_lines[0] if missing_lines else 0}(self):
492
+ """Placeholder test for uncovered code.
493
+
494
+ TODO: Implement actual test logic for lines {missing_lines[:5]}
495
+ """
496
+ # This is a placeholder - connect to LLM for real test generation
497
+ assert True, "Placeholder test - needs implementation"
498
+ '''
499
+ return template
500
+
501
+
502
+ class RealTestValidator:
503
+ """Validates generated tests by running them."""
504
+
505
+ def __init__(self, project_root: str = "."):
506
+ """Initialize test validator.
507
+
508
+ Args:
509
+ project_root: Project root directory
510
+ """
511
+ self.project_root = Path(project_root).resolve()
512
+
513
+ def validate_tests(self, test_files: list[Path]) -> dict[str, Any]:
514
+ """Run tests and measure coverage improvement.
515
+
516
+ Args:
517
+ test_files: List of test file paths
518
+
519
+ Returns:
520
+ Validation results dict
521
+
522
+ Raises:
523
+ RuntimeError: If validation fails
524
+ """
525
+ logger.info(f"Validating {len(test_files)} generated test files")
526
+
527
+ try:
528
+ # Run tests
529
+ test_paths = [str(t) for t in test_files]
530
+ cmd = ["pytest"] + test_paths + ["-v", "--tb=short"]
531
+
532
+ result = subprocess.run(
533
+ cmd,
534
+ cwd=self.project_root,
535
+ capture_output=True,
536
+ text=True,
537
+ timeout=300,
538
+ )
539
+
540
+ tests_passed = result.returncode == 0
541
+ output_lines = result.stdout.split("\n")
542
+
543
+ # Count passed/failed
544
+ passed = sum(1 for line in output_lines if " PASSED" in line)
545
+ failed = sum(1 for line in output_lines if " FAILED" in line)
546
+
547
+ logger.info(
548
+ f"Validation complete: {passed} passed, {failed} failed, "
549
+ f"tests_passed={tests_passed}"
550
+ )
551
+
552
+ return {
553
+ "all_passed": tests_passed,
554
+ "passed_count": passed,
555
+ "failed_count": failed,
556
+ "output": result.stdout[:1000], # Limit output
557
+ }
558
+
559
+ except subprocess.TimeoutExpired:
560
+ raise RuntimeError("Test validation timed out after 5 minutes")
561
+ except Exception as e:
562
+ logger.error(f"Test validation failed: {e}")
563
+ raise RuntimeError(f"Test validation failed: {e}") from e
564
+
565
+
566
+ @dataclass
567
+ class SecurityReport:
568
+ """Security audit report from bandit."""
569
+
570
+ total_issues: int
571
+ critical_count: int
572
+ high_count: int
573
+ medium_count: int
574
+ low_count: int
575
+ issues_by_file: dict[str, list[dict[str, Any]]]
576
+ passed: bool
577
+
578
+
579
+ class RealSecurityAuditor:
580
+ """Runs real security audit using bandit."""
581
+
582
+ def __init__(self, project_root: str = "."):
583
+ """Initialize security auditor.
584
+
585
+ Args:
586
+ project_root: Project root directory
587
+ """
588
+ self.project_root = Path(project_root).resolve()
589
+
590
+ def audit(self, target_path: str = "src") -> SecurityReport:
591
+ """Run security audit on codebase.
592
+
593
+ Args:
594
+ target_path: Path to audit (default: src)
595
+
596
+ Returns:
597
+ SecurityReport with vulnerability findings
598
+
599
+ Raises:
600
+ RuntimeError: If security audit fails
601
+ """
602
+ logger.info(f"Running security audit on {target_path}")
603
+
604
+ try:
605
+ # Run bandit with JSON output
606
+ cmd = [
607
+ "bandit",
608
+ "-r",
609
+ target_path,
610
+ "-f",
611
+ "json",
612
+ "-q", # Quiet mode - suppress progress bar and log messages
613
+ "-ll", # Only report medium and above
614
+ ]
615
+
616
+ result = subprocess.run(
617
+ cmd,
618
+ cwd=self.project_root,
619
+ capture_output=True,
620
+ text=True,
621
+ timeout=300,
622
+ )
623
+
624
+ # Parse JSON output
625
+ try:
626
+ bandit_data = json.loads(result.stdout)
627
+ except json.JSONDecodeError as e:
628
+ # Bandit might not be installed or JSON output malformed
629
+ logger.warning(f"Bandit not available or returned invalid JSON: {e}")
630
+ logger.debug(f"Bandit stdout: {result.stdout[:500]}")
631
+ logger.debug(f"Bandit stderr: {result.stderr[:500]}")
632
+ return SecurityReport(
633
+ total_issues=0,
634
+ critical_count=0,
635
+ high_count=0,
636
+ medium_count=0,
637
+ low_count=0,
638
+ issues_by_file={},
639
+ passed=True,
640
+ )
641
+
642
+ # Count issues by severity
643
+ results = bandit_data.get("results", [])
644
+ critical_count = sum(1 for r in results if r.get("issue_severity") == "CRITICAL")
645
+ high_count = sum(1 for r in results if r.get("issue_severity") == "HIGH")
646
+ medium_count = sum(1 for r in results if r.get("issue_severity") == "MEDIUM")
647
+ low_count = sum(1 for r in results if r.get("issue_severity") == "LOW")
648
+
649
+ # Group by file
650
+ issues_by_file = {}
651
+ for issue in results:
652
+ filepath = issue.get("filename", "unknown")
653
+ if filepath not in issues_by_file:
654
+ issues_by_file[filepath] = []
655
+ issues_by_file[filepath].append(
656
+ {
657
+ "line": issue.get("line_number"),
658
+ "severity": issue.get("issue_severity"),
659
+ "confidence": issue.get("issue_confidence"),
660
+ "message": issue.get("issue_text"),
661
+ "test_id": issue.get("test_id"),
662
+ }
663
+ )
664
+
665
+ total_issues = len(results)
666
+ passed = critical_count == 0 and high_count == 0
667
+
668
+ logger.info(
669
+ f"Security audit complete: {total_issues} issues "
670
+ f"(critical={critical_count}, high={high_count}, medium={medium_count})"
671
+ )
672
+
673
+ return SecurityReport(
674
+ total_issues=total_issues,
675
+ critical_count=critical_count,
676
+ high_count=high_count,
677
+ medium_count=medium_count,
678
+ low_count=low_count,
679
+ issues_by_file=issues_by_file,
680
+ passed=passed,
681
+ )
682
+
683
+ except subprocess.TimeoutExpired:
684
+ raise RuntimeError("Security audit timed out after 5 minutes")
685
+ except Exception as e:
686
+ logger.error(f"Security audit failed: {e}")
687
+ raise RuntimeError(f"Security audit failed: {e}") from e
688
+
689
+
690
+ @dataclass
691
+ class QualityReport:
692
+ """Code quality report from ruff and mypy."""
693
+
694
+ quality_score: float # 0-10
695
+ ruff_issues: int
696
+ mypy_issues: int
697
+ total_files: int
698
+ issues_by_category: dict[str, int]
699
+ passed: bool
700
+
701
+
702
+ class RealCodeQualityAnalyzer:
703
+ """Runs real code quality analysis using ruff and mypy."""
704
+
705
+ def __init__(self, project_root: str = "."):
706
+ """Initialize code quality analyzer.
707
+
708
+ Args:
709
+ project_root: Project root directory
710
+ """
711
+ self.project_root = Path(project_root).resolve()
712
+
713
+ def analyze(self, target_path: str = "src") -> QualityReport:
714
+ """Run code quality analysis.
715
+
716
+ Args:
717
+ target_path: Path to analyze (default: src)
718
+
719
+ Returns:
720
+ QualityReport with quality metrics
721
+
722
+ Raises:
723
+ RuntimeError: If quality analysis fails
724
+ """
725
+ logger.info(f"Running code quality analysis on {target_path}")
726
+
727
+ try:
728
+ # Run ruff for linting
729
+ ruff_issues = self._run_ruff(target_path)
730
+
731
+ # Run mypy for type checking (optional - may not be installed)
732
+ mypy_issues = self._run_mypy(target_path)
733
+
734
+ # Count files
735
+ target = self.project_root / target_path
736
+ py_files = list(target.rglob("*.py")) if target.is_dir() else [target]
737
+ total_files = len(py_files)
738
+
739
+ # Calculate quality score (0-10 scale)
740
+ # Start with 10, deduct points for issues
741
+ quality_score = 10.0
742
+ quality_score -= min(ruff_issues * 0.01, 3.0) # Max -3 points for ruff
743
+ quality_score -= min(mypy_issues * 0.02, 2.0) # Max -2 points for mypy
744
+ quality_score = max(0.0, quality_score) # Floor at 0
745
+
746
+ # Passed if score >= 7.0
747
+ passed = quality_score >= 7.0
748
+
749
+ logger.info(
750
+ f"Quality analysis complete: score={quality_score:.1f}/10 "
751
+ f"(ruff={ruff_issues}, mypy={mypy_issues})"
752
+ )
753
+
754
+ return QualityReport(
755
+ quality_score=quality_score,
756
+ ruff_issues=ruff_issues,
757
+ mypy_issues=mypy_issues,
758
+ total_files=total_files,
759
+ issues_by_category={"ruff": ruff_issues, "mypy": mypy_issues},
760
+ passed=passed,
761
+ )
762
+
763
+ except Exception as e:
764
+ logger.error(f"Quality analysis failed: {e}")
765
+ raise RuntimeError(f"Quality analysis failed: {e}") from e
766
+
767
+ def _run_ruff(self, target_path: str) -> int:
768
+ """Run ruff linter and count issues."""
769
+ try:
770
+ cmd = ["ruff", "check", target_path, "--output-format=json"]
771
+
772
+ result = subprocess.run(
773
+ cmd,
774
+ cwd=self.project_root,
775
+ capture_output=True,
776
+ text=True,
777
+ timeout=120,
778
+ )
779
+
780
+ # Parse JSON output
781
+ try:
782
+ ruff_data = json.loads(result.stdout) if result.stdout else []
783
+ return len(ruff_data)
784
+ except json.JSONDecodeError:
785
+ logger.warning("Ruff returned invalid JSON")
786
+ return 0
787
+
788
+ except FileNotFoundError:
789
+ logger.warning("Ruff not installed, skipping")
790
+ return 0
791
+ except Exception as e:
792
+ logger.warning(f"Ruff check failed: {e}")
793
+ return 0
794
+
795
+ def _run_mypy(self, target_path: str) -> int:
796
+ """Run mypy type checker and count issues."""
797
+ try:
798
+ cmd = ["mypy", target_path, "--no-error-summary"]
799
+
800
+ result = subprocess.run(
801
+ cmd,
802
+ cwd=self.project_root,
803
+ capture_output=True,
804
+ text=True,
805
+ timeout=120,
806
+ )
807
+
808
+ # Count error lines
809
+ error_count = sum(
810
+ 1 for line in result.stdout.split("\n") if ": error:" in line
811
+ )
812
+ return error_count
813
+
814
+ except FileNotFoundError:
815
+ logger.warning("Mypy not installed, skipping")
816
+ return 0
817
+ except Exception as e:
818
+ logger.warning(f"Mypy check failed: {e}")
819
+ return 0
820
+
821
+
822
+ @dataclass
823
+ class DocumentationReport:
824
+ """Documentation completeness report."""
825
+
826
+ completeness_percentage: float
827
+ total_functions: int
828
+ documented_functions: int
829
+ total_classes: int
830
+ documented_classes: int
831
+ missing_docstrings: list[str]
832
+ passed: bool
833
+
834
+
835
+ class RealDocumentationAnalyzer:
836
+ """Analyzes documentation completeness by scanning docstrings."""
837
+
838
+ def __init__(self, project_root: str = "."):
839
+ """Initialize documentation analyzer.
840
+
841
+ Args:
842
+ project_root: Project root directory
843
+ """
844
+ self.project_root = Path(project_root).resolve()
845
+
846
+ def analyze(self, target_path: str = "src") -> DocumentationReport:
847
+ """Analyze documentation completeness.
848
+
849
+ Args:
850
+ target_path: Path to analyze (default: src)
851
+
852
+ Returns:
853
+ DocumentationReport with completeness metrics
854
+
855
+ Raises:
856
+ RuntimeError: If analysis fails
857
+ """
858
+ logger.info(f"Analyzing documentation completeness in {target_path}")
859
+
860
+ import ast
861
+
862
+ target = self.project_root / target_path
863
+ py_files = list(target.rglob("*.py")) if target.is_dir() else [target]
864
+
865
+ total_functions = 0
866
+ documented_functions = 0
867
+ total_classes = 0
868
+ documented_classes = 0
869
+ missing_docstrings = []
870
+
871
+ for py_file in py_files:
872
+ if py_file.name.startswith("__") and py_file.name.endswith("__.py"):
873
+ continue # Skip __init__.py, __main__.py
874
+
875
+ try:
876
+ tree = ast.parse(py_file.read_text())
877
+
878
+ for node in ast.walk(tree):
879
+ if isinstance(node, ast.FunctionDef):
880
+ if not node.name.startswith("_"): # Public functions
881
+ total_functions += 1
882
+ if ast.get_docstring(node):
883
+ documented_functions += 1
884
+ else:
885
+ missing_docstrings.append(
886
+ f"{py_file.relative_to(self.project_root)}:{node.lineno} - function {node.name}"
887
+ )
888
+
889
+ elif isinstance(node, ast.ClassDef):
890
+ if not node.name.startswith("_"): # Public classes
891
+ total_classes += 1
892
+ if ast.get_docstring(node):
893
+ documented_classes += 1
894
+ else:
895
+ missing_docstrings.append(
896
+ f"{py_file.relative_to(self.project_root)}:{node.lineno} - class {node.name}"
897
+ )
898
+
899
+ except Exception as e:
900
+ logger.warning(f"Failed to parse {py_file}: {e}")
901
+ continue
902
+
903
+ # Calculate completeness
904
+ total_items = total_functions + total_classes
905
+ documented_items = documented_functions + documented_classes
906
+
907
+ if total_items > 0:
908
+ completeness_percentage = (documented_items / total_items) * 100
909
+ else:
910
+ completeness_percentage = 100.0 # No public APIs, consider complete
911
+
912
+ passed = completeness_percentage >= 80.0
913
+
914
+ logger.info(
915
+ f"Documentation analysis complete: {completeness_percentage:.1f}% "
916
+ f"({documented_items}/{total_items} items documented)"
917
+ )
918
+
919
+ return DocumentationReport(
920
+ completeness_percentage=completeness_percentage,
921
+ total_functions=total_functions,
922
+ documented_functions=documented_functions,
923
+ total_classes=total_classes,
924
+ documented_classes=documented_classes,
925
+ missing_docstrings=missing_docstrings[:10], # Limit to first 10
926
+ passed=passed,
927
+ )
928
+
929
+
930
+ # Tool registry for agents
931
+ REAL_TOOLS = {
932
+ "coverage_analyzer": RealCoverageAnalyzer,
933
+ "test_generator": RealTestGenerator,
934
+ "test_validator": RealTestValidator,
935
+ "security_auditor": RealSecurityAuditor,
936
+ "code_quality_analyzer": RealCodeQualityAnalyzer,
937
+ "documentation_analyzer": RealDocumentationAnalyzer,
938
+ }