empathy-framework 4.9.1__py3-none-any.whl → 5.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.1.dist-info}/METADATA +1 -1
  2. {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.1.dist-info}/RECORD +47 -26
  3. empathy_os/__init__.py +1 -1
  4. empathy_os/cache/hash_only.py +6 -3
  5. empathy_os/cache/hybrid.py +6 -3
  6. empathy_os/cli_legacy.py +27 -1
  7. empathy_os/cli_minimal.py +512 -15
  8. empathy_os/cli_router.py +145 -113
  9. empathy_os/cli_unified.py +25 -0
  10. empathy_os/dashboard/__init__.py +42 -0
  11. empathy_os/dashboard/app.py +512 -0
  12. empathy_os/dashboard/simple_server.py +403 -0
  13. empathy_os/dashboard/standalone_server.py +536 -0
  14. empathy_os/memory/__init__.py +19 -5
  15. empathy_os/memory/short_term.py +4 -70
  16. empathy_os/memory/types.py +2 -2
  17. empathy_os/models/__init__.py +3 -0
  18. empathy_os/models/adaptive_routing.py +437 -0
  19. empathy_os/models/registry.py +4 -4
  20. empathy_os/socratic/ab_testing.py +1 -1
  21. empathy_os/telemetry/__init__.py +29 -1
  22. empathy_os/telemetry/agent_coordination.py +478 -0
  23. empathy_os/telemetry/agent_tracking.py +350 -0
  24. empathy_os/telemetry/approval_gates.py +563 -0
  25. empathy_os/telemetry/event_streaming.py +405 -0
  26. empathy_os/telemetry/feedback_loop.py +557 -0
  27. empathy_os/vscode_bridge 2.py +173 -0
  28. empathy_os/workflows/__init__.py +4 -4
  29. empathy_os/workflows/base.py +495 -43
  30. empathy_os/workflows/history.py +3 -5
  31. empathy_os/workflows/output.py +410 -0
  32. empathy_os/workflows/progress.py +324 -22
  33. empathy_os/workflows/progressive/README 2.md +454 -0
  34. empathy_os/workflows/progressive/__init__ 2.py +92 -0
  35. empathy_os/workflows/progressive/cli 2.py +242 -0
  36. empathy_os/workflows/progressive/core 2.py +488 -0
  37. empathy_os/workflows/progressive/orchestrator 2.py +701 -0
  38. empathy_os/workflows/progressive/reports 2.py +528 -0
  39. empathy_os/workflows/progressive/telemetry 2.py +280 -0
  40. empathy_os/workflows/progressive/test_gen 2.py +514 -0
  41. empathy_os/workflows/progressive/workflow 2.py +628 -0
  42. empathy_os/workflows/routing.py +5 -0
  43. empathy_os/workflows/security_audit.py +189 -0
  44. {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.1.dist-info}/WHEEL +0 -0
  45. {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.1.dist-info}/entry_points.txt +0 -0
  46. {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.1.dist-info}/licenses/LICENSE +0 -0
  47. {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.1.dist-info}/top_level.txt +0 -0
@@ -342,11 +342,29 @@ class SecurityAuditWorkflow(BaseWorkflow):
342
342
  if self._is_detection_code(line_content, match.group()):
343
343
  continue
344
344
 
345
+ # Phase 2: Skip safe SQL parameterization patterns
346
+ if vuln_type == "sql_injection":
347
+ if self._is_safe_sql_parameterization(
348
+ line_content,
349
+ match.group(),
350
+ content,
351
+ ):
352
+ continue
353
+
345
354
  # Skip fake/test credentials
346
355
  if vuln_type == "hardcoded_secret":
347
356
  if self._is_fake_credential(match.group()):
348
357
  continue
349
358
 
359
+ # Phase 2: Skip safe random usage (tests, demos, documented)
360
+ if vuln_type == "insecure_random":
361
+ if self._is_safe_random_usage(
362
+ line_content,
363
+ file_name,
364
+ content,
365
+ ):
366
+ continue
367
+
350
368
  # Skip command_injection in documentation strings
351
369
  if vuln_type == "command_injection":
352
370
  if self._is_documentation_or_string(
@@ -380,6 +398,29 @@ class SecurityAuditWorkflow(BaseWorkflow):
380
398
  except OSError:
381
399
  continue
382
400
 
401
+ # Phase 3: Apply AST-based filtering for command injection
402
+ try:
403
+ from .security_audit_phase3 import apply_phase3_filtering
404
+
405
+ # Separate command injection findings
406
+ cmd_findings = [f for f in findings if f["type"] == "command_injection"]
407
+ other_findings = [f for f in findings if f["type"] != "command_injection"]
408
+
409
+ # Apply Phase 3 filtering to command injection
410
+ filtered_cmd = apply_phase3_filtering(cmd_findings)
411
+
412
+ # Combine back
413
+ findings = other_findings + filtered_cmd
414
+
415
+ logger.info(
416
+ f"Phase 3: Filtered command_injection from {len(cmd_findings)} to {len(filtered_cmd)} "
417
+ f"({len(cmd_findings) - len(filtered_cmd)} false positives removed)"
418
+ )
419
+ except ImportError:
420
+ logger.debug("Phase 3 module not available, skipping AST-based filtering")
421
+ except Exception as e:
422
+ logger.warning(f"Phase 3 filtering failed: {e}")
423
+
383
424
  input_tokens = len(str(input_data)) // 4
384
425
  output_tokens = len(str(findings)) // 4
385
426
 
@@ -541,6 +582,154 @@ class SecurityAuditWorkflow(BaseWorkflow):
541
582
 
542
583
  return False
543
584
 
585
+ def _is_safe_sql_parameterization(self, line_content: str, match_text: str, file_content: str) -> bool:
586
+ """Check if SQL query uses safe parameterization despite f-string usage.
587
+
588
+ Phase 2 Enhancement: Detects safe patterns like:
589
+ - placeholders = ",".join("?" * len(ids))
590
+ - cursor.execute(f"... IN ({placeholders})", ids)
591
+
592
+ This prevents false positives for the SQLite-recommended pattern
593
+ of building dynamic placeholder strings.
594
+
595
+ Args:
596
+ line_content: The line containing the match (may be incomplete for multi-line)
597
+ match_text: The matched text
598
+ file_content: Full file content for context analysis
599
+
600
+ Returns:
601
+ True if this is safe parameterized SQL, False otherwise
602
+ """
603
+ # Get the position of the match in the full file content
604
+ match_pos = file_content.find(match_text)
605
+ if match_pos == -1:
606
+ # Try to find cursor.execute
607
+ match_pos = file_content.find("cursor.execute")
608
+ if match_pos == -1:
609
+ return False
610
+
611
+ # Extract a larger context (next 200 chars after match)
612
+ context = file_content[match_pos:match_pos + 200]
613
+
614
+ # Also get lines before the match for placeholder detection
615
+ lines_before = file_content[:match_pos].split("\n")
616
+ recent_lines = lines_before[-10:] if len(lines_before) > 10 else lines_before
617
+
618
+ # Pattern 1: Check if this is a placeholder-based parameterized query
619
+ # Look for: cursor.execute(f"... IN ({placeholders})", params)
620
+ if "placeholders" in context or any("placeholders" in line for line in recent_lines[-5:]):
621
+ # Check if context has both f-string and separate parameters
622
+ # Pattern: f"...{placeholders}..." followed by comma and params
623
+ if re.search(r'f["\'][^"\']*\{placeholders\}[^"\']*["\']\s*,\s*\w+', context):
624
+ return True # Safe - has separate parameters
625
+
626
+ # Also check if recent lines built the placeholders
627
+ for prev_line in reversed(recent_lines):
628
+ if "placeholders" in prev_line and '"?"' in prev_line and "join" in prev_line:
629
+ # Found placeholder construction
630
+ # Now check if the execute has separate parameters
631
+ if "," in context and any(param in context for param in ["run_ids", "ids", "params", "values", ")"]):
632
+ return True
633
+
634
+ # Pattern 2: Check if f-string only builds SQL structure with constants
635
+ # Example: f"SELECT * FROM {TABLE_NAME}" where TABLE_NAME is a constant
636
+ f_string_vars = re.findall(r'\{(\w+)\}', context)
637
+ if f_string_vars:
638
+ # Check if all variables are constants (UPPERCASE or table/column names)
639
+ all_constants = all(
640
+ var.isupper() or "TABLE" in var.upper() or "COLUMN" in var.upper()
641
+ for var in f_string_vars
642
+ )
643
+ if all_constants:
644
+ return True # Safe - using constants, not user data
645
+
646
+ # Pattern 3: Check for security note comments nearby
647
+ # If developers added security notes, it's likely safe
648
+ for prev_line in reversed(recent_lines[-3:]):
649
+ if "security note" in prev_line.lower() and "safe" in prev_line.lower():
650
+ return True
651
+
652
+ return False
653
+
654
+ def _is_safe_random_usage(self, line_content: str, file_path: str, file_content: str) -> bool:
655
+ """Check if random usage is in a safe context (tests, simulations, non-crypto).
656
+
657
+ Phase 2 Enhancement: Reduces false positives for random module usage
658
+ in test fixtures, A/B testing simulations, and demo code.
659
+
660
+ Args:
661
+ line_content: The line containing the match
662
+ file_path: Path to the file being scanned
663
+ file_content: Full file content for context analysis
664
+
665
+ Returns:
666
+ True if random usage is safe/documented, False if potentially insecure
667
+ """
668
+ # Check if file is a test file
669
+ is_test = any(pattern in file_path.lower() for pattern in ["/test", "test_", "conftest"])
670
+
671
+ # Check for explicit security notes nearby
672
+ lines = file_content.split("\n")
673
+ line_index = None
674
+ for i, line in enumerate(lines):
675
+ if line_content.strip() in line:
676
+ line_index = i
677
+ break
678
+
679
+ if line_index is not None:
680
+ # Check 5 lines before and after for security notes
681
+ context_start = max(0, line_index - 5)
682
+ context_end = min(len(lines), line_index + 5)
683
+ context = "\n".join(lines[context_start:context_end]).lower()
684
+
685
+ # Look for clarifying comments
686
+ safe_indicators = [
687
+ "security note",
688
+ "not cryptographic",
689
+ "not for crypto",
690
+ "test data",
691
+ "demo data",
692
+ "simulation",
693
+ "reproducible",
694
+ "deterministic",
695
+ "fixed seed",
696
+ "not used for security",
697
+ "not used for secrets",
698
+ "not used for tokens",
699
+ ]
700
+
701
+ if any(indicator in context for indicator in safe_indicators):
702
+ return True # Documented as safe
703
+
704
+ # Check for common safe random patterns
705
+ line_lower = line_content.lower()
706
+
707
+ # Pattern 1: Fixed seed (reproducible tests)
708
+ if "random.seed(" in line_lower:
709
+ return True # Fixed seed is for reproducibility, not security
710
+
711
+ # Pattern 2: A/B testing, simulations, demos
712
+ safe_contexts = [
713
+ "simulation",
714
+ "demo",
715
+ "a/b test",
716
+ "ab_test",
717
+ "fixture",
718
+ "mock",
719
+ "example",
720
+ "sample",
721
+ ]
722
+ if any(context in file_path.lower() for context in safe_contexts):
723
+ return True
724
+
725
+ # If it's a test file without crypto indicators, it's probably safe
726
+ if is_test:
727
+ crypto_indicators = ["password", "secret", "token", "key", "crypto", "auth"]
728
+ if not any(indicator in file_path.lower() for indicator in crypto_indicators):
729
+ return True
730
+
731
+ return False
732
+
544
733
  async def _assess(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
545
734
  """Risk scoring and severity classification.
546
735