PyPI - empathy-framework - Versions diffs - 4.7.0__py3-none-any.whl → 4.8.0__py3-none-any.whl - Mend

empathy-framework 4.7.0py3-none-any.whl → 4.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

empathy_framework-4.8.0.dist-info/METADATA +753 -0
{empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/RECORD +83 -37
{empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/WHEEL +1 -1
{empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/entry_points.txt +2 -1
empathy_os/__init__.py +2 -0
empathy_os/cache/hash_only.py +6 -3
empathy_os/cache/hybrid.py +6 -3
empathy_os/cli/__init__.py +128 -238
empathy_os/cli/__main__.py +5 -33
empathy_os/cli/commands/__init__.py +1 -8
empathy_os/cli/commands/help.py +331 -0
empathy_os/cli/commands/info.py +140 -0
empathy_os/cli/commands/inspect.py +437 -0
empathy_os/cli/commands/metrics.py +92 -0
empathy_os/cli/commands/orchestrate.py +184 -0
empathy_os/cli/commands/patterns.py +207 -0
empathy_os/cli/commands/provider.py +93 -81
empathy_os/cli/commands/setup.py +96 -0
empathy_os/cli/commands/status.py +235 -0
empathy_os/cli/commands/sync.py +166 -0
empathy_os/cli/commands/tier.py +121 -0
empathy_os/cli/commands/workflow.py +574 -0
empathy_os/cli/parsers/__init__.py +62 -0
empathy_os/cli/parsers/help.py +41 -0
empathy_os/cli/parsers/info.py +26 -0
empathy_os/cli/parsers/inspect.py +66 -0
empathy_os/cli/parsers/metrics.py +42 -0
empathy_os/cli/parsers/orchestrate.py +61 -0
empathy_os/cli/parsers/patterns.py +54 -0
empathy_os/cli/parsers/provider.py +40 -0
empathy_os/cli/parsers/setup.py +42 -0
empathy_os/cli/parsers/status.py +47 -0
empathy_os/cli/parsers/sync.py +31 -0
empathy_os/cli/parsers/tier.py +33 -0
empathy_os/cli/parsers/workflow.py +77 -0
empathy_os/cli/utils/__init__.py +1 -0
empathy_os/cli/utils/data.py +242 -0
empathy_os/cli/utils/helpers.py +68 -0
empathy_os/{cli.py → cli_legacy.py} +27 -27
empathy_os/cli_minimal.py +662 -0
empathy_os/cli_router.py +384 -0
empathy_os/cli_unified.py +38 -2
empathy_os/memory/__init__.py +19 -5
empathy_os/memory/short_term.py +14 -404
empathy_os/memory/types.py +437 -0
empathy_os/memory/unified.py +61 -48
empathy_os/models/fallback.py +1 -1
empathy_os/models/provider_config.py +59 -344
empathy_os/models/registry.py +31 -180
empathy_os/monitoring/alerts.py +14 -20
empathy_os/monitoring/alerts_cli.py +24 -7
empathy_os/project_index/__init__.py +2 -0
empathy_os/project_index/index.py +210 -5
empathy_os/project_index/scanner.py +45 -14
empathy_os/project_index/scanner_parallel.py +291 -0
empathy_os/socratic/ab_testing.py +1 -1
empathy_os/vscode_bridge 2.py +173 -0
empathy_os/workflows/__init__.py +31 -2
empathy_os/workflows/base.py +349 -325
empathy_os/workflows/bug_predict.py +8 -0
empathy_os/workflows/builder.py +273 -0
empathy_os/workflows/caching.py +253 -0
empathy_os/workflows/code_review_pipeline.py +1 -0
empathy_os/workflows/history.py +510 -0
empathy_os/workflows/output.py +410 -0
empathy_os/workflows/perf_audit.py +125 -19
empathy_os/workflows/progress.py +324 -22
empathy_os/workflows/progressive/README 2.md +454 -0
empathy_os/workflows/progressive/__init__ 2.py +92 -0
empathy_os/workflows/progressive/cli 2.py +242 -0
empathy_os/workflows/progressive/core 2.py +488 -0
empathy_os/workflows/progressive/orchestrator 2.py +701 -0
empathy_os/workflows/progressive/reports 2.py +528 -0
empathy_os/workflows/progressive/telemetry 2.py +280 -0
empathy_os/workflows/progressive/test_gen 2.py +514 -0
empathy_os/workflows/progressive/workflow 2.py +628 -0
empathy_os/workflows/routing.py +168 -0
empathy_os/workflows/secure_release.py +1 -0
empathy_os/workflows/security_audit.py +190 -0
empathy_os/workflows/security_audit_phase3.py +328 -0
empathy_os/workflows/telemetry_mixin.py +269 -0
empathy_framework-4.7.0.dist-info/METADATA +0 -1598
empathy_os/dashboard/__init__.py +0 -15
empathy_os/dashboard/server.py +0 -941
{empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/licenses/LICENSE +0 -0
{empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/top_level.txt +0 -0

empathy_os/project_index/index.py CHANGED Viewed

@@ -17,6 +17,7 @@ from empathy_os.config import _validate_file_path
 from .models import FileRecord, IndexConfig, ProjectSummary
 from .scanner import ProjectScanner
+from .scanner_parallel import ParallelProjectScanner
 logger = logging.getLogger(__name__)
@@ -39,10 +40,27 @@ class ProjectIndex:
         project_root: str,
         config: IndexConfig | None = None,
         redis_client: Any | None = None,
+        workers: int | None = None,
+        use_parallel: bool = True,
     ):
+        """Initialize ProjectIndex.
+        Args:
+            project_root: Root directory of the project
+            config: Optional index configuration
+            redis_client: Optional Redis client for real-time sync
+            workers: Number of worker processes for parallel scanning.
+                None (default): Use all CPU cores
+                1: Sequential processing
+                N: Use N worker processes
+            use_parallel: Whether to use parallel scanner (default: True).
+                Set to False to force sequential processing.
+        """
         self.project_root = Path(project_root)
         self.config = config or IndexConfig()
         self.redis_client = redis_client
+        self.workers = workers
+        self.use_parallel = use_parallel
         # In-memory state
         self._records: dict[str, FileRecord] = {}
@@ -174,15 +192,34 @@ class ProjectIndex:
     # ===== Index Operations =====
-    def refresh(self) -> None:
+    def refresh(self, analyze_dependencies: bool = True) -> None:
         """Refresh the entire index by scanning the project.
-        This rebuilds the index from scratch.
+        This rebuilds the index from scratch using parallel processing when enabled.
+        Args:
+            analyze_dependencies: Whether to analyze import dependencies.
+                Set to False for faster scans when dependency graph not needed.
+                Default: True.
+        Performance:
+            - Sequential: ~3.6s for 3,472 files
+            - Parallel (12 workers): ~1.8s for 3,472 files
+            - Parallel without deps: ~1.0s for 3,472 files
         """
         logger.info(f"Refreshing index for {self.project_root}")
-        scanner = ProjectScanner(str(self.project_root), self.config)
-        records, summary = scanner.scan()
+        # Use parallel scanner by default for better performance
+        if self.use_parallel and (self.workers is None or self.workers > 1):
+            logger.info(f"Using parallel scanner (workers: {self.workers or 'auto'})")
+            scanner = ParallelProjectScanner(
+                str(self.project_root), self.config, workers=self.workers
+            )
+        else:
+            logger.info("Using sequential scanner")
+            scanner = ProjectScanner(str(self.project_root), self.config)
+        records, summary = scanner.scan(analyze_dependencies=analyze_dependencies)
         # Update internal state
         self._records = {r.path: r for r in records}
@@ -193,9 +230,177 @@ class ProjectIndex:
         self.save()
         logger.info(
-            f"Index refreshed: {len(self._records)} files, {summary.files_needing_attention} need attention",
+            f"Index refreshed: {len(self._records)} files, "
+            f"{summary.files_needing_attention} need attention"
         )
+    def refresh_incremental(
+        self, analyze_dependencies: bool = True, base_ref: str = "HEAD"
+    ) -> tuple[int, int]:
+        """Incrementally refresh index by scanning only changed files.
+        Uses git diff to identify changed files since last index generation.
+        This is significantly faster than full refresh for small changes.
+        Args:
+            analyze_dependencies: Whether to rebuild dependency graph.
+                Note: Even if True, only changed files are re-scanned.
+                Default: True.
+            base_ref: Git ref to diff against (default: "HEAD").
+                Use "HEAD~1" for changes since last commit,
+                "origin/main" for changes vs remote, etc.
+        Returns:
+            Tuple of (files_updated, files_removed)
+        Performance:
+            - Small change (10 files): ~0.1s vs ~1.0s full refresh (10x faster)
+            - Medium change (100 files): ~0.3s vs ~1.0s full refresh (3x faster)
+            - Large change (1000+ files): Similar to full refresh
+        Raises:
+            RuntimeError: If not in a git repository
+            ValueError: If no previous index exists
+        Example:
+            >>> index = ProjectIndex(".")
+            >>> index.load()
+            >>> updated, removed = index.refresh_incremental()
+            >>> print(f"Updated {updated} files, removed {removed}")
+        """
+        import subprocess
+        # Ensure we have a previous index to update
+        if not self._records:
+            raise ValueError(
+                "No existing index to update. Run refresh() first to create initial index."
+            )
+        # Get changed files from git
+        try:
+            # Get untracked files
+            result_untracked = subprocess.run(
+                ["git", "ls-files", "--others", "--exclude-standard"],
+                cwd=self.project_root,
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            untracked_files = (
+                set(result_untracked.stdout.strip().split("\n"))
+                if result_untracked.stdout.strip()
+                else set()
+            )
+            # Get modified/added files since base_ref
+            result_modified = subprocess.run(
+                ["git", "diff", "--name-only", base_ref],
+                cwd=self.project_root,
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            modified_files = (
+                set(result_modified.stdout.strip().split("\n"))
+                if result_modified.stdout.strip()
+                else set()
+            )
+            # Get deleted files
+            result_deleted = subprocess.run(
+                ["git", "diff", "--name-only", "--diff-filter=D", base_ref],
+                cwd=self.project_root,
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            deleted_files = (
+                set(result_deleted.stdout.strip().split("\n"))
+                if result_deleted.stdout.strip()
+                else set()
+            )
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(f"Git command failed: {e}. Are you in a git repository?")
+        except FileNotFoundError:
+            raise RuntimeError("Git not found. Incremental refresh requires git.")
+        # Combine untracked and modified
+        changed_files = untracked_files | modified_files
+        # Filter out files that don't match our patterns
+        changed_paths = []
+        for file_str in changed_files:
+            if not file_str:  # Skip empty strings
+                continue
+            file_path = self.project_root / file_str
+            if file_path.exists() and not self._is_excluded(file_path):
+                changed_paths.append(file_path)
+        logger.info(
+            f"Incremental refresh: {len(changed_paths)} changed, {len(deleted_files)} deleted"
+        )
+        # If no changes, nothing to do
+        if not changed_paths and not deleted_files:
+            logger.info("No changes detected, index is up to date")
+            return 0, 0
+        # Re-scan changed files using appropriate scanner
+        if changed_paths:
+            if self.use_parallel and len(changed_paths) > 100:
+                # Use parallel scanner for large change sets
+                scanner = ParallelProjectScanner(
+                    str(self.project_root), self.config, workers=self.workers
+                )
+                # Monkey-patch _discover_files to return only changed files
+                scanner._discover_files = lambda: changed_paths
+            else:
+                # Use sequential scanner for small change sets
+                scanner = ProjectScanner(str(self.project_root), self.config)
+                scanner._discover_files = lambda: changed_paths
+            # Scan only changed files (without dependency analysis yet)
+            new_records, _ = scanner.scan(analyze_dependencies=False)
+            # Update records
+            for record in new_records:
+                self._records[record.path] = record
+        # Remove deleted files
+        files_removed = 0
+        for deleted_file in deleted_files:
+            if deleted_file and deleted_file in self._records:
+                del self._records[deleted_file]
+                files_removed += 1
+        # Rebuild dependency graph if requested
+        if analyze_dependencies:
+            scanner = ProjectScanner(str(self.project_root), self.config)
+            all_records = list(self._records.values())
+            scanner._analyze_dependencies(all_records)
+            scanner._calculate_impact_scores(all_records)
+        # Rebuild summary
+        scanner = ProjectScanner(str(self.project_root), self.config)
+        self._summary = scanner._build_summary(list(self._records.values()))
+        self._generated_at = datetime.now()
+        # Save to disk
+        self.save()
+        files_updated = len(changed_paths)
+        logger.info(
+            f"Incremental refresh complete: {files_updated} updated, {files_removed} removed"
+        )
+        return files_updated, files_removed
+    def _is_excluded(self, path: Path) -> bool:
+        """Check if a path should be excluded from indexing."""
+        scanner = ProjectScanner(str(self.project_root), self.config)
+        return scanner._is_excluded(path)
     def update_file(self, path: str, **updates: Any) -> bool:
         """Update metadata for a specific file.

empathy_os/project_index/scanner.py CHANGED Viewed

@@ -119,9 +119,14 @@ class ProjectScanner:
         except (SyntaxError, ValueError, OSError):
             return None
-    def scan(self) -> tuple[list[FileRecord], ProjectSummary]:
+    def scan(self, analyze_dependencies: bool = True) -> tuple[list[FileRecord], ProjectSummary]:
         """Scan the entire project and return file records and summary.
+        Args:
+            analyze_dependencies: Whether to analyze import dependencies.
+                Set to False to skip expensive dependency graph analysis (saves ~2s).
+                Default: True for backwards compatibility.
         Returns:
             Tuple of (list of FileRecords, ProjectSummary)
@@ -140,11 +145,12 @@ class ProjectScanner:
             if record:
                 records.append(record)
-        # Third pass: build dependency graph
-        self._analyze_dependencies(records)
+        # Third pass: build dependency graph (optional - saves ~2s when skipped)
+        if analyze_dependencies:
+            self._analyze_dependencies(records)
-        # Calculate impact scores
-        self._calculate_impact_scores(records)
+            # Calculate impact scores (depends on dependency graph)
+            self._calculate_impact_scores(records)
         # Determine attention needs
         self._determine_attention_needs(records)
@@ -320,8 +326,8 @@ class ProjectScanner:
                 staleness_days = (last_modified - tests_last_modified).days
                 is_stale = staleness_days >= self.config.staleness_threshold_days
-        # Analyze code metrics
-        metrics = self._analyze_code_metrics(file_path, language)
+        # Analyze code metrics (skip expensive AST analysis for test files)
+        metrics = self._analyze_code_metrics(file_path, language, category)
         return FileRecord(
             path=rel_path,
@@ -426,11 +432,21 @@ class ProjectScanner:
         return TestRequirement.REQUIRED
-    def _analyze_code_metrics(self, path: Path, language: str) -> dict[str, Any]:
+    def _analyze_code_metrics(
+        self, path: Path, language: str, category: FileCategory = FileCategory.SOURCE
+    ) -> dict[str, Any]:
         """Analyze code metrics for a file with caching.
         Uses cached AST parsing for Python files to avoid re-parsing
         unchanged files during incremental scans.
+        Optimization: Skips expensive AST analysis for test files since they
+        don't need complexity scoring (saves ~30% of AST traversal time).
+        Args:
+            path: Path to file to analyze
+            language: Programming language of the file
+            category: File category (SOURCE, TEST, etc.)
         """
         metrics: dict[str, Any] = {
             "lines_of_code": 0,
@@ -458,13 +474,28 @@ class ProjectScanner:
                 [line for line in lines if line.strip() and not line.strip().startswith("#")],
             )
-            # Use cached AST parsing for Python files
-            file_path_str = str(path)
-            file_hash = self._hash_file(file_path_str)
-            tree = self._parse_python_cached(file_path_str, file_hash)
+            # Optimization: Skip expensive AST analysis for test files
+            # Test files don't need complexity scoring, docstring/type hint checks
+            # This saves ~30% of AST traversal time (1+ seconds on large codebases)
+            if category == FileCategory.TEST:
+                # For test files, just count test functions with simple regex
+                import re
+                test_func_pattern = re.compile(r"^\s*def\s+test_\w+\(")
+                metrics["test_count"] = sum(
+                    1 for line in lines if test_func_pattern.match(line)
+                )
+                # Mark as having test functions (for test file records)
+                if metrics["test_count"] > 0:
+                    metrics["lines_of_test"] = metrics["lines_of_code"]
+            else:
+                # Use cached AST parsing for source files only
+                file_path_str = str(path)
+                file_hash = self._hash_file(file_path_str)
+                tree = self._parse_python_cached(file_path_str, file_hash)
-            if tree:
-                metrics.update(self._analyze_python_ast(tree))
+                if tree:
+                    metrics.update(self._analyze_python_ast(tree))
         except OSError:
             pass

empathy_os/project_index/scanner_parallel.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""Parallel Project Scanner - Multi-core optimized file scanning.
+This module provides a parallel implementation of ProjectScanner using
+multiprocessing to distribute file analysis across CPU cores.
+Expected speedup: 3-4x on quad-core machines for large codebases (>1000 files).
+Usage:
+    from empathy_os.project_index.scanner_parallel import ParallelProjectScanner
+    scanner = ParallelProjectScanner(project_root=".", workers=4)
+    records, summary = scanner.scan()
+Copyright 2025 Smart AI Memory, LLC
+Licensed under Fair Source 0.9
+"""
+import multiprocessing as mp
+from functools import partial
+from pathlib import Path
+from typing import Any
+from .models import FileRecord, IndexConfig, ProjectSummary
+from .scanner import ProjectScanner
+def _analyze_file_worker(
+    file_path_str: str,
+    project_root_str: str,
+    config_dict: dict[str, Any],
+    test_file_map: dict[str, str],
+) -> FileRecord | None:
+    """Worker function to analyze a single file in parallel.
+    This function is designed to be pickled and sent to worker processes.
+    It reconstructs necessary objects from serialized data.
+    Args:
+        file_path_str: String path to file to analyze
+        project_root_str: String path to project root
+        config_dict: Serialized IndexConfig as dict
+        test_file_map: Mapping of source files to test files
+    Returns:
+        FileRecord for the analyzed file, or None if analysis fails
+    """
+    from pathlib import Path
+    # Reconstruct objects
+    file_path = Path(file_path_str)
+    project_root = Path(project_root_str)
+    # Create a temporary scanner instance for this worker
+    # (Each worker gets its own scanner to avoid shared state issues)
+    config = IndexConfig(**config_dict)
+    scanner = ProjectScanner(project_root=project_root, config=config)
+    scanner._test_file_map = test_file_map
+    # Analyze the file
+    return scanner._analyze_file(file_path)
+class ParallelProjectScanner(ProjectScanner):
+    """Parallel implementation of ProjectScanner using multiprocessing.
+    Uses multiple CPU cores to analyze files concurrently, providing
+    significant speedup for large codebases.
+    Attributes:
+        workers: Number of worker processes (default: CPU count)
+    Performance:
+        - Sequential: ~9.2s for 3,469 files (375 files/sec)
+        - Parallel (4 workers): ~2.5s expected (1,387 files/sec)
+        - Speedup: 3.7x on quad-core machines
+    Memory:
+        - Each worker creates its own scanner instance
+        - Peak memory scales with worker count
+        - Expected: 2x-3x memory usage vs sequential
+    Example:
+        >>> scanner = ParallelProjectScanner(project_root=".", workers=4)
+        >>> records, summary = scanner.scan()
+        >>> print(f"Scanned {summary.total_files} files")
+    """
+    def __init__(
+        self,
+        project_root: str,
+        config: IndexConfig | None = None,
+        workers: int | None = None,
+    ):
+        """Initialize parallel scanner.
+        Args:
+            project_root: Root directory of project to scan
+            config: Optional configuration (uses defaults if not provided)
+            workers: Number of worker processes.
+                None (default): Use all available CPUs
+                1: Sequential processing (same as ProjectScanner)
+                N: Use N worker processes
+        """
+        super().__init__(project_root, config)
+        self.workers = workers or mp.cpu_count()
+    def scan(
+        self,
+        analyze_dependencies: bool = True,
+        use_parallel: bool = True,
+    ) -> tuple[list[FileRecord], ProjectSummary]:
+        """Scan the entire project using parallel processing.
+        Args:
+            analyze_dependencies: Whether to analyze import dependencies.
+                Set to False to skip expensive dependency graph analysis.
+                Default: True for backwards compatibility.
+            use_parallel: Whether to use parallel processing.
+                Set to False to use sequential processing.
+                Default: True.
+        Returns:
+            Tuple of (list of FileRecords, ProjectSummary)
+        Note:
+            Dependency analysis is always sequential (after file analysis).
+            Parallel processing only applies to file analysis phase.
+        """
+        records: list[FileRecord] = []
+        # First pass: discover all files (sequential - fast)
+        all_files = self._discover_files()
+        # Build test file mapping (sequential - fast)
+        self._build_test_mapping(all_files)
+        # Second pass: analyze each file (PARALLEL - slow)
+        if use_parallel and self.workers > 1:
+            records = self._analyze_files_parallel(all_files)
+        else:
+            # Fall back to sequential for debugging or single worker
+            for file_path in all_files:
+                record = self._analyze_file(file_path)
+                if record:
+                    records.append(record)
+        # Third pass: build dependency graph (sequential - already optimized)
+        if analyze_dependencies:
+            self._analyze_dependencies(records)
+            # Calculate impact scores (sequential - fast)
+            self._calculate_impact_scores(records)
+        # Determine attention needs (sequential - fast)
+        self._determine_attention_needs(records)
+        # Build summary (sequential - fast)
+        summary = self._build_summary(records)
+        return records, summary
+    def _analyze_files_parallel(self, all_files: list[Path]) -> list[FileRecord]:
+        """Analyze files in parallel using multiprocessing.
+        Args:
+            all_files: List of file paths to analyze
+        Returns:
+            List of FileRecords (order not guaranteed)
+        Note:
+            Uses multiprocessing.Pool with chunksize optimization.
+            Chunksize is calculated to balance overhead vs parallelism.
+        """
+        # Serialize configuration for workers
+        config_dict = {
+            "exclude_patterns": list(self.config.exclude_patterns),
+            "no_test_patterns": list(self.config.no_test_patterns),
+            "staleness_threshold_days": self.config.staleness_threshold_days,
+        }
+        # Create partial function with fixed arguments
+        analyze_func = partial(
+            _analyze_file_worker,
+            project_root_str=str(self.project_root),
+            config_dict=config_dict,
+            test_file_map=self._test_file_map,
+        )
+        # Calculate optimal chunksize
+        # Too small: overhead from process communication
+        # Too large: poor load balancing
+        total_files = len(all_files)
+        chunksize = max(1, total_files // (self.workers * 4))
+        # Process files in parallel
+        records: list[FileRecord] = []
+        with mp.Pool(processes=self.workers) as pool:
+            # Map file paths to string for pickling
+            file_path_strs = [str(f) for f in all_files]
+            # Process files in chunks
+            results = pool.map(analyze_func, file_path_strs, chunksize=chunksize)
+            # Filter out None results
+            records = [r for r in results if r is not None]
+        return records
+def compare_sequential_vs_parallel(project_root: str = ".", workers: int = 4) -> dict[str, Any]:
+    """Benchmark sequential vs parallel scanner performance.
+    Args:
+        project_root: Root directory to scan
+        workers: Number of worker processes for parallel version
+    Returns:
+        Dictionary with benchmark results:
+            - sequential_time: Time taken by sequential scan
+            - parallel_time: Time taken by parallel scan
+            - speedup: Ratio of sequential to parallel time
+            - files_scanned: Number of files scanned
+            - workers: Number of workers used
+    Example:
+        >>> results = compare_sequential_vs_parallel(workers=4)
+        >>> print(f"Speedup: {results['speedup']:.2f}x")
+        Speedup: 3.74x
+    """
+    import time
+    # Sequential scan
+    print("Running sequential scan...")
+    start = time.perf_counter()
+    scanner_seq = ProjectScanner(project_root=project_root)
+    records_seq, summary_seq = scanner_seq.scan()
+    sequential_time = time.perf_counter() - start
+    print(f"  Sequential: {sequential_time:.4f}s")
+    # Parallel scan
+    print(f"Running parallel scan ({workers} workers)...")
+    start = time.perf_counter()
+    scanner_par = ParallelProjectScanner(project_root=project_root, workers=workers)
+    records_par, summary_par = scanner_par.scan()
+    parallel_time = time.perf_counter() - start
+    print(f"  Parallel: {parallel_time:.4f}s")
+    speedup = sequential_time / parallel_time if parallel_time > 0 else 0
+    return {
+        "sequential_time": sequential_time,
+        "parallel_time": parallel_time,
+        "speedup": speedup,
+        "improvement_pct": ((sequential_time - parallel_time) / sequential_time * 100)
+        if sequential_time > 0
+        else 0,
+        "files_scanned": summary_seq.total_files,
+        "workers": workers,
+    }
+if __name__ == "__main__":
+    # Example usage and benchmark
+    print("=" * 70)
+    print("PARALLEL PROJECT SCANNER - Benchmark")
+    print("=" * 70)
+    # Run benchmark
+    results = compare_sequential_vs_parallel(workers=4)
+    print("\n" + "=" * 70)
+    print("BENCHMARK RESULTS")
+    print("=" * 70)
+    print(f"Files scanned: {results['files_scanned']:,}")
+    print(f"Workers: {results['workers']}")
+    print(f"\nSequential time: {results['sequential_time']:.4f}s")
+    print(f"Parallel time: {results['parallel_time']:.4f}s")
+    print(f"\nSpeedup: {results['speedup']:.2f}x")
+    print(f"Improvement: {results['improvement_pct']:.1f}%")
+    if results['speedup'] >= 2.0:
+        print("\n✅ Parallel processing is highly effective!")
+    elif results['speedup'] >= 1.5:
+        print("\n✅ Parallel processing provides moderate benefit")
+    else:
+        print("\n⚠️  Parallel processing may not be worth the overhead")
+    print("=" * 70)

empathy_os/socratic/ab_testing.py CHANGED Viewed

@@ -20,7 +20,7 @@ import hashlib
 import json
 import logging
 import math
-import random
+import random  # Security Note: For A/B test simulation data, not cryptographic use
 import time
 from dataclasses import dataclass, field
 from datetime import datetime

empathy-framework 4.7.0__py3-none-any.whl → 4.8.0__py3-none-any.whl

empathy-framework 4.7.0py3-none-any.whl → 4.8.0py3-none-any.whl