empathy-framework 5.0.3__py3-none-any.whl → 5.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/METADATA +259 -142
  2. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/RECORD +58 -28
  3. empathy_framework-5.1.1.dist-info/licenses/LICENSE +201 -0
  4. empathy_framework-5.1.1.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
  5. empathy_os/__init__.py +1 -1
  6. empathy_os/cli/commands/batch.py +5 -5
  7. empathy_os/cli/commands/routing.py +1 -1
  8. empathy_os/cli/commands/workflow.py +2 -1
  9. empathy_os/cli/parsers/cache 2.py +65 -0
  10. empathy_os/cli_minimal.py +3 -3
  11. empathy_os/cli_router 2.py +416 -0
  12. empathy_os/cli_router.py +12 -0
  13. empathy_os/dashboard/__init__.py +1 -2
  14. empathy_os/dashboard/app 2.py +512 -0
  15. empathy_os/dashboard/app.py +1 -1
  16. empathy_os/dashboard/simple_server 2.py +403 -0
  17. empathy_os/dashboard/standalone_server 2.py +536 -0
  18. empathy_os/memory/types 2.py +441 -0
  19. empathy_os/meta_workflows/intent_detector.py +71 -0
  20. empathy_os/models/__init__.py +19 -0
  21. empathy_os/models/adaptive_routing 2.py +437 -0
  22. empathy_os/models/auth_cli.py +444 -0
  23. empathy_os/models/auth_strategy.py +450 -0
  24. empathy_os/project_index/scanner_parallel 2.py +291 -0
  25. empathy_os/telemetry/agent_coordination 2.py +478 -0
  26. empathy_os/telemetry/agent_coordination.py +3 -3
  27. empathy_os/telemetry/agent_tracking 2.py +350 -0
  28. empathy_os/telemetry/agent_tracking.py +1 -2
  29. empathy_os/telemetry/approval_gates 2.py +563 -0
  30. empathy_os/telemetry/event_streaming 2.py +405 -0
  31. empathy_os/telemetry/event_streaming.py +3 -3
  32. empathy_os/telemetry/feedback_loop 2.py +557 -0
  33. empathy_os/telemetry/feedback_loop.py +1 -1
  34. empathy_os/vscode_bridge 2.py +173 -0
  35. empathy_os/workflows/__init__.py +8 -0
  36. empathy_os/workflows/autonomous_test_gen.py +569 -0
  37. empathy_os/workflows/bug_predict.py +45 -0
  38. empathy_os/workflows/code_review.py +92 -22
  39. empathy_os/workflows/document_gen.py +594 -62
  40. empathy_os/workflows/llm_base.py +363 -0
  41. empathy_os/workflows/perf_audit.py +69 -0
  42. empathy_os/workflows/progressive/README 2.md +454 -0
  43. empathy_os/workflows/progressive/__init__ 2.py +92 -0
  44. empathy_os/workflows/progressive/cli 2.py +242 -0
  45. empathy_os/workflows/progressive/core 2.py +488 -0
  46. empathy_os/workflows/progressive/orchestrator 2.py +701 -0
  47. empathy_os/workflows/progressive/reports 2.py +528 -0
  48. empathy_os/workflows/progressive/telemetry 2.py +280 -0
  49. empathy_os/workflows/progressive/test_gen 2.py +514 -0
  50. empathy_os/workflows/progressive/workflow 2.py +628 -0
  51. empathy_os/workflows/release_prep.py +54 -0
  52. empathy_os/workflows/security_audit.py +154 -79
  53. empathy_os/workflows/test_gen.py +60 -0
  54. empathy_os/workflows/test_gen_behavioral.py +477 -0
  55. empathy_os/workflows/test_gen_parallel.py +341 -0
  56. empathy_framework-5.0.3.dist-info/licenses/LICENSE +0 -139
  57. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/WHEEL +0 -0
  58. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/entry_points.txt +0 -0
  59. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,291 @@
1
+ """Parallel Project Scanner - Multi-core optimized file scanning.
2
+
3
+ This module provides a parallel implementation of ProjectScanner using
4
+ multiprocessing to distribute file analysis across CPU cores.
5
+
6
+ Expected speedup: 3-4x on quad-core machines for large codebases (>1000 files).
7
+
8
+ Usage:
9
+ from empathy_os.project_index.scanner_parallel import ParallelProjectScanner
10
+
11
+ scanner = ParallelProjectScanner(project_root=".", workers=4)
12
+ records, summary = scanner.scan()
13
+
14
+ Copyright 2025 Smart AI Memory, LLC
15
+ Licensed under Fair Source 0.9
16
+ """
17
+
18
+ import multiprocessing as mp
19
+ from functools import partial
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ from .models import FileRecord, IndexConfig, ProjectSummary
24
+ from .scanner import ProjectScanner
25
+
26
+
27
+ def _analyze_file_worker(
28
+ file_path_str: str,
29
+ project_root_str: str,
30
+ config_dict: dict[str, Any],
31
+ test_file_map: dict[str, str],
32
+ ) -> FileRecord | None:
33
+ """Worker function to analyze a single file in parallel.
34
+
35
+ This function is designed to be pickled and sent to worker processes.
36
+ It reconstructs necessary objects from serialized data.
37
+
38
+ Args:
39
+ file_path_str: String path to file to analyze
40
+ project_root_str: String path to project root
41
+ config_dict: Serialized IndexConfig as dict
42
+ test_file_map: Mapping of source files to test files
43
+
44
+ Returns:
45
+ FileRecord for the analyzed file, or None if analysis fails
46
+ """
47
+ from pathlib import Path
48
+
49
+ # Reconstruct objects
50
+ file_path = Path(file_path_str)
51
+ project_root = Path(project_root_str)
52
+
53
+ # Create a temporary scanner instance for this worker
54
+ # (Each worker gets its own scanner to avoid shared state issues)
55
+ config = IndexConfig(**config_dict)
56
+ scanner = ProjectScanner(project_root=project_root, config=config)
57
+ scanner._test_file_map = test_file_map
58
+
59
+ # Analyze the file
60
+ return scanner._analyze_file(file_path)
61
+
62
+
63
+ class ParallelProjectScanner(ProjectScanner):
64
+ """Parallel implementation of ProjectScanner using multiprocessing.
65
+
66
+ Uses multiple CPU cores to analyze files concurrently, providing
67
+ significant speedup for large codebases.
68
+
69
+ Attributes:
70
+ workers: Number of worker processes (default: CPU count)
71
+
72
+ Performance:
73
+ - Sequential: ~9.2s for 3,469 files (375 files/sec)
74
+ - Parallel (4 workers): ~2.5s expected (1,387 files/sec)
75
+ - Speedup: 3.7x on quad-core machines
76
+
77
+ Memory:
78
+ - Each worker creates its own scanner instance
79
+ - Peak memory scales with worker count
80
+ - Expected: 2x-3x memory usage vs sequential
81
+
82
+ Example:
83
+ >>> scanner = ParallelProjectScanner(project_root=".", workers=4)
84
+ >>> records, summary = scanner.scan()
85
+ >>> print(f"Scanned {summary.total_files} files")
86
+ """
87
+
88
+ def __init__(
89
+ self,
90
+ project_root: str,
91
+ config: IndexConfig | None = None,
92
+ workers: int | None = None,
93
+ ):
94
+ """Initialize parallel scanner.
95
+
96
+ Args:
97
+ project_root: Root directory of project to scan
98
+ config: Optional configuration (uses defaults if not provided)
99
+ workers: Number of worker processes.
100
+ None (default): Use all available CPUs
101
+ 1: Sequential processing (same as ProjectScanner)
102
+ N: Use N worker processes
103
+ """
104
+ super().__init__(project_root, config)
105
+ self.workers = workers or mp.cpu_count()
106
+
107
+ def scan(
108
+ self,
109
+ analyze_dependencies: bool = True,
110
+ use_parallel: bool = True,
111
+ ) -> tuple[list[FileRecord], ProjectSummary]:
112
+ """Scan the entire project using parallel processing.
113
+
114
+ Args:
115
+ analyze_dependencies: Whether to analyze import dependencies.
116
+ Set to False to skip expensive dependency graph analysis.
117
+ Default: True for backwards compatibility.
118
+ use_parallel: Whether to use parallel processing.
119
+ Set to False to use sequential processing.
120
+ Default: True.
121
+
122
+ Returns:
123
+ Tuple of (list of FileRecords, ProjectSummary)
124
+
125
+ Note:
126
+ Dependency analysis is always sequential (after file analysis).
127
+ Parallel processing only applies to file analysis phase.
128
+ """
129
+ records: list[FileRecord] = []
130
+
131
+ # First pass: discover all files (sequential - fast)
132
+ all_files = self._discover_files()
133
+
134
+ # Build test file mapping (sequential - fast)
135
+ self._build_test_mapping(all_files)
136
+
137
+ # Second pass: analyze each file (PARALLEL - slow)
138
+ if use_parallel and self.workers > 1:
139
+ records = self._analyze_files_parallel(all_files)
140
+ else:
141
+ # Fall back to sequential for debugging or single worker
142
+ for file_path in all_files:
143
+ record = self._analyze_file(file_path)
144
+ if record:
145
+ records.append(record)
146
+
147
+ # Third pass: build dependency graph (sequential - already optimized)
148
+ if analyze_dependencies:
149
+ self._analyze_dependencies(records)
150
+
151
+ # Calculate impact scores (sequential - fast)
152
+ self._calculate_impact_scores(records)
153
+
154
+ # Determine attention needs (sequential - fast)
155
+ self._determine_attention_needs(records)
156
+
157
+ # Build summary (sequential - fast)
158
+ summary = self._build_summary(records)
159
+
160
+ return records, summary
161
+
162
+ def _analyze_files_parallel(self, all_files: list[Path]) -> list[FileRecord]:
163
+ """Analyze files in parallel using multiprocessing.
164
+
165
+ Args:
166
+ all_files: List of file paths to analyze
167
+
168
+ Returns:
169
+ List of FileRecords (order not guaranteed)
170
+
171
+ Note:
172
+ Uses multiprocessing.Pool with chunksize optimization.
173
+ Chunksize is calculated to balance overhead vs parallelism.
174
+ """
175
+ # Serialize configuration for workers
176
+ config_dict = {
177
+ "exclude_patterns": list(self.config.exclude_patterns),
178
+ "no_test_patterns": list(self.config.no_test_patterns),
179
+ "staleness_threshold_days": self.config.staleness_threshold_days,
180
+ }
181
+
182
+ # Create partial function with fixed arguments
183
+ analyze_func = partial(
184
+ _analyze_file_worker,
185
+ project_root_str=str(self.project_root),
186
+ config_dict=config_dict,
187
+ test_file_map=self._test_file_map,
188
+ )
189
+
190
+ # Calculate optimal chunksize
191
+ # Too small: overhead from process communication
192
+ # Too large: poor load balancing
193
+ total_files = len(all_files)
194
+ chunksize = max(1, total_files // (self.workers * 4))
195
+
196
+ # Process files in parallel
197
+ records: list[FileRecord] = []
198
+
199
+ with mp.Pool(processes=self.workers) as pool:
200
+ # Map file paths to string for pickling
201
+ file_path_strs = [str(f) for f in all_files]
202
+
203
+ # Process files in chunks
204
+ results = pool.map(analyze_func, file_path_strs, chunksize=chunksize)
205
+
206
+ # Filter out None results
207
+ records = [r for r in results if r is not None]
208
+
209
+ return records
210
+
211
+
212
+ def compare_sequential_vs_parallel(project_root: str = ".", workers: int = 4) -> dict[str, Any]:
213
+ """Benchmark sequential vs parallel scanner performance.
214
+
215
+ Args:
216
+ project_root: Root directory to scan
217
+ workers: Number of worker processes for parallel version
218
+
219
+ Returns:
220
+ Dictionary with benchmark results:
221
+ - sequential_time: Time taken by sequential scan
222
+ - parallel_time: Time taken by parallel scan
223
+ - speedup: Ratio of sequential to parallel time
224
+ - files_scanned: Number of files scanned
225
+ - workers: Number of workers used
226
+
227
+ Example:
228
+ >>> results = compare_sequential_vs_parallel(workers=4)
229
+ >>> print(f"Speedup: {results['speedup']:.2f}x")
230
+ Speedup: 3.74x
231
+ """
232
+ import time
233
+
234
+ # Sequential scan
235
+ print("Running sequential scan...")
236
+ start = time.perf_counter()
237
+ scanner_seq = ProjectScanner(project_root=project_root)
238
+ records_seq, summary_seq = scanner_seq.scan()
239
+ sequential_time = time.perf_counter() - start
240
+ print(f" Sequential: {sequential_time:.4f}s")
241
+
242
+ # Parallel scan
243
+ print(f"Running parallel scan ({workers} workers)...")
244
+ start = time.perf_counter()
245
+ scanner_par = ParallelProjectScanner(project_root=project_root, workers=workers)
246
+ records_par, summary_par = scanner_par.scan()
247
+ parallel_time = time.perf_counter() - start
248
+ print(f" Parallel: {parallel_time:.4f}s")
249
+
250
+ speedup = sequential_time / parallel_time if parallel_time > 0 else 0
251
+
252
+ return {
253
+ "sequential_time": sequential_time,
254
+ "parallel_time": parallel_time,
255
+ "speedup": speedup,
256
+ "improvement_pct": ((sequential_time - parallel_time) / sequential_time * 100)
257
+ if sequential_time > 0
258
+ else 0,
259
+ "files_scanned": summary_seq.total_files,
260
+ "workers": workers,
261
+ }
262
+
263
+
264
+ if __name__ == "__main__":
265
+
266
+ # Example usage and benchmark
267
+ print("=" * 70)
268
+ print("PARALLEL PROJECT SCANNER - Benchmark")
269
+ print("=" * 70)
270
+
271
+ # Run benchmark
272
+ results = compare_sequential_vs_parallel(workers=4)
273
+
274
+ print("\n" + "=" * 70)
275
+ print("BENCHMARK RESULTS")
276
+ print("=" * 70)
277
+ print(f"Files scanned: {results['files_scanned']:,}")
278
+ print(f"Workers: {results['workers']}")
279
+ print(f"\nSequential time: {results['sequential_time']:.4f}s")
280
+ print(f"Parallel time: {results['parallel_time']:.4f}s")
281
+ print(f"\nSpeedup: {results['speedup']:.2f}x")
282
+ print(f"Improvement: {results['improvement_pct']:.1f}%")
283
+
284
+ if results['speedup'] >= 2.0:
285
+ print("\n✅ Parallel processing is highly effective!")
286
+ elif results['speedup'] >= 1.5:
287
+ print("\n✅ Parallel processing provides moderate benefit")
288
+ else:
289
+ print("\n⚠️ Parallel processing may not be worth the overhead")
290
+
291
+ print("=" * 70)