empathy-framework 5.0.3__py3-none-any.whl → 5.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/METADATA +259 -142
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/RECORD +58 -28
- empathy_framework-5.1.1.dist-info/licenses/LICENSE +201 -0
- empathy_framework-5.1.1.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- empathy_os/__init__.py +1 -1
- empathy_os/cli/commands/batch.py +5 -5
- empathy_os/cli/commands/routing.py +1 -1
- empathy_os/cli/commands/workflow.py +2 -1
- empathy_os/cli/parsers/cache 2.py +65 -0
- empathy_os/cli_minimal.py +3 -3
- empathy_os/cli_router 2.py +416 -0
- empathy_os/cli_router.py +12 -0
- empathy_os/dashboard/__init__.py +1 -2
- empathy_os/dashboard/app 2.py +512 -0
- empathy_os/dashboard/app.py +1 -1
- empathy_os/dashboard/simple_server 2.py +403 -0
- empathy_os/dashboard/standalone_server 2.py +536 -0
- empathy_os/memory/types 2.py +441 -0
- empathy_os/meta_workflows/intent_detector.py +71 -0
- empathy_os/models/__init__.py +19 -0
- empathy_os/models/adaptive_routing 2.py +437 -0
- empathy_os/models/auth_cli.py +444 -0
- empathy_os/models/auth_strategy.py +450 -0
- empathy_os/project_index/scanner_parallel 2.py +291 -0
- empathy_os/telemetry/agent_coordination 2.py +478 -0
- empathy_os/telemetry/agent_coordination.py +3 -3
- empathy_os/telemetry/agent_tracking 2.py +350 -0
- empathy_os/telemetry/agent_tracking.py +1 -2
- empathy_os/telemetry/approval_gates 2.py +563 -0
- empathy_os/telemetry/event_streaming 2.py +405 -0
- empathy_os/telemetry/event_streaming.py +3 -3
- empathy_os/telemetry/feedback_loop 2.py +557 -0
- empathy_os/telemetry/feedback_loop.py +1 -1
- empathy_os/vscode_bridge 2.py +173 -0
- empathy_os/workflows/__init__.py +8 -0
- empathy_os/workflows/autonomous_test_gen.py +569 -0
- empathy_os/workflows/bug_predict.py +45 -0
- empathy_os/workflows/code_review.py +92 -22
- empathy_os/workflows/document_gen.py +594 -62
- empathy_os/workflows/llm_base.py +363 -0
- empathy_os/workflows/perf_audit.py +69 -0
- empathy_os/workflows/progressive/README 2.md +454 -0
- empathy_os/workflows/progressive/__init__ 2.py +92 -0
- empathy_os/workflows/progressive/cli 2.py +242 -0
- empathy_os/workflows/progressive/core 2.py +488 -0
- empathy_os/workflows/progressive/orchestrator 2.py +701 -0
- empathy_os/workflows/progressive/reports 2.py +528 -0
- empathy_os/workflows/progressive/telemetry 2.py +280 -0
- empathy_os/workflows/progressive/test_gen 2.py +514 -0
- empathy_os/workflows/progressive/workflow 2.py +628 -0
- empathy_os/workflows/release_prep.py +54 -0
- empathy_os/workflows/security_audit.py +154 -79
- empathy_os/workflows/test_gen.py +60 -0
- empathy_os/workflows/test_gen_behavioral.py +477 -0
- empathy_os/workflows/test_gen_parallel.py +341 -0
- empathy_framework-5.0.3.dist-info/licenses/LICENSE +0 -139
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/WHEEL +0 -0
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/entry_points.txt +0 -0
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Parallel Project Scanner - Multi-core optimized file scanning.
|
|
2
|
+
|
|
3
|
+
This module provides a parallel implementation of ProjectScanner using
|
|
4
|
+
multiprocessing to distribute file analysis across CPU cores.
|
|
5
|
+
|
|
6
|
+
Expected speedup: 3-4x on quad-core machines for large codebases (>1000 files).
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from empathy_os.project_index.scanner_parallel import ParallelProjectScanner
|
|
10
|
+
|
|
11
|
+
scanner = ParallelProjectScanner(project_root=".", workers=4)
|
|
12
|
+
records, summary = scanner.scan()
|
|
13
|
+
|
|
14
|
+
Copyright 2025 Smart AI Memory, LLC
|
|
15
|
+
Licensed under Fair Source 0.9
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import multiprocessing as mp
|
|
19
|
+
from functools import partial
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from .models import FileRecord, IndexConfig, ProjectSummary
|
|
24
|
+
from .scanner import ProjectScanner
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _analyze_file_worker(
|
|
28
|
+
file_path_str: str,
|
|
29
|
+
project_root_str: str,
|
|
30
|
+
config_dict: dict[str, Any],
|
|
31
|
+
test_file_map: dict[str, str],
|
|
32
|
+
) -> FileRecord | None:
|
|
33
|
+
"""Worker function to analyze a single file in parallel.
|
|
34
|
+
|
|
35
|
+
This function is designed to be pickled and sent to worker processes.
|
|
36
|
+
It reconstructs necessary objects from serialized data.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_path_str: String path to file to analyze
|
|
40
|
+
project_root_str: String path to project root
|
|
41
|
+
config_dict: Serialized IndexConfig as dict
|
|
42
|
+
test_file_map: Mapping of source files to test files
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
FileRecord for the analyzed file, or None if analysis fails
|
|
46
|
+
"""
|
|
47
|
+
from pathlib import Path
|
|
48
|
+
|
|
49
|
+
# Reconstruct objects
|
|
50
|
+
file_path = Path(file_path_str)
|
|
51
|
+
project_root = Path(project_root_str)
|
|
52
|
+
|
|
53
|
+
# Create a temporary scanner instance for this worker
|
|
54
|
+
# (Each worker gets its own scanner to avoid shared state issues)
|
|
55
|
+
config = IndexConfig(**config_dict)
|
|
56
|
+
scanner = ProjectScanner(project_root=project_root, config=config)
|
|
57
|
+
scanner._test_file_map = test_file_map
|
|
58
|
+
|
|
59
|
+
# Analyze the file
|
|
60
|
+
return scanner._analyze_file(file_path)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ParallelProjectScanner(ProjectScanner):
|
|
64
|
+
"""Parallel implementation of ProjectScanner using multiprocessing.
|
|
65
|
+
|
|
66
|
+
Uses multiple CPU cores to analyze files concurrently, providing
|
|
67
|
+
significant speedup for large codebases.
|
|
68
|
+
|
|
69
|
+
Attributes:
|
|
70
|
+
workers: Number of worker processes (default: CPU count)
|
|
71
|
+
|
|
72
|
+
Performance:
|
|
73
|
+
- Sequential: ~9.2s for 3,469 files (375 files/sec)
|
|
74
|
+
- Parallel (4 workers): ~2.5s expected (1,387 files/sec)
|
|
75
|
+
- Speedup: 3.7x on quad-core machines
|
|
76
|
+
|
|
77
|
+
Memory:
|
|
78
|
+
- Each worker creates its own scanner instance
|
|
79
|
+
- Peak memory scales with worker count
|
|
80
|
+
- Expected: 2x-3x memory usage vs sequential
|
|
81
|
+
|
|
82
|
+
Example:
|
|
83
|
+
>>> scanner = ParallelProjectScanner(project_root=".", workers=4)
|
|
84
|
+
>>> records, summary = scanner.scan()
|
|
85
|
+
>>> print(f"Scanned {summary.total_files} files")
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
project_root: str,
|
|
91
|
+
config: IndexConfig | None = None,
|
|
92
|
+
workers: int | None = None,
|
|
93
|
+
):
|
|
94
|
+
"""Initialize parallel scanner.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
project_root: Root directory of project to scan
|
|
98
|
+
config: Optional configuration (uses defaults if not provided)
|
|
99
|
+
workers: Number of worker processes.
|
|
100
|
+
None (default): Use all available CPUs
|
|
101
|
+
1: Sequential processing (same as ProjectScanner)
|
|
102
|
+
N: Use N worker processes
|
|
103
|
+
"""
|
|
104
|
+
super().__init__(project_root, config)
|
|
105
|
+
self.workers = workers or mp.cpu_count()
|
|
106
|
+
|
|
107
|
+
def scan(
|
|
108
|
+
self,
|
|
109
|
+
analyze_dependencies: bool = True,
|
|
110
|
+
use_parallel: bool = True,
|
|
111
|
+
) -> tuple[list[FileRecord], ProjectSummary]:
|
|
112
|
+
"""Scan the entire project using parallel processing.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
analyze_dependencies: Whether to analyze import dependencies.
|
|
116
|
+
Set to False to skip expensive dependency graph analysis.
|
|
117
|
+
Default: True for backwards compatibility.
|
|
118
|
+
use_parallel: Whether to use parallel processing.
|
|
119
|
+
Set to False to use sequential processing.
|
|
120
|
+
Default: True.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Tuple of (list of FileRecords, ProjectSummary)
|
|
124
|
+
|
|
125
|
+
Note:
|
|
126
|
+
Dependency analysis is always sequential (after file analysis).
|
|
127
|
+
Parallel processing only applies to file analysis phase.
|
|
128
|
+
"""
|
|
129
|
+
records: list[FileRecord] = []
|
|
130
|
+
|
|
131
|
+
# First pass: discover all files (sequential - fast)
|
|
132
|
+
all_files = self._discover_files()
|
|
133
|
+
|
|
134
|
+
# Build test file mapping (sequential - fast)
|
|
135
|
+
self._build_test_mapping(all_files)
|
|
136
|
+
|
|
137
|
+
# Second pass: analyze each file (PARALLEL - slow)
|
|
138
|
+
if use_parallel and self.workers > 1:
|
|
139
|
+
records = self._analyze_files_parallel(all_files)
|
|
140
|
+
else:
|
|
141
|
+
# Fall back to sequential for debugging or single worker
|
|
142
|
+
for file_path in all_files:
|
|
143
|
+
record = self._analyze_file(file_path)
|
|
144
|
+
if record:
|
|
145
|
+
records.append(record)
|
|
146
|
+
|
|
147
|
+
# Third pass: build dependency graph (sequential - already optimized)
|
|
148
|
+
if analyze_dependencies:
|
|
149
|
+
self._analyze_dependencies(records)
|
|
150
|
+
|
|
151
|
+
# Calculate impact scores (sequential - fast)
|
|
152
|
+
self._calculate_impact_scores(records)
|
|
153
|
+
|
|
154
|
+
# Determine attention needs (sequential - fast)
|
|
155
|
+
self._determine_attention_needs(records)
|
|
156
|
+
|
|
157
|
+
# Build summary (sequential - fast)
|
|
158
|
+
summary = self._build_summary(records)
|
|
159
|
+
|
|
160
|
+
return records, summary
|
|
161
|
+
|
|
162
|
+
def _analyze_files_parallel(self, all_files: list[Path]) -> list[FileRecord]:
|
|
163
|
+
"""Analyze files in parallel using multiprocessing.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
all_files: List of file paths to analyze
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
List of FileRecords (order not guaranteed)
|
|
170
|
+
|
|
171
|
+
Note:
|
|
172
|
+
Uses multiprocessing.Pool with chunksize optimization.
|
|
173
|
+
Chunksize is calculated to balance overhead vs parallelism.
|
|
174
|
+
"""
|
|
175
|
+
# Serialize configuration for workers
|
|
176
|
+
config_dict = {
|
|
177
|
+
"exclude_patterns": list(self.config.exclude_patterns),
|
|
178
|
+
"no_test_patterns": list(self.config.no_test_patterns),
|
|
179
|
+
"staleness_threshold_days": self.config.staleness_threshold_days,
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# Create partial function with fixed arguments
|
|
183
|
+
analyze_func = partial(
|
|
184
|
+
_analyze_file_worker,
|
|
185
|
+
project_root_str=str(self.project_root),
|
|
186
|
+
config_dict=config_dict,
|
|
187
|
+
test_file_map=self._test_file_map,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Calculate optimal chunksize
|
|
191
|
+
# Too small: overhead from process communication
|
|
192
|
+
# Too large: poor load balancing
|
|
193
|
+
total_files = len(all_files)
|
|
194
|
+
chunksize = max(1, total_files // (self.workers * 4))
|
|
195
|
+
|
|
196
|
+
# Process files in parallel
|
|
197
|
+
records: list[FileRecord] = []
|
|
198
|
+
|
|
199
|
+
with mp.Pool(processes=self.workers) as pool:
|
|
200
|
+
# Map file paths to string for pickling
|
|
201
|
+
file_path_strs = [str(f) for f in all_files]
|
|
202
|
+
|
|
203
|
+
# Process files in chunks
|
|
204
|
+
results = pool.map(analyze_func, file_path_strs, chunksize=chunksize)
|
|
205
|
+
|
|
206
|
+
# Filter out None results
|
|
207
|
+
records = [r for r in results if r is not None]
|
|
208
|
+
|
|
209
|
+
return records
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def compare_sequential_vs_parallel(project_root: str = ".", workers: int = 4) -> dict[str, Any]:
|
|
213
|
+
"""Benchmark sequential vs parallel scanner performance.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
project_root: Root directory to scan
|
|
217
|
+
workers: Number of worker processes for parallel version
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Dictionary with benchmark results:
|
|
221
|
+
- sequential_time: Time taken by sequential scan
|
|
222
|
+
- parallel_time: Time taken by parallel scan
|
|
223
|
+
- speedup: Ratio of sequential to parallel time
|
|
224
|
+
- files_scanned: Number of files scanned
|
|
225
|
+
- workers: Number of workers used
|
|
226
|
+
|
|
227
|
+
Example:
|
|
228
|
+
>>> results = compare_sequential_vs_parallel(workers=4)
|
|
229
|
+
>>> print(f"Speedup: {results['speedup']:.2f}x")
|
|
230
|
+
Speedup: 3.74x
|
|
231
|
+
"""
|
|
232
|
+
import time
|
|
233
|
+
|
|
234
|
+
# Sequential scan
|
|
235
|
+
print("Running sequential scan...")
|
|
236
|
+
start = time.perf_counter()
|
|
237
|
+
scanner_seq = ProjectScanner(project_root=project_root)
|
|
238
|
+
records_seq, summary_seq = scanner_seq.scan()
|
|
239
|
+
sequential_time = time.perf_counter() - start
|
|
240
|
+
print(f" Sequential: {sequential_time:.4f}s")
|
|
241
|
+
|
|
242
|
+
# Parallel scan
|
|
243
|
+
print(f"Running parallel scan ({workers} workers)...")
|
|
244
|
+
start = time.perf_counter()
|
|
245
|
+
scanner_par = ParallelProjectScanner(project_root=project_root, workers=workers)
|
|
246
|
+
records_par, summary_par = scanner_par.scan()
|
|
247
|
+
parallel_time = time.perf_counter() - start
|
|
248
|
+
print(f" Parallel: {parallel_time:.4f}s")
|
|
249
|
+
|
|
250
|
+
speedup = sequential_time / parallel_time if parallel_time > 0 else 0
|
|
251
|
+
|
|
252
|
+
return {
|
|
253
|
+
"sequential_time": sequential_time,
|
|
254
|
+
"parallel_time": parallel_time,
|
|
255
|
+
"speedup": speedup,
|
|
256
|
+
"improvement_pct": ((sequential_time - parallel_time) / sequential_time * 100)
|
|
257
|
+
if sequential_time > 0
|
|
258
|
+
else 0,
|
|
259
|
+
"files_scanned": summary_seq.total_files,
|
|
260
|
+
"workers": workers,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
if __name__ == "__main__":
|
|
265
|
+
|
|
266
|
+
# Example usage and benchmark
|
|
267
|
+
print("=" * 70)
|
|
268
|
+
print("PARALLEL PROJECT SCANNER - Benchmark")
|
|
269
|
+
print("=" * 70)
|
|
270
|
+
|
|
271
|
+
# Run benchmark
|
|
272
|
+
results = compare_sequential_vs_parallel(workers=4)
|
|
273
|
+
|
|
274
|
+
print("\n" + "=" * 70)
|
|
275
|
+
print("BENCHMARK RESULTS")
|
|
276
|
+
print("=" * 70)
|
|
277
|
+
print(f"Files scanned: {results['files_scanned']:,}")
|
|
278
|
+
print(f"Workers: {results['workers']}")
|
|
279
|
+
print(f"\nSequential time: {results['sequential_time']:.4f}s")
|
|
280
|
+
print(f"Parallel time: {results['parallel_time']:.4f}s")
|
|
281
|
+
print(f"\nSpeedup: {results['speedup']:.2f}x")
|
|
282
|
+
print(f"Improvement: {results['improvement_pct']:.1f}%")
|
|
283
|
+
|
|
284
|
+
if results['speedup'] >= 2.0:
|
|
285
|
+
print("\n✅ Parallel processing is highly effective!")
|
|
286
|
+
elif results['speedup'] >= 1.5:
|
|
287
|
+
print("\n✅ Parallel processing provides moderate benefit")
|
|
288
|
+
else:
|
|
289
|
+
print("\n⚠️ Parallel processing may not be worth the overhead")
|
|
290
|
+
|
|
291
|
+
print("=" * 70)
|