crackerjack 0.32.0__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/core/enhanced_container.py +67 -0
- crackerjack/core/phase_coordinator.py +183 -44
- crackerjack/core/workflow_orchestrator.py +459 -138
- crackerjack/managers/publish_manager.py +22 -5
- crackerjack/managers/test_command_builder.py +4 -2
- crackerjack/managers/test_manager.py +15 -4
- crackerjack/mcp/server_core.py +162 -34
- crackerjack/mcp/tools/core_tools.py +1 -1
- crackerjack/mcp/tools/execution_tools.py +8 -3
- crackerjack/mixins/__init__.py +5 -0
- crackerjack/mixins/error_handling.py +214 -0
- crackerjack/models/config.py +9 -0
- crackerjack/models/protocols.py +69 -0
- crackerjack/models/task.py +3 -0
- crackerjack/security/__init__.py +1 -1
- crackerjack/security/audit.py +92 -78
- crackerjack/services/config.py +3 -2
- crackerjack/services/config_merge.py +11 -5
- crackerjack/services/coverage_ratchet.py +22 -0
- crackerjack/services/git.py +37 -24
- crackerjack/services/initialization.py +25 -9
- crackerjack/services/memory_optimizer.py +477 -0
- crackerjack/services/parallel_executor.py +474 -0
- crackerjack/services/performance_benchmarks.py +292 -577
- crackerjack/services/performance_cache.py +443 -0
- crackerjack/services/performance_monitor.py +633 -0
- crackerjack/services/security.py +63 -0
- crackerjack/services/security_logger.py +9 -1
- crackerjack/services/terminal_utils.py +0 -0
- {crackerjack-0.32.0.dist-info → crackerjack-0.33.0.dist-info}/METADATA +2 -2
- {crackerjack-0.32.0.dist-info → crackerjack-0.33.0.dist-info}/RECORD +34 -27
- {crackerjack-0.32.0.dist-info → crackerjack-0.33.0.dist-info}/WHEEL +0 -0
- {crackerjack-0.32.0.dist-info → crackerjack-0.33.0.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.32.0.dist-info → crackerjack-0.33.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,633 @@
|
|
|
1
|
+
"""Performance monitoring and profiling service for crackerjack workflows.
|
|
2
|
+
|
|
3
|
+
This module provides comprehensive performance tracking, benchmarking,
|
|
4
|
+
and analysis capabilities for the workflow execution system.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import typing as t
|
|
9
|
+
from collections import defaultdict, deque
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from threading import Lock
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from crackerjack.services.logging import get_logger
|
|
17
|
+
from crackerjack.services.memory_optimizer import MemoryOptimizer
|
|
18
|
+
from crackerjack.services.performance_cache import get_performance_cache
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class PerformanceMetric:
|
|
23
|
+
"""Individual performance metric."""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
value: float
|
|
27
|
+
unit: str
|
|
28
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
29
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class PhasePerformance:
|
|
34
|
+
"""Performance data for a workflow phase."""
|
|
35
|
+
|
|
36
|
+
phase_name: str
|
|
37
|
+
start_time: datetime
|
|
38
|
+
end_time: datetime | None = None
|
|
39
|
+
duration_seconds: float = 0.0
|
|
40
|
+
memory_start_mb: float = 0.0
|
|
41
|
+
memory_peak_mb: float = 0.0
|
|
42
|
+
memory_end_mb: float = 0.0
|
|
43
|
+
cache_hits: int = 0
|
|
44
|
+
cache_misses: int = 0
|
|
45
|
+
parallel_operations: int = 0
|
|
46
|
+
sequential_operations: int = 0
|
|
47
|
+
success: bool = True
|
|
48
|
+
metrics: list[PerformanceMetric] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
def finalize(self, end_time: datetime | None = None) -> None:
|
|
51
|
+
"""Finalize phase timing."""
|
|
52
|
+
self.end_time = end_time or datetime.now()
|
|
53
|
+
self.duration_seconds = (self.end_time - self.start_time).total_seconds()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class WorkflowPerformance:
|
|
58
|
+
"""Complete workflow performance data."""
|
|
59
|
+
|
|
60
|
+
workflow_id: str
|
|
61
|
+
start_time: datetime
|
|
62
|
+
end_time: datetime | None = None
|
|
63
|
+
total_duration_seconds: float = 0.0
|
|
64
|
+
phases: list[PhasePerformance] = field(default_factory=list)
|
|
65
|
+
overall_success: bool = True
|
|
66
|
+
performance_score: float = 0.0
|
|
67
|
+
|
|
68
|
+
def add_phase(self, phase: PhasePerformance) -> None:
|
|
69
|
+
"""Add phase performance data."""
|
|
70
|
+
self.phases.append(phase)
|
|
71
|
+
|
|
72
|
+
def finalize(self, success: bool = True) -> None:
|
|
73
|
+
"""Finalize workflow timing and calculate performance score."""
|
|
74
|
+
self.end_time = datetime.now()
|
|
75
|
+
self.total_duration_seconds = (self.end_time - self.start_time).total_seconds()
|
|
76
|
+
self.overall_success = success
|
|
77
|
+
self.performance_score = self._calculate_performance_score()
|
|
78
|
+
|
|
79
|
+
def _calculate_performance_score(self) -> float:
|
|
80
|
+
"""Calculate overall performance score (0-100)."""
|
|
81
|
+
if not self.phases:
|
|
82
|
+
return 0.0
|
|
83
|
+
|
|
84
|
+
# Base score from duration (faster = higher score)
|
|
85
|
+
duration_score = max(
|
|
86
|
+
0, 100 - (self.total_duration_seconds / 10)
|
|
87
|
+
) # Penalize >10s
|
|
88
|
+
|
|
89
|
+
# Cache efficiency score
|
|
90
|
+
total_hits = sum(p.cache_hits for p in self.phases)
|
|
91
|
+
total_misses = sum(p.cache_misses for p in self.phases)
|
|
92
|
+
cache_ratio = (
|
|
93
|
+
total_hits / (total_hits + total_misses)
|
|
94
|
+
if total_hits + total_misses > 0
|
|
95
|
+
else 0
|
|
96
|
+
)
|
|
97
|
+
cache_score = cache_ratio * 20 # Max 20 points for cache efficiency
|
|
98
|
+
|
|
99
|
+
# Parallelization score
|
|
100
|
+
total_parallel = sum(p.parallel_operations for p in self.phases)
|
|
101
|
+
total_sequential = sum(p.sequential_operations for p in self.phases)
|
|
102
|
+
parallel_ratio = (
|
|
103
|
+
total_parallel / (total_parallel + total_sequential)
|
|
104
|
+
if total_parallel + total_sequential > 0
|
|
105
|
+
else 0
|
|
106
|
+
)
|
|
107
|
+
parallel_score = parallel_ratio * 15 # Max 15 points for parallelization
|
|
108
|
+
|
|
109
|
+
# Memory efficiency score (lower memory usage = higher score)
|
|
110
|
+
max_memory = max((p.memory_peak_mb for p in self.phases), default=0)
|
|
111
|
+
memory_score = max(0, 15 - (max_memory / 50)) # Penalize >50MB usage
|
|
112
|
+
|
|
113
|
+
# Success bonus
|
|
114
|
+
success_score = 10 if self.overall_success else 0
|
|
115
|
+
|
|
116
|
+
return min(
|
|
117
|
+
100,
|
|
118
|
+
duration_score
|
|
119
|
+
+ cache_score
|
|
120
|
+
+ parallel_score
|
|
121
|
+
+ memory_score
|
|
122
|
+
+ success_score,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class PerformanceBenchmark:
|
|
128
|
+
"""Performance benchmark data."""
|
|
129
|
+
|
|
130
|
+
operation_name: str
|
|
131
|
+
baseline_duration_seconds: float
|
|
132
|
+
current_duration_seconds: float
|
|
133
|
+
improvement_percentage: float = 0.0
|
|
134
|
+
regression: bool = False
|
|
135
|
+
|
|
136
|
+
def __post_init__(self) -> None:
|
|
137
|
+
"""Calculate improvement metrics."""
|
|
138
|
+
if self.baseline_duration_seconds > 0:
|
|
139
|
+
self.improvement_percentage = (
|
|
140
|
+
(self.baseline_duration_seconds - self.current_duration_seconds)
|
|
141
|
+
/ self.baseline_duration_seconds
|
|
142
|
+
* 100
|
|
143
|
+
)
|
|
144
|
+
self.regression = self.improvement_percentage < 0
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class PerformanceMonitor:
|
|
148
|
+
"""Central performance monitoring service."""
|
|
149
|
+
|
|
150
|
+
def __init__(
|
|
151
|
+
self,
|
|
152
|
+
data_retention_days: int = 30,
|
|
153
|
+
benchmark_history_size: int = 100,
|
|
154
|
+
):
|
|
155
|
+
self.data_retention_days = data_retention_days
|
|
156
|
+
self.benchmark_history_size = benchmark_history_size
|
|
157
|
+
self._initialize_data_structures(benchmark_history_size)
|
|
158
|
+
self._initialize_services()
|
|
159
|
+
self._initialize_thresholds()
|
|
160
|
+
|
|
161
|
+
def _initialize_data_structures(self, history_size: int) -> None:
|
|
162
|
+
"""Initialize performance data structures."""
|
|
163
|
+
self._active_workflows: dict[str, WorkflowPerformance] = {}
|
|
164
|
+
self._active_phases: dict[str, PhasePerformance] = {}
|
|
165
|
+
self._completed_workflows: deque[WorkflowPerformance] = deque(
|
|
166
|
+
maxlen=history_size
|
|
167
|
+
)
|
|
168
|
+
self._benchmarks: dict[str, deque[float]] = defaultdict(
|
|
169
|
+
lambda: deque(maxlen=history_size)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def _initialize_services(self) -> None:
|
|
173
|
+
"""Initialize external services and utilities."""
|
|
174
|
+
self._lock = Lock()
|
|
175
|
+
self._logger = get_logger("crackerjack.performance_monitor")
|
|
176
|
+
self._memory_optimizer = MemoryOptimizer.get_instance()
|
|
177
|
+
self._cache = get_performance_cache()
|
|
178
|
+
|
|
179
|
+
def _initialize_thresholds(self) -> None:
|
|
180
|
+
"""Initialize performance warning thresholds."""
|
|
181
|
+
self._warning_thresholds = {
|
|
182
|
+
"duration_seconds": 30.0,
|
|
183
|
+
"memory_mb": 100.0,
|
|
184
|
+
"cache_hit_ratio": 0.5,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
def start_workflow(self, workflow_id: str) -> None:
|
|
188
|
+
"""Start monitoring a workflow."""
|
|
189
|
+
with self._lock:
|
|
190
|
+
if workflow_id in self._active_workflows:
|
|
191
|
+
self._logger.warning(f"Workflow {workflow_id} already being monitored")
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
workflow = WorkflowPerformance(
|
|
195
|
+
workflow_id=workflow_id,
|
|
196
|
+
start_time=datetime.now(),
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
self._active_workflows[workflow_id] = workflow
|
|
200
|
+
self._logger.debug(f"Started monitoring workflow: {workflow_id}")
|
|
201
|
+
|
|
202
|
+
# Start memory profiling
|
|
203
|
+
self._memory_optimizer.start_profiling()
|
|
204
|
+
|
|
205
|
+
def end_workflow(
|
|
206
|
+
self, workflow_id: str, success: bool = True
|
|
207
|
+
) -> WorkflowPerformance:
|
|
208
|
+
"""End workflow monitoring and return performance data."""
|
|
209
|
+
with self._lock:
|
|
210
|
+
if workflow_id not in self._active_workflows:
|
|
211
|
+
self._logger.warning(f"Workflow {workflow_id} not found for ending")
|
|
212
|
+
return WorkflowPerformance(
|
|
213
|
+
workflow_id=workflow_id, start_time=datetime.now()
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
workflow = self._active_workflows.pop(workflow_id)
|
|
217
|
+
workflow.finalize(success)
|
|
218
|
+
|
|
219
|
+
# Add to completed workflows for analysis
|
|
220
|
+
self._completed_workflows.append(workflow)
|
|
221
|
+
|
|
222
|
+
self._logger.info(
|
|
223
|
+
f"Completed workflow {workflow_id}: "
|
|
224
|
+
f"{workflow.total_duration_seconds:.2f}s, "
|
|
225
|
+
f"score: {workflow.performance_score:.1f}, "
|
|
226
|
+
f"phases: {len(workflow.phases)}"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Check for performance warnings
|
|
230
|
+
self._check_performance_warnings(workflow)
|
|
231
|
+
|
|
232
|
+
return workflow
|
|
233
|
+
|
|
234
|
+
def start_phase(self, workflow_id: str, phase_name: str) -> None:
|
|
235
|
+
"""Start monitoring a workflow phase."""
|
|
236
|
+
phase_key = f"{workflow_id}:{phase_name}"
|
|
237
|
+
|
|
238
|
+
with self._lock:
|
|
239
|
+
if phase_key in self._active_phases:
|
|
240
|
+
self._logger.warning(f"Phase {phase_key} already being monitored")
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
# Record memory checkpoint
|
|
244
|
+
memory_mb = self._memory_optimizer.record_checkpoint(f"{phase_name}_start")
|
|
245
|
+
|
|
246
|
+
phase = PhasePerformance(
|
|
247
|
+
phase_name=phase_name,
|
|
248
|
+
start_time=datetime.now(),
|
|
249
|
+
memory_start_mb=memory_mb,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
self._active_phases[phase_key] = phase
|
|
253
|
+
self._logger.debug(f"Started monitoring phase: {phase_key}")
|
|
254
|
+
|
|
255
|
+
def end_phase(
|
|
256
|
+
self, workflow_id: str, phase_name: str, success: bool = True
|
|
257
|
+
) -> PhasePerformance:
|
|
258
|
+
"""End phase monitoring and attach to workflow."""
|
|
259
|
+
phase_key = f"{workflow_id}:{phase_name}"
|
|
260
|
+
|
|
261
|
+
with self._lock:
|
|
262
|
+
if phase_key not in self._active_phases:
|
|
263
|
+
self._logger.warning(f"Phase {phase_key} not found for ending")
|
|
264
|
+
return PhasePerformance(
|
|
265
|
+
phase_name=phase_name, start_time=datetime.now()
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
phase = self._active_phases.pop(phase_key)
|
|
269
|
+
phase.success = success
|
|
270
|
+
|
|
271
|
+
# Record final memory usage
|
|
272
|
+
phase.memory_end_mb = self._memory_optimizer.record_checkpoint(
|
|
273
|
+
f"{phase_name}_end"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Get cache statistics
|
|
277
|
+
cache_stats = self._cache.get_stats()
|
|
278
|
+
phase.cache_hits = cache_stats.hits
|
|
279
|
+
phase.cache_misses = cache_stats.misses
|
|
280
|
+
|
|
281
|
+
phase.finalize()
|
|
282
|
+
|
|
283
|
+
# Add to workflow if it exists
|
|
284
|
+
if workflow_id in self._active_workflows:
|
|
285
|
+
self._active_workflows[workflow_id].add_phase(phase)
|
|
286
|
+
|
|
287
|
+
self._logger.debug(
|
|
288
|
+
f"Completed phase {phase_key}: {phase.duration_seconds:.2f}s"
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return phase
|
|
292
|
+
|
|
293
|
+
def record_metric(
|
|
294
|
+
self,
|
|
295
|
+
workflow_id: str,
|
|
296
|
+
phase_name: str,
|
|
297
|
+
metric_name: str,
|
|
298
|
+
value: float,
|
|
299
|
+
unit: str = "",
|
|
300
|
+
metadata: dict[str, t.Any] | None = None,
|
|
301
|
+
) -> None:
|
|
302
|
+
"""Record a performance metric."""
|
|
303
|
+
metric = PerformanceMetric(
|
|
304
|
+
name=metric_name,
|
|
305
|
+
value=value,
|
|
306
|
+
unit=unit,
|
|
307
|
+
metadata=metadata or {},
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
phase_key = f"{workflow_id}:{phase_name}"
|
|
311
|
+
|
|
312
|
+
with self._lock:
|
|
313
|
+
if phase_key in self._active_phases:
|
|
314
|
+
self._active_phases[phase_key].metrics.append(metric)
|
|
315
|
+
else:
|
|
316
|
+
self._logger.warning(
|
|
317
|
+
f"Phase {phase_key} not found for metric {metric_name}"
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
def record_parallel_operation(self, workflow_id: str, phase_name: str) -> None:
|
|
321
|
+
"""Record a parallel operation."""
|
|
322
|
+
phase_key = f"{workflow_id}:{phase_name}"
|
|
323
|
+
|
|
324
|
+
with self._lock:
|
|
325
|
+
if phase_key in self._active_phases:
|
|
326
|
+
self._active_phases[phase_key].parallel_operations += 1
|
|
327
|
+
|
|
328
|
+
def record_sequential_operation(self, workflow_id: str, phase_name: str) -> None:
|
|
329
|
+
"""Record a sequential operation."""
|
|
330
|
+
phase_key = f"{workflow_id}:{phase_name}"
|
|
331
|
+
|
|
332
|
+
with self._lock:
|
|
333
|
+
if phase_key in self._active_phases:
|
|
334
|
+
self._active_phases[phase_key].sequential_operations += 1
|
|
335
|
+
|
|
336
|
+
def benchmark_operation(
|
|
337
|
+
self, operation_name: str, duration_seconds: float
|
|
338
|
+
) -> PerformanceBenchmark:
|
|
339
|
+
"""Benchmark an operation against historical data."""
|
|
340
|
+
with self._lock:
|
|
341
|
+
history = self._benchmarks[operation_name]
|
|
342
|
+
history.append(duration_seconds)
|
|
343
|
+
|
|
344
|
+
if len(history) > 1:
|
|
345
|
+
# Use median as baseline to avoid outlier skew
|
|
346
|
+
sorted_history = sorted(history)
|
|
347
|
+
baseline = sorted_history[len(sorted_history) // 2]
|
|
348
|
+
|
|
349
|
+
return PerformanceBenchmark(
|
|
350
|
+
operation_name=operation_name,
|
|
351
|
+
baseline_duration_seconds=baseline,
|
|
352
|
+
current_duration_seconds=duration_seconds,
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
# First measurement, no baseline
|
|
356
|
+
return PerformanceBenchmark(
|
|
357
|
+
operation_name=operation_name,
|
|
358
|
+
baseline_duration_seconds=duration_seconds,
|
|
359
|
+
current_duration_seconds=duration_seconds,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
def get_performance_summary(self, last_n_workflows: int = 10) -> dict[str, Any]:
|
|
363
|
+
"""Get performance summary for recent workflows."""
|
|
364
|
+
with self._lock:
|
|
365
|
+
recent_workflows = list(self._completed_workflows)[-last_n_workflows:]
|
|
366
|
+
|
|
367
|
+
if not recent_workflows:
|
|
368
|
+
return {"message": "No completed workflows to analyze"}
|
|
369
|
+
|
|
370
|
+
# Calculate aggregate statistics using helper methods
|
|
371
|
+
basic_stats = self._calculate_basic_workflow_stats(recent_workflows)
|
|
372
|
+
cache_stats = self._calculate_cache_statistics(recent_workflows)
|
|
373
|
+
parallel_stats = self._calculate_parallelization_statistics(
|
|
374
|
+
recent_workflows
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return (
|
|
378
|
+
{
|
|
379
|
+
"workflows_analyzed": len(recent_workflows),
|
|
380
|
+
}
|
|
381
|
+
| basic_stats
|
|
382
|
+
| cache_stats
|
|
383
|
+
| parallel_stats
|
|
384
|
+
| {}
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
def _calculate_basic_workflow_stats(
|
|
388
|
+
self, workflows: list[WorkflowPerformance]
|
|
389
|
+
) -> dict[str, Any]:
|
|
390
|
+
"""Calculate basic workflow statistics (duration, score, success rate)."""
|
|
391
|
+
total_duration = sum(w.total_duration_seconds for w in workflows)
|
|
392
|
+
avg_duration = total_duration / len(workflows)
|
|
393
|
+
avg_score = sum(w.performance_score for w in workflows) / len(workflows)
|
|
394
|
+
success_rate = sum(1 for w in workflows if w.overall_success) / len(workflows)
|
|
395
|
+
|
|
396
|
+
return {
|
|
397
|
+
"avg_duration_seconds": round(avg_duration, 2),
|
|
398
|
+
"avg_performance_score": round(avg_score, 1),
|
|
399
|
+
"success_rate": round(success_rate, 2),
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
def _calculate_cache_statistics(
|
|
403
|
+
self, workflows: list[WorkflowPerformance]
|
|
404
|
+
) -> dict[str, Any]:
|
|
405
|
+
"""Calculate cache hit/miss statistics across workflows."""
|
|
406
|
+
total_cache_hits = sum(sum(p.cache_hits for p in w.phases) for w in workflows)
|
|
407
|
+
total_cache_misses = sum(
|
|
408
|
+
sum(p.cache_misses for p in w.phases) for w in workflows
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
cache_hit_ratio = (
|
|
412
|
+
total_cache_hits / (total_cache_hits + total_cache_misses)
|
|
413
|
+
if total_cache_hits + total_cache_misses > 0
|
|
414
|
+
else 0
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return {
|
|
418
|
+
"cache_hit_ratio": round(cache_hit_ratio, 2),
|
|
419
|
+
"total_cache_hits": total_cache_hits,
|
|
420
|
+
"total_cache_misses": total_cache_misses,
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
def _calculate_parallelization_statistics(
|
|
424
|
+
self, workflows: list[WorkflowPerformance]
|
|
425
|
+
) -> dict[str, Any]:
|
|
426
|
+
"""Calculate parallelization statistics across workflows."""
|
|
427
|
+
total_parallel = sum(
|
|
428
|
+
sum(p.parallel_operations for p in w.phases) for w in workflows
|
|
429
|
+
)
|
|
430
|
+
total_sequential = sum(
|
|
431
|
+
sum(p.sequential_operations for p in w.phases) for w in workflows
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
parallel_ratio = (
|
|
435
|
+
total_parallel / (total_parallel + total_sequential)
|
|
436
|
+
if total_parallel + total_sequential > 0
|
|
437
|
+
else 0
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
return {
|
|
441
|
+
"parallel_operation_ratio": round(parallel_ratio, 2),
|
|
442
|
+
"total_parallel_operations": total_parallel,
|
|
443
|
+
"total_sequential_operations": total_sequential,
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
def get_benchmark_trends(self) -> dict[str, dict[str, Any]]:
|
|
447
|
+
"""Get benchmark trends for all operations."""
|
|
448
|
+
trends = {}
|
|
449
|
+
|
|
450
|
+
with self._lock:
|
|
451
|
+
for operation_name, history in self._benchmarks.items():
|
|
452
|
+
if len(history) < 2:
|
|
453
|
+
continue
|
|
454
|
+
|
|
455
|
+
history_list = list(history)
|
|
456
|
+
basic_stats = self._calculate_benchmark_basic_stats(history_list)
|
|
457
|
+
trend_percentage = self._calculate_trend_percentage(history_list)
|
|
458
|
+
|
|
459
|
+
trends[operation_name] = basic_stats | {
|
|
460
|
+
"trend_percentage": round(trend_percentage, 1),
|
|
461
|
+
"sample_count": len(history_list),
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
return trends
|
|
465
|
+
|
|
466
|
+
def _calculate_benchmark_basic_stats(
|
|
467
|
+
self, history_list: list[float]
|
|
468
|
+
) -> dict[str, float]:
|
|
469
|
+
"""Calculate basic statistics for benchmark history."""
|
|
470
|
+
avg_duration = sum(history_list) / len(history_list)
|
|
471
|
+
min_duration = min(history_list)
|
|
472
|
+
max_duration = max(history_list)
|
|
473
|
+
|
|
474
|
+
return {
|
|
475
|
+
"avg_duration_seconds": round(avg_duration, 3),
|
|
476
|
+
"min_duration_seconds": round(min_duration, 3),
|
|
477
|
+
"max_duration_seconds": round(max_duration, 3),
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
def _calculate_trend_percentage(self, history_list: list[float]) -> float:
|
|
481
|
+
"""Calculate trend percentage for benchmark improvement."""
|
|
482
|
+
if len(history_list) < 5:
|
|
483
|
+
return 0.0
|
|
484
|
+
|
|
485
|
+
recent_avg = sum(history_list[-5:]) / 5
|
|
486
|
+
older_avg = (
|
|
487
|
+
sum(history_list[:-5]) / len(history_list[:-5])
|
|
488
|
+
if len(history_list) > 5
|
|
489
|
+
else recent_avg
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
return ((older_avg - recent_avg) / older_avg * 100) if older_avg > 0 else 0.0
|
|
493
|
+
|
|
494
|
+
def export_performance_data(self, output_path: Path) -> None:
|
|
495
|
+
"""Export performance data to JSON file."""
|
|
496
|
+
with self._lock:
|
|
497
|
+
data = {
|
|
498
|
+
"export_timestamp": datetime.now().isoformat(),
|
|
499
|
+
"completed_workflows": [
|
|
500
|
+
{
|
|
501
|
+
"workflow_id": w.workflow_id,
|
|
502
|
+
"start_time": w.start_time.isoformat(),
|
|
503
|
+
"end_time": w.end_time.isoformat() if w.end_time else None,
|
|
504
|
+
"duration_seconds": w.total_duration_seconds,
|
|
505
|
+
"performance_score": w.performance_score,
|
|
506
|
+
"success": w.overall_success,
|
|
507
|
+
"phases": [
|
|
508
|
+
{
|
|
509
|
+
"name": p.phase_name,
|
|
510
|
+
"duration_seconds": p.duration_seconds,
|
|
511
|
+
"memory_peak_mb": p.memory_peak_mb,
|
|
512
|
+
"cache_hits": p.cache_hits,
|
|
513
|
+
"cache_misses": p.cache_misses,
|
|
514
|
+
"parallel_operations": p.parallel_operations,
|
|
515
|
+
"sequential_operations": p.sequential_operations,
|
|
516
|
+
"success": p.success,
|
|
517
|
+
}
|
|
518
|
+
for p in w.phases
|
|
519
|
+
],
|
|
520
|
+
}
|
|
521
|
+
for w in self._completed_workflows
|
|
522
|
+
],
|
|
523
|
+
"benchmarks": {
|
|
524
|
+
name: list(history) for name, history in self._benchmarks.items()
|
|
525
|
+
},
|
|
526
|
+
"summary": self.get_performance_summary(),
|
|
527
|
+
"trends": self.get_benchmark_trends(),
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
with output_path.open("w") as f:
|
|
531
|
+
json.dump(data, f, indent=2)
|
|
532
|
+
|
|
533
|
+
self._logger.info(f"Exported performance data to {output_path}")
|
|
534
|
+
|
|
535
|
+
def _check_performance_warnings(self, workflow: WorkflowPerformance) -> None:
|
|
536
|
+
"""Check for performance warnings and log them only in debug mode."""
|
|
537
|
+
warnings = []
|
|
538
|
+
|
|
539
|
+
# Collect warnings from different checks
|
|
540
|
+
warnings.extend(self._check_duration_warning(workflow))
|
|
541
|
+
warnings.extend(self._check_memory_warning(workflow))
|
|
542
|
+
warnings.extend(self._check_cache_warning(workflow))
|
|
543
|
+
|
|
544
|
+
# Log all warnings at debug level to avoid console spam
|
|
545
|
+
for warning in warnings:
|
|
546
|
+
self._logger.debug(
|
|
547
|
+
f"Performance warning for {workflow.workflow_id}: {warning}"
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
def _check_duration_warning(self, workflow: WorkflowPerformance) -> list[str]:
|
|
551
|
+
"""Check for duration-based warnings."""
|
|
552
|
+
if (
|
|
553
|
+
workflow.total_duration_seconds
|
|
554
|
+
> self._warning_thresholds["duration_seconds"]
|
|
555
|
+
):
|
|
556
|
+
return [
|
|
557
|
+
f"Slow workflow duration: {workflow.total_duration_seconds:.1f}s "
|
|
558
|
+
f"(threshold: {self._warning_thresholds['duration_seconds']}s)"
|
|
559
|
+
]
|
|
560
|
+
return []
|
|
561
|
+
|
|
562
|
+
def _check_memory_warning(self, workflow: WorkflowPerformance) -> list[str]:
|
|
563
|
+
"""Check for memory usage warnings."""
|
|
564
|
+
max_memory = max((p.memory_peak_mb for p in workflow.phases), default=0)
|
|
565
|
+
if max_memory > self._warning_thresholds["memory_mb"]:
|
|
566
|
+
return [
|
|
567
|
+
f"High memory usage: {max_memory:.1f}MB "
|
|
568
|
+
f"(threshold: {self._warning_thresholds['memory_mb']}MB)"
|
|
569
|
+
]
|
|
570
|
+
return []
|
|
571
|
+
|
|
572
|
+
def _check_cache_warning(self, workflow: WorkflowPerformance) -> list[str]:
|
|
573
|
+
"""Check for cache efficiency warnings."""
|
|
574
|
+
total_hits = sum(p.cache_hits for p in workflow.phases)
|
|
575
|
+
total_misses = sum(p.cache_misses for p in workflow.phases)
|
|
576
|
+
|
|
577
|
+
if total_hits + total_misses > 0:
|
|
578
|
+
hit_ratio = total_hits / (total_hits + total_misses)
|
|
579
|
+
if hit_ratio < self._warning_thresholds["cache_hit_ratio"]:
|
|
580
|
+
return [
|
|
581
|
+
f"Low cache hit ratio: {hit_ratio:.2f} "
|
|
582
|
+
f"(threshold: {self._warning_thresholds['cache_hit_ratio']})"
|
|
583
|
+
]
|
|
584
|
+
return []
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
# Global monitor instance
|
|
588
|
+
_global_monitor: PerformanceMonitor | None = None
|
|
589
|
+
_monitor_lock = Lock()
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def get_performance_monitor() -> PerformanceMonitor:
|
|
593
|
+
"""Get global performance monitor instance."""
|
|
594
|
+
global _global_monitor
|
|
595
|
+
with _monitor_lock:
|
|
596
|
+
if _global_monitor is None:
|
|
597
|
+
_global_monitor = PerformanceMonitor()
|
|
598
|
+
return _global_monitor
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
# Context manager for easy phase monitoring
|
|
602
|
+
class phase_monitor:
|
|
603
|
+
"""Context manager for phase performance monitoring."""
|
|
604
|
+
|
|
605
|
+
def __init__(self, workflow_id: str, phase_name: str):
|
|
606
|
+
self.workflow_id = workflow_id
|
|
607
|
+
self.phase_name = phase_name
|
|
608
|
+
self.monitor = get_performance_monitor()
|
|
609
|
+
|
|
610
|
+
def __enter__(self):
|
|
611
|
+
self.monitor.start_phase(self.workflow_id, self.phase_name)
|
|
612
|
+
return self
|
|
613
|
+
|
|
614
|
+
def __exit__(
|
|
615
|
+
self,
|
|
616
|
+
exc_type: type[BaseException] | None,
|
|
617
|
+
exc_val: BaseException | None,
|
|
618
|
+
exc_tb: object | None,
|
|
619
|
+
) -> None:
|
|
620
|
+
success = exc_type is None
|
|
621
|
+
self.monitor.end_phase(self.workflow_id, self.phase_name, success)
|
|
622
|
+
|
|
623
|
+
def record_parallel_op(self):
|
|
624
|
+
"""Record a parallel operation in this phase."""
|
|
625
|
+
self.monitor.record_parallel_operation(self.workflow_id, self.phase_name)
|
|
626
|
+
|
|
627
|
+
def record_sequential_op(self):
|
|
628
|
+
"""Record a sequential operation in this phase."""
|
|
629
|
+
self.monitor.record_sequential_operation(self.workflow_id, self.phase_name)
|
|
630
|
+
|
|
631
|
+
def record_metric(self, name: str, value: float, unit: str = ""):
|
|
632
|
+
"""Record a custom metric in this phase."""
|
|
633
|
+
self.monitor.record_metric(self.workflow_id, self.phase_name, name, value, unit)
|
crackerjack/services/security.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import tempfile
|
|
3
|
+
import typing as t
|
|
3
4
|
from contextlib import suppress
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
@@ -198,3 +199,65 @@ class SecurityService:
|
|
|
198
199
|
secure_env.pop(var, None)
|
|
199
200
|
|
|
200
201
|
return secure_env
|
|
202
|
+
|
|
203
|
+
def validate_file_safety(self, path: str | Path) -> bool:
|
|
204
|
+
"""Protocol method: Validate file safety."""
|
|
205
|
+
try:
|
|
206
|
+
file_path = Path(path)
|
|
207
|
+
# Check if path exists and is safe
|
|
208
|
+
if not file_path.exists():
|
|
209
|
+
return False
|
|
210
|
+
# Basic safety checks
|
|
211
|
+
if file_path.is_symlink():
|
|
212
|
+
return False
|
|
213
|
+
return True
|
|
214
|
+
except Exception:
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
def check_hardcoded_secrets(self, content: str) -> list[dict[str, t.Any]]:
|
|
218
|
+
"""Protocol method: Check for hardcoded secrets."""
|
|
219
|
+
secrets = []
|
|
220
|
+
# Basic patterns for common secrets
|
|
221
|
+
patterns = {
|
|
222
|
+
"api_key": r'api[_-]?key["\s]*[:=]["\s]*([a-zA-Z0-9_-]{20,})',
|
|
223
|
+
"password": r'password["\s]*[:=]["\s]*([^\s"]{8,})',
|
|
224
|
+
"token": r'token["\s]*[:=]["\s]*([a-zA-Z0-9_-]{20,})',
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
import re
|
|
228
|
+
|
|
229
|
+
for secret_type, pattern in patterns.items():
|
|
230
|
+
matches = re.finditer(pattern, content, re.IGNORECASE)
|
|
231
|
+
for match in matches:
|
|
232
|
+
secrets.append(
|
|
233
|
+
{
|
|
234
|
+
"type": secret_type,
|
|
235
|
+
"value": match.group(1)[:10] + "...", # Truncated for safety
|
|
236
|
+
"line": content[: match.start()].count("\n") + 1,
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
return secrets
|
|
240
|
+
|
|
241
|
+
def is_safe_subprocess_call(self, cmd: list[str]) -> bool:
|
|
242
|
+
"""Protocol method: Check if subprocess call is safe."""
|
|
243
|
+
if not cmd:
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
dangerous_commands = {
|
|
247
|
+
"rm",
|
|
248
|
+
"rmdir",
|
|
249
|
+
"del",
|
|
250
|
+
"format",
|
|
251
|
+
"fdisk",
|
|
252
|
+
"sudo",
|
|
253
|
+
"su",
|
|
254
|
+
"chmod",
|
|
255
|
+
"chown",
|
|
256
|
+
"curl",
|
|
257
|
+
"wget",
|
|
258
|
+
"nc",
|
|
259
|
+
"netcat",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
command = cmd[0].split("/")[-1] # Get base command name
|
|
263
|
+
return command not in dangerous_commands
|