crackerjack 0.31.18__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (43) hide show
  1. crackerjack/CLAUDE.md +71 -452
  2. crackerjack/__main__.py +1 -1
  3. crackerjack/agents/refactoring_agent.py +67 -46
  4. crackerjack/cli/handlers.py +7 -7
  5. crackerjack/config/hooks.py +36 -6
  6. crackerjack/core/async_workflow_orchestrator.py +2 -2
  7. crackerjack/core/enhanced_container.py +67 -0
  8. crackerjack/core/phase_coordinator.py +211 -44
  9. crackerjack/core/workflow_orchestrator.py +723 -72
  10. crackerjack/dynamic_config.py +1 -25
  11. crackerjack/managers/publish_manager.py +22 -5
  12. crackerjack/managers/test_command_builder.py +19 -13
  13. crackerjack/managers/test_manager.py +15 -4
  14. crackerjack/mcp/server_core.py +162 -34
  15. crackerjack/mcp/tools/core_tools.py +1 -1
  16. crackerjack/mcp/tools/execution_tools.py +16 -3
  17. crackerjack/mcp/tools/workflow_executor.py +130 -40
  18. crackerjack/mixins/__init__.py +5 -0
  19. crackerjack/mixins/error_handling.py +214 -0
  20. crackerjack/models/config.py +9 -0
  21. crackerjack/models/protocols.py +114 -0
  22. crackerjack/models/task.py +3 -0
  23. crackerjack/security/__init__.py +1 -0
  24. crackerjack/security/audit.py +226 -0
  25. crackerjack/services/config.py +3 -2
  26. crackerjack/services/config_merge.py +11 -5
  27. crackerjack/services/coverage_ratchet.py +22 -0
  28. crackerjack/services/git.py +121 -22
  29. crackerjack/services/initialization.py +25 -9
  30. crackerjack/services/memory_optimizer.py +477 -0
  31. crackerjack/services/parallel_executor.py +474 -0
  32. crackerjack/services/performance_benchmarks.py +292 -577
  33. crackerjack/services/performance_cache.py +443 -0
  34. crackerjack/services/performance_monitor.py +633 -0
  35. crackerjack/services/security.py +63 -0
  36. crackerjack/services/security_logger.py +9 -1
  37. crackerjack/services/terminal_utils.py +0 -0
  38. crackerjack/tools/validate_regex_patterns.py +14 -0
  39. {crackerjack-0.31.18.dist-info → crackerjack-0.33.0.dist-info}/METADATA +2 -2
  40. {crackerjack-0.31.18.dist-info → crackerjack-0.33.0.dist-info}/RECORD +43 -34
  41. {crackerjack-0.31.18.dist-info → crackerjack-0.33.0.dist-info}/WHEEL +0 -0
  42. {crackerjack-0.31.18.dist-info → crackerjack-0.33.0.dist-info}/entry_points.txt +0 -0
  43. {crackerjack-0.31.18.dist-info → crackerjack-0.33.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,633 @@
1
+ """Performance monitoring and profiling service for crackerjack workflows.
2
+
3
+ This module provides comprehensive performance tracking, benchmarking,
4
+ and analysis capabilities for the workflow execution system.
5
+ """
6
+
7
+ import json
8
+ import typing as t
9
+ from collections import defaultdict, deque
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from threading import Lock
14
+ from typing import Any
15
+
16
+ from crackerjack.services.logging import get_logger
17
+ from crackerjack.services.memory_optimizer import MemoryOptimizer
18
+ from crackerjack.services.performance_cache import get_performance_cache
19
+
20
+
21
+ @dataclass
22
+ class PerformanceMetric:
23
+ """Individual performance metric."""
24
+
25
+ name: str
26
+ value: float
27
+ unit: str
28
+ timestamp: datetime = field(default_factory=datetime.now)
29
+ metadata: dict[str, Any] = field(default_factory=dict)
30
+
31
+
32
+ @dataclass
33
+ class PhasePerformance:
34
+ """Performance data for a workflow phase."""
35
+
36
+ phase_name: str
37
+ start_time: datetime
38
+ end_time: datetime | None = None
39
+ duration_seconds: float = 0.0
40
+ memory_start_mb: float = 0.0
41
+ memory_peak_mb: float = 0.0
42
+ memory_end_mb: float = 0.0
43
+ cache_hits: int = 0
44
+ cache_misses: int = 0
45
+ parallel_operations: int = 0
46
+ sequential_operations: int = 0
47
+ success: bool = True
48
+ metrics: list[PerformanceMetric] = field(default_factory=list)
49
+
50
+ def finalize(self, end_time: datetime | None = None) -> None:
51
+ """Finalize phase timing."""
52
+ self.end_time = end_time or datetime.now()
53
+ self.duration_seconds = (self.end_time - self.start_time).total_seconds()
54
+
55
+
56
+ @dataclass
57
+ class WorkflowPerformance:
58
+ """Complete workflow performance data."""
59
+
60
+ workflow_id: str
61
+ start_time: datetime
62
+ end_time: datetime | None = None
63
+ total_duration_seconds: float = 0.0
64
+ phases: list[PhasePerformance] = field(default_factory=list)
65
+ overall_success: bool = True
66
+ performance_score: float = 0.0
67
+
68
+ def add_phase(self, phase: PhasePerformance) -> None:
69
+ """Add phase performance data."""
70
+ self.phases.append(phase)
71
+
72
+ def finalize(self, success: bool = True) -> None:
73
+ """Finalize workflow timing and calculate performance score."""
74
+ self.end_time = datetime.now()
75
+ self.total_duration_seconds = (self.end_time - self.start_time).total_seconds()
76
+ self.overall_success = success
77
+ self.performance_score = self._calculate_performance_score()
78
+
79
+ def _calculate_performance_score(self) -> float:
80
+ """Calculate overall performance score (0-100)."""
81
+ if not self.phases:
82
+ return 0.0
83
+
84
+ # Base score from duration (faster = higher score)
85
+ duration_score = max(
86
+ 0, 100 - (self.total_duration_seconds / 10)
87
+ ) # Penalize >10s
88
+
89
+ # Cache efficiency score
90
+ total_hits = sum(p.cache_hits for p in self.phases)
91
+ total_misses = sum(p.cache_misses for p in self.phases)
92
+ cache_ratio = (
93
+ total_hits / (total_hits + total_misses)
94
+ if total_hits + total_misses > 0
95
+ else 0
96
+ )
97
+ cache_score = cache_ratio * 20 # Max 20 points for cache efficiency
98
+
99
+ # Parallelization score
100
+ total_parallel = sum(p.parallel_operations for p in self.phases)
101
+ total_sequential = sum(p.sequential_operations for p in self.phases)
102
+ parallel_ratio = (
103
+ total_parallel / (total_parallel + total_sequential)
104
+ if total_parallel + total_sequential > 0
105
+ else 0
106
+ )
107
+ parallel_score = parallel_ratio * 15 # Max 15 points for parallelization
108
+
109
+ # Memory efficiency score (lower memory usage = higher score)
110
+ max_memory = max((p.memory_peak_mb for p in self.phases), default=0)
111
+ memory_score = max(0, 15 - (max_memory / 50)) # Penalize >50MB usage
112
+
113
+ # Success bonus
114
+ success_score = 10 if self.overall_success else 0
115
+
116
+ return min(
117
+ 100,
118
+ duration_score
119
+ + cache_score
120
+ + parallel_score
121
+ + memory_score
122
+ + success_score,
123
+ )
124
+
125
+
126
+ @dataclass
127
+ class PerformanceBenchmark:
128
+ """Performance benchmark data."""
129
+
130
+ operation_name: str
131
+ baseline_duration_seconds: float
132
+ current_duration_seconds: float
133
+ improvement_percentage: float = 0.0
134
+ regression: bool = False
135
+
136
+ def __post_init__(self) -> None:
137
+ """Calculate improvement metrics."""
138
+ if self.baseline_duration_seconds > 0:
139
+ self.improvement_percentage = (
140
+ (self.baseline_duration_seconds - self.current_duration_seconds)
141
+ / self.baseline_duration_seconds
142
+ * 100
143
+ )
144
+ self.regression = self.improvement_percentage < 0
145
+
146
+
147
+ class PerformanceMonitor:
148
+ """Central performance monitoring service."""
149
+
150
+ def __init__(
151
+ self,
152
+ data_retention_days: int = 30,
153
+ benchmark_history_size: int = 100,
154
+ ):
155
+ self.data_retention_days = data_retention_days
156
+ self.benchmark_history_size = benchmark_history_size
157
+ self._initialize_data_structures(benchmark_history_size)
158
+ self._initialize_services()
159
+ self._initialize_thresholds()
160
+
161
+ def _initialize_data_structures(self, history_size: int) -> None:
162
+ """Initialize performance data structures."""
163
+ self._active_workflows: dict[str, WorkflowPerformance] = {}
164
+ self._active_phases: dict[str, PhasePerformance] = {}
165
+ self._completed_workflows: deque[WorkflowPerformance] = deque(
166
+ maxlen=history_size
167
+ )
168
+ self._benchmarks: dict[str, deque[float]] = defaultdict(
169
+ lambda: deque(maxlen=history_size)
170
+ )
171
+
172
+ def _initialize_services(self) -> None:
173
+ """Initialize external services and utilities."""
174
+ self._lock = Lock()
175
+ self._logger = get_logger("crackerjack.performance_monitor")
176
+ self._memory_optimizer = MemoryOptimizer.get_instance()
177
+ self._cache = get_performance_cache()
178
+
179
+ def _initialize_thresholds(self) -> None:
180
+ """Initialize performance warning thresholds."""
181
+ self._warning_thresholds = {
182
+ "duration_seconds": 30.0,
183
+ "memory_mb": 100.0,
184
+ "cache_hit_ratio": 0.5,
185
+ }
186
+
187
+ def start_workflow(self, workflow_id: str) -> None:
188
+ """Start monitoring a workflow."""
189
+ with self._lock:
190
+ if workflow_id in self._active_workflows:
191
+ self._logger.warning(f"Workflow {workflow_id} already being monitored")
192
+ return
193
+
194
+ workflow = WorkflowPerformance(
195
+ workflow_id=workflow_id,
196
+ start_time=datetime.now(),
197
+ )
198
+
199
+ self._active_workflows[workflow_id] = workflow
200
+ self._logger.debug(f"Started monitoring workflow: {workflow_id}")
201
+
202
+ # Start memory profiling
203
+ self._memory_optimizer.start_profiling()
204
+
205
+ def end_workflow(
206
+ self, workflow_id: str, success: bool = True
207
+ ) -> WorkflowPerformance:
208
+ """End workflow monitoring and return performance data."""
209
+ with self._lock:
210
+ if workflow_id not in self._active_workflows:
211
+ self._logger.warning(f"Workflow {workflow_id} not found for ending")
212
+ return WorkflowPerformance(
213
+ workflow_id=workflow_id, start_time=datetime.now()
214
+ )
215
+
216
+ workflow = self._active_workflows.pop(workflow_id)
217
+ workflow.finalize(success)
218
+
219
+ # Add to completed workflows for analysis
220
+ self._completed_workflows.append(workflow)
221
+
222
+ self._logger.info(
223
+ f"Completed workflow {workflow_id}: "
224
+ f"{workflow.total_duration_seconds:.2f}s, "
225
+ f"score: {workflow.performance_score:.1f}, "
226
+ f"phases: {len(workflow.phases)}"
227
+ )
228
+
229
+ # Check for performance warnings
230
+ self._check_performance_warnings(workflow)
231
+
232
+ return workflow
233
+
234
+ def start_phase(self, workflow_id: str, phase_name: str) -> None:
235
+ """Start monitoring a workflow phase."""
236
+ phase_key = f"{workflow_id}:{phase_name}"
237
+
238
+ with self._lock:
239
+ if phase_key in self._active_phases:
240
+ self._logger.warning(f"Phase {phase_key} already being monitored")
241
+ return
242
+
243
+ # Record memory checkpoint
244
+ memory_mb = self._memory_optimizer.record_checkpoint(f"{phase_name}_start")
245
+
246
+ phase = PhasePerformance(
247
+ phase_name=phase_name,
248
+ start_time=datetime.now(),
249
+ memory_start_mb=memory_mb,
250
+ )
251
+
252
+ self._active_phases[phase_key] = phase
253
+ self._logger.debug(f"Started monitoring phase: {phase_key}")
254
+
255
+ def end_phase(
256
+ self, workflow_id: str, phase_name: str, success: bool = True
257
+ ) -> PhasePerformance:
258
+ """End phase monitoring and attach to workflow."""
259
+ phase_key = f"{workflow_id}:{phase_name}"
260
+
261
+ with self._lock:
262
+ if phase_key not in self._active_phases:
263
+ self._logger.warning(f"Phase {phase_key} not found for ending")
264
+ return PhasePerformance(
265
+ phase_name=phase_name, start_time=datetime.now()
266
+ )
267
+
268
+ phase = self._active_phases.pop(phase_key)
269
+ phase.success = success
270
+
271
+ # Record final memory usage
272
+ phase.memory_end_mb = self._memory_optimizer.record_checkpoint(
273
+ f"{phase_name}_end"
274
+ )
275
+
276
+ # Get cache statistics
277
+ cache_stats = self._cache.get_stats()
278
+ phase.cache_hits = cache_stats.hits
279
+ phase.cache_misses = cache_stats.misses
280
+
281
+ phase.finalize()
282
+
283
+ # Add to workflow if it exists
284
+ if workflow_id in self._active_workflows:
285
+ self._active_workflows[workflow_id].add_phase(phase)
286
+
287
+ self._logger.debug(
288
+ f"Completed phase {phase_key}: {phase.duration_seconds:.2f}s"
289
+ )
290
+
291
+ return phase
292
+
293
+ def record_metric(
294
+ self,
295
+ workflow_id: str,
296
+ phase_name: str,
297
+ metric_name: str,
298
+ value: float,
299
+ unit: str = "",
300
+ metadata: dict[str, t.Any] | None = None,
301
+ ) -> None:
302
+ """Record a performance metric."""
303
+ metric = PerformanceMetric(
304
+ name=metric_name,
305
+ value=value,
306
+ unit=unit,
307
+ metadata=metadata or {},
308
+ )
309
+
310
+ phase_key = f"{workflow_id}:{phase_name}"
311
+
312
+ with self._lock:
313
+ if phase_key in self._active_phases:
314
+ self._active_phases[phase_key].metrics.append(metric)
315
+ else:
316
+ self._logger.warning(
317
+ f"Phase {phase_key} not found for metric {metric_name}"
318
+ )
319
+
320
+ def record_parallel_operation(self, workflow_id: str, phase_name: str) -> None:
321
+ """Record a parallel operation."""
322
+ phase_key = f"{workflow_id}:{phase_name}"
323
+
324
+ with self._lock:
325
+ if phase_key in self._active_phases:
326
+ self._active_phases[phase_key].parallel_operations += 1
327
+
328
+ def record_sequential_operation(self, workflow_id: str, phase_name: str) -> None:
329
+ """Record a sequential operation."""
330
+ phase_key = f"{workflow_id}:{phase_name}"
331
+
332
+ with self._lock:
333
+ if phase_key in self._active_phases:
334
+ self._active_phases[phase_key].sequential_operations += 1
335
+
336
+ def benchmark_operation(
337
+ self, operation_name: str, duration_seconds: float
338
+ ) -> PerformanceBenchmark:
339
+ """Benchmark an operation against historical data."""
340
+ with self._lock:
341
+ history = self._benchmarks[operation_name]
342
+ history.append(duration_seconds)
343
+
344
+ if len(history) > 1:
345
+ # Use median as baseline to avoid outlier skew
346
+ sorted_history = sorted(history)
347
+ baseline = sorted_history[len(sorted_history) // 2]
348
+
349
+ return PerformanceBenchmark(
350
+ operation_name=operation_name,
351
+ baseline_duration_seconds=baseline,
352
+ current_duration_seconds=duration_seconds,
353
+ )
354
+ else:
355
+ # First measurement, no baseline
356
+ return PerformanceBenchmark(
357
+ operation_name=operation_name,
358
+ baseline_duration_seconds=duration_seconds,
359
+ current_duration_seconds=duration_seconds,
360
+ )
361
+
362
+ def get_performance_summary(self, last_n_workflows: int = 10) -> dict[str, Any]:
363
+ """Get performance summary for recent workflows."""
364
+ with self._lock:
365
+ recent_workflows = list(self._completed_workflows)[-last_n_workflows:]
366
+
367
+ if not recent_workflows:
368
+ return {"message": "No completed workflows to analyze"}
369
+
370
+ # Calculate aggregate statistics using helper methods
371
+ basic_stats = self._calculate_basic_workflow_stats(recent_workflows)
372
+ cache_stats = self._calculate_cache_statistics(recent_workflows)
373
+ parallel_stats = self._calculate_parallelization_statistics(
374
+ recent_workflows
375
+ )
376
+
377
+ return (
378
+ {
379
+ "workflows_analyzed": len(recent_workflows),
380
+ }
381
+ | basic_stats
382
+ | cache_stats
383
+ | parallel_stats
384
+ | {}
385
+ )
386
+
387
+ def _calculate_basic_workflow_stats(
388
+ self, workflows: list[WorkflowPerformance]
389
+ ) -> dict[str, Any]:
390
+ """Calculate basic workflow statistics (duration, score, success rate)."""
391
+ total_duration = sum(w.total_duration_seconds for w in workflows)
392
+ avg_duration = total_duration / len(workflows)
393
+ avg_score = sum(w.performance_score for w in workflows) / len(workflows)
394
+ success_rate = sum(1 for w in workflows if w.overall_success) / len(workflows)
395
+
396
+ return {
397
+ "avg_duration_seconds": round(avg_duration, 2),
398
+ "avg_performance_score": round(avg_score, 1),
399
+ "success_rate": round(success_rate, 2),
400
+ }
401
+
402
+ def _calculate_cache_statistics(
403
+ self, workflows: list[WorkflowPerformance]
404
+ ) -> dict[str, Any]:
405
+ """Calculate cache hit/miss statistics across workflows."""
406
+ total_cache_hits = sum(sum(p.cache_hits for p in w.phases) for w in workflows)
407
+ total_cache_misses = sum(
408
+ sum(p.cache_misses for p in w.phases) for w in workflows
409
+ )
410
+
411
+ cache_hit_ratio = (
412
+ total_cache_hits / (total_cache_hits + total_cache_misses)
413
+ if total_cache_hits + total_cache_misses > 0
414
+ else 0
415
+ )
416
+
417
+ return {
418
+ "cache_hit_ratio": round(cache_hit_ratio, 2),
419
+ "total_cache_hits": total_cache_hits,
420
+ "total_cache_misses": total_cache_misses,
421
+ }
422
+
423
+ def _calculate_parallelization_statistics(
424
+ self, workflows: list[WorkflowPerformance]
425
+ ) -> dict[str, Any]:
426
+ """Calculate parallelization statistics across workflows."""
427
+ total_parallel = sum(
428
+ sum(p.parallel_operations for p in w.phases) for w in workflows
429
+ )
430
+ total_sequential = sum(
431
+ sum(p.sequential_operations for p in w.phases) for w in workflows
432
+ )
433
+
434
+ parallel_ratio = (
435
+ total_parallel / (total_parallel + total_sequential)
436
+ if total_parallel + total_sequential > 0
437
+ else 0
438
+ )
439
+
440
+ return {
441
+ "parallel_operation_ratio": round(parallel_ratio, 2),
442
+ "total_parallel_operations": total_parallel,
443
+ "total_sequential_operations": total_sequential,
444
+ }
445
+
446
+ def get_benchmark_trends(self) -> dict[str, dict[str, Any]]:
447
+ """Get benchmark trends for all operations."""
448
+ trends = {}
449
+
450
+ with self._lock:
451
+ for operation_name, history in self._benchmarks.items():
452
+ if len(history) < 2:
453
+ continue
454
+
455
+ history_list = list(history)
456
+ basic_stats = self._calculate_benchmark_basic_stats(history_list)
457
+ trend_percentage = self._calculate_trend_percentage(history_list)
458
+
459
+ trends[operation_name] = basic_stats | {
460
+ "trend_percentage": round(trend_percentage, 1),
461
+ "sample_count": len(history_list),
462
+ }
463
+
464
+ return trends
465
+
466
+ def _calculate_benchmark_basic_stats(
467
+ self, history_list: list[float]
468
+ ) -> dict[str, float]:
469
+ """Calculate basic statistics for benchmark history."""
470
+ avg_duration = sum(history_list) / len(history_list)
471
+ min_duration = min(history_list)
472
+ max_duration = max(history_list)
473
+
474
+ return {
475
+ "avg_duration_seconds": round(avg_duration, 3),
476
+ "min_duration_seconds": round(min_duration, 3),
477
+ "max_duration_seconds": round(max_duration, 3),
478
+ }
479
+
480
+ def _calculate_trend_percentage(self, history_list: list[float]) -> float:
481
+ """Calculate trend percentage for benchmark improvement."""
482
+ if len(history_list) < 5:
483
+ return 0.0
484
+
485
+ recent_avg = sum(history_list[-5:]) / 5
486
+ older_avg = (
487
+ sum(history_list[:-5]) / len(history_list[:-5])
488
+ if len(history_list) > 5
489
+ else recent_avg
490
+ )
491
+
492
+ return ((older_avg - recent_avg) / older_avg * 100) if older_avg > 0 else 0.0
493
+
494
+ def export_performance_data(self, output_path: Path) -> None:
495
+ """Export performance data to JSON file."""
496
+ with self._lock:
497
+ data = {
498
+ "export_timestamp": datetime.now().isoformat(),
499
+ "completed_workflows": [
500
+ {
501
+ "workflow_id": w.workflow_id,
502
+ "start_time": w.start_time.isoformat(),
503
+ "end_time": w.end_time.isoformat() if w.end_time else None,
504
+ "duration_seconds": w.total_duration_seconds,
505
+ "performance_score": w.performance_score,
506
+ "success": w.overall_success,
507
+ "phases": [
508
+ {
509
+ "name": p.phase_name,
510
+ "duration_seconds": p.duration_seconds,
511
+ "memory_peak_mb": p.memory_peak_mb,
512
+ "cache_hits": p.cache_hits,
513
+ "cache_misses": p.cache_misses,
514
+ "parallel_operations": p.parallel_operations,
515
+ "sequential_operations": p.sequential_operations,
516
+ "success": p.success,
517
+ }
518
+ for p in w.phases
519
+ ],
520
+ }
521
+ for w in self._completed_workflows
522
+ ],
523
+ "benchmarks": {
524
+ name: list(history) for name, history in self._benchmarks.items()
525
+ },
526
+ "summary": self.get_performance_summary(),
527
+ "trends": self.get_benchmark_trends(),
528
+ }
529
+
530
+ with output_path.open("w") as f:
531
+ json.dump(data, f, indent=2)
532
+
533
+ self._logger.info(f"Exported performance data to {output_path}")
534
+
535
+ def _check_performance_warnings(self, workflow: WorkflowPerformance) -> None:
536
+ """Check for performance warnings and log them only in debug mode."""
537
+ warnings = []
538
+
539
+ # Collect warnings from different checks
540
+ warnings.extend(self._check_duration_warning(workflow))
541
+ warnings.extend(self._check_memory_warning(workflow))
542
+ warnings.extend(self._check_cache_warning(workflow))
543
+
544
+ # Log all warnings at debug level to avoid console spam
545
+ for warning in warnings:
546
+ self._logger.debug(
547
+ f"Performance warning for {workflow.workflow_id}: {warning}"
548
+ )
549
+
550
+ def _check_duration_warning(self, workflow: WorkflowPerformance) -> list[str]:
551
+ """Check for duration-based warnings."""
552
+ if (
553
+ workflow.total_duration_seconds
554
+ > self._warning_thresholds["duration_seconds"]
555
+ ):
556
+ return [
557
+ f"Slow workflow duration: {workflow.total_duration_seconds:.1f}s "
558
+ f"(threshold: {self._warning_thresholds['duration_seconds']}s)"
559
+ ]
560
+ return []
561
+
562
+ def _check_memory_warning(self, workflow: WorkflowPerformance) -> list[str]:
563
+ """Check for memory usage warnings."""
564
+ max_memory = max((p.memory_peak_mb for p in workflow.phases), default=0)
565
+ if max_memory > self._warning_thresholds["memory_mb"]:
566
+ return [
567
+ f"High memory usage: {max_memory:.1f}MB "
568
+ f"(threshold: {self._warning_thresholds['memory_mb']}MB)"
569
+ ]
570
+ return []
571
+
572
+ def _check_cache_warning(self, workflow: WorkflowPerformance) -> list[str]:
573
+ """Check for cache efficiency warnings."""
574
+ total_hits = sum(p.cache_hits for p in workflow.phases)
575
+ total_misses = sum(p.cache_misses for p in workflow.phases)
576
+
577
+ if total_hits + total_misses > 0:
578
+ hit_ratio = total_hits / (total_hits + total_misses)
579
+ if hit_ratio < self._warning_thresholds["cache_hit_ratio"]:
580
+ return [
581
+ f"Low cache hit ratio: {hit_ratio:.2f} "
582
+ f"(threshold: {self._warning_thresholds['cache_hit_ratio']})"
583
+ ]
584
+ return []
585
+
586
+
587
+ # Global monitor instance
588
+ _global_monitor: PerformanceMonitor | None = None
589
+ _monitor_lock = Lock()
590
+
591
+
592
+ def get_performance_monitor() -> PerformanceMonitor:
593
+ """Get global performance monitor instance."""
594
+ global _global_monitor
595
+ with _monitor_lock:
596
+ if _global_monitor is None:
597
+ _global_monitor = PerformanceMonitor()
598
+ return _global_monitor
599
+
600
+
601
+ # Context manager for easy phase monitoring
602
+ class phase_monitor:
603
+ """Context manager for phase performance monitoring."""
604
+
605
+ def __init__(self, workflow_id: str, phase_name: str):
606
+ self.workflow_id = workflow_id
607
+ self.phase_name = phase_name
608
+ self.monitor = get_performance_monitor()
609
+
610
+ def __enter__(self):
611
+ self.monitor.start_phase(self.workflow_id, self.phase_name)
612
+ return self
613
+
614
+ def __exit__(
615
+ self,
616
+ exc_type: type[BaseException] | None,
617
+ exc_val: BaseException | None,
618
+ exc_tb: object | None,
619
+ ) -> None:
620
+ success = exc_type is None
621
+ self.monitor.end_phase(self.workflow_id, self.phase_name, success)
622
+
623
+ def record_parallel_op(self):
624
+ """Record a parallel operation in this phase."""
625
+ self.monitor.record_parallel_operation(self.workflow_id, self.phase_name)
626
+
627
+ def record_sequential_op(self):
628
+ """Record a sequential operation in this phase."""
629
+ self.monitor.record_sequential_operation(self.workflow_id, self.phase_name)
630
+
631
+ def record_metric(self, name: str, value: float, unit: str = ""):
632
+ """Record a custom metric in this phase."""
633
+ self.monitor.record_metric(self.workflow_id, self.phase_name, name, value, unit)
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import tempfile
3
+ import typing as t
3
4
  from contextlib import suppress
4
5
  from pathlib import Path
5
6
 
@@ -198,3 +199,65 @@ class SecurityService:
198
199
  secure_env.pop(var, None)
199
200
 
200
201
  return secure_env
202
+
203
+ def validate_file_safety(self, path: str | Path) -> bool:
204
+ """Protocol method: Validate file safety."""
205
+ try:
206
+ file_path = Path(path)
207
+ # Check if path exists and is safe
208
+ if not file_path.exists():
209
+ return False
210
+ # Basic safety checks
211
+ if file_path.is_symlink():
212
+ return False
213
+ return True
214
+ except Exception:
215
+ return False
216
+
217
+ def check_hardcoded_secrets(self, content: str) -> list[dict[str, t.Any]]:
218
+ """Protocol method: Check for hardcoded secrets."""
219
+ secrets = []
220
+ # Basic patterns for common secrets
221
+ patterns = {
222
+ "api_key": r'api[_-]?key["\s]*[:=]["\s]*([a-zA-Z0-9_-]{20,})',
223
+ "password": r'password["\s]*[:=]["\s]*([^\s"]{8,})',
224
+ "token": r'token["\s]*[:=]["\s]*([a-zA-Z0-9_-]{20,})',
225
+ }
226
+
227
+ import re
228
+
229
+ for secret_type, pattern in patterns.items():
230
+ matches = re.finditer(pattern, content, re.IGNORECASE)
231
+ for match in matches:
232
+ secrets.append(
233
+ {
234
+ "type": secret_type,
235
+ "value": match.group(1)[:10] + "...", # Truncated for safety
236
+ "line": content[: match.start()].count("\n") + 1,
237
+ }
238
+ )
239
+ return secrets
240
+
241
+ def is_safe_subprocess_call(self, cmd: list[str]) -> bool:
242
+ """Protocol method: Check if subprocess call is safe."""
243
+ if not cmd:
244
+ return False
245
+
246
+ dangerous_commands = {
247
+ "rm",
248
+ "rmdir",
249
+ "del",
250
+ "format",
251
+ "fdisk",
252
+ "sudo",
253
+ "su",
254
+ "chmod",
255
+ "chown",
256
+ "curl",
257
+ "wget",
258
+ "nc",
259
+ "netcat",
260
+ }
261
+
262
+ command = cmd[0].split("/")[-1] # Get base command name
263
+ return command not in dangerous_commands