mcp-souschef 2.0.1__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
souschef/profiling.py ADDED
@@ -0,0 +1,568 @@
1
+ """
2
+ Performance profiling and optimization utilities for SousChef.
3
+
4
+ This module provides tools for profiling parsing operations, identifying bottlenecks,
5
+ and generating performance reports for large cookbook migrations.
6
+ """
7
+
8
+ import cProfile
9
+ import io
10
+ import pstats
11
+ import time
12
+ import tracemalloc
13
+ from collections.abc import Callable, Iterator
14
+ from contextlib import contextmanager
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from souschef.core.errors import SousChefError
20
+
21
+
22
+ @dataclass
23
+ class ProfileResult:
24
+ """
25
+ Results from a profiling operation.
26
+
27
+ Attributes:
28
+ operation_name: Name of the operation profiled.
29
+ execution_time: Total execution time in seconds.
30
+ peak_memory: Peak memory usage in bytes.
31
+ function_stats: Statistics for individual function calls.
32
+ context: Additional context about the operation.
33
+
34
+ """
35
+
36
+ operation_name: str
37
+ execution_time: float
38
+ peak_memory: int
39
+ function_stats: dict[str, Any] = field(default_factory=dict)
40
+ context: dict[str, Any] = field(default_factory=dict)
41
+
42
+ def __str__(self) -> str:
43
+ """
44
+ Format profile result as readable string.
45
+
46
+ Returns:
47
+ Formatted profile summary.
48
+
49
+ """
50
+ return f"""
51
+ Profile: {self.operation_name}
52
+ Execution Time: {self.execution_time:.3f}s
53
+ Peak Memory: {self.peak_memory / 1024 / 1024:.2f}MB
54
+ Context: {self.context}
55
+ """
56
+
57
+
58
+ @dataclass
59
+ class PerformanceReport:
60
+ """
61
+ Comprehensive performance report for cookbook migration.
62
+
63
+ Attributes:
64
+ cookbook_name: Name of the cookbook profiled.
65
+ total_time: Total processing time in seconds.
66
+ total_memory: Total peak memory usage in bytes.
67
+ operation_results: Results for individual operations.
68
+ recommendations: Performance optimization recommendations.
69
+
70
+ """
71
+
72
+ cookbook_name: str
73
+ total_time: float
74
+ total_memory: int
75
+ operation_results: list[ProfileResult] = field(default_factory=list)
76
+ recommendations: list[str] = field(default_factory=list)
77
+
78
+ def add_result(self, result: ProfileResult) -> None:
79
+ """
80
+ Add a profile result to the report.
81
+
82
+ Args:
83
+ result: Profile result to add.
84
+
85
+ """
86
+ self.operation_results.append(result)
87
+
88
+ def add_recommendation(self, recommendation: str) -> None:
89
+ """
90
+ Add a performance recommendation.
91
+
92
+ Args:
93
+ recommendation: Recommendation text.
94
+
95
+ """
96
+ self.recommendations.append(recommendation)
97
+
98
+ def __str__(self) -> str:
99
+ """
100
+ Format performance report as readable string.
101
+
102
+ Returns:
103
+ Formatted report.
104
+
105
+ """
106
+ report_lines = [
107
+ f"\n{'=' * 80}",
108
+ f"Performance Report: {self.cookbook_name}",
109
+ f"{'=' * 80}",
110
+ f"Total Time: {self.total_time:.3f}s",
111
+ f"Total Peak Memory: {self.total_memory / 1024 / 1024:.2f}MB",
112
+ "\nOperation Breakdown:",
113
+ f"{'-' * 80}",
114
+ ]
115
+
116
+ for result in self.operation_results:
117
+ report_lines.extend(
118
+ [
119
+ f"\n{result.operation_name}:",
120
+ (
121
+ f" Time: {result.execution_time:.3f}s "
122
+ f"({result.execution_time / self.total_time * 100:.1f}%)"
123
+ ),
124
+ f" Memory: {result.peak_memory / 1024 / 1024:.2f}MB",
125
+ ]
126
+ )
127
+ if result.context:
128
+ for key, value in result.context.items():
129
+ report_lines.append(f" {key}: {value}")
130
+
131
+ if self.recommendations:
132
+ report_lines.extend([f"\n{'-' * 80}", "Recommendations:", f"{'-' * 80}"])
133
+ for i, rec in enumerate(self.recommendations, 1):
134
+ report_lines.append(f"{i}. {rec}")
135
+
136
+ report_lines.append(f"{'=' * 80}\n")
137
+ return "\n".join(report_lines)
138
+
139
+
140
+ @contextmanager
141
+ def profile_operation(
142
+ operation_name: str, context: dict[str, Any] | None = None
143
+ ) -> Iterator[ProfileResult]:
144
+ """
145
+ Context manager for profiling an operation.
146
+
147
+ Args:
148
+ operation_name: Name of the operation being profiled.
149
+ context: Additional context information.
150
+
151
+ Yields:
152
+ ProfileResult that will be populated with profiling data.
153
+
154
+ Example:
155
+ >>> with profile_operation("parse_recipe", {"file": "default.rb"}) as result:
156
+ ... parse_recipe("/path/to/recipe.rb")
157
+ >>> print(result.execution_time)
158
+
159
+ """
160
+ result = ProfileResult(
161
+ operation_name=operation_name,
162
+ execution_time=0.0,
163
+ peak_memory=0,
164
+ context=context or {},
165
+ )
166
+
167
+ tracemalloc.start()
168
+ start_time = time.perf_counter()
169
+
170
+ try:
171
+ yield result
172
+ finally:
173
+ end_time = time.perf_counter()
174
+ _, peak = tracemalloc.get_traced_memory()
175
+ tracemalloc.stop()
176
+
177
+ result.execution_time = end_time - start_time
178
+ result.peak_memory = peak
179
+
180
+
181
+ def profile_function(
182
+ func: Callable[..., Any],
183
+ *args: Any,
184
+ operation_name: str | None = None,
185
+ **kwargs: Any,
186
+ ) -> tuple[Any, ProfileResult]:
187
+ """
188
+ Profile a function call and return result with profiling data.
189
+
190
+ Args:
191
+ func: Function to profile.
192
+ *args: Positional arguments for the function.
193
+ operation_name: Name for the operation (defaults to function name).
194
+ **kwargs: Keyword arguments for the function.
195
+
196
+ Returns:
197
+ Tuple of (function_result, profile_result).
198
+
199
+ Example:
200
+ >>> result, profile = profile_function(parse_recipe, "/path/to/recipe.rb")
201
+ >>> print(f"Took {profile.execution_time:.3f}s")
202
+
203
+ """
204
+ op_name = operation_name or func.__name__
205
+
206
+ with profile_operation(op_name) as profile:
207
+ result = func(*args, **kwargs)
208
+
209
+ return result, profile
210
+
211
+
212
+ def detailed_profile_function(
213
+ func: Callable[..., Any],
214
+ *args: Any,
215
+ operation_name: str | None = None,
216
+ top_n: int = 20,
217
+ **kwargs: Any,
218
+ ) -> tuple[Any, ProfileResult]:
219
+ """
220
+ Profile a function with detailed call statistics.
221
+
222
+ Args:
223
+ func: Function to profile.
224
+ *args: Positional arguments for the function.
225
+ operation_name: Name for the operation (defaults to function name).
226
+ top_n: Number of top functions to include in stats.
227
+ **kwargs: Keyword arguments for the function.
228
+
229
+ Returns:
230
+ Tuple of (function_result, profile_result with detailed stats).
231
+
232
+ Example:
233
+ >>> result, profile = detailed_profile_function(parse_recipe, path)
234
+ >>> print(profile.function_stats["total_calls"])
235
+
236
+ """
237
+ op_name = operation_name or func.__name__
238
+
239
+ # Memory and time profiling
240
+ tracemalloc.start()
241
+ start_time = time.perf_counter()
242
+
243
+ # Function call profiling
244
+ profiler = cProfile.Profile()
245
+ profiler.enable()
246
+
247
+ try:
248
+ result = func(*args, **kwargs)
249
+ finally:
250
+ profiler.disable()
251
+ end_time = time.perf_counter()
252
+ _, peak = tracemalloc.get_traced_memory()
253
+ tracemalloc.stop()
254
+
255
+ # Extract statistics
256
+ stats_buffer = io.StringIO()
257
+ stats = pstats.Stats(profiler, stream=stats_buffer)
258
+ stats.strip_dirs()
259
+ stats.sort_stats("cumulative")
260
+ stats.print_stats(top_n)
261
+
262
+ # Get call counts from stats
263
+ # pstats.Stats has a stats attribute that's a dict in runtime,
264
+ # but mypy doesn't recognize it. We use getattr to bypass type checking.
265
+ stats_dict = getattr(stats, "stats", {})
266
+ total_calls = sum(cc[0] for cc in stats_dict.values()) if stats_dict else 0
267
+ primitive_calls = sum(cc[1] for cc in stats_dict.values()) if stats_dict else 0
268
+
269
+ profile_result = ProfileResult(
270
+ operation_name=op_name,
271
+ execution_time=end_time - start_time,
272
+ peak_memory=peak,
273
+ function_stats={
274
+ "total_calls": total_calls,
275
+ "primitive_calls": primitive_calls,
276
+ "top_functions": stats_buffer.getvalue(),
277
+ },
278
+ )
279
+
280
+ return result, profile_result
281
+
282
+
283
+ def _profile_directory_files(
284
+ directory: Path,
285
+ parse_func: Callable[[str], str],
286
+ operation_name: str,
287
+ file_pattern: str = "*.rb",
288
+ ) -> ProfileResult | None:
289
+ """
290
+ Profile parsing operations for files in a directory.
291
+
292
+ Args:
293
+ directory: Directory containing files to parse.
294
+ parse_func: Function to parse individual files.
295
+ operation_name: Name for the profiling operation.
296
+ file_pattern: Glob pattern to match files.
297
+
298
+ Returns:
299
+ ProfileResult with aggregated stats, or None if no files found.
300
+
301
+ """
302
+ if not directory.exists():
303
+ return None
304
+
305
+ file_count = 0
306
+ total_time = 0.0
307
+
308
+ for file_path in directory.glob(file_pattern):
309
+ with profile_operation(operation_name, {"file": file_path.name}) as result:
310
+ parse_func(str(file_path))
311
+ file_count += 1
312
+ total_time += result.execution_time
313
+
314
+ if file_count > 0:
315
+ return ProfileResult(
316
+ operation_name=f"{operation_name} (total: {file_count})",
317
+ execution_time=total_time,
318
+ peak_memory=0,
319
+ context={"avg_per_file": total_time / file_count},
320
+ )
321
+
322
+ return None
323
+
324
+
325
+ def generate_cookbook_performance_report(
326
+ cookbook_path: str,
327
+ ) -> PerformanceReport:
328
+ """
329
+ Generate comprehensive performance report for a cookbook.
330
+
331
+ Profiles all parsing operations for a cookbook and provides
332
+ optimization recommendations based on the results.
333
+
334
+ Args:
335
+ cookbook_path: Path to the cookbook to profile.
336
+
337
+ Returns:
338
+ PerformanceReport with detailed profiling information.
339
+
340
+ Raises:
341
+ SousChefError: If cookbook path is invalid or profiling fails.
342
+
343
+ Example:
344
+ >>> report = generate_cookbook_performance_report("/path/to/cookbook")
345
+ >>> print(report)
346
+
347
+ """
348
+ from souschef.parsers.attributes import parse_attributes
349
+ from souschef.parsers.metadata import (
350
+ list_cookbook_structure,
351
+ read_cookbook_metadata,
352
+ )
353
+ from souschef.parsers.recipe import parse_recipe
354
+ from souschef.parsers.resource import parse_custom_resource
355
+ from souschef.parsers.template import parse_template
356
+
357
+ path = Path(cookbook_path)
358
+ if not path.exists():
359
+ raise SousChefError(
360
+ f"Cookbook path not found: {cookbook_path}",
361
+ suggestion="Verify the cookbook path exists and is accessible",
362
+ )
363
+
364
+ report = PerformanceReport(cookbook_name=path.name, total_time=0.0, total_memory=0)
365
+
366
+ overall_start = time.perf_counter()
367
+ overall_memory_start = 0
368
+
369
+ tracemalloc.start()
370
+ overall_memory_start = tracemalloc.get_traced_memory()[1]
371
+
372
+ try:
373
+ # Profile cookbook structure analysis
374
+ with profile_operation(
375
+ "list_cookbook_structure", {"path": cookbook_path}
376
+ ) as result:
377
+ list_cookbook_structure(cookbook_path)
378
+ report.add_result(result)
379
+
380
+ # Profile metadata parsing
381
+ metadata_path = path / "metadata.rb"
382
+ if metadata_path.exists():
383
+ with profile_operation(
384
+ "read_cookbook_metadata", {"file": str(metadata_path)}
385
+ ) as result:
386
+ read_cookbook_metadata(str(metadata_path))
387
+ report.add_result(result)
388
+
389
+ # Profile recipe parsing
390
+ recipe_result = _profile_directory_files(
391
+ path / "recipes", parse_recipe, "parse_recipes"
392
+ )
393
+ if recipe_result:
394
+ report.add_result(recipe_result)
395
+
396
+ # Profile attribute parsing
397
+ attr_result = _profile_directory_files(
398
+ path / "attributes", parse_attributes, "parse_attributes"
399
+ )
400
+ if attr_result:
401
+ report.add_result(attr_result)
402
+
403
+ # Profile custom resource parsing
404
+ resource_result = _profile_directory_files(
405
+ path / "resources", parse_custom_resource, "parse_custom_resources"
406
+ )
407
+ if resource_result:
408
+ report.add_result(resource_result)
409
+
410
+ # Profile template parsing
411
+ templates_dir = path / "templates"
412
+ if templates_dir.exists():
413
+ template_count = 0
414
+ template_total_time = 0.0
415
+
416
+ for template_file in templates_dir.rglob("*.erb"):
417
+ with profile_operation(
418
+ "parse_template", {"file": template_file.name}
419
+ ) as result:
420
+ parse_template(str(template_file))
421
+ template_count += 1
422
+ template_total_time += result.execution_time
423
+
424
+ if template_count > 0:
425
+ report.add_result(
426
+ ProfileResult(
427
+ operation_name=f"parse_templates (total: {template_count})",
428
+ execution_time=template_total_time,
429
+ peak_memory=0,
430
+ context={
431
+ "avg_per_template": template_total_time / template_count
432
+ },
433
+ )
434
+ )
435
+
436
+ finally:
437
+ _, overall_peak = tracemalloc.get_traced_memory()
438
+ tracemalloc.stop()
439
+
440
+ # Total memory is the peak during profiling, ensuring non-negative
441
+ report.total_time = time.perf_counter() - overall_start
442
+ report.total_memory = max(0, overall_peak - overall_memory_start)
443
+
444
+ # Generate recommendations
445
+ _add_performance_recommendations(report)
446
+
447
+ return report
448
+
449
+
450
+ def _add_performance_recommendations(report: PerformanceReport) -> None:
451
+ """
452
+ Add performance recommendations based on profiling results.
453
+
454
+ Args:
455
+ report: Performance report to add recommendations to.
456
+
457
+ """
458
+ # Check for slow operations
459
+ slow_threshold = 1.0 # 1 second
460
+ for result in report.operation_results:
461
+ if result.execution_time > slow_threshold:
462
+ report.add_recommendation(
463
+ f"Operation '{result.operation_name}' took "
464
+ f"{result.execution_time:.2f}s. "
465
+ f"Consider optimizing or parallelizing this operation."
466
+ )
467
+
468
+ # Check for high memory usage
469
+ memory_threshold = 100 * 1024 * 1024 # 100MB
470
+ if report.total_memory > memory_threshold:
471
+ report.add_recommendation(
472
+ f"Peak memory usage is {report.total_memory / 1024 / 1024:.0f}MB. "
473
+ "Consider processing files in batches or implementing streaming "
474
+ "for large cookbooks."
475
+ )
476
+
477
+ # Check for many files
478
+ for result in report.operation_results:
479
+ if "total:" in result.operation_name:
480
+ # Extract count from operation name like "parse_recipes (total: 50)"
481
+ if "parse_recipes" in result.operation_name and result.execution_time > 5.0:
482
+ report.add_recommendation(
483
+ "Large number of recipes detected. Consider using parallel "
484
+ "processing or caching intermediate results for faster re-runs."
485
+ )
486
+ elif (
487
+ "parse_templates" in result.operation_name
488
+ and result.execution_time > 3.0
489
+ ):
490
+ report.add_recommendation(
491
+ "Template parsing is time-consuming. Consider caching parsed "
492
+ "templates or using a template compilation cache."
493
+ )
494
+
495
+ # Always provide general recommendations
496
+ report.add_recommendation(
497
+ "Use the --cache option (if available) to cache parsing results for "
498
+ "faster subsequent runs."
499
+ )
500
+ report.add_recommendation(
501
+ "For very large cookbooks, consider splitting into smaller, focused cookbooks."
502
+ )
503
+
504
+
505
+ def compare_performance(
506
+ before: ProfileResult | PerformanceReport,
507
+ after: ProfileResult | PerformanceReport,
508
+ ) -> str:
509
+ """
510
+ Compare performance before and after optimization.
511
+
512
+ Args:
513
+ before: Performance metrics before optimization.
514
+ after: Performance metrics after optimization.
515
+
516
+ Returns:
517
+ Formatted comparison report.
518
+
519
+ Example:
520
+ >>> comparison = compare_performance(before_profile, after_profile)
521
+ >>> print(comparison)
522
+
523
+ """
524
+ time_before = (
525
+ before.total_time
526
+ if isinstance(before, PerformanceReport)
527
+ else before.execution_time
528
+ )
529
+ time_after = (
530
+ after.total_time
531
+ if isinstance(after, PerformanceReport)
532
+ else after.execution_time
533
+ )
534
+
535
+ memory_before = (
536
+ before.total_memory
537
+ if isinstance(before, PerformanceReport)
538
+ else before.peak_memory
539
+ )
540
+ memory_after = (
541
+ after.total_memory
542
+ if isinstance(after, PerformanceReport)
543
+ else after.peak_memory
544
+ )
545
+
546
+ time_improvement = (
547
+ ((time_before - time_after) / time_before * 100) if time_before > 0 else 0
548
+ )
549
+ memory_improvement = (
550
+ ((memory_before - memory_after) / memory_before * 100)
551
+ if memory_before > 0
552
+ else 0
553
+ )
554
+
555
+ return f"""
556
+ Performance Comparison
557
+ {"=" * 80}
558
+ Execution Time:
559
+ Before: {time_before:.3f}s
560
+ After: {time_after:.3f}s
561
+ Change: {time_improvement:+.1f}% {"(faster)" if time_improvement > 0 else "(slower)"}
562
+
563
+ Memory Usage:
564
+ Before: {memory_before / 1024 / 1024:.2f}MB
565
+ After: {memory_after / 1024 / 1024:.2f}MB
566
+ Change: {memory_improvement:+.1f}% {"(less)" if memory_improvement > 0 else "(more)"}
567
+ {"=" * 80}
568
+ """