mcp-souschef 3.2.0__py3-none-any.whl → 3.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,344 @@
1
+ """
2
+ Structured logging configuration for SousChef.
3
+
4
+ This module provides structured logging with JSON output support,
5
+ contextual information, and integration with monitoring systems.
6
+ """
7
+
8
+ import logging
9
+ import sys
10
+ from contextvars import ContextVar
11
+ from typing import Any, Literal
12
+
13
+ # Context variables for structured logging
14
+ request_id_var: ContextVar[str | None] = ContextVar("request_id", default=None)
15
+ operation_var: ContextVar[str | None] = ContextVar("operation", default=None)
16
+ cookbook_var: ContextVar[str | None] = ContextVar("cookbook", default=None)
17
+
18
+
19
+ class StructuredFormatter(logging.Formatter):
20
+ """
21
+ Formatter that outputs structured log records.
22
+
23
+ Supports both JSON and human-readable text formats.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ fmt: str | None = None,
29
+ datefmt: str | None = None,
30
+ style: Literal["%", "{", "$"] = "%",
31
+ json_format: bool = False,
32
+ ):
33
+ """
34
+ Initialise structured formatter.
35
+
36
+ Args:
37
+ fmt: Log format string (ignored if json_format=True).
38
+ datefmt: Date format string.
39
+ style: Format style ('%', '{', or '$').
40
+ json_format: Whether to output JSON format.
41
+
42
+ """
43
+ super().__init__(fmt, datefmt, style)
44
+ self.json_format = json_format
45
+
46
+ def format(self, record: logging.LogRecord) -> str:
47
+ """
48
+ Format log record as structured output.
49
+
50
+ Args:
51
+ record: Log record to format.
52
+
53
+ Returns:
54
+ Formatted log string (JSON or text).
55
+
56
+ """
57
+ # Add context variables to record
58
+ record.request_id = request_id_var.get()
59
+ record.operation = operation_var.get()
60
+ record.cookbook = cookbook_var.get()
61
+
62
+ if self.json_format:
63
+ return self._format_json(record)
64
+ else:
65
+ return self._format_text(record)
66
+
67
+ def _format_json(self, record: logging.LogRecord) -> str:
68
+ """Format record as JSON."""
69
+ import json
70
+
71
+ log_data = {
72
+ "timestamp": self.formatTime(record, self.datefmt),
73
+ "level": record.levelname,
74
+ "logger": record.name,
75
+ "message": record.getMessage(),
76
+ "module": record.module,
77
+ "function": record.funcName,
78
+ "line": record.lineno,
79
+ }
80
+
81
+ # Add context if available
82
+ request_id = getattr(record, "request_id", None)
83
+ operation = getattr(record, "operation", None)
84
+ cookbook = getattr(record, "cookbook", None)
85
+
86
+ if request_id:
87
+ log_data["request_id"] = request_id
88
+ if operation:
89
+ log_data["operation"] = operation
90
+ if cookbook:
91
+ log_data["cookbook"] = cookbook
92
+
93
+ # Add exception info if present
94
+ if record.exc_info:
95
+ log_data["exception"] = self.formatException(record.exc_info)
96
+
97
+ # Add extra fields
98
+ for key, value in record.__dict__.items():
99
+ if key not in {
100
+ "name",
101
+ "msg",
102
+ "args",
103
+ "created",
104
+ "msecs",
105
+ "levelname",
106
+ "levelno",
107
+ "pathname",
108
+ "filename",
109
+ "module",
110
+ "exc_info",
111
+ "exc_text",
112
+ "stack_info",
113
+ "lineno",
114
+ "funcName",
115
+ "processName",
116
+ "process",
117
+ "threadName",
118
+ "thread",
119
+ "request_id",
120
+ "operation",
121
+ "cookbook",
122
+ "message",
123
+ "asctime",
124
+ "relativeCreated",
125
+ } and not key.startswith("_"):
126
+ log_data[key] = value
127
+
128
+ return json.dumps(log_data, default=str)
129
+
130
+ def _format_text(self, record: logging.LogRecord) -> str:
131
+ """Format record as human-readable text."""
132
+ # Use parent formatter for base formatting
133
+ base_msg = super().format(record)
134
+
135
+ # Add context if available
136
+ context_parts = []
137
+ request_id = getattr(record, "request_id", None)
138
+ operation = getattr(record, "operation", None)
139
+ cookbook = getattr(record, "cookbook", None)
140
+
141
+ if request_id:
142
+ context_parts.append(f"request_id={request_id}")
143
+ if operation:
144
+ context_parts.append(f"operation={operation}")
145
+ if cookbook:
146
+ context_parts.append(f"cookbook={cookbook}")
147
+
148
+ if context_parts:
149
+ context_str = " [" + ", ".join(context_parts) + "]"
150
+ return base_msg + context_str
151
+
152
+ return base_msg
153
+
154
+
155
+ def configure_logging(
156
+ level: str = "INFO",
157
+ json_format: bool = False,
158
+ log_file: str | None = None,
159
+ ) -> None:
160
+ """
161
+ Configure structured logging for SousChef.
162
+
163
+ Args:
164
+ level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
165
+ json_format: Whether to output JSON format.
166
+ log_file: Optional file path for log output.
167
+
168
+ """
169
+ # Convert string level to logging constant
170
+ numeric_level = getattr(logging, level.upper(), logging.INFO)
171
+
172
+ # Create formatter
173
+ if json_format:
174
+ formatter = StructuredFormatter(json_format=True)
175
+ else:
176
+ formatter = StructuredFormatter(
177
+ fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
178
+ datefmt="%Y-%m-%d %H:%M:%S",
179
+ )
180
+
181
+ # Configure root logger
182
+ root_logger = logging.getLogger()
183
+ root_logger.setLevel(numeric_level)
184
+
185
+ # Remove existing handlers
186
+ root_logger.handlers.clear()
187
+
188
+ # Console handler
189
+ console_handler = logging.StreamHandler(sys.stdout)
190
+ console_handler.setLevel(numeric_level)
191
+ console_handler.setFormatter(formatter)
192
+ root_logger.addHandler(console_handler)
193
+
194
+ # File handler if specified
195
+ if log_file:
196
+ file_handler = logging.FileHandler(log_file)
197
+ file_handler.setLevel(numeric_level)
198
+ file_handler.setFormatter(formatter)
199
+ root_logger.addHandler(file_handler)
200
+
201
+ # Configure SousChef logger
202
+ souschef_logger = logging.getLogger("souschef")
203
+ souschef_logger.setLevel(numeric_level)
204
+
205
+
206
+ def get_logger(name: str) -> logging.Logger:
207
+ """
208
+ Get a logger instance for the given name.
209
+
210
+ Args:
211
+ name: Logger name (typically __name__).
212
+
213
+ Returns:
214
+ Configured logger instance.
215
+
216
+ """
217
+ return logging.getLogger(name)
218
+
219
+
220
+ def set_context(
221
+ request_id: str | None = None,
222
+ operation: str | None = None,
223
+ cookbook: str | None = None,
224
+ ) -> None:
225
+ """
226
+ Set context variables for structured logging.
227
+
228
+ Args:
229
+ request_id: Unique request/operation ID.
230
+ operation: Current operation name.
231
+ cookbook: Cookbook being processed.
232
+
233
+ """
234
+ if request_id is not None:
235
+ request_id_var.set(request_id)
236
+ if operation is not None:
237
+ operation_var.set(operation)
238
+ if cookbook is not None:
239
+ cookbook_var.set(cookbook)
240
+
241
+
242
+ def clear_context() -> None:
243
+ """Clear all context variables."""
244
+ request_id_var.set(None)
245
+ operation_var.set(None)
246
+ cookbook_var.set(None)
247
+
248
+
249
+ class LogContext:
250
+ """
251
+ Context manager for temporary logging context.
252
+
253
+ Example:
254
+ with LogContext(operation="convert_recipe", cookbook="apache"):
255
+ logger.info("Converting recipe")
256
+
257
+ """
258
+
259
+ def __init__(
260
+ self,
261
+ request_id: str | None = None,
262
+ operation: str | None = None,
263
+ cookbook: str | None = None,
264
+ ):
265
+ """
266
+ Initialise log context.
267
+
268
+ Args:
269
+ request_id: Unique request/operation ID.
270
+ operation: Current operation name.
271
+ cookbook: Cookbook being processed.
272
+
273
+ """
274
+ self.request_id = request_id
275
+ self.operation = operation
276
+ self.cookbook = cookbook
277
+ self.previous_context: dict[str, Any] = {}
278
+
279
+ def __enter__(self) -> "LogContext":
280
+ """Enter context and save previous values."""
281
+ self.previous_context = {
282
+ "request_id": request_id_var.get(),
283
+ "operation": operation_var.get(),
284
+ "cookbook": cookbook_var.get(),
285
+ }
286
+ set_context(
287
+ request_id=self.request_id,
288
+ operation=self.operation,
289
+ cookbook=self.cookbook,
290
+ )
291
+ return self
292
+
293
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
294
+ """Exit context and restore previous values."""
295
+ request_id_var.set(self.previous_context["request_id"])
296
+ operation_var.set(self.previous_context["operation"])
297
+ cookbook_var.set(self.previous_context["cookbook"])
298
+
299
+
300
+ def log_operation(operation_name: str):
301
+ """
302
+ Decorate functions to log operations with structured context.
303
+
304
+ Args:
305
+ operation_name: Name of the operation being logged.
306
+
307
+ Example:
308
+ @log_operation("convert_recipe")
309
+ def convert_recipe(recipe_path: str) -> str:
310
+ # Operation is logged with context
311
+ return playbook_content
312
+
313
+ """
314
+
315
+ def decorator(func):
316
+ import functools
317
+
318
+ @functools.wraps(func)
319
+ def wrapper(*args, **kwargs):
320
+ logger = get_logger(func.__module__)
321
+
322
+ with LogContext(operation=operation_name):
323
+ logger.info(
324
+ f"Starting {operation_name}",
325
+ extra={"function": func.__name__},
326
+ )
327
+ try:
328
+ result = func(*args, **kwargs)
329
+ logger.info(
330
+ f"Completed {operation_name}",
331
+ extra={"function": func.__name__},
332
+ )
333
+ return result
334
+ except Exception as e:
335
+ logger.error(
336
+ f"Failed {operation_name}: {e}",
337
+ extra={"function": func.__name__},
338
+ exc_info=True,
339
+ )
340
+ raise
341
+
342
+ return wrapper
343
+
344
+ return decorator
souschef/core/metrics.py CHANGED
@@ -33,49 +33,116 @@ class EffortMetrics:
33
33
  - Base unit: person-days (with decimal precision)
34
34
  - Derived: hours, weeks with consistent conversion factors
35
35
  - Ranges: For display purposes, converting days to week ranges
36
+ - WITH/WITHOUT SousChef: Shows effort reduction with AI assistance
36
37
 
37
38
  Ensures all components (migration planning, dependency mapping,
38
39
  validation reports) use the same underlying numbers.
39
40
  """
40
41
 
41
42
  estimated_days: float
42
- """Base unit: person-days (e.g., 2.5, 5.0, 10.0)"""
43
+ """Base unit: person-days WITHOUT SousChef assistance (manual migration)"""
44
+
45
+ @property
46
+ def estimated_days_with_souschef(self) -> float:
47
+ """
48
+ Effort WITH SousChef AI assistance.
49
+
50
+ Realistic reduction factors based on complexity:
51
+ - SousChef handles 60-70% of boilerplate conversion automatically
52
+ - Human still needed for validation, custom logic, testing
53
+ - Overall reduction: 40-50% of manual effort
54
+ """
55
+ return round(self.estimated_days * 0.5, 1)
56
+
57
+ @property
58
+ def time_saved(self) -> float:
59
+ """Time saved by using SousChef (in days)."""
60
+ return round(self.estimated_days - self.estimated_days_with_souschef, 1)
61
+
62
+ @property
63
+ def efficiency_gain_percent(self) -> int:
64
+ """Efficiency gain percentage from using SousChef."""
65
+ if self.estimated_days == 0:
66
+ return 0
67
+ return round((self.time_saved / self.estimated_days) * 100)
43
68
 
44
69
  @property
45
70
  def estimated_hours(self) -> float:
46
- """Convert days to hours using standard 8-hour workday."""
71
+ """Convert days to hours using standard 8-hour workday (WITHOUT SousChef)."""
47
72
  return self.estimated_days * 8
48
73
 
74
+ @property
75
+ def estimated_hours_with_souschef(self) -> float:
76
+ """Convert days to hours using standard 8-hour workday (WITH SousChef)."""
77
+ return self.estimated_days_with_souschef * 8
78
+
49
79
  @property
50
80
  def estimated_weeks_low(self) -> int:
51
- """Conservative estimate: assumes optimal parallelization."""
81
+ """Conservative estimate: assumes optimal parallelization (WITHOUT SousChef)."""
52
82
  return max(1, int(self.estimated_days / 7))
53
83
 
54
84
  @property
55
85
  def estimated_weeks_high(self) -> int:
56
- """Realistic estimate: assumes sequential/limited parallelization."""
86
+ """Realistic estimate: sequential parallelization (WITHOUT SousChef)."""
57
87
  return max(1, int(self.estimated_days / 3.5))
58
88
 
89
+ @property
90
+ def estimated_weeks_low_with_souschef(self) -> int:
91
+ """Conservative estimate: assumes optimal parallelization (WITH SousChef)."""
92
+ return max(1, int(self.estimated_days_with_souschef / 7))
93
+
94
+ @property
95
+ def estimated_weeks_high_with_souschef(self) -> int:
96
+ """Realistic estimate: sequential parallelization (WITH SousChef)."""
97
+ return max(1, int(self.estimated_days_with_souschef / 3.5))
98
+
59
99
  @property
60
100
  def estimated_weeks_range(self) -> str:
61
- """Human-readable week range (e.g., '2-4 weeks')."""
101
+ """Human-readable week range WITHOUT SousChef (e.g., '2-4 weeks')."""
62
102
  low = self.estimated_weeks_low
63
103
  high = self.estimated_weeks_high
64
104
  if low == high:
65
105
  return f"{low} week{'s' if low != 1 else ''}"
66
106
  return f"{low}-{high} weeks"
67
107
 
108
+ @property
109
+ def estimated_weeks_range_with_souschef(self) -> str:
110
+ """Human-readable week range WITH SousChef (e.g., '1-2 weeks')."""
111
+ low = self.estimated_weeks_low_with_souschef
112
+ high = self.estimated_weeks_high_with_souschef
113
+ if low == high:
114
+ return f"{low} week{'s' if low != 1 else ''}"
115
+ return f"{low}-{high} weeks"
116
+
68
117
  @property
69
118
  def estimated_days_formatted(self) -> str:
70
- """Formatted days with appropriate precision."""
119
+ """Formatted days with appropriate precision (WITHOUT SousChef)."""
71
120
  if self.estimated_days == int(self.estimated_days):
72
121
  return f"{int(self.estimated_days)} days"
73
122
  return f"{self.estimated_days:.1f} days"
74
123
 
124
+ @property
125
+ def estimated_days_formatted_with_souschef(self) -> str:
126
+ """Formatted days with appropriate precision (WITH SousChef)."""
127
+ if self.estimated_days_with_souschef == int(self.estimated_days_with_souschef):
128
+ return f"{int(self.estimated_days_with_souschef)} days"
129
+ return f"{self.estimated_days_with_souschef:.1f} days"
130
+
75
131
  def __str__(self) -> str:
76
132
  """Return a string representation of effort metrics."""
77
133
  return f"{self.estimated_days_formatted} ({self.estimated_weeks_range})"
78
134
 
135
+ def get_comparison_summary(self) -> str:
136
+ """Format comparison of manual vs SousChef-assisted effort."""
137
+ return (
138
+ f"Without SousChef: {self.estimated_days_formatted} "
139
+ f"({self.estimated_weeks_range})\n"
140
+ f"With SousChef: {self.estimated_days_formatted_with_souschef} "
141
+ f"({self.estimated_weeks_range_with_souschef})\n"
142
+ f"Time Saved: {self.time_saved} days "
143
+ f"({self.efficiency_gain_percent}% faster)"
144
+ )
145
+
79
146
 
80
147
  class TeamRecommendation(NamedTuple):
81
148
  """Team composition and timeline recommendation."""
@@ -0,0 +1,230 @@
1
+ """URL validation utilities for user-provided endpoints."""
2
+
3
+ import ipaddress
4
+ import os
5
+ from collections.abc import Iterable
6
+ from urllib.parse import urlparse, urlunparse
7
+
8
+ DEFAULT_ALLOWLIST_ENV = "SOUSCHEF_ALLOWED_HOSTNAMES"
9
+
10
+
11
+ def _split_allowlist(env_value: str) -> set[str]:
12
+ """
13
+ Split an allowlist environment variable into hostnames.
14
+
15
+ Args:
16
+ env_value: Raw environment value containing hostnames.
17
+
18
+ Returns:
19
+ A set of normalised hostnames.
20
+
21
+ """
22
+ return {entry.strip().lower() for entry in env_value.split(",") if entry.strip()}
23
+
24
+
25
+ def _matches_allowlist(hostname: str, allowlist: Iterable[str]) -> bool:
26
+ """
27
+ Check whether a hostname matches the allowlist.
28
+
29
+ Args:
30
+ hostname: Hostname to validate.
31
+ allowlist: Iterable of allowlist entries.
32
+
33
+ Returns:
34
+ True if the hostname matches the allowlist.
35
+
36
+ """
37
+ for entry in allowlist:
38
+ entry = entry.lower().strip()
39
+ if not entry:
40
+ continue
41
+ if entry.startswith("*."):
42
+ suffix = entry[1:]
43
+ if hostname.endswith(suffix) and hostname != suffix.lstrip("."):
44
+ return True
45
+ elif hostname == entry:
46
+ return True
47
+ return False
48
+
49
+
50
+ def _is_private_hostname(hostname: str) -> bool:
51
+ """
52
+ Determine whether a hostname resolves to a private or local address.
53
+
54
+ This check only validates IP literals and well-known local hostnames.
55
+
56
+ Args:
57
+ hostname: Hostname to inspect.
58
+
59
+ Returns:
60
+ True if the hostname is private or local.
61
+
62
+ """
63
+ local_suffixes = (".localhost", ".local", ".localdomain", ".internal")
64
+ if hostname in {"localhost"} or hostname.endswith(local_suffixes):
65
+ return True
66
+
67
+ try:
68
+ ip_address = ipaddress.ip_address(hostname)
69
+ except ValueError:
70
+ return False
71
+
72
+ return bool(
73
+ ip_address.is_private
74
+ or ip_address.is_loopback
75
+ or ip_address.is_link_local
76
+ or ip_address.is_reserved
77
+ or ip_address.is_multicast
78
+ or ip_address.is_unspecified
79
+ )
80
+
81
+
82
+ def _is_ip_literal(hostname: str) -> bool:
83
+ """
84
+ Check whether the hostname is an IP literal.
85
+
86
+ Args:
87
+ hostname: Hostname to inspect.
88
+
89
+ Returns:
90
+ True if the hostname is an IP literal.
91
+
92
+ """
93
+ try:
94
+ ipaddress.ip_address(hostname)
95
+ except ValueError:
96
+ return False
97
+ return True
98
+
99
+
100
+ def _normalise_url_value(base_url: str, default_url: str | None) -> str:
101
+ """
102
+ Normalise the input URL value.
103
+
104
+ Args:
105
+ base_url: URL provided by the user.
106
+ default_url: Default URL to use when base_url is empty.
107
+
108
+ Returns:
109
+ Normalised URL string.
110
+
111
+ """
112
+ url_value = str(base_url).strip()
113
+ if not url_value:
114
+ if default_url is None:
115
+ raise ValueError("Base URL is required.")
116
+ url_value = default_url
117
+
118
+ if "://" not in url_value:
119
+ url_value = f"https://{url_value}"
120
+
121
+ return url_value
122
+
123
+
124
+ def _validate_scheme(parsed_url) -> None:
125
+ """
126
+ Validate URL scheme.
127
+
128
+ Args:
129
+ parsed_url: Parsed URL object.
130
+
131
+ """
132
+ if parsed_url.scheme.lower() != "https":
133
+ raise ValueError("Base URL must use HTTPS.")
134
+
135
+
136
+ def _validate_hostname(
137
+ hostname: str,
138
+ allowlist: set[str],
139
+ allowed_hosts: set[str] | None,
140
+ ) -> None:
141
+ """
142
+ Validate hostname using allowlist and public host rules.
143
+
144
+ Args:
145
+ hostname: Hostname to validate.
146
+ allowlist: Allowlisted hostnames.
147
+ allowed_hosts: Provider-specific allowed hostnames.
148
+
149
+ """
150
+ hostname = hostname.lower()
151
+ is_ip_literal = _is_ip_literal(hostname)
152
+
153
+ if allowed_hosts and hostname not in allowed_hosts:
154
+ raise ValueError("Base URL host is not permitted.")
155
+
156
+ allowlist_match = _matches_allowlist(hostname, allowlist) if allowlist else False
157
+ if allowlist and not allowlist_match:
158
+ raise ValueError("Base URL host is not in the allowlist.")
159
+
160
+ if not allowlist_match and _is_private_hostname(hostname):
161
+ raise ValueError("Base URL host must be a public hostname.")
162
+
163
+ if not allowlist_match and "." not in hostname and not is_ip_literal:
164
+ raise ValueError("Base URL host must be a fully qualified domain name.")
165
+
166
+
167
+ def _normalise_parsed_url(parsed_url, strip_path: bool) -> str:
168
+ """
169
+ Normalise a parsed URL into a string.
170
+
171
+ Args:
172
+ parsed_url: Parsed URL object.
173
+ strip_path: Whether to strip paths, queries, and fragments.
174
+
175
+ Returns:
176
+ Normalised URL string.
177
+
178
+ """
179
+ cleaned = parsed_url._replace(params="", query="", fragment="")
180
+ if strip_path:
181
+ cleaned = cleaned._replace(path="")
182
+
183
+ return str(urlunparse(cleaned)).rstrip("/")
184
+
185
+
186
+ def validate_user_provided_url(
187
+ base_url: str,
188
+ *,
189
+ default_url: str | None = None,
190
+ allowlist_env_var: str = DEFAULT_ALLOWLIST_ENV,
191
+ allowed_hosts: set[str] | None = None,
192
+ strip_path: bool = False,
193
+ ) -> str:
194
+ """
195
+ Validate a user-provided URL for outbound requests.
196
+
197
+ Args:
198
+ base_url: URL provided by the user.
199
+ default_url: Default URL to use when base_url is empty.
200
+ allowlist_env_var: Environment variable containing allowed hostnames.
201
+ allowed_hosts: Explicit host allowlist for provider-specific endpoints.
202
+ strip_path: Whether to strip paths, queries, and fragments.
203
+
204
+ Returns:
205
+ Validated and normalised URL string.
206
+
207
+ Raises:
208
+ ValueError: If the URL is invalid or fails security validation.
209
+
210
+ """
211
+ url_value = _normalise_url_value(base_url, default_url)
212
+ parsed = urlparse(url_value)
213
+
214
+ _validate_scheme(parsed)
215
+
216
+ if not parsed.hostname:
217
+ raise ValueError("Base URL must include a hostname.")
218
+
219
+ if parsed.username or parsed.password:
220
+ raise ValueError("Base URL must not include user credentials.")
221
+
222
+ allowlist_value = os.environ.get(allowlist_env_var, "")
223
+ allowlist = _split_allowlist(allowlist_value)
224
+ normalised_allowed_hosts = (
225
+ {host.lower() for host in allowed_hosts} if allowed_hosts else None
226
+ )
227
+
228
+ _validate_hostname(parsed.hostname, allowlist, normalised_allowed_hosts)
229
+
230
+ return _normalise_parsed_url(parsed, strip_path)