mcp-souschef 3.2.0__py3-none-any.whl → 3.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_souschef-3.2.0.dist-info → mcp_souschef-3.5.2.dist-info}/METADATA +159 -30
- {mcp_souschef-3.2.0.dist-info → mcp_souschef-3.5.2.dist-info}/RECORD +19 -14
- {mcp_souschef-3.2.0.dist-info → mcp_souschef-3.5.2.dist-info}/WHEEL +1 -1
- souschef/assessment.py +81 -25
- souschef/cli.py +265 -6
- souschef/converters/playbook.py +413 -156
- souschef/converters/template.py +122 -5
- souschef/core/ai_schemas.py +81 -0
- souschef/core/http_client.py +394 -0
- souschef/core/logging.py +344 -0
- souschef/core/metrics.py +73 -6
- souschef/core/url_validation.py +230 -0
- souschef/server.py +130 -0
- souschef/ui/app.py +20 -6
- souschef/ui/pages/ai_settings.py +151 -30
- souschef/ui/pages/chef_server_settings.py +300 -0
- souschef/ui/pages/cookbook_analysis.py +66 -10
- {mcp_souschef-3.2.0.dist-info → mcp_souschef-3.5.2.dist-info}/entry_points.txt +0 -0
- {mcp_souschef-3.2.0.dist-info → mcp_souschef-3.5.2.dist-info}/licenses/LICENSE +0 -0
souschef/core/logging.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured logging configuration for SousChef.
|
|
3
|
+
|
|
4
|
+
This module provides structured logging with JSON output support,
|
|
5
|
+
contextual information, and integration with monitoring systems.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import sys
|
|
10
|
+
from contextvars import ContextVar
|
|
11
|
+
from typing import Any, Literal
|
|
12
|
+
|
|
13
|
+
# Context variables for structured logging
|
|
14
|
+
request_id_var: ContextVar[str | None] = ContextVar("request_id", default=None)
|
|
15
|
+
operation_var: ContextVar[str | None] = ContextVar("operation", default=None)
|
|
16
|
+
cookbook_var: ContextVar[str | None] = ContextVar("cookbook", default=None)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StructuredFormatter(logging.Formatter):
|
|
20
|
+
"""
|
|
21
|
+
Formatter that outputs structured log records.
|
|
22
|
+
|
|
23
|
+
Supports both JSON and human-readable text formats.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
fmt: str | None = None,
|
|
29
|
+
datefmt: str | None = None,
|
|
30
|
+
style: Literal["%", "{", "$"] = "%",
|
|
31
|
+
json_format: bool = False,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Initialise structured formatter.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
fmt: Log format string (ignored if json_format=True).
|
|
38
|
+
datefmt: Date format string.
|
|
39
|
+
style: Format style ('%', '{', or '$').
|
|
40
|
+
json_format: Whether to output JSON format.
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
super().__init__(fmt, datefmt, style)
|
|
44
|
+
self.json_format = json_format
|
|
45
|
+
|
|
46
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
47
|
+
"""
|
|
48
|
+
Format log record as structured output.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
record: Log record to format.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Formatted log string (JSON or text).
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
# Add context variables to record
|
|
58
|
+
record.request_id = request_id_var.get()
|
|
59
|
+
record.operation = operation_var.get()
|
|
60
|
+
record.cookbook = cookbook_var.get()
|
|
61
|
+
|
|
62
|
+
if self.json_format:
|
|
63
|
+
return self._format_json(record)
|
|
64
|
+
else:
|
|
65
|
+
return self._format_text(record)
|
|
66
|
+
|
|
67
|
+
def _format_json(self, record: logging.LogRecord) -> str:
|
|
68
|
+
"""Format record as JSON."""
|
|
69
|
+
import json
|
|
70
|
+
|
|
71
|
+
log_data = {
|
|
72
|
+
"timestamp": self.formatTime(record, self.datefmt),
|
|
73
|
+
"level": record.levelname,
|
|
74
|
+
"logger": record.name,
|
|
75
|
+
"message": record.getMessage(),
|
|
76
|
+
"module": record.module,
|
|
77
|
+
"function": record.funcName,
|
|
78
|
+
"line": record.lineno,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Add context if available
|
|
82
|
+
request_id = getattr(record, "request_id", None)
|
|
83
|
+
operation = getattr(record, "operation", None)
|
|
84
|
+
cookbook = getattr(record, "cookbook", None)
|
|
85
|
+
|
|
86
|
+
if request_id:
|
|
87
|
+
log_data["request_id"] = request_id
|
|
88
|
+
if operation:
|
|
89
|
+
log_data["operation"] = operation
|
|
90
|
+
if cookbook:
|
|
91
|
+
log_data["cookbook"] = cookbook
|
|
92
|
+
|
|
93
|
+
# Add exception info if present
|
|
94
|
+
if record.exc_info:
|
|
95
|
+
log_data["exception"] = self.formatException(record.exc_info)
|
|
96
|
+
|
|
97
|
+
# Add extra fields
|
|
98
|
+
for key, value in record.__dict__.items():
|
|
99
|
+
if key not in {
|
|
100
|
+
"name",
|
|
101
|
+
"msg",
|
|
102
|
+
"args",
|
|
103
|
+
"created",
|
|
104
|
+
"msecs",
|
|
105
|
+
"levelname",
|
|
106
|
+
"levelno",
|
|
107
|
+
"pathname",
|
|
108
|
+
"filename",
|
|
109
|
+
"module",
|
|
110
|
+
"exc_info",
|
|
111
|
+
"exc_text",
|
|
112
|
+
"stack_info",
|
|
113
|
+
"lineno",
|
|
114
|
+
"funcName",
|
|
115
|
+
"processName",
|
|
116
|
+
"process",
|
|
117
|
+
"threadName",
|
|
118
|
+
"thread",
|
|
119
|
+
"request_id",
|
|
120
|
+
"operation",
|
|
121
|
+
"cookbook",
|
|
122
|
+
"message",
|
|
123
|
+
"asctime",
|
|
124
|
+
"relativeCreated",
|
|
125
|
+
} and not key.startswith("_"):
|
|
126
|
+
log_data[key] = value
|
|
127
|
+
|
|
128
|
+
return json.dumps(log_data, default=str)
|
|
129
|
+
|
|
130
|
+
def _format_text(self, record: logging.LogRecord) -> str:
|
|
131
|
+
"""Format record as human-readable text."""
|
|
132
|
+
# Use parent formatter for base formatting
|
|
133
|
+
base_msg = super().format(record)
|
|
134
|
+
|
|
135
|
+
# Add context if available
|
|
136
|
+
context_parts = []
|
|
137
|
+
request_id = getattr(record, "request_id", None)
|
|
138
|
+
operation = getattr(record, "operation", None)
|
|
139
|
+
cookbook = getattr(record, "cookbook", None)
|
|
140
|
+
|
|
141
|
+
if request_id:
|
|
142
|
+
context_parts.append(f"request_id={request_id}")
|
|
143
|
+
if operation:
|
|
144
|
+
context_parts.append(f"operation={operation}")
|
|
145
|
+
if cookbook:
|
|
146
|
+
context_parts.append(f"cookbook={cookbook}")
|
|
147
|
+
|
|
148
|
+
if context_parts:
|
|
149
|
+
context_str = " [" + ", ".join(context_parts) + "]"
|
|
150
|
+
return base_msg + context_str
|
|
151
|
+
|
|
152
|
+
return base_msg
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def configure_logging(
|
|
156
|
+
level: str = "INFO",
|
|
157
|
+
json_format: bool = False,
|
|
158
|
+
log_file: str | None = None,
|
|
159
|
+
) -> None:
|
|
160
|
+
"""
|
|
161
|
+
Configure structured logging for SousChef.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
|
165
|
+
json_format: Whether to output JSON format.
|
|
166
|
+
log_file: Optional file path for log output.
|
|
167
|
+
|
|
168
|
+
"""
|
|
169
|
+
# Convert string level to logging constant
|
|
170
|
+
numeric_level = getattr(logging, level.upper(), logging.INFO)
|
|
171
|
+
|
|
172
|
+
# Create formatter
|
|
173
|
+
if json_format:
|
|
174
|
+
formatter = StructuredFormatter(json_format=True)
|
|
175
|
+
else:
|
|
176
|
+
formatter = StructuredFormatter(
|
|
177
|
+
fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
178
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Configure root logger
|
|
182
|
+
root_logger = logging.getLogger()
|
|
183
|
+
root_logger.setLevel(numeric_level)
|
|
184
|
+
|
|
185
|
+
# Remove existing handlers
|
|
186
|
+
root_logger.handlers.clear()
|
|
187
|
+
|
|
188
|
+
# Console handler
|
|
189
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
190
|
+
console_handler.setLevel(numeric_level)
|
|
191
|
+
console_handler.setFormatter(formatter)
|
|
192
|
+
root_logger.addHandler(console_handler)
|
|
193
|
+
|
|
194
|
+
# File handler if specified
|
|
195
|
+
if log_file:
|
|
196
|
+
file_handler = logging.FileHandler(log_file)
|
|
197
|
+
file_handler.setLevel(numeric_level)
|
|
198
|
+
file_handler.setFormatter(formatter)
|
|
199
|
+
root_logger.addHandler(file_handler)
|
|
200
|
+
|
|
201
|
+
# Configure SousChef logger
|
|
202
|
+
souschef_logger = logging.getLogger("souschef")
|
|
203
|
+
souschef_logger.setLevel(numeric_level)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def get_logger(name: str) -> logging.Logger:
|
|
207
|
+
"""
|
|
208
|
+
Get a logger instance for the given name.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
name: Logger name (typically __name__).
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Configured logger instance.
|
|
215
|
+
|
|
216
|
+
"""
|
|
217
|
+
return logging.getLogger(name)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def set_context(
|
|
221
|
+
request_id: str | None = None,
|
|
222
|
+
operation: str | None = None,
|
|
223
|
+
cookbook: str | None = None,
|
|
224
|
+
) -> None:
|
|
225
|
+
"""
|
|
226
|
+
Set context variables for structured logging.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
request_id: Unique request/operation ID.
|
|
230
|
+
operation: Current operation name.
|
|
231
|
+
cookbook: Cookbook being processed.
|
|
232
|
+
|
|
233
|
+
"""
|
|
234
|
+
if request_id is not None:
|
|
235
|
+
request_id_var.set(request_id)
|
|
236
|
+
if operation is not None:
|
|
237
|
+
operation_var.set(operation)
|
|
238
|
+
if cookbook is not None:
|
|
239
|
+
cookbook_var.set(cookbook)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def clear_context() -> None:
|
|
243
|
+
"""Clear all context variables."""
|
|
244
|
+
request_id_var.set(None)
|
|
245
|
+
operation_var.set(None)
|
|
246
|
+
cookbook_var.set(None)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class LogContext:
|
|
250
|
+
"""
|
|
251
|
+
Context manager for temporary logging context.
|
|
252
|
+
|
|
253
|
+
Example:
|
|
254
|
+
with LogContext(operation="convert_recipe", cookbook="apache"):
|
|
255
|
+
logger.info("Converting recipe")
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
def __init__(
|
|
260
|
+
self,
|
|
261
|
+
request_id: str | None = None,
|
|
262
|
+
operation: str | None = None,
|
|
263
|
+
cookbook: str | None = None,
|
|
264
|
+
):
|
|
265
|
+
"""
|
|
266
|
+
Initialise log context.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
request_id: Unique request/operation ID.
|
|
270
|
+
operation: Current operation name.
|
|
271
|
+
cookbook: Cookbook being processed.
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
self.request_id = request_id
|
|
275
|
+
self.operation = operation
|
|
276
|
+
self.cookbook = cookbook
|
|
277
|
+
self.previous_context: dict[str, Any] = {}
|
|
278
|
+
|
|
279
|
+
def __enter__(self) -> "LogContext":
|
|
280
|
+
"""Enter context and save previous values."""
|
|
281
|
+
self.previous_context = {
|
|
282
|
+
"request_id": request_id_var.get(),
|
|
283
|
+
"operation": operation_var.get(),
|
|
284
|
+
"cookbook": cookbook_var.get(),
|
|
285
|
+
}
|
|
286
|
+
set_context(
|
|
287
|
+
request_id=self.request_id,
|
|
288
|
+
operation=self.operation,
|
|
289
|
+
cookbook=self.cookbook,
|
|
290
|
+
)
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
294
|
+
"""Exit context and restore previous values."""
|
|
295
|
+
request_id_var.set(self.previous_context["request_id"])
|
|
296
|
+
operation_var.set(self.previous_context["operation"])
|
|
297
|
+
cookbook_var.set(self.previous_context["cookbook"])
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def log_operation(operation_name: str):
|
|
301
|
+
"""
|
|
302
|
+
Decorate functions to log operations with structured context.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
operation_name: Name of the operation being logged.
|
|
306
|
+
|
|
307
|
+
Example:
|
|
308
|
+
@log_operation("convert_recipe")
|
|
309
|
+
def convert_recipe(recipe_path: str) -> str:
|
|
310
|
+
# Operation is logged with context
|
|
311
|
+
return playbook_content
|
|
312
|
+
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
def decorator(func):
|
|
316
|
+
import functools
|
|
317
|
+
|
|
318
|
+
@functools.wraps(func)
|
|
319
|
+
def wrapper(*args, **kwargs):
|
|
320
|
+
logger = get_logger(func.__module__)
|
|
321
|
+
|
|
322
|
+
with LogContext(operation=operation_name):
|
|
323
|
+
logger.info(
|
|
324
|
+
f"Starting {operation_name}",
|
|
325
|
+
extra={"function": func.__name__},
|
|
326
|
+
)
|
|
327
|
+
try:
|
|
328
|
+
result = func(*args, **kwargs)
|
|
329
|
+
logger.info(
|
|
330
|
+
f"Completed {operation_name}",
|
|
331
|
+
extra={"function": func.__name__},
|
|
332
|
+
)
|
|
333
|
+
return result
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.error(
|
|
336
|
+
f"Failed {operation_name}: {e}",
|
|
337
|
+
extra={"function": func.__name__},
|
|
338
|
+
exc_info=True,
|
|
339
|
+
)
|
|
340
|
+
raise
|
|
341
|
+
|
|
342
|
+
return wrapper
|
|
343
|
+
|
|
344
|
+
return decorator
|
souschef/core/metrics.py
CHANGED
|
@@ -33,49 +33,116 @@ class EffortMetrics:
|
|
|
33
33
|
- Base unit: person-days (with decimal precision)
|
|
34
34
|
- Derived: hours, weeks with consistent conversion factors
|
|
35
35
|
- Ranges: For display purposes, converting days to week ranges
|
|
36
|
+
- WITH/WITHOUT SousChef: Shows effort reduction with AI assistance
|
|
36
37
|
|
|
37
38
|
Ensures all components (migration planning, dependency mapping,
|
|
38
39
|
validation reports) use the same underlying numbers.
|
|
39
40
|
"""
|
|
40
41
|
|
|
41
42
|
estimated_days: float
|
|
42
|
-
"""Base unit: person-days
|
|
43
|
+
"""Base unit: person-days WITHOUT SousChef assistance (manual migration)"""
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def estimated_days_with_souschef(self) -> float:
|
|
47
|
+
"""
|
|
48
|
+
Effort WITH SousChef AI assistance.
|
|
49
|
+
|
|
50
|
+
Realistic reduction factors based on complexity:
|
|
51
|
+
- SousChef handles 60-70% of boilerplate conversion automatically
|
|
52
|
+
- Human still needed for validation, custom logic, testing
|
|
53
|
+
- Overall reduction: 40-50% of manual effort
|
|
54
|
+
"""
|
|
55
|
+
return round(self.estimated_days * 0.5, 1)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def time_saved(self) -> float:
|
|
59
|
+
"""Time saved by using SousChef (in days)."""
|
|
60
|
+
return round(self.estimated_days - self.estimated_days_with_souschef, 1)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def efficiency_gain_percent(self) -> int:
|
|
64
|
+
"""Efficiency gain percentage from using SousChef."""
|
|
65
|
+
if self.estimated_days == 0:
|
|
66
|
+
return 0
|
|
67
|
+
return round((self.time_saved / self.estimated_days) * 100)
|
|
43
68
|
|
|
44
69
|
@property
|
|
45
70
|
def estimated_hours(self) -> float:
|
|
46
|
-
"""Convert days to hours using standard 8-hour workday."""
|
|
71
|
+
"""Convert days to hours using standard 8-hour workday (WITHOUT SousChef)."""
|
|
47
72
|
return self.estimated_days * 8
|
|
48
73
|
|
|
74
|
+
@property
|
|
75
|
+
def estimated_hours_with_souschef(self) -> float:
|
|
76
|
+
"""Convert days to hours using standard 8-hour workday (WITH SousChef)."""
|
|
77
|
+
return self.estimated_days_with_souschef * 8
|
|
78
|
+
|
|
49
79
|
@property
|
|
50
80
|
def estimated_weeks_low(self) -> int:
|
|
51
|
-
"""Conservative estimate: assumes optimal parallelization."""
|
|
81
|
+
"""Conservative estimate: assumes optimal parallelization (WITHOUT SousChef)."""
|
|
52
82
|
return max(1, int(self.estimated_days / 7))
|
|
53
83
|
|
|
54
84
|
@property
|
|
55
85
|
def estimated_weeks_high(self) -> int:
|
|
56
|
-
"""Realistic estimate:
|
|
86
|
+
"""Realistic estimate: sequential parallelization (WITHOUT SousChef)."""
|
|
57
87
|
return max(1, int(self.estimated_days / 3.5))
|
|
58
88
|
|
|
89
|
+
@property
|
|
90
|
+
def estimated_weeks_low_with_souschef(self) -> int:
|
|
91
|
+
"""Conservative estimate: assumes optimal parallelization (WITH SousChef)."""
|
|
92
|
+
return max(1, int(self.estimated_days_with_souschef / 7))
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def estimated_weeks_high_with_souschef(self) -> int:
|
|
96
|
+
"""Realistic estimate: sequential parallelization (WITH SousChef)."""
|
|
97
|
+
return max(1, int(self.estimated_days_with_souschef / 3.5))
|
|
98
|
+
|
|
59
99
|
@property
|
|
60
100
|
def estimated_weeks_range(self) -> str:
|
|
61
|
-
"""Human-readable week range (e.g., '2-4 weeks')."""
|
|
101
|
+
"""Human-readable week range WITHOUT SousChef (e.g., '2-4 weeks')."""
|
|
62
102
|
low = self.estimated_weeks_low
|
|
63
103
|
high = self.estimated_weeks_high
|
|
64
104
|
if low == high:
|
|
65
105
|
return f"{low} week{'s' if low != 1 else ''}"
|
|
66
106
|
return f"{low}-{high} weeks"
|
|
67
107
|
|
|
108
|
+
@property
|
|
109
|
+
def estimated_weeks_range_with_souschef(self) -> str:
|
|
110
|
+
"""Human-readable week range WITH SousChef (e.g., '1-2 weeks')."""
|
|
111
|
+
low = self.estimated_weeks_low_with_souschef
|
|
112
|
+
high = self.estimated_weeks_high_with_souschef
|
|
113
|
+
if low == high:
|
|
114
|
+
return f"{low} week{'s' if low != 1 else ''}"
|
|
115
|
+
return f"{low}-{high} weeks"
|
|
116
|
+
|
|
68
117
|
@property
|
|
69
118
|
def estimated_days_formatted(self) -> str:
|
|
70
|
-
"""Formatted days with appropriate precision."""
|
|
119
|
+
"""Formatted days with appropriate precision (WITHOUT SousChef)."""
|
|
71
120
|
if self.estimated_days == int(self.estimated_days):
|
|
72
121
|
return f"{int(self.estimated_days)} days"
|
|
73
122
|
return f"{self.estimated_days:.1f} days"
|
|
74
123
|
|
|
124
|
+
@property
|
|
125
|
+
def estimated_days_formatted_with_souschef(self) -> str:
|
|
126
|
+
"""Formatted days with appropriate precision (WITH SousChef)."""
|
|
127
|
+
if self.estimated_days_with_souschef == int(self.estimated_days_with_souschef):
|
|
128
|
+
return f"{int(self.estimated_days_with_souschef)} days"
|
|
129
|
+
return f"{self.estimated_days_with_souschef:.1f} days"
|
|
130
|
+
|
|
75
131
|
def __str__(self) -> str:
|
|
76
132
|
"""Return a string representation of effort metrics."""
|
|
77
133
|
return f"{self.estimated_days_formatted} ({self.estimated_weeks_range})"
|
|
78
134
|
|
|
135
|
+
def get_comparison_summary(self) -> str:
|
|
136
|
+
"""Format comparison of manual vs SousChef-assisted effort."""
|
|
137
|
+
return (
|
|
138
|
+
f"Without SousChef: {self.estimated_days_formatted} "
|
|
139
|
+
f"({self.estimated_weeks_range})\n"
|
|
140
|
+
f"With SousChef: {self.estimated_days_formatted_with_souschef} "
|
|
141
|
+
f"({self.estimated_weeks_range_with_souschef})\n"
|
|
142
|
+
f"Time Saved: {self.time_saved} days "
|
|
143
|
+
f"({self.efficiency_gain_percent}% faster)"
|
|
144
|
+
)
|
|
145
|
+
|
|
79
146
|
|
|
80
147
|
class TeamRecommendation(NamedTuple):
|
|
81
148
|
"""Team composition and timeline recommendation."""
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""URL validation utilities for user-provided endpoints."""
|
|
2
|
+
|
|
3
|
+
import ipaddress
|
|
4
|
+
import os
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from urllib.parse import urlparse, urlunparse
|
|
7
|
+
|
|
8
|
+
DEFAULT_ALLOWLIST_ENV = "SOUSCHEF_ALLOWED_HOSTNAMES"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _split_allowlist(env_value: str) -> set[str]:
|
|
12
|
+
"""
|
|
13
|
+
Split an allowlist environment variable into hostnames.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
env_value: Raw environment value containing hostnames.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
A set of normalised hostnames.
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
return {entry.strip().lower() for entry in env_value.split(",") if entry.strip()}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _matches_allowlist(hostname: str, allowlist: Iterable[str]) -> bool:
|
|
26
|
+
"""
|
|
27
|
+
Check whether a hostname matches the allowlist.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
hostname: Hostname to validate.
|
|
31
|
+
allowlist: Iterable of allowlist entries.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
True if the hostname matches the allowlist.
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
for entry in allowlist:
|
|
38
|
+
entry = entry.lower().strip()
|
|
39
|
+
if not entry:
|
|
40
|
+
continue
|
|
41
|
+
if entry.startswith("*."):
|
|
42
|
+
suffix = entry[1:]
|
|
43
|
+
if hostname.endswith(suffix) and hostname != suffix.lstrip("."):
|
|
44
|
+
return True
|
|
45
|
+
elif hostname == entry:
|
|
46
|
+
return True
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _is_private_hostname(hostname: str) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Determine whether a hostname resolves to a private or local address.
|
|
53
|
+
|
|
54
|
+
This check only validates IP literals and well-known local hostnames.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
hostname: Hostname to inspect.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
True if the hostname is private or local.
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
local_suffixes = (".localhost", ".local", ".localdomain", ".internal")
|
|
64
|
+
if hostname in {"localhost"} or hostname.endswith(local_suffixes):
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
ip_address = ipaddress.ip_address(hostname)
|
|
69
|
+
except ValueError:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
return bool(
|
|
73
|
+
ip_address.is_private
|
|
74
|
+
or ip_address.is_loopback
|
|
75
|
+
or ip_address.is_link_local
|
|
76
|
+
or ip_address.is_reserved
|
|
77
|
+
or ip_address.is_multicast
|
|
78
|
+
or ip_address.is_unspecified
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _is_ip_literal(hostname: str) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Check whether the hostname is an IP literal.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
hostname: Hostname to inspect.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
True if the hostname is an IP literal.
|
|
91
|
+
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
ipaddress.ip_address(hostname)
|
|
95
|
+
except ValueError:
|
|
96
|
+
return False
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _normalise_url_value(base_url: str, default_url: str | None) -> str:
|
|
101
|
+
"""
|
|
102
|
+
Normalise the input URL value.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
base_url: URL provided by the user.
|
|
106
|
+
default_url: Default URL to use when base_url is empty.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Normalised URL string.
|
|
110
|
+
|
|
111
|
+
"""
|
|
112
|
+
url_value = str(base_url).strip()
|
|
113
|
+
if not url_value:
|
|
114
|
+
if default_url is None:
|
|
115
|
+
raise ValueError("Base URL is required.")
|
|
116
|
+
url_value = default_url
|
|
117
|
+
|
|
118
|
+
if "://" not in url_value:
|
|
119
|
+
url_value = f"https://{url_value}"
|
|
120
|
+
|
|
121
|
+
return url_value
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _validate_scheme(parsed_url) -> None:
|
|
125
|
+
"""
|
|
126
|
+
Validate URL scheme.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
parsed_url: Parsed URL object.
|
|
130
|
+
|
|
131
|
+
"""
|
|
132
|
+
if parsed_url.scheme.lower() != "https":
|
|
133
|
+
raise ValueError("Base URL must use HTTPS.")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _validate_hostname(
|
|
137
|
+
hostname: str,
|
|
138
|
+
allowlist: set[str],
|
|
139
|
+
allowed_hosts: set[str] | None,
|
|
140
|
+
) -> None:
|
|
141
|
+
"""
|
|
142
|
+
Validate hostname using allowlist and public host rules.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
hostname: Hostname to validate.
|
|
146
|
+
allowlist: Allowlisted hostnames.
|
|
147
|
+
allowed_hosts: Provider-specific allowed hostnames.
|
|
148
|
+
|
|
149
|
+
"""
|
|
150
|
+
hostname = hostname.lower()
|
|
151
|
+
is_ip_literal = _is_ip_literal(hostname)
|
|
152
|
+
|
|
153
|
+
if allowed_hosts and hostname not in allowed_hosts:
|
|
154
|
+
raise ValueError("Base URL host is not permitted.")
|
|
155
|
+
|
|
156
|
+
allowlist_match = _matches_allowlist(hostname, allowlist) if allowlist else False
|
|
157
|
+
if allowlist and not allowlist_match:
|
|
158
|
+
raise ValueError("Base URL host is not in the allowlist.")
|
|
159
|
+
|
|
160
|
+
if not allowlist_match and _is_private_hostname(hostname):
|
|
161
|
+
raise ValueError("Base URL host must be a public hostname.")
|
|
162
|
+
|
|
163
|
+
if not allowlist_match and "." not in hostname and not is_ip_literal:
|
|
164
|
+
raise ValueError("Base URL host must be a fully qualified domain name.")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _normalise_parsed_url(parsed_url, strip_path: bool) -> str:
|
|
168
|
+
"""
|
|
169
|
+
Normalise a parsed URL into a string.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
parsed_url: Parsed URL object.
|
|
173
|
+
strip_path: Whether to strip paths, queries, and fragments.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Normalised URL string.
|
|
177
|
+
|
|
178
|
+
"""
|
|
179
|
+
cleaned = parsed_url._replace(params="", query="", fragment="")
|
|
180
|
+
if strip_path:
|
|
181
|
+
cleaned = cleaned._replace(path="")
|
|
182
|
+
|
|
183
|
+
return str(urlunparse(cleaned)).rstrip("/")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def validate_user_provided_url(
|
|
187
|
+
base_url: str,
|
|
188
|
+
*,
|
|
189
|
+
default_url: str | None = None,
|
|
190
|
+
allowlist_env_var: str = DEFAULT_ALLOWLIST_ENV,
|
|
191
|
+
allowed_hosts: set[str] | None = None,
|
|
192
|
+
strip_path: bool = False,
|
|
193
|
+
) -> str:
|
|
194
|
+
"""
|
|
195
|
+
Validate a user-provided URL for outbound requests.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
base_url: URL provided by the user.
|
|
199
|
+
default_url: Default URL to use when base_url is empty.
|
|
200
|
+
allowlist_env_var: Environment variable containing allowed hostnames.
|
|
201
|
+
allowed_hosts: Explicit host allowlist for provider-specific endpoints.
|
|
202
|
+
strip_path: Whether to strip paths, queries, and fragments.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Validated and normalised URL string.
|
|
206
|
+
|
|
207
|
+
Raises:
|
|
208
|
+
ValueError: If the URL is invalid or fails security validation.
|
|
209
|
+
|
|
210
|
+
"""
|
|
211
|
+
url_value = _normalise_url_value(base_url, default_url)
|
|
212
|
+
parsed = urlparse(url_value)
|
|
213
|
+
|
|
214
|
+
_validate_scheme(parsed)
|
|
215
|
+
|
|
216
|
+
if not parsed.hostname:
|
|
217
|
+
raise ValueError("Base URL must include a hostname.")
|
|
218
|
+
|
|
219
|
+
if parsed.username or parsed.password:
|
|
220
|
+
raise ValueError("Base URL must not include user credentials.")
|
|
221
|
+
|
|
222
|
+
allowlist_value = os.environ.get(allowlist_env_var, "")
|
|
223
|
+
allowlist = _split_allowlist(allowlist_value)
|
|
224
|
+
normalised_allowed_hosts = (
|
|
225
|
+
{host.lower() for host in allowed_hosts} if allowed_hosts else None
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
_validate_hostname(parsed.hostname, allowlist, normalised_allowed_hosts)
|
|
229
|
+
|
|
230
|
+
return _normalise_parsed_url(parsed, strip_path)
|