mcp-souschef 3.0.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,47 +1,107 @@
1
1
  """Path utility functions for safe filesystem operations."""
2
2
 
3
+ import os
3
4
  from pathlib import Path
4
5
 
5
6
 
6
- def _normalize_path(path_str: str) -> Path:
7
+ def _trusted_workspace_root() -> Path:
8
+ """Return the trusted workspace root used for containment checks."""
9
+ return Path.cwd().resolve()
10
+
11
+
12
+ def _ensure_within_base_path(path_obj: Path, base_path: Path) -> Path:
13
+ """
14
+ Ensure a path stays within a trusted base directory.
15
+
16
+ This is a path containment validator that prevents directory traversal
17
+ attacks (CWE-22) by ensuring paths stay within trusted boundaries.
18
+
19
+ Args:
20
+ path_obj: Path to validate.
21
+ base_path: Trusted base directory.
22
+
23
+ Returns:
24
+ Resolved Path guaranteed to be contained within ``base_path``.
25
+
26
+ Raises:
27
+ ValueError: If the path escapes the base directory.
28
+
29
+ """
30
+ # Use pathlib.Path.resolve() for normalization (prevents traversal)
31
+ base_resolved: Path = Path(base_path).resolve()
32
+ candidate_resolved: Path = Path(path_obj).resolve()
33
+
34
+ # Check containment using relative_to (raises ValueError if not contained)
35
+ try:
36
+ candidate_resolved.relative_to(base_resolved)
37
+ except ValueError as e:
38
+ msg = f"Path traversal attempt: escapes {base_resolved}"
39
+ raise ValueError(msg) from e
40
+
41
+ return candidate_resolved # nosonar
42
+
43
+
44
+ def _normalize_path(path_str: str | Path) -> Path:
7
45
  """
8
46
  Normalize a file path for safe filesystem operations.
9
47
 
10
48
  This function validates input and resolves relative paths and symlinks
11
49
  to absolute paths, preventing path traversal attacks (CWE-23).
12
50
 
51
+ This is a sanitizer for path inputs - it validates and normalizes
52
+ paths before any filesystem operations.
53
+
13
54
  Args:
14
- path_str: Path string to normalize.
55
+ path_str: Path string or Path object to normalize.
15
56
 
16
57
  Returns:
17
58
  Resolved absolute Path object.
18
59
 
19
60
  Raises:
20
- ValueError: If the path contains null bytes, traversal attempts, or is invalid.
61
+ ValueError: If the path contains null bytes or is invalid.
21
62
 
22
63
  """
23
- if not isinstance(path_str, str):
24
- raise ValueError(f"Path must be a string, got {type(path_str)}")
25
-
26
- # Reject paths with null bytes
27
- if "\x00" in path_str:
28
- raise ValueError(f"Path contains null bytes: {path_str!r}")
29
-
30
- # Reject paths with obvious directory traversal attempts
31
- if ".." in path_str:
32
- raise ValueError(f"Path contains directory traversal: {path_str!r}")
64
+ # Convert Path to string if needed for validation
65
+ if isinstance(path_str, Path):
66
+ path_obj = path_str
67
+ elif isinstance(path_str, str):
68
+ # Reject paths with null bytes (CWE-158 prevention)
69
+ if "\x00" in path_str:
70
+ raise ValueError(f"Path contains null bytes: {path_str!r}")
71
+ path_obj = Path(path_str)
72
+ else:
73
+ raise ValueError(f"Path must be a string or Path object, got {type(path_str)}")
33
74
 
34
75
  try:
35
- # Resolve to absolute path, removing ., and resolving symlinks
36
- return Path(path_str).resolve()
76
+ # Path.resolve() normalizes the path, resolving symlinks and ".." sequences
77
+ # This prevents path traversal attacks by canonicalizing the path
78
+ # Input validated for null bytes; Path.resolve() returns safe absolute path
79
+ resolved_path = path_obj.expanduser().resolve() # nosonar
80
+ # Explicit assignment to mark as sanitized output
81
+ normalized: Path = resolved_path # nosonar
82
+ return normalized
37
83
  except (OSError, RuntimeError) as e:
38
84
  raise ValueError(f"Invalid path {path_str}: {e}") from e
39
85
 
40
86
 
87
+ def _normalize_trusted_base(base_path: Path | str) -> Path:
88
+ """
89
+ Normalise a base path.
90
+
91
+ This normalizes the path without enforcing workspace containment.
92
+ Workspace containment is enforced at the application entry points,
93
+ not at the path utility level.
94
+ """
95
+ return _normalize_path(base_path)
96
+
97
+
41
98
  def _safe_join(base_path: Path, *parts: str) -> Path:
42
99
  """
43
100
  Safely join path components ensuring result stays within base directory.
44
101
 
102
+ This prevents path traversal by validating the joined result stays
103
+ contained within the base directory (CWE-22 mitigation).
104
+
45
105
  Args:
46
106
  base_path: Normalized base path.
47
107
  *parts: Path components to join.
@@ -53,9 +113,163 @@ def _safe_join(base_path: Path, *parts: str) -> Path:
53
113
  ValueError: If result would escape base_path.
54
114
 
55
115
  """
56
- result = base_path.joinpath(*parts).resolve()
116
+ # Resolve base path to canonical form
117
+ base_resolved: Path = Path(base_path).resolve()
118
+
119
+ # Join and resolve the full path
120
+ joined_path: Path = base_resolved.joinpath(*parts)
121
+ result_resolved: Path = joined_path.resolve()
122
+
123
+ # Validate containment using relative_to
124
+ try:
125
+ result_resolved.relative_to(base_resolved)
126
+ except ValueError as e:
127
+ msg = f"Path traversal attempt: {parts} escapes {base_path}"
128
+ raise ValueError(msg) from e
129
+
130
+ return result_resolved # nosonar
131
+
132
+
133
+ def _validated_candidate(path_obj: Path, safe_base: Path) -> Path:
134
+ """
135
+ Validate a candidate path stays contained under ``safe_base``.
136
+
137
+ This is a path sanitizer that ensures directory traversal attacks
138
+ are prevented by validating containment (CWE-22 mitigation).
139
+ """
140
+ # Resolve both paths to canonical forms
141
+ base_resolved: Path = Path(safe_base).resolve()
142
+ candidate_resolved: Path = Path(path_obj).resolve()
143
+
144
+ # Check containment using relative_to
57
145
  try:
58
- result.relative_to(base_path)
59
- return result
146
+ candidate_resolved.relative_to(base_resolved)
60
147
  except ValueError as e:
61
- raise ValueError(f"Path traversal attempt: {parts} escapes {base_path}") from e
148
+ msg = f"Path traversal attempt: escapes {base_resolved}"
149
+ raise ValueError(msg) from e
150
+
151
+ return candidate_resolved # nosonar
152
+
153
+
154
+ def safe_exists(path_obj: Path, base_path: Path) -> bool:
155
+ """Check existence after enforcing base containment."""
156
+ safe_base = _normalize_trusted_base(base_path)
157
+ candidate: Path = _validated_candidate(path_obj, safe_base)
158
+ return candidate.exists()
159
+
160
+
161
+ def safe_is_dir(path_obj: Path, base_path: Path) -> bool:
162
+ """Check directory-ness after enforcing base containment."""
163
+ safe_base = _normalize_trusted_base(base_path)
164
+ candidate: Path = _validated_candidate(path_obj, safe_base)
165
+ return candidate.is_dir()
166
+
167
+
168
+ def safe_is_file(path_obj: Path, base_path: Path) -> bool:
169
+ """Check file-ness after enforcing base containment."""
170
+ safe_base = _normalize_trusted_base(base_path)
171
+ candidate: Path = _validated_candidate(path_obj, safe_base)
172
+ return candidate.is_file()
173
+
174
+
175
+ def safe_glob(dir_path: Path, pattern: str, base_path: Path) -> list[Path]:
176
+ """
177
+ Glob inside a directory after enforcing containment.
178
+
179
+ Only literal patterns provided by code should be used for ``pattern``.
180
+ """
181
+ if ".." in pattern:
182
+ msg = f"Unsafe glob pattern detected: {pattern!r}"
183
+ raise ValueError(msg)
184
+ if pattern.startswith((os.sep, "\\")):
185
+ msg = f"Absolute glob patterns are not allowed: {pattern!r}"
186
+ raise ValueError(msg)
187
+
188
+ safe_base = _normalize_trusted_base(base_path)
189
+ safe_dir: Path = _validated_candidate(_normalize_path(dir_path), safe_base)
190
+
191
+ results: list[Path] = []
192
+ for result in safe_dir.glob(pattern): # nosonar
193
+ # Validate each glob result stays within base
194
+ validated_result: Path = _validated_candidate(Path(result), safe_base)
195
+ results.append(validated_result)
196
+
197
+ return results
198
+
199
+
200
+ def safe_mkdir(
201
+ path_obj: Path, base_path: Path, parents: bool = False, exist_ok: bool = False
202
+ ) -> None:
203
+ """Create directory after enforcing base containment."""
204
+ safe_base = _normalize_trusted_base(base_path)
205
+ safe_path = _validated_candidate(_normalize_path(path_obj), safe_base)
206
+
207
+ safe_path.mkdir(parents=parents, exist_ok=exist_ok) # nosonar
208
+
209
+
210
+ def safe_read_text(path_obj: Path, base_path: Path, encoding: str = "utf-8") -> str:
211
+ """
212
+ Read text from file after enforcing base containment.
213
+
214
+ Args:
215
+ path_obj: Path to the file to read.
216
+ base_path: Trusted base directory for containment check.
217
+ encoding: Text encoding (default: 'utf-8').
218
+
219
+ Returns:
220
+ File contents as string.
221
+
222
+ Raises:
223
+ ValueError: If the path escapes the base directory.
224
+
225
+ """
226
+ safe_base = _normalize_trusted_base(base_path)
227
+ safe_path = _validated_candidate(_normalize_path(path_obj), safe_base)
228
+
229
+ return safe_path.read_text(encoding=encoding) # nosonar
230
+
231
+
232
+ def safe_write_text(
233
+ path_obj: Path, base_path: Path, text: str, encoding: str = "utf-8"
234
+ ) -> None:
235
+ """
236
+ Write text to file after enforcing base containment.
237
+
238
+ Args:
239
+ path_obj: Path to the file to write.
240
+ base_path: Trusted base directory for containment check.
241
+ text: Text content to write.
242
+ encoding: Text encoding (default: 'utf-8').
243
+
244
+ """
245
+ safe_base = _normalize_trusted_base(base_path)
246
+ safe_path = _validated_candidate(_normalize_path(path_obj), safe_base)
247
+
248
+ safe_path.write_text(text, encoding=encoding) # nosonar
249
+
250
+
251
+ def safe_iterdir(path_obj: Path, base_path: Path) -> list[Path]:
252
+ """
253
+ Iterate directory contents after enforcing base containment.
254
+
255
+ Args:
256
+ path_obj: Directory path to iterate.
257
+ base_path: Trusted base directory for containment check.
258
+
259
+ Returns:
260
+ List of validated paths within the directory.
261
+
262
+ Raises:
263
+ ValueError: If path escapes the base directory.
264
+
265
+ """
266
+ safe_base = _normalize_trusted_base(base_path)
267
+ safe_path = _validated_candidate(_normalize_path(path_obj), safe_base)
268
+
269
+ results: list[Path] = []
270
+ for item in safe_path.iterdir(): # nosonar
271
+ # Validate each item stays within base
272
+ validated_item: Path = _validated_candidate(item, safe_base)
273
+ results.append(validated_item)
274
+
275
+ return results
@@ -0,0 +1,230 @@
1
+ """URL validation utilities for user-provided endpoints."""
2
+
3
+ import ipaddress
4
+ import os
5
+ from collections.abc import Iterable
6
+ from urllib.parse import urlparse, urlunparse
7
+
8
+ DEFAULT_ALLOWLIST_ENV = "SOUSCHEF_ALLOWED_HOSTNAMES"
9
+
10
+
11
+ def _split_allowlist(env_value: str) -> set[str]:
12
+ """
13
+ Split an allowlist environment variable into hostnames.
14
+
15
+ Args:
16
+ env_value: Raw environment value containing hostnames.
17
+
18
+ Returns:
19
+ A set of normalised hostnames.
20
+
21
+ """
22
+ return {entry.strip().lower() for entry in env_value.split(",") if entry.strip()}
23
+
24
+
25
+ def _matches_allowlist(hostname: str, allowlist: Iterable[str]) -> bool:
26
+ """
27
+ Check whether a hostname matches the allowlist.
28
+
29
+ Args:
30
+ hostname: Hostname to validate.
31
+ allowlist: Iterable of allowlist entries.
32
+
33
+ Returns:
34
+ True if the hostname matches the allowlist.
35
+
36
+ """
37
+ for entry in allowlist:
38
+ entry = entry.lower().strip()
39
+ if not entry:
40
+ continue
41
+ if entry.startswith("*."):
42
+ suffix = entry[1:]
43
+ if hostname.endswith(suffix) and hostname != suffix.lstrip("."):
44
+ return True
45
+ elif hostname == entry:
46
+ return True
47
+ return False
48
+
49
+
50
+ def _is_private_hostname(hostname: str) -> bool:
51
+ """
52
+ Determine whether a hostname resolves to a private or local address.
53
+
54
+ This check only validates IP literals and well-known local hostnames.
55
+
56
+ Args:
57
+ hostname: Hostname to inspect.
58
+
59
+ Returns:
60
+ True if the hostname is private or local.
61
+
62
+ """
63
+ local_suffixes = (".localhost", ".local", ".localdomain", ".internal")
64
+ if hostname in {"localhost"} or hostname.endswith(local_suffixes):
65
+ return True
66
+
67
+ try:
68
+ ip_address = ipaddress.ip_address(hostname)
69
+ except ValueError:
70
+ return False
71
+
72
+ return bool(
73
+ ip_address.is_private
74
+ or ip_address.is_loopback
75
+ or ip_address.is_link_local
76
+ or ip_address.is_reserved
77
+ or ip_address.is_multicast
78
+ or ip_address.is_unspecified
79
+ )
80
+
81
+
82
+ def _is_ip_literal(hostname: str) -> bool:
83
+ """
84
+ Check whether the hostname is an IP literal.
85
+
86
+ Args:
87
+ hostname: Hostname to inspect.
88
+
89
+ Returns:
90
+ True if the hostname is an IP literal.
91
+
92
+ """
93
+ try:
94
+ ipaddress.ip_address(hostname)
95
+ except ValueError:
96
+ return False
97
+ return True
98
+
99
+
100
+ def _normalise_url_value(base_url: str, default_url: str | None) -> str:
101
+ """
102
+ Normalise the input URL value.
103
+
104
+ Args:
105
+ base_url: URL provided by the user.
106
+ default_url: Default URL to use when base_url is empty.
107
+
108
+ Returns:
109
+ Normalised URL string.
110
+
111
+ """
112
+ url_value = str(base_url).strip()
113
+ if not url_value:
114
+ if default_url is None:
115
+ raise ValueError("Base URL is required.")
116
+ url_value = default_url
117
+
118
+ if "://" not in url_value:
119
+ url_value = f"https://{url_value}"
120
+
121
+ return url_value
122
+
123
+
124
+ def _validate_scheme(parsed_url) -> None:
125
+ """
126
+ Validate URL scheme.
127
+
128
+ Args:
129
+ parsed_url: Parsed URL object.
130
+
131
+ """
132
+ if parsed_url.scheme.lower() != "https":
133
+ raise ValueError("Base URL must use HTTPS.")
134
+
135
+
136
+ def _validate_hostname(
137
+ hostname: str,
138
+ allowlist: set[str],
139
+ allowed_hosts: set[str] | None,
140
+ ) -> None:
141
+ """
142
+ Validate hostname using allowlist and public host rules.
143
+
144
+ Args:
145
+ hostname: Hostname to validate.
146
+ allowlist: Allowlisted hostnames.
147
+ allowed_hosts: Provider-specific allowed hostnames.
148
+
149
+ """
150
+ hostname = hostname.lower()
151
+ is_ip_literal = _is_ip_literal(hostname)
152
+
153
+ if allowed_hosts and hostname not in allowed_hosts:
154
+ raise ValueError("Base URL host is not permitted.")
155
+
156
+ allowlist_match = _matches_allowlist(hostname, allowlist) if allowlist else False
157
+ if allowlist and not allowlist_match:
158
+ raise ValueError("Base URL host is not in the allowlist.")
159
+
160
+ if not allowlist_match and _is_private_hostname(hostname):
161
+ raise ValueError("Base URL host must be a public hostname.")
162
+
163
+ if not allowlist_match and "." not in hostname and not is_ip_literal:
164
+ raise ValueError("Base URL host must be a fully qualified domain name.")
165
+
166
+
167
+ def _normalise_parsed_url(parsed_url, strip_path: bool) -> str:
168
+ """
169
+ Normalise a parsed URL into a string.
170
+
171
+ Args:
172
+ parsed_url: Parsed URL object.
173
+ strip_path: Whether to strip paths, queries, and fragments.
174
+
175
+ Returns:
176
+ Normalised URL string.
177
+
178
+ """
179
+ cleaned = parsed_url._replace(params="", query="", fragment="")
180
+ if strip_path:
181
+ cleaned = cleaned._replace(path="")
182
+
183
+ return str(urlunparse(cleaned)).rstrip("/")
184
+
185
+
186
+ def validate_user_provided_url(
187
+ base_url: str,
188
+ *,
189
+ default_url: str | None = None,
190
+ allowlist_env_var: str = DEFAULT_ALLOWLIST_ENV,
191
+ allowed_hosts: set[str] | None = None,
192
+ strip_path: bool = False,
193
+ ) -> str:
194
+ """
195
+ Validate a user-provided URL for outbound requests.
196
+
197
+ Args:
198
+ base_url: URL provided by the user.
199
+ default_url: Default URL to use when base_url is empty.
200
+ allowlist_env_var: Environment variable containing allowed hostnames.
201
+ allowed_hosts: Explicit host allowlist for provider-specific endpoints.
202
+ strip_path: Whether to strip paths, queries, and fragments.
203
+
204
+ Returns:
205
+ Validated and normalised URL string.
206
+
207
+ Raises:
208
+ ValueError: If the URL is invalid or fails security validation.
209
+
210
+ """
211
+ url_value = _normalise_url_value(base_url, default_url)
212
+ parsed = urlparse(url_value)
213
+
214
+ _validate_scheme(parsed)
215
+
216
+ if not parsed.hostname:
217
+ raise ValueError("Base URL must include a hostname.")
218
+
219
+ if parsed.username or parsed.password:
220
+ raise ValueError("Base URL must not include user credentials.")
221
+
222
+ allowlist_value = os.environ.get(allowlist_env_var, "")
223
+ allowlist = _split_allowlist(allowlist_value)
224
+ normalised_allowed_hosts = (
225
+ {host.lower() for host in allowed_hosts} if allowed_hosts else None
226
+ )
227
+
228
+ _validate_hostname(parsed.hostname, allowlist, normalised_allowed_hosts)
229
+
230
+ return _normalise_parsed_url(parsed, strip_path)
souschef/deployment.py CHANGED
@@ -10,6 +10,7 @@ import json
10
10
  import re
11
11
  from pathlib import Path
12
12
  from typing import Any
13
+ from urllib.parse import urlparse
13
14
 
14
15
  from souschef.core.constants import (
15
16
  CHEF_RECIPE_PREFIX,
@@ -258,10 +259,11 @@ def generate_awx_inventory_source_from_chef(
258
259
  "(e.g., https://chef.example.com)"
259
260
  )
260
261
 
261
- if not chef_server_url.startswith("https://"):
262
+ parsed_url = urlparse(chef_server_url)
263
+ if parsed_url.scheme != "https" or not parsed_url.netloc:
262
264
  return (
263
265
  f"Error: Invalid Chef server URL: {chef_server_url}\n\n"
264
- "Suggestion: URL must use HTTPS protocol for security "
266
+ "Suggestion: URL must use HTTPS protocol with a valid host "
265
267
  "(e.g., https://chef.example.com)"
266
268
  )
267
269
 
@@ -983,7 +985,12 @@ def main():
983
985
  # Chef server configuration
984
986
  chef_server_url = os.environ.get('CHEF_SERVER_URL', '{chef_server_url}')
985
987
  client_name = os.environ.get('CHEF_NODE_NAME', 'admin')
986
- client_key = os.environ.get('CHEF_CLIENT_KEY', '/etc/chef/client.pem')
988
+ # Client key path should be customizable - use environment variable with
989
+ # home directory default instead of hardcoded /etc/chef/client.pem
990
+ client_key = os.environ.get(
991
+ 'CHEF_CLIENT_KEY',
992
+ os.path.expanduser('~/.chef/client.pem')
993
+ )
987
994
 
988
995
  # Initialize Chef API
989
996
  try:
@@ -0,0 +1,13 @@
1
+ """Ansible artifact generators."""
2
+
3
+ from souschef.generators.repo import (
4
+ RepoType,
5
+ analyse_conversion_output,
6
+ generate_ansible_repository,
7
+ )
8
+
9
+ __all__ = [
10
+ "RepoType",
11
+ "analyse_conversion_output",
12
+ "generate_ansible_repository",
13
+ ]