mcp-souschef 3.0.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ inventory scripts.
7
7
  """
8
8
 
9
9
  import json
10
+ import os
10
11
  import re
11
12
  import shutil
12
13
  import subprocess
@@ -31,20 +32,25 @@ from souschef.core.constants import (
31
32
  REGEX_WHITESPACE_QUOTE,
32
33
  VALUE_PREFIX,
33
34
  )
34
- from souschef.core.path_utils import _normalize_path, _safe_join
35
+ from souschef.core.path_utils import (
36
+ _normalize_path,
37
+ _safe_join,
38
+ safe_exists,
39
+ safe_glob,
40
+ safe_read_text,
41
+ )
42
+ from souschef.core.url_validation import validate_user_provided_url
35
43
  from souschef.parsers.attributes import parse_attributes
36
44
  from souschef.parsers.recipe import parse_recipe
37
45
 
38
46
  # Optional AI provider imports
39
47
  try:
40
- import requests # type: ignore[import-untyped]
48
+ import requests
41
49
  except ImportError:
42
- requests = None
50
+ requests = None # type: ignore[assignment]
43
51
 
44
52
  try:
45
- from ibm_watsonx_ai import ( # type: ignore[import-not-found]
46
- APIClient,
47
- )
53
+ from ibm_watsonx_ai import APIClient # type: ignore[import-not-found]
48
54
  except ImportError:
49
55
  APIClient = None
50
56
 
@@ -52,12 +58,13 @@ except ImportError:
52
58
  MAX_GUARD_LENGTH = 500
53
59
 
54
60
 
55
- def generate_playbook_from_recipe(recipe_path: str) -> str:
61
+ def generate_playbook_from_recipe(recipe_path: str, cookbook_path: str = "") -> str:
56
62
  """
57
63
  Generate a complete Ansible playbook from a Chef recipe.
58
64
 
59
65
  Args:
60
66
  recipe_path: Path to the Chef recipe (.rb) file.
67
+ cookbook_path: Optional path to the cookbook root for path validation.
61
68
 
62
69
  Returns:
63
70
  Complete Ansible playbook in YAML format with tasks, handlers, and
@@ -73,10 +80,18 @@ def generate_playbook_from_recipe(recipe_path: str) -> str:
73
80
 
74
81
  # Parse the raw recipe file for advanced features
75
82
  recipe_file = _normalize_path(recipe_path)
76
- if not recipe_file.exists():
77
- return f"{ERROR_PREFIX} Recipe file does not exist: {recipe_path}"
78
83
 
79
- raw_content = recipe_file.read_text()
84
+ # Validate path if cookbook_path provided
85
+ base_path = (
86
+ Path(cookbook_path).resolve() if cookbook_path else recipe_file.parent
87
+ )
88
+
89
+ try:
90
+ if not safe_exists(recipe_file, base_path):
91
+ return f"{ERROR_PREFIX} Recipe file does not exist: {recipe_path}"
92
+ raw_content = safe_read_text(recipe_file, base_path)
93
+ except ValueError:
94
+ return f"{ERROR_PREFIX} Path traversal attempt detected: {recipe_path}"
80
95
 
81
96
  # Generate playbook structure
82
97
  playbook: str = _generate_playbook_structure(
@@ -99,6 +114,7 @@ def generate_playbook_from_recipe_with_ai(
99
114
  project_id: str = "",
100
115
  base_url: str = "",
101
116
  project_recommendations: dict | None = None,
117
+ cookbook_path: str = "",
102
118
  ) -> str:
103
119
  """
104
120
  Generate an AI-enhanced Ansible playbook from a Chef recipe.
@@ -119,6 +135,7 @@ def generate_playbook_from_recipe_with_ai(
119
135
  base_url: Custom base URL for the AI provider.
120
136
  project_recommendations: Dictionary containing project-level analysis
121
137
  and recommendations from cookbook assessment.
138
+ cookbook_path: Optional path to the cookbook root for path validation.
122
139
 
123
140
  Returns:
124
141
  AI-generated Ansible playbook in YAML format.
@@ -127,10 +144,18 @@ def generate_playbook_from_recipe_with_ai(
127
144
  try:
128
145
  # Parse the recipe file
129
146
  recipe_file = _normalize_path(recipe_path)
130
- if not recipe_file.exists():
131
- return f"{ERROR_PREFIX} Recipe file does not exist: {recipe_path}"
132
147
 
133
- raw_content = recipe_file.read_text()
148
+ # Validate path if cookbook_path provided
149
+ base_path = (
150
+ Path(cookbook_path).resolve() if cookbook_path else recipe_file.parent
151
+ )
152
+
153
+ try:
154
+ if not safe_exists(recipe_file, base_path):
155
+ return f"{ERROR_PREFIX} Recipe file does not exist: {recipe_path}"
156
+ raw_content = safe_read_text(recipe_file, base_path)
157
+ except ValueError:
158
+ return f"{ERROR_PREFIX} Path traversal attempt detected: {recipe_path}"
134
159
 
135
160
  # Get basic recipe parsing for context
136
161
  parsed_content = parse_recipe(recipe_path)
@@ -220,18 +245,36 @@ def _initialize_ai_client(
220
245
  if APIClient is None:
221
246
  return f"{ERROR_PREFIX} ibm_watsonx_ai library not available"
222
247
 
248
+ try:
249
+ validated_url = validate_user_provided_url(
250
+ base_url,
251
+ default_url="https://us-south.ml.cloud.ibm.com",
252
+ )
253
+ except ValueError as exc:
254
+ return f"{ERROR_PREFIX} Invalid Watsonx base URL: {exc}"
255
+
223
256
  return APIClient(
224
257
  api_key=api_key,
225
258
  project_id=project_id,
226
- url=base_url or "https://us-south.ml.cloud.ibm.com",
259
+ url=validated_url,
227
260
  )
228
261
  elif ai_provider.lower() == "lightspeed":
229
262
  if requests is None:
230
263
  return f"{ERROR_PREFIX} requests library not available"
231
264
 
265
+ try:
266
+ validated_url = validate_user_provided_url(
267
+ base_url,
268
+ default_url="https://api.redhat.com",
269
+ allowed_hosts={"api.redhat.com"},
270
+ strip_path=True,
271
+ )
272
+ except ValueError as exc:
273
+ return f"{ERROR_PREFIX} Invalid Lightspeed base URL: {exc}"
274
+
232
275
  return {
233
276
  "api_key": api_key,
234
- "base_url": base_url or "https://api.redhat.com",
277
+ "base_url": validated_url,
235
278
  }
236
279
  elif ai_provider.lower() == "github_copilot":
237
280
  return (
@@ -245,98 +288,227 @@ def _initialize_ai_client(
245
288
  return f"{ERROR_PREFIX} Unsupported AI provider: {ai_provider}"
246
289
 
247
290
 
248
- def _call_ai_api(
291
+ def _call_anthropic_api(
249
292
  client: Any,
250
- ai_provider: str,
251
293
  prompt: str,
252
294
  model: str,
253
295
  temperature: float,
254
296
  max_tokens: int,
297
+ response_format: dict[str, Any] | None = None,
255
298
  ) -> str:
256
- """Call the appropriate AI API based on provider."""
257
- if ai_provider.lower() == "anthropic":
299
+ """Call Anthropic API with optional structured output via tool calling."""
300
+ if response_format and response_format.get("type") == "json_object":
301
+ # Use tool calling for structured JSON responses
258
302
  response = client.messages.create(
259
303
  model=model,
260
304
  max_tokens=max_tokens,
261
305
  temperature=temperature,
262
306
  messages=[{"role": "user", "content": prompt}],
307
+ tools=[
308
+ {
309
+ "name": "format_response",
310
+ "description": "Format the response as structured JSON",
311
+ "input_schema": {
312
+ "type": "object",
313
+ "properties": {
314
+ "response": {
315
+ "type": "string",
316
+ "description": "The formatted response",
317
+ }
318
+ },
319
+ "required": ["response"],
320
+ },
321
+ }
322
+ ],
263
323
  )
324
+ # Extract from tool use or fallback to text
325
+ for block in response.content:
326
+ if hasattr(block, "type") and block.type == "tool_use":
327
+ return str(block.input.get("response", ""))
328
+ # Fallback to text content
264
329
  return str(response.content[0].text)
265
- elif ai_provider.lower() == "watson":
266
- response = client.generate_text(
267
- model_id=model,
268
- input=prompt,
269
- parameters={
270
- "max_new_tokens": max_tokens,
271
- "temperature": temperature,
272
- "min_new_tokens": 1,
273
- },
330
+ else:
331
+ # Standard text response
332
+ response = client.messages.create(
333
+ model=model,
334
+ max_tokens=max_tokens,
335
+ temperature=temperature,
336
+ messages=[{"role": "user", "content": prompt}],
274
337
  )
275
- return str(response["results"][0]["generated_text"])
276
- elif ai_provider.lower() == "lightspeed":
277
- if requests is None:
278
- return f"{ERROR_PREFIX} requests library not available"
338
+ return str(response.content[0].text)
279
339
 
280
- headers = {
281
- "Authorization": f"Bearer {client['api_key']}",
282
- "Content-Type": "application/json",
283
- }
284
- payload = {
285
- "model": model,
286
- "prompt": prompt,
287
- "max_tokens": max_tokens,
340
+
341
+ def _call_watson_api(
342
+ client: Any,
343
+ prompt: str,
344
+ model: str,
345
+ temperature: float,
346
+ max_tokens: int,
347
+ ) -> str:
348
+ """Call IBM Watsonx API."""
349
+ response = client.generate_text(
350
+ model_id=model,
351
+ input=prompt,
352
+ parameters={
353
+ "max_new_tokens": max_tokens,
288
354
  "temperature": temperature,
289
- }
290
- response = requests.post(
291
- f"{client['base_url']}/v1/completions",
292
- headers=headers,
293
- json=payload,
294
- timeout=60,
355
+ "min_new_tokens": 1,
356
+ },
357
+ )
358
+ return str(response["results"][0]["generated_text"])
359
+
360
+
361
+ def _call_lightspeed_api(
362
+ client: dict[str, str],
363
+ prompt: str,
364
+ model: str,
365
+ temperature: float,
366
+ max_tokens: int,
367
+ response_format: dict[str, Any] | None = None,
368
+ ) -> str:
369
+ """Call Red Hat Lightspeed API."""
370
+ if requests is None:
371
+ return f"{ERROR_PREFIX} requests library not available"
372
+
373
+ headers = {
374
+ "Authorization": f"Bearer {client['api_key']}",
375
+ "Content-Type": "application/json",
376
+ }
377
+ payload = {
378
+ "model": model,
379
+ "prompt": prompt,
380
+ "max_tokens": max_tokens,
381
+ "temperature": temperature,
382
+ }
383
+ if response_format:
384
+ payload["response_format"] = response_format
385
+
386
+ response = requests.post(
387
+ f"{client['base_url']}/v1/completions",
388
+ headers=headers,
389
+ json=payload,
390
+ timeout=60,
391
+ )
392
+ if response.status_code == 200:
393
+ return str(response.json()["choices"][0]["text"])
394
+ else:
395
+ return (
396
+ f"{ERROR_PREFIX} Red Hat Lightspeed API error: "
397
+ f"{response.status_code} - {response.text}"
295
398
  )
296
- if response.status_code == 200:
297
- return str(response.json()["choices"][0]["text"])
298
- else:
299
- return (
300
- f"{ERROR_PREFIX} Red Hat Lightspeed API error: "
301
- f"{response.status_code} - {response.text}"
302
- )
303
- elif ai_provider.lower() == "github_copilot":
304
- if requests is None:
305
- return f"{ERROR_PREFIX} requests library not available"
306
399
 
307
- headers = {
308
- "Authorization": f"Bearer {client['api_key']}",
309
- "Content-Type": "application/json",
310
- "User-Agent": "SousChef/1.0",
311
- }
312
- payload = {
313
- "model": model,
314
- "messages": [{"role": "user", "content": prompt}],
315
- "max_tokens": max_tokens,
316
- "temperature": temperature,
317
- }
318
- # GitHub Copilot uses OpenAI-compatible chat completions endpoint
319
- response = requests.post(
320
- f"{client['base_url']}/copilot/chat/completions",
321
- headers=headers,
322
- json=payload,
323
- timeout=60,
400
+
401
+ def _call_github_copilot_api(
402
+ client: dict[str, str],
403
+ prompt: str,
404
+ model: str,
405
+ temperature: float,
406
+ max_tokens: int,
407
+ response_format: dict[str, Any] | None = None,
408
+ ) -> str:
409
+ """Call GitHub Copilot API."""
410
+ if requests is None:
411
+ return f"{ERROR_PREFIX} requests library not available"
412
+
413
+ headers = {
414
+ "Authorization": f"Bearer {client['api_key']}",
415
+ "Content-Type": "application/json",
416
+ "User-Agent": "SousChef/1.0",
417
+ }
418
+ payload = {
419
+ "model": model,
420
+ "messages": [{"role": "user", "content": prompt}],
421
+ "max_tokens": max_tokens,
422
+ "temperature": temperature,
423
+ }
424
+ if response_format:
425
+ payload["response_format"] = response_format
426
+
427
+ # GitHub Copilot uses OpenAI-compatible chat completions endpoint
428
+ response = requests.post(
429
+ f"{client['base_url']}/copilot/chat/completions",
430
+ headers=headers,
431
+ json=payload,
432
+ timeout=60,
433
+ )
434
+ if response.status_code == 200:
435
+ return str(response.json()["choices"][0]["message"]["content"])
436
+ else:
437
+ return (
438
+ f"{ERROR_PREFIX} GitHub Copilot API error: "
439
+ f"{response.status_code} - {response.text}"
440
+ )
441
+
442
+
443
+ def _call_openai_api(
444
+ client: Any,
445
+ prompt: str,
446
+ model: str,
447
+ temperature: float,
448
+ max_tokens: int,
449
+ response_format: dict[str, Any] | None = None,
450
+ ) -> str:
451
+ """Call OpenAI API."""
452
+ kwargs = {
453
+ "model": model,
454
+ "max_tokens": max_tokens,
455
+ "temperature": temperature,
456
+ "messages": [{"role": "user", "content": prompt}],
457
+ }
458
+ if response_format:
459
+ kwargs["response_format"] = response_format
460
+
461
+ response = client.chat.completions.create(**kwargs)
462
+ return str(response.choices[0].message.content)
463
+
464
+
465
+ def _call_ai_api(
466
+ client: Any,
467
+ ai_provider: str,
468
+ prompt: str,
469
+ model: str,
470
+ temperature: float,
471
+ max_tokens: int,
472
+ response_format: dict[str, Any] | None = None,
473
+ ) -> str:
474
+ """
475
+ Call the appropriate AI API based on provider.
476
+
477
+ Args:
478
+ client: Initialized AI client.
479
+ ai_provider: AI provider name.
480
+ prompt: Prompt text.
481
+ model: Model identifier.
482
+ temperature: Sampling temperature.
483
+ max_tokens: Maximum tokens in response.
484
+ response_format: Optional response format specification for structured
485
+ outputs. For OpenAI: {"type": "json_object"}. For Anthropic: Use
486
+ tool calling instead.
487
+
488
+ Returns:
489
+ AI-generated response text.
490
+
491
+ """
492
+ provider = ai_provider.lower()
493
+
494
+ if provider == "anthropic":
495
+ return _call_anthropic_api(
496
+ client, prompt, model, temperature, max_tokens, response_format
497
+ )
498
+ elif provider == "watson":
499
+ return _call_watson_api(client, prompt, model, temperature, max_tokens)
500
+ elif provider == "lightspeed":
501
+ return _call_lightspeed_api(
502
+ client, prompt, model, temperature, max_tokens, response_format
503
+ )
504
+ elif provider == "github_copilot":
505
+ return _call_github_copilot_api(
506
+ client, prompt, model, temperature, max_tokens, response_format
324
507
  )
325
- if response.status_code == 200:
326
- return str(response.json()["choices"][0]["message"]["content"])
327
- else:
328
- return (
329
- f"{ERROR_PREFIX} GitHub Copilot API error: "
330
- f"{response.status_code} - {response.text}"
331
- )
332
508
  else: # OpenAI
333
- response = client.chat.completions.create(
334
- model=model,
335
- max_tokens=max_tokens,
336
- temperature=temperature,
337
- messages=[{"role": "user", "content": prompt}],
509
+ return _call_openai_api(
510
+ client, prompt, model, temperature, max_tokens, response_format
338
511
  )
339
- return str(response.choices[0].message.content)
340
512
 
341
513
 
342
514
  def _create_ai_conversion_prompt(
@@ -499,6 +671,10 @@ def _build_conversion_requirements_parts() -> list[str]:
499
671
  "",
500
672
  "7. **Conditionals**: Convert Chef guards (only_if/not_if) to Ansible when",
501
673
  " conditions.",
674
+ " - For file or directory checks, add a stat task with register,",
675
+ " then use a boolean when expression like 'stat_result.stat.exists'.",
676
+ " - Do NOT put module names or task mappings under when.",
677
+ " - Keep when expressions as valid YAML scalars (strings or lists).",
502
678
  "",
503
679
  "8. **Notifications**: Convert Chef notifications to Ansible handlers",
504
680
  " where appropriate.",
@@ -594,7 +770,7 @@ def _build_output_format_parts() -> list[str]:
594
770
 
595
771
 
596
772
  def _clean_ai_playbook_response(ai_response: str) -> str:
597
- """Clean and validate the AI-generated playbook response."""
773
+ """Clean the AI-generated playbook response."""
598
774
  if not ai_response or not ai_response.strip():
599
775
  return f"{ERROR_PREFIX} AI returned empty response"
600
776
 
@@ -606,15 +782,19 @@ def _clean_ai_playbook_response(ai_response: str) -> str:
606
782
  if not cleaned.startswith("---") and not cleaned.startswith("- name:"):
607
783
  return f"{ERROR_PREFIX} AI response does not appear to be valid YAML playbook"
608
784
 
609
- # Try to parse as YAML to validate structure
785
+ return cleaned
786
+
787
+
788
+ def _validate_playbook_yaml(playbook_content: str) -> str | None:
789
+ """Validate YAML syntax and return an error message if invalid."""
610
790
  try:
611
791
  import yaml
612
792
 
613
- yaml.safe_load(cleaned)
614
- except Exception as e:
615
- return f"{ERROR_PREFIX} AI generated invalid YAML: {e}"
793
+ yaml.safe_load(playbook_content)
794
+ except Exception as exc:
795
+ return str(exc)
616
796
 
617
- return cleaned
797
+ return None
618
798
 
619
799
 
620
800
  def _validate_and_fix_playbook(
@@ -629,7 +809,13 @@ def _validate_and_fix_playbook(
629
809
  if playbook_content.startswith(ERROR_PREFIX):
630
810
  return playbook_content
631
811
 
632
- validation_error = _run_ansible_lint(playbook_content)
812
+ yaml_error = _validate_playbook_yaml(playbook_content)
813
+ validation_error: str | None
814
+ if yaml_error:
815
+ validation_error = f"YAML parse error: {yaml_error}"
816
+ else:
817
+ validation_error = _run_ansible_lint(playbook_content)
818
+
633
819
  if not validation_error:
634
820
  return playbook_content
635
821
 
@@ -663,6 +849,10 @@ Just the YAML content.
663
849
  # rather than returning an error string
664
850
  return playbook_content
665
851
 
852
+ fixed_yaml_error = _validate_playbook_yaml(cleaned_response)
853
+ if fixed_yaml_error:
854
+ return f"{ERROR_PREFIX} AI generated invalid YAML: {fixed_yaml_error}"
855
+
666
856
  return cleaned_response
667
857
  except Exception:
668
858
  # If fix fails, return original with warning (or original error)
@@ -677,9 +867,16 @@ def _run_ansible_lint(playbook_content: str) -> str | None:
677
867
 
678
868
  tmp_path = None
679
869
  try:
680
- with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as tmp:
681
- tmp.write(playbook_content)
682
- tmp_path = tmp.name
870
+ # Create temp file with secure permissions (0o600 = rw-------)
871
+ # Use os.open with secure flags instead of NamedTemporaryFile for better control
872
+ tmp_fd, tmp_path = tempfile.mkstemp(suffix=".yml", text=True)
873
+ try:
874
+ # Write content to file descriptor (atomic operation)
875
+ with os.fdopen(tmp_fd, "w") as tmp:
876
+ tmp.write(playbook_content)
877
+ except Exception:
878
+ os.close(tmp_fd)
879
+ raise
683
880
 
684
881
  # Run ansible-lint
685
882
  # We ignore return code because we want to capture output even on failure
@@ -768,8 +965,9 @@ def analyse_chef_search_patterns(recipe_or_cookbook_path: str) -> str:
768
965
  path_obj = _normalize_path(recipe_or_cookbook_path)
769
966
 
770
967
  if path_obj.is_file():
771
- # Single recipe file
772
- search_patterns = _extract_search_patterns_from_file(path_obj)
968
+ # Single recipe file - use parent directory as base path
969
+ base_path = path_obj.parent
970
+ search_patterns = _extract_search_patterns_from_file(path_obj, base_path)
773
971
  elif path_obj.is_dir():
774
972
  # Cookbook directory
775
973
  search_patterns = _extract_search_patterns_from_cookbook(path_obj)
@@ -1048,7 +1246,18 @@ def _generate_ansible_inventory_from_search(
1048
1246
 
1049
1247
  def _generate_inventory_script_content(queries_data: list[dict[str, str]]) -> str:
1050
1248
  """Generate Python dynamic inventory script content."""
1051
- script_template = '''#!/usr/bin/env python3
1249
+ # Convert queries_data to JSON string for embedding
1250
+ queries_json = json.dumps( # nosonar
1251
+ {
1252
+ item.get("group_name", f"group_{i}"): (
1253
+ item.get("search_query") or item.get("query", "")
1254
+ )
1255
+ for i, item in enumerate(queries_data)
1256
+ },
1257
+ indent=4,
1258
+ )
1259
+
1260
+ script_template = f'''#!/usr/bin/env python3
1052
1261
  """Dynamic Ansible Inventory Script.
1053
1262
 
1054
1263
  Generated from Chef search queries by SousChef
@@ -1057,96 +1266,118 @@ This script converts Chef search queries to Ansible inventory groups.
1057
1266
  Requires: python-requests (for Chef server API)
1058
1267
  """
1059
1268
  import json
1269
+ import os
1060
1270
  import sys
1061
1271
  import argparse
1272
+ import ipaddress
1273
+ from urllib.parse import urlparse, urlunparse
1062
1274
  from typing import Dict, List, Any
1063
1275
 
1064
- # Chef server configuration
1065
- CHEF_SERVER_URL = "https://your-chef-server"
1066
- CLIENT_NAME = "your-client-name"
1067
- CLIENT_KEY_PATH = "/path/to/client.pem"
1068
-
1069
1276
  # Search query to group mappings
1070
- SEARCH_QUERIES = {search_queries_json}
1277
+ SEARCH_QUERIES = {queries_json}
1071
1278
 
1279
+ def validate_chef_server_url(server_url: str) -> str:
1280
+ """Validate Chef Server URL to avoid unsafe requests."""
1281
+ url_value = str(server_url).strip()
1282
+ if not url_value:
1283
+ raise ValueError("Chef Server URL is required")
1072
1284
 
1073
- def get_chef_nodes(search_query: str) -> List[Dict[str, Any]]:
1074
- """Query Chef server for nodes matching search criteria.
1285
+ if "://" not in url_value:
1286
+ url_value = f"https://{{url_value}}"
1075
1287
 
1076
- Args:
1077
- search_query: Chef search query string
1288
+ parsed = urlparse(url_value)
1289
+ if parsed.scheme.lower() != "https":
1290
+ raise ValueError("Chef Server URL must use HTTPS")
1078
1291
 
1079
- Returns:
1080
- List of node objects from Chef server
1081
- """
1082
- # TODO: Implement Chef server API client
1083
- # This is a placeholder - implement Chef server communication
1084
- # using python-chef library or direct API calls
1292
+ if not parsed.hostname:
1293
+ raise ValueError("Chef Server URL must include a hostname")
1085
1294
 
1086
- # Example structure of what this should return:
1087
- return [
1088
- {
1089
- "name": "web01.example.com",
1090
- "roles": ["web"],
1091
- "environment": "production",
1092
- "platform": "ubuntu",
1093
- "ipaddress": "10.0.1.10"
1094
- }
1095
- ]
1295
+ hostname = parsed.hostname.lower()
1296
+ local_suffixes = (".localhost", ".local", ".localdomain", ".internal")
1297
+ if hostname == "localhost" or hostname.endswith(local_suffixes):
1298
+ raise ValueError("Chef Server URL must use a public hostname")
1096
1299
 
1300
+ try:
1301
+ ip_address = ipaddress.ip_address(hostname)
1302
+ except ValueError:
1303
+ ip_address = None
1304
+
1305
+ if ip_address and (
1306
+ ip_address.is_private
1307
+ or ip_address.is_loopback
1308
+ or ip_address.is_link_local
1309
+ or ip_address.is_reserved
1310
+ or ip_address.is_multicast
1311
+ or ip_address.is_unspecified
1312
+ ):
1313
+ raise ValueError("Chef Server URL must use a public hostname")
1097
1314
 
1098
- def build_inventory() -> Dict[str, Any]:
1099
- """Build Ansible inventory from Chef searches.
1315
+ cleaned = parsed._replace(params="", query="", fragment="")
1316
+ return urlunparse(cleaned).rstrip("/")
1100
1317
 
1101
- Returns:
1102
- Ansible inventory dictionary
1103
- """
1104
- inventory = {
1105
- "_meta": {
1106
- "hostvars": {}
1107
- }
1108
- }
1318
+ def get_chef_nodes(search_query: str) -> List[Dict[str, Any]]:
1319
+ """Query Chef server for nodes matching search criteria."""
1320
+ import requests
1321
+
1322
+ chef_server_url = os.environ.get("CHEF_SERVER_URL", "").rstrip("/")
1323
+ if not chef_server_url:
1324
+ return []
1325
+
1326
+ try:
1327
+ chef_server_url = validate_chef_server_url(chef_server_url)
1328
+ except ValueError:
1329
+ return []
1330
+
1331
+ try:
1332
+ search_url = f"{{chef_server_url}}/search/node?q={{search_query}}"
1333
+ response = requests.get(search_url, timeout=10)
1334
+ response.raise_for_status()
1335
+ search_result = response.json()
1336
+ nodes_data = []
1337
+
1338
+ for row in search_result.get("rows", []):
1339
+ node_obj = {{
1340
+ "name": row.get("name", "unknown"),
1341
+ "roles": row.get("run_list", []),
1342
+ "environment": row.get("chef_environment", "_default"),
1343
+ "platform": row.get("platform", "unknown"),
1344
+ "ipaddress": row.get("ipaddress", ""),
1345
+ "fqdn": row.get("fqdn", ""),
1346
+ }}
1347
+ nodes_data.append(node_obj)
1348
+ return nodes_data
1349
+ except Exception:
1350
+ return []
1351
+
1352
+ def build_inventory() -> Dict[str, Any]:
1353
+ """Build Ansible inventory from Chef searches."""
1354
+ inventory = {{"_meta": {{"hostvars": {{}}}}}}
1109
1355
 
1110
1356
  for group_name, search_query in SEARCH_QUERIES.items():
1111
- inventory[group_name] = {
1357
+ inventory[group_name] = {{
1112
1358
  "hosts": [],
1113
- "vars": {
1114
- "chef_search_query": search_query
1115
- }
1116
- }
1117
-
1359
+ "vars": {{"chef_search_query": search_query}},
1360
+ }}
1118
1361
  try:
1119
1362
  nodes = get_chef_nodes(search_query)
1120
-
1121
1363
  for node in nodes:
1122
1364
  hostname = node.get("name", node.get("fqdn", "unknown"))
1123
1365
  inventory[group_name]["hosts"].append(hostname)
1124
-
1125
- # Add host variables
1126
- inventory["_meta"]["hostvars"][hostname] = {
1366
+ inventory["_meta"]["hostvars"][hostname] = {{
1127
1367
  "chef_roles": node.get("roles", []),
1128
1368
  "chef_environment": node.get("environment", ""),
1129
1369
  "chef_platform": node.get("platform", ""),
1130
1370
  "ansible_host": node.get("ipaddress", hostname)
1131
- }
1132
-
1133
- except Exception as e:
1134
- print(
1135
- f"Error querying Chef server for group {group_name}: {e}",
1136
- file=sys.stderr,
1137
- )
1371
+ }}
1372
+ except Exception:
1373
+ pass
1138
1374
 
1139
1375
  return inventory
1140
1376
 
1141
-
1142
1377
  def main():
1143
1378
  """Main entry point for dynamic inventory script."""
1144
- parser = argparse.ArgumentParser(
1145
- description="Dynamic Ansible Inventory from Chef"
1146
- )
1147
- parser.add_argument(
1148
- "--list", action="store_true", help="List all groups and hosts"
1149
- )
1379
+ parser = argparse.ArgumentParser(description="Dynamic Ansible Inventory from Chef")
1380
+ parser.add_argument("--list", action="store_true", help="List all groups")
1150
1381
  parser.add_argument("--host", help="Get variables for specific host")
1151
1382
 
1152
1383
  args = parser.parse_args()
@@ -1155,65 +1386,149 @@ def main():
1155
1386
  inventory = build_inventory()
1156
1387
  print(json.dumps(inventory, indent=2))
1157
1388
  elif args.host:
1158
- # Return empty dict for host-specific queries
1159
- # All host vars are included in _meta/hostvars
1160
- print(json.dumps({}))
1389
+ print(json.dumps({{}}))
1161
1390
  else:
1162
1391
  parser.print_help()
1163
1392
 
1164
-
1165
1393
  if __name__ == "__main__":
1166
1394
  main()
1167
1395
  '''
1396
+ return script_template
1168
1397
 
1169
- # Convert queries_data to JSON string for embedding
1170
- queries_json = json.dumps(
1171
- {
1172
- item.get("group_name", f"group_{i}"): item.get("search_query", "")
1173
- for i, item in enumerate(queries_data)
1174
- },
1175
- indent=4,
1176
- )
1177
1398
 
1178
- return script_template.replace("{search_queries_json}", queries_json)
1399
+ def get_chef_nodes(search_query: str) -> list[dict[str, Any]]:
1400
+ """
1401
+ Query Chef server for nodes matching search criteria.
1402
+
1403
+ Communicates with Chef server API to search for nodes.
1404
+ Falls back to empty list if Chef server is unavailable.
1405
+
1406
+ Args:
1407
+ search_query: Chef search query string
1408
+
1409
+ Returns:
1410
+ List of node objects from Chef server
1411
+
1412
+ """
1413
+ if not requests:
1414
+ return []
1415
+
1416
+ chef_server_url = os.environ.get("CHEF_SERVER_URL", "").rstrip("/")
1417
+
1418
+ if not chef_server_url:
1419
+ # Chef server not configured - return empty list
1420
+ return []
1421
+
1422
+ try:
1423
+ chef_server_url = validate_user_provided_url(chef_server_url)
1424
+ except ValueError:
1425
+ return []
1426
+
1427
+ try:
1428
+ # Using Chef Server REST API search endpoint
1429
+ # Search endpoint: GET /search/node?q=<query>
1430
+ search_url = f"{chef_server_url}/search/node?q={search_query}"
1431
+
1432
+ # Note: Proper authentication requires Chef API signing
1433
+ # For unauthenticated access, this may work on open Chef servers
1434
+ # For production, use python-chef library for proper authentication
1435
+ response = requests.get(search_url, timeout=10)
1436
+ response.raise_for_status()
1437
+
1438
+ search_result = response.json()
1439
+ nodes_data = []
1440
+
1441
+ for row in search_result.get("rows", []):
1442
+ node_obj = {
1443
+ "name": row.get("name", "unknown"),
1444
+ "roles": row.get("run_list", []),
1445
+ "environment": row.get("chef_environment", "_default"),
1446
+ "platform": row.get("platform", "unknown"),
1447
+ "ipaddress": row.get("ipaddress", ""),
1448
+ "fqdn": row.get("fqdn", ""),
1449
+ "automatic": row.get("automatic", {}),
1450
+ }
1451
+ nodes_data.append(node_obj)
1452
+
1453
+ return nodes_data
1454
+
1455
+ except requests.exceptions.Timeout:
1456
+ # Chef server not responding within timeout
1457
+ return []
1458
+ except requests.exceptions.ConnectionError:
1459
+ # Cannot reach Chef server
1460
+ return []
1461
+ except requests.exceptions.HTTPError:
1462
+ # HTTP error (404, 403, 500, etc.)
1463
+ return []
1464
+ except Exception:
1465
+ # Fallback for any other errors
1466
+ return []
1179
1467
 
1180
1468
 
1181
1469
  # Search pattern extraction
1182
1470
 
1183
1471
 
1184
- def _extract_search_patterns_from_file(file_path: Path) -> list[dict[str, str]]:
1185
- """Extract Chef search patterns from a single recipe file."""
1472
+ def _extract_search_patterns_from_file(
1473
+ file_path: Path, base_path: Path
1474
+ ) -> list[dict[str, str]]:
1475
+ """
1476
+ Extract Chef search patterns from a single recipe file.
1477
+
1478
+ Args:
1479
+ file_path: Path to the file to parse.
1480
+ base_path: Base directory for path validation.
1481
+
1482
+ Returns:
1483
+ List of search patterns found in the file.
1484
+
1485
+ """
1186
1486
  try:
1187
- content = file_path.read_text()
1487
+ content = safe_read_text(file_path, base_path)
1188
1488
  return _find_search_patterns_in_content(content, str(file_path))
1189
1489
  except Exception:
1190
1490
  return []
1191
1491
 
1192
1492
 
1193
1493
  def _extract_search_patterns_from_cookbook(cookbook_path: Path) -> list[dict[str, str]]:
1194
- """Extract Chef search patterns from all files in a cookbook."""
1494
+ """
1495
+ Extract Chef search patterns from all files in a cookbook.
1496
+
1497
+ Args:
1498
+ cookbook_path: Path to the cookbook directory.
1499
+
1500
+ Returns:
1501
+ List of all search patterns found in the cookbook.
1502
+
1503
+ """
1195
1504
  patterns = []
1196
1505
 
1197
- # Search in recipes directory
1506
+ # Search in recipes directory using safe_glob
1198
1507
  recipes_dir = _safe_join(cookbook_path, "recipes")
1199
- if recipes_dir.exists():
1200
- for recipe_file in recipes_dir.glob("*.rb"):
1201
- file_patterns = _extract_search_patterns_from_file(recipe_file)
1202
- patterns.extend(file_patterns)
1508
+ if safe_exists(recipes_dir, cookbook_path):
1509
+ for recipe_file in safe_glob(recipes_dir, "*.rb", cookbook_path):
1510
+ patterns_found = _extract_search_patterns_from_file(
1511
+ recipe_file, cookbook_path
1512
+ )
1513
+ patterns.extend(patterns_found)
1203
1514
 
1204
- # Search in libraries directory
1515
+ # Search in libraries directory using safe_glob
1205
1516
  libraries_dir = _safe_join(cookbook_path, "libraries")
1206
- if libraries_dir.exists():
1207
- for library_file in libraries_dir.glob("*.rb"):
1208
- file_patterns = _extract_search_patterns_from_file(library_file)
1209
- patterns.extend(file_patterns)
1517
+ if safe_exists(libraries_dir, cookbook_path):
1518
+ for library_file in safe_glob(libraries_dir, "*.rb", cookbook_path):
1519
+ patterns_found = _extract_search_patterns_from_file(
1520
+ library_file, cookbook_path
1521
+ )
1522
+ patterns.extend(patterns_found)
1210
1523
 
1211
- # Search in resources directory
1524
+ # Search in resources directory using safe_glob
1212
1525
  resources_dir = _safe_join(cookbook_path, "resources")
1213
- if resources_dir.exists():
1214
- for resource_file in resources_dir.glob("*.rb"):
1215
- file_patterns = _extract_search_patterns_from_file(resource_file)
1216
- patterns.extend(file_patterns)
1526
+ if safe_exists(resources_dir, cookbook_path):
1527
+ for resource_file in safe_glob(resources_dir, "*.rb", cookbook_path):
1528
+ patterns_found = _extract_search_patterns_from_file(
1529
+ resource_file, cookbook_path
1530
+ )
1531
+ patterns.extend(patterns_found)
1217
1532
 
1218
1533
  return patterns
1219
1534
 
@@ -1430,19 +1745,32 @@ def _build_playbook_header(recipe_name: str) -> list[str]:
1430
1745
  def _add_playbook_variables(
1431
1746
  playbook_lines: list[str], raw_content: str, recipe_file: Path
1432
1747
  ) -> None:
1433
- """Extract and add variables section to playbook."""
1748
+ """
1749
+ Extract and add variables section to playbook.
1750
+
1751
+ Args:
1752
+ playbook_lines: List of playbook lines to add variables to.
1753
+ raw_content: Raw recipe file content.
1754
+ recipe_file: Path to the recipe file, normalized and contained within cookbook.
1755
+
1756
+ """
1434
1757
  variables = _extract_recipe_variables(raw_content)
1435
1758
 
1436
- # Try to parse attributes file
1437
- attributes_path = recipe_file.parent.parent / "attributes" / "default.rb"
1438
- if attributes_path.exists():
1439
- attributes_content = parse_attributes(str(attributes_path))
1440
- if not attributes_content.startswith(
1441
- "Error:"
1442
- ) and not attributes_content.startswith("Warning:"):
1443
- # Parse the resolved attributes
1444
- attr_vars = _extract_attribute_variables(attributes_content)
1445
- variables.update(attr_vars)
1759
+ # Try to parse attributes file - validate it stays within cookbook
1760
+ cookbook_path = recipe_file.parent.parent
1761
+ attributes_path = _safe_join(cookbook_path, "attributes", "default.rb")
1762
+ try:
1763
+ if safe_exists(attributes_path, cookbook_path):
1764
+ attributes_content = parse_attributes(str(attributes_path))
1765
+ if not attributes_content.startswith(
1766
+ "Error:"
1767
+ ) and not attributes_content.startswith("Warning:"):
1768
+ # Parse the resolved attributes
1769
+ attr_vars = _extract_attribute_variables(attributes_content)
1770
+ variables.update(attr_vars)
1771
+ except ValueError:
1772
+ # Path traversal attempt detected - skip safely
1773
+ pass
1446
1774
 
1447
1775
  for var_name, var_value in variables.items():
1448
1776
  playbook_lines.append(f" {var_name}: {var_value}")