mcp-souschef 2.0.1__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,342 @@
1
+ """Chef ERB template parser and Jinja2 converter."""
2
+
3
+ import json
4
+ import re
5
+
6
+ from souschef.core.constants import (
7
+ ERB_PATTERNS,
8
+ ERROR_FILE_NOT_FOUND,
9
+ ERROR_IS_DIRECTORY,
10
+ ERROR_PERMISSION_DENIED,
11
+ JINJA2_VAR_REPLACEMENT,
12
+ NODE_PREFIX,
13
+ REGEX_ERB_OUTPUT,
14
+ REGEX_RUBY_INTERPOLATION,
15
+ REGEX_WORD_SYMBOLS,
16
+ )
17
+ from souschef.core.path_utils import _normalize_path
18
+
19
+ # Maximum length for variable names in ERB template parsing
20
+ MAX_VARIABLE_NAME_LENGTH = 100
21
+
22
+ # Maximum length for code block content in regex matching
23
+ MAX_CODE_BLOCK_LENGTH = 500
24
+
25
+
26
+ def parse_template(path: str) -> str:
27
+ """
28
+ Parse a Chef ERB template file and convert to Jinja2.
29
+
30
+ Args:
31
+ path: Path to the ERB template file.
32
+
33
+ Returns:
34
+ JSON string with extracted variables and Jinja2-converted template.
35
+
36
+ """
37
+ try:
38
+ file_path = _normalize_path(path)
39
+ content = file_path.read_text(encoding="utf-8")
40
+
41
+ # Extract variables
42
+ variables = _extract_template_variables(content)
43
+
44
+ # Convert ERB to Jinja2
45
+ jinja2_content = _convert_erb_to_jinja2(content)
46
+
47
+ result = {
48
+ "original_file": str(file_path),
49
+ "variables": sorted(variables),
50
+ "jinja2_template": jinja2_content,
51
+ }
52
+
53
+ return json.dumps(result, indent=2)
54
+
55
+ except FileNotFoundError:
56
+ return ERROR_FILE_NOT_FOUND.format(path=path)
57
+ except IsADirectoryError:
58
+ return ERROR_IS_DIRECTORY.format(path=path)
59
+ except PermissionError:
60
+ return ERROR_PERMISSION_DENIED.format(path=path)
61
+ except UnicodeDecodeError:
62
+ return f"Error: Unable to decode {path} as UTF-8 text"
63
+ except Exception as e:
64
+ return f"An error occurred: {e}"
65
+
66
+
67
+ def _strip_ruby_comments(content: str) -> str:
68
+ """
69
+ Remove Ruby comments from code.
70
+
71
+ Args:
72
+ content: Ruby code content.
73
+
74
+ Returns:
75
+ Content with comments removed.
76
+
77
+ """
78
+ # Remove single-line comments but preserve strings
79
+ lines = []
80
+ for line in content.split("\n"):
81
+ # Skip if line is only a comment
82
+ if line.strip().startswith("#"):
83
+ continue
84
+ # Remove inline comments (simple approach - doesn't handle # in strings)
85
+ comment_pos = line.find("#")
86
+ if comment_pos > 0:
87
+ # Check if # is inside a string by counting quotes before it
88
+ before_comment = line[:comment_pos]
89
+ single_quotes = before_comment.count("'") - before_comment.count("\\'")
90
+ double_quotes = before_comment.count('"') - before_comment.count('\\"')
91
+ # If odd number of quotes, # is inside a string
92
+ if single_quotes % 2 == 0 and double_quotes % 2 == 0:
93
+ line = line[:comment_pos]
94
+ lines.append(line)
95
+ return "\n".join(lines)
96
+
97
+
98
+ def _extract_output_variables(content: str, variables: set[str]) -> None:
99
+ """
100
+ Extract variables from <%= %> output tags.
101
+
102
+ Args:
103
+ content: Raw ERB template content.
104
+ variables: Set to add found variables to (modified in place).
105
+
106
+ """
107
+ output_vars = re.findall(REGEX_ERB_OUTPUT, content)
108
+ for var in output_vars:
109
+ var = var.strip()
110
+ if var.startswith(NODE_PREFIX):
111
+ attr_path = _extract_node_attribute_path(var)
112
+ if attr_path:
113
+ variables.add(attr_path)
114
+ elif var.startswith("@"):
115
+ # Instance variables: @var -> var
116
+ variables.add(var[1:])
117
+ else:
118
+ # Extract the base variable name
119
+ base_var = re.match(r"(\w+)", var)
120
+ if base_var:
121
+ variables.add(base_var.group(1))
122
+
123
+
124
+ def _extract_node_attribute_path(node_ref: str) -> str:
125
+ """
126
+ Extract attribute path from a node reference.
127
+
128
+ Args:
129
+ node_ref: Node reference like "node['attr']['subattr']".
130
+
131
+ Returns:
132
+ Cleaned attribute path like "attr']['subattr".
133
+
134
+ """
135
+ # Extract the full attribute path
136
+ attr_path = node_ref[5:] # Remove 'node['
137
+ # Remove the leading quote if present
138
+ if attr_path and attr_path[0] in ("'", '"'):
139
+ attr_path = attr_path[1:]
140
+ # Remove the trailing ] and quote if present
141
+ if attr_path and (attr_path.endswith("']") or attr_path.endswith('"]')):
142
+ attr_path = attr_path[:-2]
143
+ elif attr_path and attr_path[-1] == "]":
144
+ attr_path = attr_path[:-1]
145
+ return attr_path
146
+
147
+
148
+ def _extract_interpolated_variables(code: str, variables: set[str]) -> None:
149
+ """
150
+ Extract variables from Ruby string interpolation.
151
+
152
+ Args:
153
+ code: Code block content.
154
+ variables: Set to add found variables to (modified in place).
155
+
156
+ """
157
+ interpolated = re.findall(REGEX_RUBY_INTERPOLATION, code)
158
+ for expr in interpolated:
159
+ var_match = re.match(REGEX_WORD_SYMBOLS, expr.strip())
160
+ if var_match:
161
+ variables.add(var_match.group())
162
+
163
+
164
+ def _extract_node_attributes(code: str, variables: set[str]) -> None:
165
+ """
166
+ Extract node attribute references from code.
167
+
168
+ Args:
169
+ code: Code block content.
170
+ variables: Set to add found variables to (modified in place).
171
+
172
+ """
173
+ if NODE_PREFIX in code:
174
+ node_matches = re.finditer(r"node\[.+\]", code)
175
+ for match in node_matches:
176
+ attr_path = _extract_node_attribute_path(match.group())
177
+ if attr_path:
178
+ variables.add(attr_path)
179
+
180
+
181
+ def _extract_conditional_variables(code: str, variables: set[str]) -> None:
182
+ """
183
+ Extract variables from conditional statements.
184
+
185
+ Args:
186
+ code: Code block content.
187
+ variables: Set to add found variables to (modified in place).
188
+
189
+ """
190
+ if code.startswith(("if ", "unless ", "elsif ")):
191
+ var_refs = re.findall(r"\b(\w+)", code)
192
+ for var in var_refs:
193
+ if var not in ["if", "unless", "elsif", "end", "do", "node"]:
194
+ variables.add(var)
195
+
196
+
197
+ def _extract_iterator_variables(code: str, variables: set[str]) -> None:
198
+ """
199
+ Extract variables from .each iterators.
200
+
201
+ Args:
202
+ code: Code block content.
203
+ variables: Set to add found variables to (modified in place).
204
+
205
+ """
206
+ if ".each" in code:
207
+ match = re.search(
208
+ rf"(\w{{1,{MAX_VARIABLE_NAME_LENGTH}}})\.each\s+do\s+\|"
209
+ rf"(\w{{1,{MAX_VARIABLE_NAME_LENGTH}}})\|",
210
+ code,
211
+ )
212
+ if match:
213
+ variables.add(match.group(1)) # Array variable
214
+ variables.add(match.group(2)) # Iterator variable
215
+
216
+
217
+ def _extract_code_block_variables(content: str, variables: set[str]) -> None:
218
+ """
219
+ Extract variables from <% %> code blocks.
220
+
221
+ Args:
222
+ content: Raw ERB template content.
223
+ variables: Set to add found variables to (modified in place).
224
+
225
+ """
226
+ code_blocks = re.findall(
227
+ rf"<%\s+([^%]{{1,{MAX_CODE_BLOCK_LENGTH}}}?)\s+%>", content, re.DOTALL
228
+ )
229
+ for code in code_blocks:
230
+ _extract_interpolated_variables(code, variables)
231
+ _extract_node_attributes(code, variables)
232
+ _extract_conditional_variables(code, variables)
233
+ _extract_iterator_variables(code, variables)
234
+
235
+
236
+ def _extract_template_variables(content: str) -> set[str]:
237
+ """
238
+ Extract all variables used in an ERB template.
239
+
240
+ Args:
241
+ content: Raw ERB template content.
242
+
243
+ Returns:
244
+ Set of variable names found in the template.
245
+
246
+ """
247
+ variables: set[str] = set()
248
+
249
+ # Extract from output tags
250
+ _extract_output_variables(content, variables)
251
+
252
+ # Extract from code blocks
253
+ _extract_code_block_variables(content, variables)
254
+
255
+ return variables
256
+
257
+
258
+ def _convert_erb_to_jinja2(content: str) -> str:
259
+ """
260
+ Convert ERB template syntax to Jinja2.
261
+
262
+ Args:
263
+ content: Raw ERB template content.
264
+
265
+ Returns:
266
+ Template content converted to Jinja2 syntax.
267
+
268
+ """
269
+ result = content
270
+
271
+ # Apply each conversion pattern in order
272
+ # Start with most specific patterns first
273
+
274
+ # Convert node attribute access: <%= node['attr'] %> -> {{ attr }}
275
+ result = re.sub(ERB_PATTERNS["node_attr"][0], ERB_PATTERNS["node_attr"][1], result)
276
+
277
+ # Convert each loops
278
+ result = re.sub(ERB_PATTERNS["each"][0], ERB_PATTERNS["each"][1], result)
279
+
280
+ # Convert conditionals
281
+ result = re.sub(ERB_PATTERNS["unless"][0], ERB_PATTERNS["unless"][1], result)
282
+ result = re.sub(ERB_PATTERNS["elsif"][0], ERB_PATTERNS["elsif"][1], result)
283
+ result = re.sub(ERB_PATTERNS["if_start"][0], ERB_PATTERNS["if_start"][1], result)
284
+ result = re.sub(ERB_PATTERNS["else"][0], ERB_PATTERNS["else"][1], result)
285
+
286
+ # Convert end statements - need to handle both endfor and endif
287
+ # First pass: replace all ends with temporary markers
288
+ result = re.sub(r"<%\s*end\s*%>", "<<<END_MARKER>>>", result)
289
+
290
+ # Second pass: replace markers from last to first
291
+ parts = result.split("<<<END_MARKER>>>")
292
+ final_result = ""
293
+
294
+ for i, part in enumerate(parts):
295
+ final_result += part
296
+
297
+ if i < len(parts) - 1: # Not the last part
298
+ # Count control structures in the accumulated result
299
+ for_count = final_result.count("{% for ")
300
+ endfor_count = final_result.count("{% endfor %}")
301
+
302
+ # Find the last unclosed structure
303
+ last_if = final_result.rfind("{% if")
304
+ last_for = final_result.rfind("{% for")
305
+
306
+ if (for_count - endfor_count) > 0 and last_for > last_if:
307
+ final_result += "{% endfor %}"
308
+ else:
309
+ final_result += "{% endif %}"
310
+
311
+ result = final_result
312
+
313
+ # Convert variable output (do this last to not interfere with other patterns)
314
+ result = re.sub(ERB_PATTERNS["output"][0], ERB_PATTERNS["output"][1], result)
315
+
316
+ # Clean up instance variables: @var -> var
317
+ result = re.sub(r"\{\{\s*@(\w+)\s*\}\}", JINJA2_VAR_REPLACEMENT, result)
318
+ # Clean up @var in conditionals and other control structures
319
+ result = re.sub(r"@(\w+)", r"\1", result)
320
+
321
+ return result
322
+
323
+
324
+ def _extract_heredoc_strings(content: str) -> dict[str, str]:
325
+ """
326
+ Extract heredoc strings from Ruby code.
327
+
328
+ Args:
329
+ content: Ruby code content.
330
+
331
+ Returns:
332
+ Dictionary mapping heredoc markers to their content.
333
+
334
+ """
335
+ heredocs = {}
336
+ # Match heredoc patterns: <<-MARKER or <<MARKER
337
+ heredoc_pattern = r"<<-?(\w+)\s*\n((?:(?!^\s*\1\s*$).)*?)^\s*\1\s*$"
338
+ for match in re.finditer(heredoc_pattern, content, re.DOTALL | re.MULTILINE):
339
+ marker = match.group(1)
340
+ content_text = match.group(2)
341
+ heredocs[marker] = content_text
342
+ return heredocs