py-adtools 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adtools/__init__.py +1 -0
- adtools/cli.py +61 -0
- adtools/evaluator/__init__.py +2 -0
- adtools/evaluator/auto_server.py +258 -0
- adtools/evaluator/py_evaluator.py +170 -0
- adtools/evaluator/py_evaluator_ray.py +110 -0
- adtools/lm/__init__.py +4 -0
- adtools/lm/lm_base.py +63 -0
- adtools/lm/openai_api.py +118 -0
- adtools/lm/sglang_server.py +423 -0
- adtools/lm/vllm_server.py +452 -0
- adtools/py_code.py +577 -0
- adtools/sandbox/__init__.py +2 -0
- adtools/sandbox/sandbox_executor.py +244 -0
- adtools/sandbox/sandbox_executor_ray.py +194 -0
- adtools/sandbox/utils.py +32 -0
- py_adtools-0.3.2.dist-info/METADATA +567 -0
- py_adtools-0.3.2.dist-info/RECORD +22 -0
- py_adtools-0.3.2.dist-info/WHEEL +5 -0
- py_adtools-0.3.2.dist-info/entry_points.txt +2 -0
- py_adtools-0.3.2.dist-info/licenses/LICENSE +21 -0
- py_adtools-0.3.2.dist-info/top_level.txt +1 -0
adtools/py_code.py
ADDED
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
|
|
3
|
+
|
|
4
|
+
NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
|
|
5
|
+
Commercial use of this software or its derivatives requires prior written permission.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import ast
|
|
9
|
+
import dataclasses
|
|
10
|
+
import textwrap
|
|
11
|
+
import tokenize
|
|
12
|
+
from typing import List, Optional, Union, Set, Any
|
|
13
|
+
from io import BytesIO
|
|
14
|
+
|
|
15
|
+
__all__ = ["PyCodeBlock", "PyFunction", "PyClass", "PyProgram"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclasses.dataclass
|
|
19
|
+
class PyCodeBlock:
|
|
20
|
+
"""A parsed Python code block (e.g., top-level code that is not in classes/functions,
|
|
21
|
+
or miscellaneous statements inside a class).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
code: str
|
|
25
|
+
|
|
26
|
+
def __str__(self) -> str:
|
|
27
|
+
return self.code
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
return self.__str__() + "\n"
|
|
31
|
+
|
|
32
|
+
def _to_str(self, indent_str=""):
|
|
33
|
+
return _indent_code_skip_multi_line_str(self.code, indent_str)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Adapted from: https://github.com/google-deepmind/funsearch/blob/main/implementation/code_manipulation.py
|
|
37
|
+
@dataclasses.dataclass
|
|
38
|
+
class PyFunction:
|
|
39
|
+
"""A parsed Python function."""
|
|
40
|
+
|
|
41
|
+
decorator: str
|
|
42
|
+
name: str
|
|
43
|
+
args: str
|
|
44
|
+
body: str
|
|
45
|
+
return_type: Optional[str] = None
|
|
46
|
+
docstring: Optional[str] = None
|
|
47
|
+
is_async: bool = False
|
|
48
|
+
|
|
49
|
+
def _to_str(self, indent_str=""):
|
|
50
|
+
return_type = f" -> {self.return_type}" if self.return_type else ""
|
|
51
|
+
function_def = f"{self.decorator}\n" if self.decorator else ""
|
|
52
|
+
prefix = "async def" if self.is_async else "def"
|
|
53
|
+
function_def += f"{prefix} {self.name}({self.args}){return_type}:"
|
|
54
|
+
# Indent function signature
|
|
55
|
+
function_def = textwrap.indent(function_def, indent_str) + "\n"
|
|
56
|
+
|
|
57
|
+
if self.docstring:
|
|
58
|
+
# We indent the docstring. Assumes 4-space standard indentation for generation
|
|
59
|
+
new_line = "\n" if self.body else ""
|
|
60
|
+
# If the docstring has multiple lines, we add a line break
|
|
61
|
+
is_multi_line_doc = len(self.docstring.splitlines()) > 1
|
|
62
|
+
docstring_end = "\n " if is_multi_line_doc else ""
|
|
63
|
+
# Indent doc-string
|
|
64
|
+
function_def += (
|
|
65
|
+
textwrap.indent(
|
|
66
|
+
f' """{self.docstring}{docstring_end}"""', indent_str
|
|
67
|
+
)
|
|
68
|
+
+ new_line
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Indent function body
|
|
72
|
+
function_def += _indent_code_skip_multi_line_str(self.body, indent_str)
|
|
73
|
+
return function_def
|
|
74
|
+
|
|
75
|
+
def __str__(self) -> str:
|
|
76
|
+
return_type = f" -> {self.return_type}" if self.return_type else ""
|
|
77
|
+
function_def = f"{self.decorator}\n" if self.decorator else ""
|
|
78
|
+
prefix = "async def" if self.is_async else "def"
|
|
79
|
+
function_def += f"{prefix} {self.name}({self.args}){return_type}:\n"
|
|
80
|
+
|
|
81
|
+
if self.docstring:
|
|
82
|
+
# We indent the docstring. Assumes 4-space standard indentation for generation
|
|
83
|
+
new_line = "\n" if self.body else ""
|
|
84
|
+
# If the docstring has multiple lines, we add a line break
|
|
85
|
+
is_multi_line_doc = len(self.docstring.splitlines()) > 1
|
|
86
|
+
docstring_end = "\n " if is_multi_line_doc else ""
|
|
87
|
+
function_def += f' """{self.docstring}{docstring_end}"""{new_line}'
|
|
88
|
+
|
|
89
|
+
# The body is expected to be already indented (if parsed correctly).
|
|
90
|
+
# We ensure it is indented relative to the function definition.
|
|
91
|
+
function_def += self.body
|
|
92
|
+
return function_def
|
|
93
|
+
|
|
94
|
+
def __repr__(self) -> str:
|
|
95
|
+
return self.__str__() + "\n\n"
|
|
96
|
+
|
|
97
|
+
def __setattr__(self, name: str, value: str) -> None:
|
|
98
|
+
# Ensure there aren't leading & trailing new lines in `body`
|
|
99
|
+
if name == "body" and isinstance(value, str):
|
|
100
|
+
value = value.strip("\n")
|
|
101
|
+
# Ensure there aren't leading & trailing quotes in `docstring`
|
|
102
|
+
if name == "docstring" and value is not None:
|
|
103
|
+
if '"""' in value:
|
|
104
|
+
value = value.strip()
|
|
105
|
+
value = value.replace('"""', "")
|
|
106
|
+
super().__setattr__(name, value)
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def extract_first_function_from_text(cls, text: str) -> "PyFunction":
|
|
110
|
+
"""Parses text and returns the first function found."""
|
|
111
|
+
tree = ast.parse(text)
|
|
112
|
+
visitor = _ProgramVisitor(text)
|
|
113
|
+
visitor.visit(tree)
|
|
114
|
+
program = visitor.return_program()
|
|
115
|
+
if not program.functions:
|
|
116
|
+
raise ValueError("No functions found in the provided text.")
|
|
117
|
+
return program.functions[0]
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def extract_all_functions_from_text(cls, text: str) -> List["PyFunction"]:
|
|
121
|
+
"""Parses text and returns all top-level functions found."""
|
|
122
|
+
tree = ast.parse(text)
|
|
123
|
+
visitor = _ProgramVisitor(text)
|
|
124
|
+
visitor.visit(tree)
|
|
125
|
+
program = visitor.return_program()
|
|
126
|
+
return program.functions
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclasses.dataclass
|
|
130
|
+
class PyClass:
|
|
131
|
+
"""A parsed Python class."""
|
|
132
|
+
|
|
133
|
+
decorator: str
|
|
134
|
+
name: str
|
|
135
|
+
bases: str
|
|
136
|
+
|
|
137
|
+
# Holds raw code blocks (variables, assignments) found in the class body.
|
|
138
|
+
statements: Optional[List[PyCodeBlock]] = None
|
|
139
|
+
docstring: Optional[str] = None
|
|
140
|
+
functions: List[PyFunction] = dataclasses.field(default_factory=list)
|
|
141
|
+
# Holds everything in order (Methods + Statements + Gaps).
|
|
142
|
+
body: Optional[List[Union[PyCodeBlock, PyFunction]]] = None
|
|
143
|
+
|
|
144
|
+
def __str__(self) -> str:
|
|
145
|
+
class_def = f"{self.decorator}\n" if self.decorator else ""
|
|
146
|
+
class_def += f"class {self.name}"
|
|
147
|
+
if self.bases:
|
|
148
|
+
class_def += f"({self.bases})"
|
|
149
|
+
class_def += ":\n"
|
|
150
|
+
|
|
151
|
+
if self.docstring:
|
|
152
|
+
# If the docstring has multiple lines, we add a line break
|
|
153
|
+
is_multi_line_doc = len(self.docstring.splitlines()) > 1
|
|
154
|
+
docstring_end = "\n " if is_multi_line_doc else ""
|
|
155
|
+
class_def += f' """{self.docstring}{docstring_end}"""\n\n'
|
|
156
|
+
|
|
157
|
+
if self.body:
|
|
158
|
+
last_item = None
|
|
159
|
+
for i, item in enumerate(self.body):
|
|
160
|
+
if last_item is not None:
|
|
161
|
+
# If there are not two consecutive PyCodeBlock instances, we add a new line
|
|
162
|
+
if not (
|
|
163
|
+
isinstance(last_item, PyCodeBlock)
|
|
164
|
+
and isinstance(item, PyCodeBlock)
|
|
165
|
+
):
|
|
166
|
+
class_def += "\n"
|
|
167
|
+
|
|
168
|
+
# Use item._to_str() to indent each item in the class
|
|
169
|
+
assert isinstance(item, (PyCodeBlock, PyFunction))
|
|
170
|
+
class_def += str(item._to_str(indent_str=" "))
|
|
171
|
+
class_def += "\n" if i != len(self.body) - 1 else ""
|
|
172
|
+
last_item = item
|
|
173
|
+
else:
|
|
174
|
+
class_def += " pass"
|
|
175
|
+
|
|
176
|
+
return class_def
|
|
177
|
+
|
|
178
|
+
def __repr__(self):
|
|
179
|
+
return self.__str__() + "\n\n"
|
|
180
|
+
|
|
181
|
+
def __setattr__(self, name: str, value: str) -> None:
|
|
182
|
+
if name == "body" and isinstance(value, str):
|
|
183
|
+
value = value.strip("\n")
|
|
184
|
+
if name == "docstring" and value is not None:
|
|
185
|
+
if '"""' in value:
|
|
186
|
+
value = value.strip()
|
|
187
|
+
value = value.replace('"""', "")
|
|
188
|
+
super().__setattr__(name, value)
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def extract_first_class_from_text(cls, text: str) -> "PyClass":
|
|
192
|
+
tree = ast.parse(text)
|
|
193
|
+
visitor = _ProgramVisitor(text)
|
|
194
|
+
visitor.visit(tree)
|
|
195
|
+
program = visitor.return_program()
|
|
196
|
+
if not program.classes:
|
|
197
|
+
raise ValueError("No classes found in the provided text.")
|
|
198
|
+
return program.classes[0]
|
|
199
|
+
|
|
200
|
+
@classmethod
|
|
201
|
+
def extract_all_classes_from_text(cls, text: str) -> List["PyClass"]:
|
|
202
|
+
tree = ast.parse(text)
|
|
203
|
+
visitor = _ProgramVisitor(text)
|
|
204
|
+
visitor.visit(tree)
|
|
205
|
+
program = visitor.return_program()
|
|
206
|
+
return program.classes
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@dataclasses.dataclass
|
|
210
|
+
class PyProgram:
|
|
211
|
+
"""A parsed Python program containing scripts, functions, and classes."""
|
|
212
|
+
|
|
213
|
+
scripts: List[PyCodeBlock] # Top-level code not in classes/functions
|
|
214
|
+
functions: List[PyFunction] # Top-level functions
|
|
215
|
+
classes: List[PyClass] # Top-level classes
|
|
216
|
+
elements: List[
|
|
217
|
+
Union[PyFunction, PyClass, PyCodeBlock]
|
|
218
|
+
] # Complete sequence of the file elements.
|
|
219
|
+
|
|
220
|
+
def __str__(self) -> str:
|
|
221
|
+
program = ""
|
|
222
|
+
for item in self.elements:
|
|
223
|
+
program += str(item) + "\n\n"
|
|
224
|
+
return program.strip()
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def from_text(cls, text: str, debug=False) -> Optional["PyProgram"]:
|
|
228
|
+
"""Parses text into a PyProgram object. Returns None on syntax errors."""
|
|
229
|
+
try:
|
|
230
|
+
tree = ast.parse(text)
|
|
231
|
+
visitor = _ProgramVisitor(text)
|
|
232
|
+
visitor.visit(tree)
|
|
233
|
+
return visitor.return_program()
|
|
234
|
+
except:
|
|
235
|
+
if debug:
|
|
236
|
+
raise
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
def remove_comments(cls, py_code: str | Any) -> str:
|
|
241
|
+
"""Removes all comments from the given Python code string.
|
|
242
|
+
|
|
243
|
+
This function uses the `tokenize` module to identify and remove all
|
|
244
|
+
comment tokens (# ...) while attempting to preserve the original
|
|
245
|
+
code structure and formatting.
|
|
246
|
+
"""
|
|
247
|
+
try:
|
|
248
|
+
py_code = str(py_code)
|
|
249
|
+
# Use tokenize to accurately identify and remove comments
|
|
250
|
+
io_obj = BytesIO(py_code.encode("utf-8"))
|
|
251
|
+
tokens = tokenize.tokenize(io_obj.readline)
|
|
252
|
+
filtered_tokens = [t for t in tokens if t.type != tokenize.COMMENT]
|
|
253
|
+
return tokenize.untokenize(filtered_tokens).decode("utf-8")
|
|
254
|
+
except (tokenize.TokenError, IndentationError):
|
|
255
|
+
# Return original code if tokenization fails
|
|
256
|
+
return py_code
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _indent_code_skip_multi_line_str(code: str, indent_str: str) -> str:
|
|
260
|
+
"""Indents code by `indent_str`, but skips lines that are inside
|
|
261
|
+
multiline strings to preserve their internal formatting.
|
|
262
|
+
"""
|
|
263
|
+
lines = code.splitlines()
|
|
264
|
+
string_lines = set()
|
|
265
|
+
|
|
266
|
+
# Identify lines belonging to multiline strings
|
|
267
|
+
tokens = tokenize.tokenize(BytesIO(code.encode("utf-8")).readline)
|
|
268
|
+
for token in tokens:
|
|
269
|
+
if token.type == tokenize.STRING:
|
|
270
|
+
start_line, _ = token.start
|
|
271
|
+
end_line, _ = token.end
|
|
272
|
+
|
|
273
|
+
# If it is a multiline string
|
|
274
|
+
if end_line > start_line:
|
|
275
|
+
# We protect the content (start+1 to end)
|
|
276
|
+
# We also protect the end_line because usually the closing quotes
|
|
277
|
+
# are already positioned correctly in the source string
|
|
278
|
+
for i in range(start_line + 1, end_line + 1):
|
|
279
|
+
string_lines.add(i)
|
|
280
|
+
|
|
281
|
+
result = []
|
|
282
|
+
for i, line in enumerate(lines):
|
|
283
|
+
lineno = i + 1
|
|
284
|
+
# If the line is inside a multiline string, append it as-is (no indent)
|
|
285
|
+
if lineno in string_lines:
|
|
286
|
+
result.append(line)
|
|
287
|
+
else:
|
|
288
|
+
# Otherwise, apply indentation
|
|
289
|
+
# We strip whitespace to avoid indenting empty lines (mimicking textwrap behavior)
|
|
290
|
+
if line.strip():
|
|
291
|
+
result.append(indent_str + line)
|
|
292
|
+
else:
|
|
293
|
+
result.append("")
|
|
294
|
+
|
|
295
|
+
return "\n".join(result)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class _ProgramVisitor(ast.NodeVisitor):
|
|
299
|
+
"""Parses code to collect all required information to produce a `PyProgram`.
|
|
300
|
+
Handles scripts, functions, and classes with robust indentation handling.
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
def __init__(self, sourcecode: str):
|
|
304
|
+
self._codelines: List[str] = sourcecode.splitlines()
|
|
305
|
+
self._scripts: List[PyCodeBlock] = []
|
|
306
|
+
self._functions: List[PyFunction] = []
|
|
307
|
+
self._classes: List[PyClass] = []
|
|
308
|
+
self._elements: List[Union[PyFunction, PyClass, PyCodeBlock]] = []
|
|
309
|
+
self._last_script_end = 0
|
|
310
|
+
# Pre-process to identify all lines that are part of a multiline string.
|
|
311
|
+
self._multiline_string_lines: Set[int] = self._detect_multiline_strings(
|
|
312
|
+
sourcecode
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def _detect_multiline_strings(self, sourcecode: str) -> Set[int]:
|
|
316
|
+
"""Scans the source code using tokenize to identify line numbers
|
|
317
|
+
that belong to the body of multiline strings. These lines are not indented or dedented.
|
|
318
|
+
"""
|
|
319
|
+
string_lines = set()
|
|
320
|
+
# Tokenize the source code
|
|
321
|
+
tokens = tokenize.tokenize(BytesIO(sourcecode.encode("utf-8")).readline)
|
|
322
|
+
for token in tokens:
|
|
323
|
+
if token.type == tokenize.STRING:
|
|
324
|
+
start_line, _ = token.start
|
|
325
|
+
end_line, _ = token.end
|
|
326
|
+
|
|
327
|
+
# If start_line != end_line, it is a multiline string
|
|
328
|
+
if end_line > start_line:
|
|
329
|
+
# Mark the lines strictly between start and end as string body
|
|
330
|
+
# The start line usually contains the assignment variable or key,
|
|
331
|
+
# so the indent lines is between [start_line + 1, end_line],
|
|
332
|
+
# or [start_line + 1, end_line + 1)
|
|
333
|
+
for i in range(start_line + 1, end_line + 1):
|
|
334
|
+
string_lines.add(i)
|
|
335
|
+
|
|
336
|
+
return string_lines
|
|
337
|
+
|
|
338
|
+
def _get_code(self, start_line: int, end_line: int, remove_indent: int = 0) -> str:
|
|
339
|
+
"""Get code between start_line and end_line.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
remove_indent: The number of spaces to strip from the beginning of each line.
|
|
343
|
+
This corresponds to the column offset of the function/class definition.
|
|
344
|
+
"""
|
|
345
|
+
if start_line >= end_line:
|
|
346
|
+
return ""
|
|
347
|
+
|
|
348
|
+
lines = self._codelines[start_line:end_line]
|
|
349
|
+
|
|
350
|
+
if remove_indent > 0:
|
|
351
|
+
dedented_lines = []
|
|
352
|
+
|
|
353
|
+
for idx, line in enumerate(lines):
|
|
354
|
+
# Calculate the 1-based line number in the original source file
|
|
355
|
+
current_lineno = start_line + idx + 1
|
|
356
|
+
|
|
357
|
+
if current_lineno in self._multiline_string_lines:
|
|
358
|
+
# Check if the current line is inside a multiline string
|
|
359
|
+
# If the line is in the multiline string, we preserve it exactly as is
|
|
360
|
+
dedented_lines.append(line)
|
|
361
|
+
else:
|
|
362
|
+
# For normal code, we allow stripping if the line is empty (isspace),
|
|
363
|
+
# even if it doesn't have the full indentation length
|
|
364
|
+
if len(line) >= remove_indent and line[:remove_indent].isspace():
|
|
365
|
+
dedented_lines.append(line[remove_indent:])
|
|
366
|
+
else:
|
|
367
|
+
dedented_lines.append(line)
|
|
368
|
+
|
|
369
|
+
return "\n".join(dedented_lines).rstrip()
|
|
370
|
+
else:
|
|
371
|
+
# For top-level functions (remove_indent=0), return raw code
|
|
372
|
+
return "\n".join(lines).rstrip()
|
|
373
|
+
|
|
374
|
+
def _add_script_segment(self, start_line: int, end_line: int):
|
|
375
|
+
"""Add a script segment (gap between functions/classes) from the code."""
|
|
376
|
+
if start_line >= end_line:
|
|
377
|
+
return
|
|
378
|
+
script_code = self._get_code(start_line, end_line).strip()
|
|
379
|
+
if script_code:
|
|
380
|
+
script = PyCodeBlock(code=script_code)
|
|
381
|
+
self._scripts.append(script)
|
|
382
|
+
self._elements.append(script)
|
|
383
|
+
|
|
384
|
+
def _extract_function_info(
|
|
385
|
+
self, node: Union[ast.FunctionDef, ast.AsyncFunctionDef]
|
|
386
|
+
) -> PyFunction:
|
|
387
|
+
"""Shared logic to extract information from FunctionDef or AsyncFunctionDef."""
|
|
388
|
+
# Extract decorators
|
|
389
|
+
if hasattr(node, "decorator_list") and node.decorator_list:
|
|
390
|
+
dec_start = min(d.lineno for d in node.decorator_list)
|
|
391
|
+
decorator = self._get_code(
|
|
392
|
+
dec_start - 1, node.lineno - 1, remove_indent=node.col_offset
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
decorator = None
|
|
396
|
+
|
|
397
|
+
# Extract docstring
|
|
398
|
+
if isinstance(node.body[0], ast.Expr) and isinstance(
|
|
399
|
+
node.body[0].value, ast.Constant
|
|
400
|
+
):
|
|
401
|
+
docstring = ast.literal_eval(ast.unparse(node.body[0])).strip()
|
|
402
|
+
# Dedent docstring based on the node offset
|
|
403
|
+
dedented_docstring_lines = []
|
|
404
|
+
# For top-level functions, the node.col_offset is 0, docstring is not modified
|
|
405
|
+
# For class methods, the node.col_offset is 4, we dedent docstring for 4 spaces (the class indent)
|
|
406
|
+
remove_indent = node.col_offset
|
|
407
|
+
|
|
408
|
+
for idx, line in enumerate(docstring.splitlines()):
|
|
409
|
+
if len(line) >= remove_indent and line[:remove_indent].isspace():
|
|
410
|
+
line = line[remove_indent:]
|
|
411
|
+
dedented_docstring_lines.append(line)
|
|
412
|
+
|
|
413
|
+
docstring = "\n".join(dedented_docstring_lines)
|
|
414
|
+
else:
|
|
415
|
+
docstring = None
|
|
416
|
+
|
|
417
|
+
# Determine where the actual code body starts
|
|
418
|
+
if docstring and len(node.body) > 1:
|
|
419
|
+
body_start_line = node.body[1].lineno - 1
|
|
420
|
+
elif docstring:
|
|
421
|
+
body_start_line = node.end_lineno
|
|
422
|
+
else:
|
|
423
|
+
body_start_line = node.body[0].lineno - 1
|
|
424
|
+
|
|
425
|
+
# Extract body, and apply critical indentation:
|
|
426
|
+
# For top-level functions, col_offset is 0, the body is not modified
|
|
427
|
+
# For class methods, col_offset is 4, the dy loses exactly 4 spaces (the class indent)
|
|
428
|
+
body = self._get_code(
|
|
429
|
+
body_start_line, node.end_lineno, remove_indent=node.col_offset
|
|
430
|
+
)
|
|
431
|
+
is_async = isinstance(node, ast.AsyncFunctionDef)
|
|
432
|
+
|
|
433
|
+
return PyFunction(
|
|
434
|
+
decorator=decorator,
|
|
435
|
+
name=node.name,
|
|
436
|
+
args=ast.unparse(node.args),
|
|
437
|
+
return_type=ast.unparse(node.returns) if node.returns else None,
|
|
438
|
+
docstring=docstring,
|
|
439
|
+
body=body,
|
|
440
|
+
is_async=is_async,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
444
|
+
"""Handles top-level synchronous functions."""
|
|
445
|
+
if node.col_offset == 0:
|
|
446
|
+
start_line = node.lineno - 1
|
|
447
|
+
if hasattr(node, "decorator_list") and node.decorator_list:
|
|
448
|
+
start_line = min(d.lineno for d in node.decorator_list) - 1
|
|
449
|
+
|
|
450
|
+
self._add_script_segment(self._last_script_end, start_line)
|
|
451
|
+
self._last_script_end = node.end_lineno
|
|
452
|
+
|
|
453
|
+
func = self._extract_function_info(node)
|
|
454
|
+
self._functions.append(func)
|
|
455
|
+
self._elements.append(func)
|
|
456
|
+
|
|
457
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
458
|
+
"""Handles top-level asynchronous functions."""
|
|
459
|
+
if node.col_offset == 0:
|
|
460
|
+
start_line = node.lineno - 1
|
|
461
|
+
if hasattr(node, "decorator_list") and node.decorator_list:
|
|
462
|
+
start_line = min(d.lineno for d in node.decorator_list) - 1
|
|
463
|
+
|
|
464
|
+
self._add_script_segment(self._last_script_end, start_line)
|
|
465
|
+
self._last_script_end = node.end_lineno
|
|
466
|
+
|
|
467
|
+
func = self._extract_function_info(node)
|
|
468
|
+
self._functions.append(func)
|
|
469
|
+
self._elements.append(func)
|
|
470
|
+
|
|
471
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
472
|
+
"""Handles top-level classes."""
|
|
473
|
+
if node.col_offset == 0:
|
|
474
|
+
# Handle decorators and preceding script
|
|
475
|
+
start_line = node.lineno - 1
|
|
476
|
+
if hasattr(node, "decorator_list") and node.decorator_list:
|
|
477
|
+
start_line = min(d.lineno for d in node.decorator_list) - 1
|
|
478
|
+
decorator_code = self._get_code(start_line, node.lineno - 1)
|
|
479
|
+
else:
|
|
480
|
+
decorator_code = None
|
|
481
|
+
|
|
482
|
+
self._add_script_segment(self._last_script_end, start_line)
|
|
483
|
+
self._last_script_end = node.end_lineno
|
|
484
|
+
|
|
485
|
+
# Extract docstring
|
|
486
|
+
if isinstance(node.body[0], ast.Expr) and isinstance(
|
|
487
|
+
node.body[0].value, ast.Constant
|
|
488
|
+
):
|
|
489
|
+
docstring = ast.literal_eval(ast.unparse(node.body[0])).strip()
|
|
490
|
+
else:
|
|
491
|
+
docstring = None
|
|
492
|
+
|
|
493
|
+
# Extract class basic info
|
|
494
|
+
bases = (
|
|
495
|
+
", ".join([ast.unparse(base) for base in node.bases])
|
|
496
|
+
if node.bases
|
|
497
|
+
else None
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Process class body contents
|
|
501
|
+
methods = []
|
|
502
|
+
statements = []
|
|
503
|
+
class_body = []
|
|
504
|
+
last_inner_end = node.lineno
|
|
505
|
+
body_nodes = node.body
|
|
506
|
+
|
|
507
|
+
if docstring:
|
|
508
|
+
if len(body_nodes) > 0:
|
|
509
|
+
last_inner_end = body_nodes[0].end_lineno
|
|
510
|
+
body_nodes = body_nodes[1:]
|
|
511
|
+
|
|
512
|
+
for item in body_nodes:
|
|
513
|
+
# Default start is the definition line
|
|
514
|
+
item_start_line = item.lineno
|
|
515
|
+
|
|
516
|
+
# If the item has decorators (Function or Class), the visual start is the first decorator
|
|
517
|
+
if hasattr(item, "decorator_list") and item.decorator_list:
|
|
518
|
+
item_start_line = min(d.lineno for d in item.decorator_list)
|
|
519
|
+
|
|
520
|
+
# Capture Gaps (Use item_start_line instead of item.lineno)
|
|
521
|
+
gap_code = self._get_code(
|
|
522
|
+
last_inner_end, item_start_line - 1, remove_indent=item.col_offset
|
|
523
|
+
).strip()
|
|
524
|
+
if gap_code:
|
|
525
|
+
gap_block = PyCodeBlock(code=gap_code)
|
|
526
|
+
statements.append(gap_block)
|
|
527
|
+
class_body.append(gap_block)
|
|
528
|
+
|
|
529
|
+
# Process the Item
|
|
530
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
531
|
+
method_func = self._extract_function_info(item)
|
|
532
|
+
methods.append(method_func)
|
|
533
|
+
class_body.append(method_func)
|
|
534
|
+
else:
|
|
535
|
+
code_text = self._get_code(
|
|
536
|
+
item_start_line - 1,
|
|
537
|
+
item.end_lineno,
|
|
538
|
+
remove_indent=item.col_offset,
|
|
539
|
+
)
|
|
540
|
+
block = PyCodeBlock(code=code_text)
|
|
541
|
+
statements.append(block)
|
|
542
|
+
class_body.append(block)
|
|
543
|
+
|
|
544
|
+
last_inner_end = item.end_lineno
|
|
545
|
+
|
|
546
|
+
class_obj = PyClass(
|
|
547
|
+
decorator=decorator_code,
|
|
548
|
+
name=node.name,
|
|
549
|
+
bases=bases,
|
|
550
|
+
docstring=docstring,
|
|
551
|
+
statements=statements if statements else None,
|
|
552
|
+
functions=methods,
|
|
553
|
+
body=class_body if class_body else None,
|
|
554
|
+
)
|
|
555
|
+
self._classes.append(class_obj)
|
|
556
|
+
self._elements.append(class_obj)
|
|
557
|
+
|
|
558
|
+
self.generic_visit(node)
|
|
559
|
+
|
|
560
|
+
def return_program(self) -> PyProgram:
|
|
561
|
+
"""Finalizes parsing and returns the PyProgram object."""
|
|
562
|
+
self._add_script_segment(self._last_script_end, len(self._codelines))
|
|
563
|
+
|
|
564
|
+
return PyProgram(
|
|
565
|
+
scripts=self._scripts,
|
|
566
|
+
functions=self._functions,
|
|
567
|
+
classes=self._classes,
|
|
568
|
+
elements=self._elements,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
if __name__ == "__main__":
|
|
573
|
+
with open(__file__) as f:
|
|
574
|
+
code = f.read()
|
|
575
|
+
|
|
576
|
+
code = PyProgram.from_text(code, debug=True)
|
|
577
|
+
print(code)
|