asterrdetection 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ # This file is part of ast_error_detection.
2
+ # Copyright (C) 2025 Badmavasan.
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or any later version.
7
+ #
8
+ # You should have received a copy of the GNU Affero General Public License
9
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
10
+
11
+ from .error_diagnosis import get_code_errors
12
+
13
+ __all__ = [
14
+ "get_code_errors"
15
+ ]
16
+
17
+ __version__ = "0.1.0"
@@ -0,0 +1,173 @@
1
+ # This file is part of ast_error_detection.
2
+ # Copyright (C) 2025 Badmavasan.
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or any later version.
7
+ #
8
+ # You should have received a copy of the GNU Affero General Public License
9
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
10
+
11
+
12
+ class AnnotatedTree:
13
+ """
14
+ A class representing an annotated tree structure used in the Zhang-Shasha tree edit distance algorithm.
15
+
16
+ This class attaches additional information to each node in a tree to facilitate the computation of the
17
+ minimal edit distance between two trees. It does so by performing a post-order traversal and computing:
18
+
19
+ - Post-order node IDs.
20
+ - The Leftmost Leaf Descendant (LMD) of each node.
21
+ - Keyroots, which are nodes that represent roots of subproblems in the Zhang-Shasha algorithm.
22
+
23
+ Attributes:
24
+ get_children (callable): A function that, given a node, returns a list of its children.
25
+ root (Node): The root node of the tree.
26
+ nodes (list): A list of nodes in the tree in post-order.
27
+ ids (list): A list of integer IDs corresponding to each node, assigned in post-order.
28
+ lmds (list): A list of leftmost leaf descendant indices (LMDs) for each node.
29
+ keyroots (list): A list of keyroot indices used in the tree edit distance computation.
30
+ nodes_path (list): A list of paths (lists of labels) for each node from the root.
31
+
32
+ The AnnotatedTree is designed to work hand-in-hand with the Zhang-Shasha distance algorithm. The trees
33
+ are processed so that each node has a unique post-order ID and an associated LMD, allowing for efficient
34
+ dynamic programming computation of the edit distance.
35
+ """
36
+
37
+ def __init__(self, root, get_children):
38
+ """
39
+ Initialize the AnnotatedTree with the given root and get_children function.
40
+
41
+ Args:
42
+ root (Node): The root node of the tree.
43
+ get_children (callable): A function or method that takes a node and returns a list of its children.
44
+ """
45
+ self.get_children = get_children
46
+ self.root = root
47
+ self.nodes = []
48
+ self.ids = []
49
+ self.lmds = []
50
+ self.keyroots = []
51
+ self.nodes_path = [] # Store the path from the root to each node
52
+ self._build(root)
53
+
54
+ def _build(self, node):
55
+ """
56
+ Build the annotated tree data structures by performing a post-order traversal from the given node.
57
+
58
+ This method initializes counters and mappings, then delegates to the recursive `_compute_post_order`
59
+ method to populate the `nodes`, `ids`, and `lmds`. Finally, it computes the keyroots based on the LMD
60
+ information.
61
+ """
62
+ self._id_counter = 0
63
+ self._lmd_mapping = {}
64
+ self._compute_post_order(node)
65
+ self.keyroots = self._compute_keyroots()
66
+
67
+ def _compute_post_order(self, node):
68
+ """
69
+ Recursively compute post-order traversal to assign IDs, determine LMDs, and record paths.
70
+
71
+ During the traversal:
72
+ - Each node is assigned a post-order ID.
73
+ - The leftmost leaf descendant (LMD) of each node is determined.
74
+ - The path from the root to the node is recorded.
75
+ - Results are appended to class-level lists: `nodes`, `ids`, and `lmds`.
76
+
77
+ Args:
78
+ node (Node): The current node being processed.
79
+
80
+ Returns:
81
+ int: The LMD index of the current node.
82
+ """
83
+ # Retrieve children of the current node
84
+ children = self.get_children(node)
85
+ lmd = None
86
+
87
+ # Post-order: first process children
88
+ for child in children:
89
+ child_lmd = self._compute_post_order(child)
90
+ if lmd is None:
91
+ # The LMD of the first child encountered will be the LMD of this node
92
+ lmd = child_lmd
93
+
94
+ # Assign post-order ID to the current node
95
+ node_id = self._id_counter
96
+ self._id_counter += 1
97
+ self.nodes.append(node)
98
+
99
+ # Store the path from the root to this node
100
+ path = node.get_path()
101
+ self.nodes_path.append(path)
102
+
103
+ self.ids.append(node_id)
104
+
105
+ # If no children, this is a leaf node; its LMD is itself
106
+ if lmd is None:
107
+ lmd = node_id
108
+
109
+ self.lmds.append(lmd)
110
+ self._lmd_mapping[node_id] = lmd
111
+
112
+ return lmd
113
+
114
+ def _compute_keyroots(self):
115
+ """
116
+ Compute the keyroots for the tree.
117
+
118
+ Keyroots are defined as follows:
119
+ - For each leftmost leaf descendant index (LMD) in `self.lmds`, the last occurrence of that LMD
120
+ identifies a keyroot.
121
+ - Keyroots are then sorted by their indices.
122
+
123
+ Returns:
124
+ list: A sorted list of keyroot indices.
125
+ """
126
+ lmd_to_index = {}
127
+ # Map each LMD to its last occurrence in the list
128
+ for index, lmd in enumerate(self.lmds):
129
+ lmd_to_index[lmd] = index
130
+
131
+ # Keyroots are the final occurrences of each LMD, sorted by their indices
132
+ keyroots = sorted(lmd_to_index.values())
133
+ return keyroots
134
+
135
+ def print_tree_structure(self, name):
136
+ """
137
+ Print a human-readable representation of the tree structure.
138
+
139
+ This method is intended to help with debugging or understanding the current tree layout
140
+ after it has been processed into an `AnnotatedTree`. It displays each node with:
141
+ - Its index in the post-order traversal list.
142
+ - The path from the root to this node.
143
+ - The node's label.
144
+ - The labels of its children in the current order.
145
+
146
+ Args:
147
+ name (str): A descriptive name for the tree, used as a heading in the output.
148
+
149
+ Example:
150
+ If the tree structure is:
151
+ Module
152
+ For
153
+ Condition:
154
+ Var: i
155
+ Call: range
156
+ Const: 5
157
+ Body:
158
+ Call: print
159
+ Const: 'hello'
160
+
161
+ The output might look like:
162
+ --- Code1 Tree Structure ---
163
+ Node 0: Path: ['Module', 'For[0]', 'Condition:[0]', 'Var: i[0]'] | Label: 'Var: i' | Children Order: []
164
+ Node 1: Path: ['Module', 'For[0]', 'Condition:[0]', 'Call: range[1]'] | Label: 'Call: range' | Children Order: ['Const: 5']
165
+ ...
166
+ """
167
+ print(f"--- {name} Tree Structure ---")
168
+ for idx, node in enumerate(self.nodes):
169
+ path = node.get_path()
170
+ label = node.label
171
+ children_labels = [child.label for child in node.children]
172
+ print(f"Node {idx}: Path: {path} | Label: '{label}' | Children Order: {children_labels}")
173
+
@@ -0,0 +1,245 @@
1
+ # This file is part of ast_error_detection.
2
+ # Copyright (C) 2025 Badmavasan.
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or any later version.
7
+ #
8
+ # You should have received a copy of the GNU Affero General Public License
9
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
10
+
11
+ import ast
12
+ from node import Node
13
+
14
+
15
+ def handle_comparison(comparison_node):
16
+ # Mapping of AST comparison types to their string representation.
17
+ comparison_ops = {
18
+ ast.Gt: '>',
19
+ ast.Lt: '<',
20
+ ast.LtE: '<=',
21
+ ast.GtE: '>=',
22
+ ast.Eq: '==',
23
+ ast.NotEq: '!=',
24
+ ast.In: 'In',
25
+ ast.NotIn: 'Not in'
26
+ }
27
+
28
+ operator = comparison_node.ops[0]
29
+ operator_label = comparison_ops.get(type(operator), '')
30
+
31
+ left_nodes = ast_to_custom_node(comparison_node.left)
32
+ right_nodes = ast_to_custom_node(comparison_node.comparators[0])
33
+
34
+ condition_children = left_nodes + right_nodes
35
+ node_label = (
36
+ f"Compare: {operator_label}"
37
+ if operator_label not in ('In', 'Not in')
38
+ else f"Call: {operator_label}"
39
+ )
40
+
41
+ return [Node(node_label, children=condition_children)]
42
+
43
+
44
+ def process_child_nodes(ast_node):
45
+ children = []
46
+ for child in ast.iter_child_nodes(ast_node):
47
+ child_nodes = ast_to_custom_node(child)
48
+ if child_nodes:
49
+ children.extend(child_nodes)
50
+ return children
51
+
52
+
53
+ def ast_to_custom_node(ast_node):
54
+ """
55
+ Transforms an AST node into a Tree using Node
56
+
57
+ Returns:
58
+ Node: Tree object representing ast_node
59
+ """
60
+ node_type = type(ast_node).__name__
61
+
62
+ # Skip 'Load' or 'Store' node types.
63
+ if node_type in {'Load', 'Store'}:
64
+ return []
65
+
66
+ # Handle 'Expr' nodes.
67
+ elif node_type == 'Expr':
68
+ return process_child_nodes(ast_node)
69
+
70
+ # Handle 'UnaryOp' nodes.
71
+ elif node_type == 'UnaryOp':
72
+ if isinstance(ast_node.op, ast.USub):
73
+ if isinstance(ast_node.operand, ast.Constant):
74
+ return [Node(f"Const: -{ast_node.operand.value}")]
75
+ elif isinstance(ast_node.operand, ast.Name):
76
+ return [Node(f"Var: -{ast_node.operand.id}")]
77
+ return []
78
+
79
+ # Handle function definitions.
80
+ elif isinstance(ast_node, ast.FunctionDef):
81
+ children = process_child_nodes(ast_node)
82
+ return [Node(f"Function: {ast_node.name}", children=children)]
83
+
84
+ # Handle function arguments.
85
+ elif isinstance(ast_node, ast.arg):
86
+ return [Node(f"Arg: {ast_node.arg}")]
87
+
88
+ # Handle function argument lists.
89
+ elif isinstance(ast_node, ast.arguments):
90
+ children = process_child_nodes(ast_node)
91
+ if children:
92
+ return [Node("arguments", children=children)]
93
+ return []
94
+
95
+ # Handle binary operations and augmented assignments.
96
+ elif isinstance(ast_node, (ast.BinOp, ast.AugAssign)):
97
+ op_labels = {
98
+ ast.Mod: '%',
99
+ ast.Add: '+',
100
+ ast.Sub: '-',
101
+ ast.Mult: '*',
102
+ ast.Div: '/',
103
+ ast.FloorDiv: '//',
104
+ ast.Pow: '**',
105
+ }
106
+ op_type = type(ast_node.op)
107
+ op_label = op_labels.get(op_type, 'Operation')
108
+
109
+ children = []
110
+ if isinstance(ast_node, ast.BinOp):
111
+ left_nodes = ast_to_custom_node(ast_node.left)
112
+ right_nodes = ast_to_custom_node(ast_node.right)
113
+ children.extend(left_nodes + right_nodes)
114
+ elif isinstance(ast_node, ast.AugAssign):
115
+ target_nodes = ast_to_custom_node(ast_node.target)
116
+ value_nodes = ast_to_custom_node(ast_node.value)
117
+ children.extend(target_nodes + value_nodes)
118
+
119
+ return [Node(f"Operation: {op_label}", children=children)]
120
+
121
+ # Handle 'For' loops.
122
+ elif isinstance(ast_node, ast.For):
123
+ loop_var_nodes = ast_to_custom_node(ast_node.target)
124
+ iter_nodes = ast_to_custom_node(ast_node.iter)
125
+ condition_node = Node("Condition:", children=loop_var_nodes + iter_nodes)
126
+
127
+ body_nodes = []
128
+ for stmt in ast_node.body:
129
+ body_nodes.extend(ast_to_custom_node(stmt))
130
+
131
+ children = [condition_node, Node("Body:", children=body_nodes)]
132
+ return [Node("For", children=children)]
133
+
134
+ # Handle variable assignments.
135
+ elif isinstance(ast_node, ast.Assign):
136
+ nodes = []
137
+ if isinstance(ast_node.targets[0], ast.Tuple) and isinstance(ast_node.value, ast.Tuple):
138
+ for target, value in zip(ast_node.targets[0].elts, ast_node.value.elts):
139
+ target_label = f"Var: {target.id}"
140
+ value_label = (
141
+ f"Const: {value.value}" if isinstance(value, ast.Constant) else f"Var: {value.id}"
142
+ )
143
+ nodes.append(Node("Assign", children=[Node(target_label), Node(value_label)]))
144
+ else:
145
+ target_label = f"Var: {ast_node.targets[0].id}"
146
+ value_nodes = ast_to_custom_node(ast_node.value)
147
+ nodes.append(Node("Assign", children=[Node(target_label)] + value_nodes))
148
+ return nodes
149
+
150
+ # Handle conditional statements ('If') and loops ('While').
151
+ elif isinstance(ast_node, (ast.If, ast.While)):
152
+ children = []
153
+
154
+ # Handle the test condition
155
+ if isinstance(ast_node.test, ast.Compare):
156
+ condition_nodes = handle_comparison(ast_node.test)
157
+ children.append(Node("Condition:", children=condition_nodes))
158
+ elif isinstance(ast_node.test, ast.BoolOp):
159
+ boolop_children = []
160
+ for value in ast_node.test.values:
161
+ if isinstance(value, ast.Compare):
162
+ boolop_children.extend(handle_comparison(value))
163
+ else:
164
+ boolop_children.extend(ast_to_custom_node(value))
165
+ op_name = type(ast_node.test.op).__name__
166
+ children.append(Node(f"Cond. set: {op_name}", children=boolop_children))
167
+ elif (
168
+ isinstance(ast_node.test, ast.UnaryOp)
169
+ and isinstance(ast_node.test.op, ast.Not)
170
+ and isinstance(ast_node.test.operand, ast.Compare)
171
+ ):
172
+ condition_nodes = handle_comparison(ast_node.test.operand)
173
+ children.append(Node("Condition:", children=[Node("Call: Not")] + condition_nodes))
174
+ else:
175
+ test_nodes = ast_to_custom_node(ast_node.test)
176
+ if test_nodes:
177
+ children.append(Node("Condition:", children=test_nodes))
178
+
179
+ # Handle the body
180
+ body_nodes = []
181
+ for stmt in ast_node.body:
182
+ body_nodes.extend(ast_to_custom_node(stmt))
183
+ if body_nodes:
184
+ children.append(Node("Body:", children=body_nodes))
185
+
186
+ # Handle the else part
187
+ else_nodes = []
188
+ for stmt in ast_node.orelse:
189
+ else_nodes.extend(ast_to_custom_node(stmt))
190
+ if else_nodes:
191
+ children.append(Node("Else:", children=else_nodes))
192
+
193
+ return [Node(node_type, children=children)]
194
+
195
+ # Handle list constructs.
196
+ elif isinstance(ast_node, ast.List):
197
+ elements = []
198
+ for elt in ast_node.elts:
199
+ if isinstance(elt, ast.Constant):
200
+ elements.append(str(elt.value))
201
+ elif isinstance(elt, ast.Name):
202
+ elements.append(elt.id)
203
+ elements_str = ', '.join(elements)
204
+ return [Node(f"Const: [{elements_str}]")]
205
+
206
+ # Handle variable subscripts.
207
+ elif isinstance(ast_node, ast.Subscript):
208
+ value_nodes = ast_to_custom_node(ast_node.value)
209
+ slice_nodes = ast_to_custom_node(ast_node.slice)
210
+ return value_nodes + [Node("Sliced by", children=slice_nodes)]
211
+
212
+ # Handle comparison constructs.
213
+ elif isinstance(ast_node, ast.Compare):
214
+ return handle_comparison(ast_node)
215
+
216
+ # Default case for other node types.
217
+ else:
218
+ if isinstance(ast_node, ast.Constant):
219
+ value = ast_node.value
220
+ formatted_value = f"'{value}'" if isinstance(value, str) else str(value)
221
+ return [Node(f"Const: {formatted_value}")]
222
+ elif isinstance(ast_node, ast.Call):
223
+ children = []
224
+ for arg in ast_node.args:
225
+ children.extend(ast_to_custom_node(arg))
226
+ func_name = ast_node.func.id if hasattr(ast_node.func, "id") else "Unknown"
227
+ return [Node(f"Call: {func_name}", children=children)]
228
+ elif isinstance(ast_node, ast.Name):
229
+ return [Node(f"Var: {ast_node.id}")]
230
+ elif isinstance(ast_node, ast.Module):
231
+ children = process_child_nodes(ast_node)
232
+ return [Node("Module", children=children)]
233
+ elif isinstance(ast_node, ast.Return):
234
+ value_nodes = ast_to_custom_node(ast_node.value)
235
+ return [Node("Return", children=value_nodes)]
236
+ elif isinstance(ast_node, ast.alias):
237
+ return [Node(f"alias: {ast_node.name}")]
238
+ elif isinstance(ast_node, ast.ImportFrom):
239
+ names_nodes = []
240
+ for alias in ast_node.names:
241
+ names_nodes.extend(ast_to_custom_node(alias))
242
+ return [Node(f"ImportFrom: {ast_node.module}", children=names_nodes)]
243
+ else:
244
+ children = process_child_nodes(ast_node)
245
+ return [Node(node_type, children=children)]