gabion 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ from gabion.schema import AnalysisResponse
4
+
5
+
6
+ class GabionEngine:
7
+ def analyze(self) -> AnalysisResponse:
8
+ return AnalysisResponse(bundles=[], stats={})
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, List, Optional, Set, Tuple
5
+
6
+
7
+ @dataclass
8
+ class SymbolTable:
9
+ imports: Dict[Tuple[str, str], str] = field(default_factory=dict)
10
+ # Map: (module_name, local_name) -> fully_qualified_name
11
+
12
+
13
+ @dataclass
14
+ class ClassInfo:
15
+ qual: str
16
+ bases: List[str]
17
+ methods: Set[str]
18
+
19
+
20
+ @dataclass
21
+ class DispatchTable:
22
+ name: str
23
+ targets: Set[str]
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class CallArgs:
28
+ callee_expr: str
29
+ pos_map: Dict[str, str]
30
+ kw_map: Dict[str, str]
31
+ resolved_targets: List[str] = field(default_factory=list)
32
+
33
+
34
+ @dataclass
35
+ class FunctionInfo:
36
+ qual: str
37
+ params: List[str]
38
+ calls: List[CallArgs]
39
+
40
+
41
+ @dataclass
42
+ class ParamUse:
43
+ direct_forward: Set[Tuple[str, str]]
44
+ non_forward: bool
45
+ current_aliases: Set[str]
@@ -0,0 +1,402 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from typing import Callable, TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from gabion.analysis.dataflow_audit import CallArgs, ParamUse
8
+
9
+
10
+ class ProjectVisitor(ast.NodeVisitor):
11
+ pass
12
+
13
+
14
+ class ParentAnnotator(ast.NodeVisitor):
15
+ def __init__(self) -> None:
16
+ self.parents: dict[ast.AST, ast.AST] = {}
17
+
18
+ def generic_visit(self, node: ast.AST) -> None:
19
+ for child in ast.iter_child_nodes(node):
20
+ self.parents[child] = node
21
+ self.visit(child)
22
+
23
+
24
+ class ImportVisitor(ast.NodeVisitor):
25
+ def __init__(self, module_name: str, table) -> None:
26
+ # dataflow-bundle: module_name, table
27
+ self.module = module_name
28
+ self.table = table
29
+
30
+ def visit_Import(self, node: ast.Import) -> None:
31
+ for alias in node.names:
32
+ local = alias.asname or alias.name
33
+ self.table.imports[(self.module, local)] = alias.name
34
+
35
+ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
36
+ if not node.module and node.level == 0:
37
+ return
38
+ if node.level > 0:
39
+ parts = self.module.split(".")
40
+ if node.level > len(parts):
41
+ return
42
+ base = parts[:-node.level]
43
+ if node.module:
44
+ base.append(node.module)
45
+ source = ".".join(base)
46
+ else:
47
+ source = node.module or ""
48
+ for alias in node.names:
49
+ if alias.name == "*":
50
+ self.table.star_imports.setdefault(self.module, set()).add(source)
51
+ continue
52
+ local = alias.asname or alias.name
53
+ fqn = f"{source}.{alias.name}" if source else alias.name
54
+ self.table.imports[(self.module, local)] = fqn
55
+
56
+
57
+ class UseVisitor(ast.NodeVisitor):
58
+ def __init__(
59
+ self,
60
+ *,
61
+ parents: dict[ast.AST, ast.AST],
62
+ use_map: dict[str, ParamUse],
63
+ call_args: list[CallArgs],
64
+ alias_to_param: dict[str, str],
65
+ is_test: bool,
66
+ strictness: str,
67
+ const_repr: Callable[[ast.AST], str | None],
68
+ callee_name: Callable[[ast.Call], str],
69
+ call_args_factory: Callable[..., CallArgs],
70
+ call_context: Callable[[ast.AST, dict[ast.AST, ast.AST]], tuple[ast.Call | None, bool]],
71
+ ) -> None:
72
+ # dataflow-bundle: alias_to_param, call_args, call_args_factory, call_context, callee_name, const_repr, is_test, parents, strictness, use_map
73
+ self.parents = parents
74
+ self.use_map = use_map
75
+ self.call_args = call_args
76
+ self.alias_to_param = alias_to_param
77
+ self.is_test = is_test
78
+ self.strictness = strictness
79
+ self.const_repr = const_repr
80
+ self.callee_name = callee_name
81
+ self.call_args_factory = call_args_factory
82
+ self.call_context = call_context
83
+ self._suspend_non_forward: set[str] = set()
84
+ self._attr_alias_to_param: dict[tuple[str, str], str] = {}
85
+ self._key_alias_to_param: dict[tuple[str, str], str] = {}
86
+
87
+ def visit_Call(self, node: ast.Call) -> None:
88
+ callee = self.callee_name(node)
89
+ pos_map: dict[str, str] = {}
90
+ kw_map: dict[str, str] = {}
91
+ const_pos: dict[str, str] = {}
92
+ const_kw: dict[str, str] = {}
93
+ non_const_pos: set[str] = set()
94
+ non_const_kw: set[str] = set()
95
+ star_pos: list[tuple[int, str]] = []
96
+ star_kw: list[str] = []
97
+ for idx, arg in enumerate(node.args):
98
+ if isinstance(arg, ast.Starred):
99
+ if isinstance(arg.value, ast.Name) and arg.value.id in self.alias_to_param:
100
+ star_pos.append((idx, self.alias_to_param[arg.value.id]))
101
+ else:
102
+ non_const_pos.add(str(idx))
103
+ continue
104
+ const = self.const_repr(arg)
105
+ if const is not None:
106
+ const_pos[str(idx)] = const
107
+ continue
108
+ if isinstance(arg, ast.Name) and arg.id in self.alias_to_param:
109
+ pos_map[str(idx)] = self.alias_to_param[arg.id]
110
+ else:
111
+ non_const_pos.add(str(idx))
112
+ for kw in node.keywords:
113
+ if kw.arg is None:
114
+ if isinstance(kw.value, ast.Name) and kw.value.id in self.alias_to_param:
115
+ star_kw.append(self.alias_to_param[kw.value.id])
116
+ else:
117
+ non_const_kw.add("**")
118
+ continue
119
+ const = self.const_repr(kw.value)
120
+ if const is not None:
121
+ const_kw[kw.arg] = const
122
+ continue
123
+ if isinstance(kw.value, ast.Name) and kw.value.id in self.alias_to_param:
124
+ kw_map[kw.arg] = self.alias_to_param[kw.value.id]
125
+ else:
126
+ non_const_kw.add(kw.arg)
127
+ self.call_args.append(
128
+ self.call_args_factory(
129
+ callee=callee,
130
+ pos_map=pos_map,
131
+ kw_map=kw_map,
132
+ const_pos=const_pos,
133
+ const_kw=const_kw,
134
+ non_const_pos=non_const_pos,
135
+ non_const_kw=non_const_kw,
136
+ star_pos=star_pos,
137
+ star_kw=star_kw,
138
+ is_test=self.is_test,
139
+ )
140
+ )
141
+ self.generic_visit(node)
142
+
143
+ def _check_write(self, target: ast.AST) -> None:
144
+ for node in ast.walk(target):
145
+ if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store):
146
+ name = node.id
147
+ if name in self.alias_to_param:
148
+ param = self.alias_to_param.pop(name)
149
+ if param in self.use_map:
150
+ self.use_map[param].current_aliases.discard(name)
151
+ self.use_map[param].non_forward = True
152
+ to_remove = [
153
+ key for key in self._attr_alias_to_param if key[0] == name
154
+ ]
155
+ for key in to_remove:
156
+ param = self._attr_alias_to_param.pop(key, None)
157
+ if param in self.use_map:
158
+ self.use_map[param].non_forward = True
159
+ to_remove = [
160
+ key for key in self._key_alias_to_param if key[0] == name
161
+ ]
162
+ for key in to_remove:
163
+ param = self._key_alias_to_param.pop(key, None)
164
+ if param in self.use_map:
165
+ self.use_map[param].non_forward = True
166
+
167
+ def _bind_sequence(self, target: ast.AST, rhs: ast.AST) -> bool:
168
+ # dataflow-bundle: target, rhs
169
+ if not isinstance(target, (ast.Tuple, ast.List)):
170
+ return False
171
+ if not isinstance(rhs, (ast.Tuple, ast.List)):
172
+ return False
173
+ if len(target.elts) != len(rhs.elts):
174
+ return False
175
+ for lhs, rhs_node in zip(target.elts, rhs.elts):
176
+ if isinstance(lhs, (ast.Tuple, ast.List)) and isinstance(rhs_node, (ast.Tuple, ast.List)):
177
+ if not self._bind_sequence(lhs, rhs_node):
178
+ self._check_write(lhs)
179
+ continue
180
+ if isinstance(lhs, ast.Name) and isinstance(rhs_node, ast.Name) and rhs_node.id in self.alias_to_param:
181
+ param = self.alias_to_param[rhs_node.id]
182
+ self.alias_to_param[lhs.id] = param
183
+ if param in self.use_map:
184
+ self.use_map[param].current_aliases.add(lhs.id)
185
+ else:
186
+ self._check_write(lhs)
187
+ return True
188
+
189
+ def _collect_alias_sources(self, rhs: ast.AST) -> set[str]:
190
+ if isinstance(rhs, ast.Name) and rhs.id in self.alias_to_param:
191
+ return {self.alias_to_param[rhs.id]}
192
+ if isinstance(rhs, (ast.Tuple, ast.List)):
193
+ sources: set[str] = set()
194
+ for elt in rhs.elts:
195
+ sources.update(self._collect_alias_sources(elt))
196
+ return sources
197
+ return set()
198
+
199
+ def visit_Assign(self, node: ast.Assign) -> None:
200
+ rhs_param = None
201
+ if isinstance(node.value, ast.Name) and node.value.id in self.alias_to_param:
202
+ rhs_param = self.alias_to_param[node.value.id]
203
+
204
+ handled_alias = False
205
+ for target in node.targets:
206
+ if self._bind_sequence(target, node.value):
207
+ handled_alias = True
208
+ continue
209
+ if rhs_param and isinstance(target, ast.Name):
210
+ self.alias_to_param[target.id] = rhs_param
211
+ self.use_map[rhs_param].current_aliases.add(target.id)
212
+ handled_alias = True
213
+ elif rhs_param and isinstance(target, ast.Attribute):
214
+ if isinstance(target.value, ast.Name):
215
+ self._attr_alias_to_param[(target.value.id, target.attr)] = rhs_param
216
+ handled_alias = True
217
+ elif rhs_param and isinstance(target, ast.Subscript):
218
+ if (
219
+ isinstance(target.value, ast.Name)
220
+ and isinstance(target.slice, ast.Constant)
221
+ and isinstance(target.slice.value, str)
222
+ ):
223
+ self._key_alias_to_param[
224
+ (target.value.id, target.slice.value)
225
+ ] = rhs_param
226
+ handled_alias = True
227
+ else:
228
+ self._check_write(target)
229
+
230
+ if handled_alias:
231
+ sources = self._collect_alias_sources(node.value)
232
+ self._suspend_non_forward.update(sources)
233
+ self.visit(node.value)
234
+ self._suspend_non_forward.difference_update(sources)
235
+ else:
236
+ self.visit(node.value)
237
+
238
+ def visit_AnnAssign(self, node: ast.AnnAssign) -> None:
239
+ if node.value is None:
240
+ return
241
+ rhs_param = None
242
+ if isinstance(node.value, ast.Name) and node.value.id in self.alias_to_param:
243
+ rhs_param = self.alias_to_param[node.value.id]
244
+ handled_alias = False
245
+ if isinstance(node.target, ast.Name) and rhs_param:
246
+ self.alias_to_param[node.target.id] = rhs_param
247
+ self.use_map[rhs_param].current_aliases.add(node.target.id)
248
+ handled_alias = True
249
+ else:
250
+ self._check_write(node.target)
251
+ if handled_alias:
252
+ sources = self._collect_alias_sources(node.value)
253
+ self._suspend_non_forward.update(sources)
254
+ self.visit(node.value)
255
+ self._suspend_non_forward.difference_update(sources)
256
+ else:
257
+ self.visit(node.value)
258
+
259
+ def visit_AugAssign(self, node: ast.AugAssign) -> None:
260
+ self._check_write(node.target)
261
+ self.visit(node.value)
262
+
263
+ def visit_Name(self, node: ast.Name) -> None:
264
+ if not isinstance(node.ctx, ast.Load):
265
+ return
266
+ if node.id not in self.alias_to_param:
267
+ return
268
+ parent = self.parents.get(node)
269
+ if isinstance(parent, ast.Starred):
270
+ param_name = self.alias_to_param[node.id]
271
+ if self.strictness == "high":
272
+ self.use_map[param_name].non_forward = True
273
+ return
274
+ self.use_map[param_name].direct_forward.add(("args[*]", "arg[*]"))
275
+ return
276
+ if isinstance(parent, ast.keyword) and parent.arg is None:
277
+ param_name = self.alias_to_param[node.id]
278
+ if self.strictness == "high":
279
+ self.use_map[param_name].non_forward = True
280
+ return
281
+ self.use_map[param_name].direct_forward.add(("kwargs[*]", "kw[*]"))
282
+ return
283
+ param_name = self.alias_to_param[node.id]
284
+ if param_name in self._suspend_non_forward:
285
+ return
286
+ call, direct = self.call_context(node, self.parents)
287
+ if call is None or not direct:
288
+ self.use_map[param_name].non_forward = True
289
+ return
290
+ callee = self.callee_name(call)
291
+ slot = None
292
+ for idx, arg in enumerate(call.args):
293
+ if arg is node:
294
+ slot = f"arg[{idx}]"
295
+ break
296
+ if slot is None:
297
+ for kw in call.keywords:
298
+ if kw.value is node and kw.arg is not None:
299
+ slot = f"kw[{kw.arg}]"
300
+ break
301
+ if slot is None:
302
+ slot = "arg[?]"
303
+ self.use_map[param_name].direct_forward.add((callee, slot))
304
+
305
+ def _root_name(self, node: ast.AST) -> str | None:
306
+ current = node
307
+ while isinstance(current, (ast.Attribute, ast.Subscript)):
308
+ current = current.value
309
+ if isinstance(current, ast.Name):
310
+ return current.id
311
+ return None
312
+
313
+ def visit_Attribute(self, node: ast.Attribute) -> None:
314
+ if not isinstance(node.ctx, ast.Load):
315
+ return
316
+ if not isinstance(node.value, ast.Name):
317
+ root_name = self._root_name(node)
318
+ if root_name and root_name in self.alias_to_param:
319
+ param_name = self.alias_to_param[root_name]
320
+ if param_name not in self._suspend_non_forward:
321
+ self.use_map[param_name].non_forward = True
322
+ self.generic_visit(node)
323
+ return
324
+ key = (node.value.id, node.attr)
325
+ if key not in self._attr_alias_to_param:
326
+ if node.value.id in self.alias_to_param:
327
+ param_name = self.alias_to_param[node.value.id]
328
+ if param_name not in self._suspend_non_forward:
329
+ self.use_map[param_name].non_forward = True
330
+ return
331
+ param_name = self._attr_alias_to_param[key]
332
+ if param_name in self._suspend_non_forward:
333
+ return
334
+ call, direct = self.call_context(node, self.parents)
335
+ if call is None or not direct:
336
+ self.use_map[param_name].non_forward = True
337
+ return
338
+ callee = self.callee_name(call)
339
+ slot = None
340
+ for idx, arg in enumerate(call.args):
341
+ if arg is node:
342
+ slot = f"arg[{idx}]"
343
+ break
344
+ if slot is None:
345
+ for kw in call.keywords:
346
+ if kw.value is node and kw.arg is not None:
347
+ slot = f"kw[{kw.arg}]"
348
+ break
349
+ if slot is None:
350
+ slot = "arg[?]"
351
+ self.use_map[param_name].direct_forward.add((callee, slot))
352
+
353
+ def visit_Subscript(self, node: ast.Subscript) -> None:
354
+ if not isinstance(node.ctx, ast.Load):
355
+ return
356
+ if not isinstance(node.value, ast.Name):
357
+ root_name = self._root_name(node)
358
+ if root_name and root_name in self.alias_to_param:
359
+ param_name = self.alias_to_param[root_name]
360
+ if param_name not in self._suspend_non_forward:
361
+ self.use_map[param_name].non_forward = True
362
+ self.generic_visit(node)
363
+ return
364
+ key_value = None
365
+ if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
366
+ key_value = node.slice.value
367
+ if key_value is None:
368
+ if node.value.id in self.alias_to_param:
369
+ param_name = self.alias_to_param[node.value.id]
370
+ if param_name not in self._suspend_non_forward:
371
+ self.use_map[param_name].non_forward = True
372
+ self.visit(node.slice)
373
+ return
374
+ key = (node.value.id, key_value)
375
+ if key not in self._key_alias_to_param:
376
+ if node.value.id in self.alias_to_param:
377
+ param_name = self.alias_to_param[node.value.id]
378
+ if param_name not in self._suspend_non_forward:
379
+ self.use_map[param_name].non_forward = True
380
+ self.visit(node.slice)
381
+ return
382
+ param_name = self._key_alias_to_param[key]
383
+ if param_name in self._suspend_non_forward:
384
+ return
385
+ call, direct = self.call_context(node, self.parents)
386
+ if call is None or not direct:
387
+ self.use_map[param_name].non_forward = True
388
+ return
389
+ callee = self.callee_name(call)
390
+ slot = None
391
+ for idx, arg in enumerate(call.args):
392
+ if arg is node:
393
+ slot = f"arg[{idx}]"
394
+ break
395
+ if slot is None:
396
+ for kw in call.keywords:
397
+ if kw.value is node and kw.arg is not None:
398
+ slot = f"kw[{kw.arg}]"
399
+ break
400
+ if slot is None:
401
+ slot = "arg[?]"
402
+ self.use_map[param_name].direct_forward.add((callee, slot))