gabion 0.1.0__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gabion/__init__.py +1 -1
- gabion/analysis/dataflow_audit.py +315 -88
- gabion/analysis/visitors.py +80 -0
- gabion/cli.py +367 -144
- gabion/config.py +8 -0
- gabion/lsp_client.py +3 -2
- gabion/refactor/engine.py +214 -23
- gabion/refactor/model.py +1 -0
- gabion/schema.py +2 -0
- gabion/server.py +10 -7
- gabion/synthesis/merge.py +0 -2
- gabion/synthesis/model.py +1 -0
- {gabion-0.1.0.dist-info → gabion-0.1.5.dist-info}/METADATA +21 -4
- gabion-0.1.5.dist-info/RECORD +26 -0
- gabion-0.1.0.dist-info/RECORD +0 -26
- {gabion-0.1.0.dist-info → gabion-0.1.5.dist-info}/WHEEL +0 -0
- {gabion-0.1.0.dist-info → gabion-0.1.5.dist-info}/entry_points.txt +0 -0
- {gabion-0.1.0.dist-info → gabion-0.1.5.dist-info}/licenses/LICENSE +0 -0
gabion/__init__.py
CHANGED
|
@@ -27,7 +27,7 @@ from typing import Iterable, Iterator
|
|
|
27
27
|
import re
|
|
28
28
|
|
|
29
29
|
from gabion.analysis.visitors import ImportVisitor, ParentAnnotator, UseVisitor
|
|
30
|
-
from gabion.config import dataflow_defaults, merge_payload
|
|
30
|
+
from gabion.config import dataflow_defaults, merge_payload, synthesis_defaults
|
|
31
31
|
from gabion.schema import SynthesisResponse
|
|
32
32
|
from gabion.synthesis import NamingContext, SynthesisConfig, Synthesizer
|
|
33
33
|
from gabion.synthesis.merge import merge_bundles
|
|
@@ -348,8 +348,6 @@ def _param_spans(
|
|
|
348
348
|
def _function_key(scope: Iterable[str], name: str) -> str:
|
|
349
349
|
parts = list(scope)
|
|
350
350
|
parts.append(name)
|
|
351
|
-
if not parts:
|
|
352
|
-
return name
|
|
353
351
|
return ".".join(parts)
|
|
354
352
|
|
|
355
353
|
|
|
@@ -430,6 +428,117 @@ def _param_annotations(
|
|
|
430
428
|
return annots
|
|
431
429
|
|
|
432
430
|
|
|
431
|
+
def _param_defaults(
|
|
432
|
+
fn: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
433
|
+
ignore_params: set[str] | None = None,
|
|
434
|
+
) -> set[str]:
|
|
435
|
+
defaults: set[str] = set()
|
|
436
|
+
args = fn.args.posonlyargs + fn.args.args
|
|
437
|
+
names = [a.arg for a in args]
|
|
438
|
+
if fn.args.defaults:
|
|
439
|
+
defaulted = names[-len(fn.args.defaults) :]
|
|
440
|
+
defaults.update(defaulted)
|
|
441
|
+
for kw_arg, default in zip(fn.args.kwonlyargs, fn.args.kw_defaults):
|
|
442
|
+
if default is not None:
|
|
443
|
+
defaults.add(kw_arg.arg)
|
|
444
|
+
if names and names[0] in {"self", "cls"}:
|
|
445
|
+
defaults.discard(names[0])
|
|
446
|
+
if ignore_params:
|
|
447
|
+
defaults = {name for name in defaults if name not in ignore_params}
|
|
448
|
+
return defaults
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class _ReturnAliasCollector(ast.NodeVisitor):
|
|
452
|
+
def __init__(self) -> None:
|
|
453
|
+
self.returns: list[ast.AST | None] = []
|
|
454
|
+
|
|
455
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
def visit_Lambda(self, node: ast.Lambda) -> None:
|
|
462
|
+
return
|
|
463
|
+
|
|
464
|
+
def visit_Return(self, node: ast.Return) -> None:
|
|
465
|
+
self.returns.append(node.value)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _return_aliases(
|
|
469
|
+
fn: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
470
|
+
ignore_params: set[str] | None = None,
|
|
471
|
+
) -> list[str] | None:
|
|
472
|
+
params = _param_names(fn, ignore_params)
|
|
473
|
+
if not params:
|
|
474
|
+
return None
|
|
475
|
+
param_set = set(params)
|
|
476
|
+
collector = _ReturnAliasCollector()
|
|
477
|
+
for stmt in fn.body:
|
|
478
|
+
collector.visit(stmt)
|
|
479
|
+
if not collector.returns:
|
|
480
|
+
return None
|
|
481
|
+
alias: list[str] | None = None
|
|
482
|
+
|
|
483
|
+
def _alias_from_expr(expr: ast.AST | None) -> list[str] | None:
|
|
484
|
+
if expr is None:
|
|
485
|
+
return None
|
|
486
|
+
if isinstance(expr, ast.Name) and expr.id in param_set:
|
|
487
|
+
return [expr.id]
|
|
488
|
+
if isinstance(expr, (ast.Tuple, ast.List)):
|
|
489
|
+
names: list[str] = []
|
|
490
|
+
for elt in expr.elts:
|
|
491
|
+
if isinstance(elt, ast.Name) and elt.id in param_set:
|
|
492
|
+
names.append(elt.id)
|
|
493
|
+
else:
|
|
494
|
+
return None
|
|
495
|
+
return names
|
|
496
|
+
return None
|
|
497
|
+
|
|
498
|
+
for expr in collector.returns:
|
|
499
|
+
candidate = _alias_from_expr(expr)
|
|
500
|
+
if candidate is None:
|
|
501
|
+
return None
|
|
502
|
+
if alias is None:
|
|
503
|
+
alias = candidate
|
|
504
|
+
continue
|
|
505
|
+
if alias != candidate:
|
|
506
|
+
return None
|
|
507
|
+
return alias
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _collect_return_aliases(
|
|
511
|
+
funcs: list[ast.FunctionDef | ast.AsyncFunctionDef],
|
|
512
|
+
parents: dict[ast.AST, ast.AST],
|
|
513
|
+
*,
|
|
514
|
+
ignore_params: set[str] | None,
|
|
515
|
+
) -> dict[str, tuple[list[str], list[str]]]:
|
|
516
|
+
aliases: dict[str, tuple[list[str], list[str]]] = {}
|
|
517
|
+
conflicts: set[str] = set()
|
|
518
|
+
for fn in funcs:
|
|
519
|
+
alias = _return_aliases(fn, ignore_params)
|
|
520
|
+
if not alias:
|
|
521
|
+
continue
|
|
522
|
+
params = _param_names(fn, ignore_params)
|
|
523
|
+
class_name = _enclosing_class(fn, parents)
|
|
524
|
+
scopes = _enclosing_scopes(fn, parents)
|
|
525
|
+
keys = {fn.name}
|
|
526
|
+
if class_name:
|
|
527
|
+
keys.add(f"{class_name}.{fn.name}")
|
|
528
|
+
if scopes:
|
|
529
|
+
keys.add(_function_key(scopes, fn.name))
|
|
530
|
+
info = (params, alias)
|
|
531
|
+
for key in keys:
|
|
532
|
+
if key in conflicts:
|
|
533
|
+
continue
|
|
534
|
+
if key in aliases:
|
|
535
|
+
aliases.pop(key, None)
|
|
536
|
+
conflicts.add(key)
|
|
537
|
+
continue
|
|
538
|
+
aliases[key] = info
|
|
539
|
+
return aliases
|
|
540
|
+
|
|
541
|
+
|
|
433
542
|
def _const_repr(node: ast.AST) -> str | None:
|
|
434
543
|
if isinstance(node, ast.Constant):
|
|
435
544
|
return repr(node.value)
|
|
@@ -494,6 +603,7 @@ def _analyze_function(
|
|
|
494
603
|
ignore_params: set[str] | None = None,
|
|
495
604
|
strictness: str = "high",
|
|
496
605
|
class_name: str | None = None,
|
|
606
|
+
return_aliases: dict[str, tuple[list[str], list[str]]] | None = None,
|
|
497
607
|
) -> tuple[dict[str, ParamUse], list[CallArgs]]:
|
|
498
608
|
params = _param_names(fn, ignore_params)
|
|
499
609
|
use_map = {p: ParamUse(set(), False, {p}) for p in params}
|
|
@@ -511,6 +621,7 @@ def _analyze_function(
|
|
|
511
621
|
callee_name=lambda call: _normalize_callee(_callee_name(call), class_name),
|
|
512
622
|
call_args_factory=CallArgs,
|
|
513
623
|
call_context=_call_context,
|
|
624
|
+
return_aliases=return_aliases,
|
|
514
625
|
)
|
|
515
626
|
visitor.visit(fn)
|
|
516
627
|
return use_map, call_args
|
|
@@ -616,6 +727,9 @@ def analyze_file(
|
|
|
616
727
|
is_test = _is_test_path(path)
|
|
617
728
|
|
|
618
729
|
funcs = _collect_functions(tree)
|
|
730
|
+
return_aliases = _collect_return_aliases(
|
|
731
|
+
funcs, parents, ignore_params=config.ignore_params
|
|
732
|
+
)
|
|
619
733
|
fn_param_orders: dict[str, list[str]] = {}
|
|
620
734
|
fn_param_spans: dict[str, dict[str, tuple[int, int, int, int]]] = {}
|
|
621
735
|
fn_use = {}
|
|
@@ -638,6 +752,7 @@ def analyze_file(
|
|
|
638
752
|
ignore_params=config.ignore_params,
|
|
639
753
|
strictness=config.strictness,
|
|
640
754
|
class_name=class_name,
|
|
755
|
+
return_aliases=return_aliases,
|
|
641
756
|
)
|
|
642
757
|
fn_use[fn_key] = use_map
|
|
643
758
|
fn_calls[fn_key] = call_args
|
|
@@ -672,14 +787,6 @@ def analyze_file(
|
|
|
672
787
|
if not effective_scope:
|
|
673
788
|
break
|
|
674
789
|
effective_scope = effective_scope[:-1]
|
|
675
|
-
globals_only = [
|
|
676
|
-
key
|
|
677
|
-
for key in candidates
|
|
678
|
-
if not fn_lexical_scopes.get(key)
|
|
679
|
-
and not (fn_class_names.get(key) and not fn_lexical_scopes.get(key))
|
|
680
|
-
]
|
|
681
|
-
if len(globals_only) == 1:
|
|
682
|
-
return globals_only[0]
|
|
683
790
|
return None
|
|
684
791
|
|
|
685
792
|
for caller_key, calls in list(fn_calls.items()):
|
|
@@ -697,8 +804,6 @@ def analyze_file(
|
|
|
697
804
|
local_functions = set(fn_use.keys())
|
|
698
805
|
|
|
699
806
|
def _resolve_local_method(callee: str) -> str | None:
|
|
700
|
-
if "." not in callee:
|
|
701
|
-
return None
|
|
702
807
|
class_part, method = callee.rsplit(".", 1)
|
|
703
808
|
return _resolve_local_method_in_hierarchy(
|
|
704
809
|
class_part,
|
|
@@ -838,6 +943,7 @@ class FunctionInfo:
|
|
|
838
943
|
annots: dict[str, str | None]
|
|
839
944
|
calls: list[CallArgs]
|
|
840
945
|
unused_params: set[str]
|
|
946
|
+
defaults: set[str] = field(default_factory=set)
|
|
841
947
|
transparent: bool = True
|
|
842
948
|
class_name: str | None = None
|
|
843
949
|
scope: tuple[str, ...] = ()
|
|
@@ -1126,6 +1232,9 @@ def _build_function_index(
|
|
|
1126
1232
|
parents.visit(tree)
|
|
1127
1233
|
parent_map = parents.parents
|
|
1128
1234
|
module = _module_name(path, project_root)
|
|
1235
|
+
return_aliases = _collect_return_aliases(
|
|
1236
|
+
funcs, parent_map, ignore_params=ignore_params
|
|
1237
|
+
)
|
|
1129
1238
|
for fn in funcs:
|
|
1130
1239
|
class_name = _enclosing_class(fn, parent_map)
|
|
1131
1240
|
scopes = _enclosing_scopes(fn, parent_map)
|
|
@@ -1137,6 +1246,7 @@ def _build_function_index(
|
|
|
1137
1246
|
ignore_params=ignore_params,
|
|
1138
1247
|
strictness=strictness,
|
|
1139
1248
|
class_name=class_name,
|
|
1249
|
+
return_aliases=return_aliases,
|
|
1140
1250
|
)
|
|
1141
1251
|
unused_params = _unused_params(use_map)
|
|
1142
1252
|
qual_parts = [module] if module else []
|
|
@@ -1150,6 +1260,7 @@ def _build_function_index(
|
|
|
1150
1260
|
path=path,
|
|
1151
1261
|
params=_param_names(fn, ignore_params),
|
|
1152
1262
|
annots=_param_annotations(fn, ignore_params),
|
|
1263
|
+
defaults=_param_defaults(fn, ignore_params),
|
|
1153
1264
|
calls=call_args,
|
|
1154
1265
|
unused_params=unused_params,
|
|
1155
1266
|
transparent=_decorators_transparent(fn, transparent_decorators),
|
|
@@ -1329,10 +1440,7 @@ def analyze_type_flow_repo_with_map(
|
|
|
1329
1440
|
mapped_params: set[str] = set()
|
|
1330
1441
|
callee_to_caller: dict[str, set[str]] = defaultdict(set)
|
|
1331
1442
|
for pos_idx, param in call.pos_map.items():
|
|
1332
|
-
|
|
1333
|
-
idx = int(pos_idx)
|
|
1334
|
-
except ValueError:
|
|
1335
|
-
continue
|
|
1443
|
+
idx = int(pos_idx)
|
|
1336
1444
|
if idx >= len(callee_params):
|
|
1337
1445
|
continue
|
|
1338
1446
|
callee_param = callee_params[idx]
|
|
@@ -1360,8 +1468,6 @@ def analyze_type_flow_repo_with_map(
|
|
|
1360
1468
|
for caller_param in callers:
|
|
1361
1469
|
downstream[caller_param].add(annot)
|
|
1362
1470
|
for param, annots in downstream.items():
|
|
1363
|
-
if not annots:
|
|
1364
|
-
continue
|
|
1365
1471
|
if len(annots) > 1:
|
|
1366
1472
|
ambiguities.add(
|
|
1367
1473
|
f"{info.path.name}:{info.name}.{param} downstream types conflict: {sorted(annots)}"
|
|
@@ -1445,10 +1551,7 @@ def analyze_constant_flow_repo(
|
|
|
1445
1551
|
callee_params = callee.params
|
|
1446
1552
|
mapped_params = set()
|
|
1447
1553
|
for idx_str in call.pos_map:
|
|
1448
|
-
|
|
1449
|
-
idx = int(idx_str)
|
|
1450
|
-
except ValueError:
|
|
1451
|
-
continue
|
|
1554
|
+
idx = int(idx_str)
|
|
1452
1555
|
if idx >= len(callee_params):
|
|
1453
1556
|
continue
|
|
1454
1557
|
mapped_params.add(callee_params[idx])
|
|
@@ -1458,30 +1561,21 @@ def analyze_constant_flow_repo(
|
|
|
1458
1561
|
remaining = [p for p in callee_params if p not in mapped_params]
|
|
1459
1562
|
|
|
1460
1563
|
for idx_str, value in call.const_pos.items():
|
|
1461
|
-
|
|
1462
|
-
idx = int(idx_str)
|
|
1463
|
-
except ValueError:
|
|
1464
|
-
continue
|
|
1564
|
+
idx = int(idx_str)
|
|
1465
1565
|
if idx >= len(callee_params):
|
|
1466
1566
|
continue
|
|
1467
1567
|
key = (callee.qual, callee_params[idx])
|
|
1468
1568
|
const_values[key].add(value)
|
|
1469
1569
|
call_counts[key] += 1
|
|
1470
1570
|
for idx_str in call.pos_map:
|
|
1471
|
-
|
|
1472
|
-
idx = int(idx_str)
|
|
1473
|
-
except ValueError:
|
|
1474
|
-
continue
|
|
1571
|
+
idx = int(idx_str)
|
|
1475
1572
|
if idx >= len(callee_params):
|
|
1476
1573
|
continue
|
|
1477
1574
|
key = (callee.qual, callee_params[idx])
|
|
1478
1575
|
non_const[key] = True
|
|
1479
1576
|
call_counts[key] += 1
|
|
1480
1577
|
for idx_str in call.non_const_pos:
|
|
1481
|
-
|
|
1482
|
-
idx = int(idx_str)
|
|
1483
|
-
except ValueError:
|
|
1484
|
-
continue
|
|
1578
|
+
idx = int(idx_str)
|
|
1485
1579
|
if idx >= len(callee_params):
|
|
1486
1580
|
continue
|
|
1487
1581
|
key = (callee.qual, callee_params[idx])
|
|
@@ -1523,8 +1617,6 @@ def analyze_constant_flow_repo(
|
|
|
1523
1617
|
for key, values in const_values.items():
|
|
1524
1618
|
if non_const.get(key):
|
|
1525
1619
|
continue
|
|
1526
|
-
if not values:
|
|
1527
|
-
continue
|
|
1528
1620
|
if len(values) == 1:
|
|
1529
1621
|
qual, param = key
|
|
1530
1622
|
info = by_qual.get(qual)
|
|
@@ -1536,6 +1628,110 @@ def analyze_constant_flow_repo(
|
|
|
1536
1628
|
return sorted(smells)
|
|
1537
1629
|
|
|
1538
1630
|
|
|
1631
|
+
def _compute_knob_param_names(
|
|
1632
|
+
*,
|
|
1633
|
+
by_name: dict[str, list[FunctionInfo]],
|
|
1634
|
+
by_qual: dict[str, FunctionInfo],
|
|
1635
|
+
symbol_table: SymbolTable,
|
|
1636
|
+
project_root: Path | None,
|
|
1637
|
+
class_index: dict[str, ClassInfo],
|
|
1638
|
+
strictness: str,
|
|
1639
|
+
) -> set[str]:
|
|
1640
|
+
const_values: dict[tuple[str, str], set[str]] = defaultdict(set)
|
|
1641
|
+
non_const: dict[tuple[str, str], bool] = defaultdict(bool)
|
|
1642
|
+
explicit_passed: dict[tuple[str, str], bool] = defaultdict(bool)
|
|
1643
|
+
call_counts: dict[str, int] = defaultdict(int)
|
|
1644
|
+
for infos in by_name.values():
|
|
1645
|
+
for info in infos:
|
|
1646
|
+
for call in info.calls:
|
|
1647
|
+
if call.is_test:
|
|
1648
|
+
continue
|
|
1649
|
+
callee = _resolve_callee(
|
|
1650
|
+
call.callee,
|
|
1651
|
+
info,
|
|
1652
|
+
by_name,
|
|
1653
|
+
by_qual,
|
|
1654
|
+
symbol_table,
|
|
1655
|
+
project_root,
|
|
1656
|
+
class_index,
|
|
1657
|
+
)
|
|
1658
|
+
if callee is None or not callee.transparent:
|
|
1659
|
+
continue
|
|
1660
|
+
call_counts[callee.qual] += 1
|
|
1661
|
+
callee_params = callee.params
|
|
1662
|
+
remaining = [p for p in callee_params]
|
|
1663
|
+
for idx_str, value in call.const_pos.items():
|
|
1664
|
+
idx = int(idx_str)
|
|
1665
|
+
if idx >= len(callee_params):
|
|
1666
|
+
continue
|
|
1667
|
+
param = callee_params[idx]
|
|
1668
|
+
const_values[(callee.qual, param)].add(value)
|
|
1669
|
+
explicit_passed[(callee.qual, param)] = True
|
|
1670
|
+
if param in remaining:
|
|
1671
|
+
remaining.remove(param)
|
|
1672
|
+
for idx_str in call.pos_map:
|
|
1673
|
+
idx = int(idx_str)
|
|
1674
|
+
if idx >= len(callee_params):
|
|
1675
|
+
continue
|
|
1676
|
+
param = callee_params[idx]
|
|
1677
|
+
non_const[(callee.qual, param)] = True
|
|
1678
|
+
explicit_passed[(callee.qual, param)] = True
|
|
1679
|
+
if param in remaining:
|
|
1680
|
+
remaining.remove(param)
|
|
1681
|
+
for idx_str in call.non_const_pos:
|
|
1682
|
+
idx = int(idx_str)
|
|
1683
|
+
if idx >= len(callee_params):
|
|
1684
|
+
continue
|
|
1685
|
+
param = callee_params[idx]
|
|
1686
|
+
non_const[(callee.qual, param)] = True
|
|
1687
|
+
explicit_passed[(callee.qual, param)] = True
|
|
1688
|
+
if param in remaining:
|
|
1689
|
+
remaining.remove(param)
|
|
1690
|
+
for kw, value in call.const_kw.items():
|
|
1691
|
+
if kw not in callee_params:
|
|
1692
|
+
continue
|
|
1693
|
+
const_values[(callee.qual, kw)].add(value)
|
|
1694
|
+
explicit_passed[(callee.qual, kw)] = True
|
|
1695
|
+
if kw in remaining:
|
|
1696
|
+
remaining.remove(kw)
|
|
1697
|
+
for kw in call.kw_map:
|
|
1698
|
+
if kw not in callee_params:
|
|
1699
|
+
continue
|
|
1700
|
+
non_const[(callee.qual, kw)] = True
|
|
1701
|
+
explicit_passed[(callee.qual, kw)] = True
|
|
1702
|
+
if kw in remaining:
|
|
1703
|
+
remaining.remove(kw)
|
|
1704
|
+
for kw in call.non_const_kw:
|
|
1705
|
+
if kw not in callee_params:
|
|
1706
|
+
continue
|
|
1707
|
+
non_const[(callee.qual, kw)] = True
|
|
1708
|
+
explicit_passed[(callee.qual, kw)] = True
|
|
1709
|
+
if kw in remaining:
|
|
1710
|
+
remaining.remove(kw)
|
|
1711
|
+
if strictness == "low":
|
|
1712
|
+
if len(call.star_pos) == 1:
|
|
1713
|
+
for param in remaining:
|
|
1714
|
+
non_const[(callee.qual, param)] = True
|
|
1715
|
+
explicit_passed[(callee.qual, param)] = True
|
|
1716
|
+
if len(call.star_kw) == 1:
|
|
1717
|
+
for param in remaining:
|
|
1718
|
+
non_const[(callee.qual, param)] = True
|
|
1719
|
+
explicit_passed[(callee.qual, param)] = True
|
|
1720
|
+
knob_names: set[str] = set()
|
|
1721
|
+
for key, values in const_values.items():
|
|
1722
|
+
if non_const.get(key):
|
|
1723
|
+
continue
|
|
1724
|
+
if len(values) == 1:
|
|
1725
|
+
knob_names.add(key[1])
|
|
1726
|
+
for qual, info in by_qual.items():
|
|
1727
|
+
if call_counts.get(qual, 0) == 0:
|
|
1728
|
+
continue
|
|
1729
|
+
for param in info.defaults:
|
|
1730
|
+
if not explicit_passed.get((qual, param), False):
|
|
1731
|
+
knob_names.add(param)
|
|
1732
|
+
return knob_names
|
|
1733
|
+
|
|
1734
|
+
|
|
1539
1735
|
def analyze_unused_arg_flow_repo(
|
|
1540
1736
|
paths: list[Path],
|
|
1541
1737
|
*,
|
|
@@ -1594,10 +1790,7 @@ def analyze_unused_arg_flow_repo(
|
|
|
1594
1790
|
callee_params = callee.params
|
|
1595
1791
|
mapped_params = set()
|
|
1596
1792
|
for idx_str in call.pos_map:
|
|
1597
|
-
|
|
1598
|
-
idx = int(idx_str)
|
|
1599
|
-
except ValueError:
|
|
1600
|
-
continue
|
|
1793
|
+
idx = int(idx_str)
|
|
1601
1794
|
if idx >= len(callee_params):
|
|
1602
1795
|
continue
|
|
1603
1796
|
mapped_params.add(callee_params[idx])
|
|
@@ -1611,10 +1804,7 @@ def analyze_unused_arg_flow_repo(
|
|
|
1611
1804
|
]
|
|
1612
1805
|
|
|
1613
1806
|
for idx_str, caller_param in call.pos_map.items():
|
|
1614
|
-
|
|
1615
|
-
idx = int(idx_str)
|
|
1616
|
-
except ValueError:
|
|
1617
|
-
continue
|
|
1807
|
+
idx = int(idx_str)
|
|
1618
1808
|
if idx >= len(callee_params):
|
|
1619
1809
|
continue
|
|
1620
1810
|
callee_param = callee_params[idx]
|
|
@@ -1628,10 +1818,7 @@ def analyze_unused_arg_flow_repo(
|
|
|
1628
1818
|
)
|
|
1629
1819
|
)
|
|
1630
1820
|
for idx_str in call.non_const_pos:
|
|
1631
|
-
|
|
1632
|
-
idx = int(idx_str)
|
|
1633
|
-
except ValueError:
|
|
1634
|
-
continue
|
|
1821
|
+
idx = int(idx_str)
|
|
1635
1822
|
if idx >= len(callee_params):
|
|
1636
1823
|
continue
|
|
1637
1824
|
callee_param = callee_params[idx]
|
|
@@ -1791,7 +1978,7 @@ def _collect_dataclass_registry(
|
|
|
1791
1978
|
continue
|
|
1792
1979
|
if module:
|
|
1793
1980
|
registry[f"{module}.{node.name}"] = fields
|
|
1794
|
-
else:
|
|
1981
|
+
else: # pragma: no cover - module name is always non-empty for file paths
|
|
1795
1982
|
registry[node.name] = fields
|
|
1796
1983
|
return registry
|
|
1797
1984
|
|
|
@@ -1834,16 +2021,9 @@ def _iter_dataclass_call_bundles(
|
|
|
1834
2021
|
for name, fields in local_dataclasses.items():
|
|
1835
2022
|
if module:
|
|
1836
2023
|
dataclass_registry[f"{module}.{name}"] = fields
|
|
1837
|
-
else:
|
|
2024
|
+
else: # pragma: no cover - module name is always non-empty for file paths
|
|
1838
2025
|
dataclass_registry[name] = fields
|
|
1839
2026
|
|
|
1840
|
-
def _callee_name(call: ast.Call) -> str | None:
|
|
1841
|
-
if isinstance(call.func, ast.Name):
|
|
1842
|
-
return call.func.id
|
|
1843
|
-
if isinstance(call.func, ast.Attribute):
|
|
1844
|
-
return call.func.attr
|
|
1845
|
-
return None
|
|
1846
|
-
|
|
1847
2027
|
def _resolve_fields(call: ast.Call) -> list[str] | None:
|
|
1848
2028
|
if isinstance(call.func, ast.Name):
|
|
1849
2029
|
name = call.func.id
|
|
@@ -2048,8 +2228,6 @@ def _render_mermaid_component(
|
|
|
2048
2228
|
documented_only = sorted(observed_norm & documented)
|
|
2049
2229
|
def _tier(bundle: tuple[str, ...]) -> str:
|
|
2050
2230
|
count = bundle_counts.get(bundle, 1)
|
|
2051
|
-
if bundle in declared_global:
|
|
2052
|
-
return "tier-1"
|
|
2053
2231
|
if count > 1:
|
|
2054
2232
|
return "tier-2"
|
|
2055
2233
|
return "tier-3"
|
|
@@ -2201,6 +2379,27 @@ def _bundle_counts(
|
|
|
2201
2379
|
return counts
|
|
2202
2380
|
|
|
2203
2381
|
|
|
2382
|
+
def _merge_counts_by_knobs(
|
|
2383
|
+
counts: dict[tuple[str, ...], int],
|
|
2384
|
+
knob_names: set[str],
|
|
2385
|
+
) -> dict[tuple[str, ...], int]:
|
|
2386
|
+
if not knob_names:
|
|
2387
|
+
return counts
|
|
2388
|
+
bundles = [set(bundle) for bundle in counts]
|
|
2389
|
+
merged: dict[tuple[str, ...], int] = defaultdict(int)
|
|
2390
|
+
for bundle_key, count in counts.items():
|
|
2391
|
+
bundle = set(bundle_key)
|
|
2392
|
+
target = bundle
|
|
2393
|
+
for other in bundles:
|
|
2394
|
+
if bundle and bundle.issubset(other):
|
|
2395
|
+
extra = set(other) - bundle
|
|
2396
|
+
if extra and extra.issubset(knob_names):
|
|
2397
|
+
if len(other) < len(target) or target == bundle:
|
|
2398
|
+
target = set(other)
|
|
2399
|
+
merged[tuple(sorted(target))] += count
|
|
2400
|
+
return merged
|
|
2401
|
+
|
|
2402
|
+
|
|
2204
2403
|
def _collect_declared_bundles(root: Path) -> set[tuple[str, ...]]:
|
|
2205
2404
|
declared: set[tuple[str, ...]] = set()
|
|
2206
2405
|
file_paths = sorted(root.rglob("*.py"))
|
|
@@ -2218,13 +2417,37 @@ def build_synthesis_plan(
|
|
|
2218
2417
|
max_tier: int = 2,
|
|
2219
2418
|
min_bundle_size: int = 2,
|
|
2220
2419
|
allow_singletons: bool = False,
|
|
2420
|
+
merge_overlap_threshold: float | None = None,
|
|
2221
2421
|
config: AuditConfig | None = None,
|
|
2222
2422
|
) -> dict[str, object]:
|
|
2223
2423
|
audit_config = config or AuditConfig(
|
|
2224
2424
|
project_root=project_root or _infer_root(groups_by_path)
|
|
2225
2425
|
)
|
|
2226
2426
|
root = project_root or audit_config.project_root or _infer_root(groups_by_path)
|
|
2427
|
+
path_list = list(groups_by_path.keys())
|
|
2428
|
+
by_name, by_qual = _build_function_index(
|
|
2429
|
+
path_list,
|
|
2430
|
+
root,
|
|
2431
|
+
audit_config.ignore_params,
|
|
2432
|
+
audit_config.strictness,
|
|
2433
|
+
audit_config.transparent_decorators,
|
|
2434
|
+
)
|
|
2435
|
+
symbol_table = _build_symbol_table(
|
|
2436
|
+
path_list,
|
|
2437
|
+
root,
|
|
2438
|
+
external_filter=audit_config.external_filter,
|
|
2439
|
+
)
|
|
2440
|
+
class_index = _collect_class_index(path_list, root)
|
|
2441
|
+
knob_names = _compute_knob_param_names(
|
|
2442
|
+
by_name=by_name,
|
|
2443
|
+
by_qual=by_qual,
|
|
2444
|
+
symbol_table=symbol_table,
|
|
2445
|
+
project_root=root,
|
|
2446
|
+
class_index=class_index,
|
|
2447
|
+
strictness=audit_config.strictness,
|
|
2448
|
+
)
|
|
2227
2449
|
counts = _bundle_counts(groups_by_path)
|
|
2450
|
+
counts = _merge_counts_by_knobs(counts, knob_names)
|
|
2228
2451
|
if not counts:
|
|
2229
2452
|
response = SynthesisResponse(
|
|
2230
2453
|
protocols=[],
|
|
@@ -2246,7 +2469,19 @@ def build_synthesis_plan(
|
|
|
2246
2469
|
|
|
2247
2470
|
merged_bundle_tiers: dict[frozenset[str], int] = {}
|
|
2248
2471
|
original_bundles = [set(bundle) for bundle in counts]
|
|
2249
|
-
|
|
2472
|
+
synth_config = SynthesisConfig(
|
|
2473
|
+
max_tier=max_tier,
|
|
2474
|
+
min_bundle_size=min_bundle_size,
|
|
2475
|
+
allow_singletons=allow_singletons,
|
|
2476
|
+
merge_overlap_threshold=(
|
|
2477
|
+
merge_overlap_threshold
|
|
2478
|
+
if merge_overlap_threshold is not None
|
|
2479
|
+
else SynthesisConfig().merge_overlap_threshold
|
|
2480
|
+
),
|
|
2481
|
+
)
|
|
2482
|
+
merged_bundles = merge_bundles(
|
|
2483
|
+
original_bundles, min_overlap=synth_config.merge_overlap_threshold
|
|
2484
|
+
)
|
|
2250
2485
|
if merged_bundles:
|
|
2251
2486
|
for merged in merged_bundles:
|
|
2252
2487
|
members = [
|
|
@@ -2264,16 +2499,11 @@ def build_synthesis_plan(
|
|
|
2264
2499
|
bundle_tiers = merged_bundle_tiers
|
|
2265
2500
|
|
|
2266
2501
|
naming_context = NamingContext(frequency=dict(frequency))
|
|
2267
|
-
synth_config = SynthesisConfig(
|
|
2268
|
-
max_tier=max_tier,
|
|
2269
|
-
min_bundle_size=min_bundle_size,
|
|
2270
|
-
allow_singletons=allow_singletons,
|
|
2271
|
-
)
|
|
2272
2502
|
field_types: dict[str, str] = {}
|
|
2273
2503
|
type_warnings: list[str] = []
|
|
2274
2504
|
if bundle_fields:
|
|
2275
2505
|
inferred, _, _ = analyze_type_flow_repo_with_map(
|
|
2276
|
-
|
|
2506
|
+
path_list,
|
|
2277
2507
|
project_root=root,
|
|
2278
2508
|
ignore_params=audit_config.ignore_params,
|
|
2279
2509
|
strictness=audit_config.strictness,
|
|
@@ -2286,19 +2516,6 @@ def build_synthesis_plan(
|
|
|
2286
2516
|
if name not in bundle_fields or not annot:
|
|
2287
2517
|
continue
|
|
2288
2518
|
type_sets[name].add(annot)
|
|
2289
|
-
by_name, by_qual = _build_function_index(
|
|
2290
|
-
list(groups_by_path.keys()),
|
|
2291
|
-
root,
|
|
2292
|
-
audit_config.ignore_params,
|
|
2293
|
-
audit_config.strictness,
|
|
2294
|
-
audit_config.transparent_decorators,
|
|
2295
|
-
)
|
|
2296
|
-
symbol_table = _build_symbol_table(
|
|
2297
|
-
list(groups_by_path.keys()),
|
|
2298
|
-
root,
|
|
2299
|
-
external_filter=audit_config.external_filter,
|
|
2300
|
-
)
|
|
2301
|
-
class_index = _collect_class_index(list(groups_by_path.keys()), root)
|
|
2302
2519
|
for infos in by_name.values():
|
|
2303
2520
|
for info in infos:
|
|
2304
2521
|
for call in info.calls:
|
|
@@ -2317,10 +2534,7 @@ def build_synthesis_plan(
|
|
|
2317
2534
|
continue
|
|
2318
2535
|
callee_params = callee.params
|
|
2319
2536
|
for idx_str, value in call.const_pos.items():
|
|
2320
|
-
|
|
2321
|
-
idx = int(idx_str)
|
|
2322
|
-
except ValueError:
|
|
2323
|
-
continue
|
|
2537
|
+
idx = int(idx_str)
|
|
2324
2538
|
if idx >= len(callee_params):
|
|
2325
2539
|
continue
|
|
2326
2540
|
param = callee_params[idx]
|
|
@@ -2336,8 +2550,6 @@ def build_synthesis_plan(
|
|
|
2336
2550
|
if hint:
|
|
2337
2551
|
type_sets[kw].add(hint)
|
|
2338
2552
|
for name, types in type_sets.items():
|
|
2339
|
-
if not types:
|
|
2340
|
-
continue
|
|
2341
2553
|
combined, conflicted = _combine_type_hints(types)
|
|
2342
2554
|
field_types[name] = combined
|
|
2343
2555
|
if conflicted and len(types) > 1:
|
|
@@ -2517,8 +2729,6 @@ def build_refactor_plan(
|
|
|
2517
2729
|
|
|
2518
2730
|
plans: list[dict[str, object]] = []
|
|
2519
2731
|
for bundle, infos in sorted(bundle_map.items(), key=lambda item: (len(item[0]), item[0])):
|
|
2520
|
-
if not infos:
|
|
2521
|
-
continue
|
|
2522
2732
|
comp = dict(infos)
|
|
2523
2733
|
deps: dict[str, set[str]] = {qual: set() for qual in comp}
|
|
2524
2734
|
for info in infos.values():
|
|
@@ -2942,6 +3152,12 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
2942
3152
|
action="store_true",
|
|
2943
3153
|
help="Allow single-field bundles in synthesis plan.",
|
|
2944
3154
|
)
|
|
3155
|
+
parser.add_argument(
|
|
3156
|
+
"--synthesis-merge-overlap",
|
|
3157
|
+
type=float,
|
|
3158
|
+
default=None,
|
|
3159
|
+
help="Jaccard overlap threshold for merging bundles (0.0-1.0).",
|
|
3160
|
+
)
|
|
2945
3161
|
return parser
|
|
2946
3162
|
|
|
2947
3163
|
|
|
@@ -2985,6 +3201,7 @@ def run(argv: list[str] | None = None) -> int:
|
|
|
2985
3201
|
]
|
|
2986
3202
|
config_path = Path(args.config) if args.config else None
|
|
2987
3203
|
defaults = dataflow_defaults(Path(args.root), config_path)
|
|
3204
|
+
synth_defaults = synthesis_defaults(Path(args.root), config_path)
|
|
2988
3205
|
merged = merge_payload(
|
|
2989
3206
|
{
|
|
2990
3207
|
"exclude": exclude_dirs,
|
|
@@ -3030,6 +3247,15 @@ def run(argv: list[str] | None = None) -> int:
|
|
|
3030
3247
|
config=config,
|
|
3031
3248
|
)
|
|
3032
3249
|
synthesis_plan: dict[str, object] | None = None
|
|
3250
|
+
merge_overlap_threshold = None
|
|
3251
|
+
if args.synthesis_merge_overlap is not None:
|
|
3252
|
+
merge_overlap_threshold = args.synthesis_merge_overlap
|
|
3253
|
+
else:
|
|
3254
|
+
value = synth_defaults.get("merge_overlap_threshold")
|
|
3255
|
+
if isinstance(value, (int, float)):
|
|
3256
|
+
merge_overlap_threshold = float(value)
|
|
3257
|
+
if merge_overlap_threshold is not None:
|
|
3258
|
+
merge_overlap_threshold = max(0.0, min(1.0, merge_overlap_threshold))
|
|
3033
3259
|
if args.synthesis_plan or args.synthesis_report or args.synthesis_protocols:
|
|
3034
3260
|
synthesis_plan = build_synthesis_plan(
|
|
3035
3261
|
analysis.groups_by_path,
|
|
@@ -3037,6 +3263,7 @@ def run(argv: list[str] | None = None) -> int:
|
|
|
3037
3263
|
max_tier=args.synthesis_max_tier,
|
|
3038
3264
|
min_bundle_size=args.synthesis_min_bundle_size,
|
|
3039
3265
|
allow_singletons=args.synthesis_allow_singletons,
|
|
3266
|
+
merge_overlap_threshold=merge_overlap_threshold,
|
|
3040
3267
|
config=config,
|
|
3041
3268
|
)
|
|
3042
3269
|
if args.synthesis_plan:
|