gabion 0.1.0__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gabion/__init__.py CHANGED
@@ -2,4 +2,4 @@
2
2
 
3
3
  __all__ = ["__version__"]
4
4
 
5
- __version__ = "0.1.0"
5
+ __version__ = "0.1.5"
@@ -27,7 +27,7 @@ from typing import Iterable, Iterator
27
27
  import re
28
28
 
29
29
  from gabion.analysis.visitors import ImportVisitor, ParentAnnotator, UseVisitor
30
- from gabion.config import dataflow_defaults, merge_payload
30
+ from gabion.config import dataflow_defaults, merge_payload, synthesis_defaults
31
31
  from gabion.schema import SynthesisResponse
32
32
  from gabion.synthesis import NamingContext, SynthesisConfig, Synthesizer
33
33
  from gabion.synthesis.merge import merge_bundles
@@ -348,8 +348,6 @@ def _param_spans(
348
348
  def _function_key(scope: Iterable[str], name: str) -> str:
349
349
  parts = list(scope)
350
350
  parts.append(name)
351
- if not parts:
352
- return name
353
351
  return ".".join(parts)
354
352
 
355
353
 
@@ -430,6 +428,117 @@ def _param_annotations(
430
428
  return annots
431
429
 
432
430
 
431
+ def _param_defaults(
432
+ fn: ast.FunctionDef | ast.AsyncFunctionDef,
433
+ ignore_params: set[str] | None = None,
434
+ ) -> set[str]:
435
+ defaults: set[str] = set()
436
+ args = fn.args.posonlyargs + fn.args.args
437
+ names = [a.arg for a in args]
438
+ if fn.args.defaults:
439
+ defaulted = names[-len(fn.args.defaults) :]
440
+ defaults.update(defaulted)
441
+ for kw_arg, default in zip(fn.args.kwonlyargs, fn.args.kw_defaults):
442
+ if default is not None:
443
+ defaults.add(kw_arg.arg)
444
+ if names and names[0] in {"self", "cls"}:
445
+ defaults.discard(names[0])
446
+ if ignore_params:
447
+ defaults = {name for name in defaults if name not in ignore_params}
448
+ return defaults
449
+
450
+
451
+ class _ReturnAliasCollector(ast.NodeVisitor):
452
+ def __init__(self) -> None:
453
+ self.returns: list[ast.AST | None] = []
454
+
455
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
456
+ return
457
+
458
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
459
+ return
460
+
461
+ def visit_Lambda(self, node: ast.Lambda) -> None:
462
+ return
463
+
464
+ def visit_Return(self, node: ast.Return) -> None:
465
+ self.returns.append(node.value)
466
+
467
+
468
+ def _return_aliases(
469
+ fn: ast.FunctionDef | ast.AsyncFunctionDef,
470
+ ignore_params: set[str] | None = None,
471
+ ) -> list[str] | None:
472
+ params = _param_names(fn, ignore_params)
473
+ if not params:
474
+ return None
475
+ param_set = set(params)
476
+ collector = _ReturnAliasCollector()
477
+ for stmt in fn.body:
478
+ collector.visit(stmt)
479
+ if not collector.returns:
480
+ return None
481
+ alias: list[str] | None = None
482
+
483
+ def _alias_from_expr(expr: ast.AST | None) -> list[str] | None:
484
+ if expr is None:
485
+ return None
486
+ if isinstance(expr, ast.Name) and expr.id in param_set:
487
+ return [expr.id]
488
+ if isinstance(expr, (ast.Tuple, ast.List)):
489
+ names: list[str] = []
490
+ for elt in expr.elts:
491
+ if isinstance(elt, ast.Name) and elt.id in param_set:
492
+ names.append(elt.id)
493
+ else:
494
+ return None
495
+ return names
496
+ return None
497
+
498
+ for expr in collector.returns:
499
+ candidate = _alias_from_expr(expr)
500
+ if candidate is None:
501
+ return None
502
+ if alias is None:
503
+ alias = candidate
504
+ continue
505
+ if alias != candidate:
506
+ return None
507
+ return alias
508
+
509
+
510
+ def _collect_return_aliases(
511
+ funcs: list[ast.FunctionDef | ast.AsyncFunctionDef],
512
+ parents: dict[ast.AST, ast.AST],
513
+ *,
514
+ ignore_params: set[str] | None,
515
+ ) -> dict[str, tuple[list[str], list[str]]]:
516
+ aliases: dict[str, tuple[list[str], list[str]]] = {}
517
+ conflicts: set[str] = set()
518
+ for fn in funcs:
519
+ alias = _return_aliases(fn, ignore_params)
520
+ if not alias:
521
+ continue
522
+ params = _param_names(fn, ignore_params)
523
+ class_name = _enclosing_class(fn, parents)
524
+ scopes = _enclosing_scopes(fn, parents)
525
+ keys = {fn.name}
526
+ if class_name:
527
+ keys.add(f"{class_name}.{fn.name}")
528
+ if scopes:
529
+ keys.add(_function_key(scopes, fn.name))
530
+ info = (params, alias)
531
+ for key in keys:
532
+ if key in conflicts:
533
+ continue
534
+ if key in aliases:
535
+ aliases.pop(key, None)
536
+ conflicts.add(key)
537
+ continue
538
+ aliases[key] = info
539
+ return aliases
540
+
541
+
433
542
  def _const_repr(node: ast.AST) -> str | None:
434
543
  if isinstance(node, ast.Constant):
435
544
  return repr(node.value)
@@ -494,6 +603,7 @@ def _analyze_function(
494
603
  ignore_params: set[str] | None = None,
495
604
  strictness: str = "high",
496
605
  class_name: str | None = None,
606
+ return_aliases: dict[str, tuple[list[str], list[str]]] | None = None,
497
607
  ) -> tuple[dict[str, ParamUse], list[CallArgs]]:
498
608
  params = _param_names(fn, ignore_params)
499
609
  use_map = {p: ParamUse(set(), False, {p}) for p in params}
@@ -511,6 +621,7 @@ def _analyze_function(
511
621
  callee_name=lambda call: _normalize_callee(_callee_name(call), class_name),
512
622
  call_args_factory=CallArgs,
513
623
  call_context=_call_context,
624
+ return_aliases=return_aliases,
514
625
  )
515
626
  visitor.visit(fn)
516
627
  return use_map, call_args
@@ -616,6 +727,9 @@ def analyze_file(
616
727
  is_test = _is_test_path(path)
617
728
 
618
729
  funcs = _collect_functions(tree)
730
+ return_aliases = _collect_return_aliases(
731
+ funcs, parents, ignore_params=config.ignore_params
732
+ )
619
733
  fn_param_orders: dict[str, list[str]] = {}
620
734
  fn_param_spans: dict[str, dict[str, tuple[int, int, int, int]]] = {}
621
735
  fn_use = {}
@@ -638,6 +752,7 @@ def analyze_file(
638
752
  ignore_params=config.ignore_params,
639
753
  strictness=config.strictness,
640
754
  class_name=class_name,
755
+ return_aliases=return_aliases,
641
756
  )
642
757
  fn_use[fn_key] = use_map
643
758
  fn_calls[fn_key] = call_args
@@ -672,14 +787,6 @@ def analyze_file(
672
787
  if not effective_scope:
673
788
  break
674
789
  effective_scope = effective_scope[:-1]
675
- globals_only = [
676
- key
677
- for key in candidates
678
- if not fn_lexical_scopes.get(key)
679
- and not (fn_class_names.get(key) and not fn_lexical_scopes.get(key))
680
- ]
681
- if len(globals_only) == 1:
682
- return globals_only[0]
683
790
  return None
684
791
 
685
792
  for caller_key, calls in list(fn_calls.items()):
@@ -697,8 +804,6 @@ def analyze_file(
697
804
  local_functions = set(fn_use.keys())
698
805
 
699
806
  def _resolve_local_method(callee: str) -> str | None:
700
- if "." not in callee:
701
- return None
702
807
  class_part, method = callee.rsplit(".", 1)
703
808
  return _resolve_local_method_in_hierarchy(
704
809
  class_part,
@@ -838,6 +943,7 @@ class FunctionInfo:
838
943
  annots: dict[str, str | None]
839
944
  calls: list[CallArgs]
840
945
  unused_params: set[str]
946
+ defaults: set[str] = field(default_factory=set)
841
947
  transparent: bool = True
842
948
  class_name: str | None = None
843
949
  scope: tuple[str, ...] = ()
@@ -1126,6 +1232,9 @@ def _build_function_index(
1126
1232
  parents.visit(tree)
1127
1233
  parent_map = parents.parents
1128
1234
  module = _module_name(path, project_root)
1235
+ return_aliases = _collect_return_aliases(
1236
+ funcs, parent_map, ignore_params=ignore_params
1237
+ )
1129
1238
  for fn in funcs:
1130
1239
  class_name = _enclosing_class(fn, parent_map)
1131
1240
  scopes = _enclosing_scopes(fn, parent_map)
@@ -1137,6 +1246,7 @@ def _build_function_index(
1137
1246
  ignore_params=ignore_params,
1138
1247
  strictness=strictness,
1139
1248
  class_name=class_name,
1249
+ return_aliases=return_aliases,
1140
1250
  )
1141
1251
  unused_params = _unused_params(use_map)
1142
1252
  qual_parts = [module] if module else []
@@ -1150,6 +1260,7 @@ def _build_function_index(
1150
1260
  path=path,
1151
1261
  params=_param_names(fn, ignore_params),
1152
1262
  annots=_param_annotations(fn, ignore_params),
1263
+ defaults=_param_defaults(fn, ignore_params),
1153
1264
  calls=call_args,
1154
1265
  unused_params=unused_params,
1155
1266
  transparent=_decorators_transparent(fn, transparent_decorators),
@@ -1329,10 +1440,7 @@ def analyze_type_flow_repo_with_map(
1329
1440
  mapped_params: set[str] = set()
1330
1441
  callee_to_caller: dict[str, set[str]] = defaultdict(set)
1331
1442
  for pos_idx, param in call.pos_map.items():
1332
- try:
1333
- idx = int(pos_idx)
1334
- except ValueError:
1335
- continue
1443
+ idx = int(pos_idx)
1336
1444
  if idx >= len(callee_params):
1337
1445
  continue
1338
1446
  callee_param = callee_params[idx]
@@ -1360,8 +1468,6 @@ def analyze_type_flow_repo_with_map(
1360
1468
  for caller_param in callers:
1361
1469
  downstream[caller_param].add(annot)
1362
1470
  for param, annots in downstream.items():
1363
- if not annots:
1364
- continue
1365
1471
  if len(annots) > 1:
1366
1472
  ambiguities.add(
1367
1473
  f"{info.path.name}:{info.name}.{param} downstream types conflict: {sorted(annots)}"
@@ -1445,10 +1551,7 @@ def analyze_constant_flow_repo(
1445
1551
  callee_params = callee.params
1446
1552
  mapped_params = set()
1447
1553
  for idx_str in call.pos_map:
1448
- try:
1449
- idx = int(idx_str)
1450
- except ValueError:
1451
- continue
1554
+ idx = int(idx_str)
1452
1555
  if idx >= len(callee_params):
1453
1556
  continue
1454
1557
  mapped_params.add(callee_params[idx])
@@ -1458,30 +1561,21 @@ def analyze_constant_flow_repo(
1458
1561
  remaining = [p for p in callee_params if p not in mapped_params]
1459
1562
 
1460
1563
  for idx_str, value in call.const_pos.items():
1461
- try:
1462
- idx = int(idx_str)
1463
- except ValueError:
1464
- continue
1564
+ idx = int(idx_str)
1465
1565
  if idx >= len(callee_params):
1466
1566
  continue
1467
1567
  key = (callee.qual, callee_params[idx])
1468
1568
  const_values[key].add(value)
1469
1569
  call_counts[key] += 1
1470
1570
  for idx_str in call.pos_map:
1471
- try:
1472
- idx = int(idx_str)
1473
- except ValueError:
1474
- continue
1571
+ idx = int(idx_str)
1475
1572
  if idx >= len(callee_params):
1476
1573
  continue
1477
1574
  key = (callee.qual, callee_params[idx])
1478
1575
  non_const[key] = True
1479
1576
  call_counts[key] += 1
1480
1577
  for idx_str in call.non_const_pos:
1481
- try:
1482
- idx = int(idx_str)
1483
- except ValueError:
1484
- continue
1578
+ idx = int(idx_str)
1485
1579
  if idx >= len(callee_params):
1486
1580
  continue
1487
1581
  key = (callee.qual, callee_params[idx])
@@ -1523,8 +1617,6 @@ def analyze_constant_flow_repo(
1523
1617
  for key, values in const_values.items():
1524
1618
  if non_const.get(key):
1525
1619
  continue
1526
- if not values:
1527
- continue
1528
1620
  if len(values) == 1:
1529
1621
  qual, param = key
1530
1622
  info = by_qual.get(qual)
@@ -1536,6 +1628,110 @@ def analyze_constant_flow_repo(
1536
1628
  return sorted(smells)
1537
1629
 
1538
1630
 
1631
+ def _compute_knob_param_names(
1632
+ *,
1633
+ by_name: dict[str, list[FunctionInfo]],
1634
+ by_qual: dict[str, FunctionInfo],
1635
+ symbol_table: SymbolTable,
1636
+ project_root: Path | None,
1637
+ class_index: dict[str, ClassInfo],
1638
+ strictness: str,
1639
+ ) -> set[str]:
1640
+ const_values: dict[tuple[str, str], set[str]] = defaultdict(set)
1641
+ non_const: dict[tuple[str, str], bool] = defaultdict(bool)
1642
+ explicit_passed: dict[tuple[str, str], bool] = defaultdict(bool)
1643
+ call_counts: dict[str, int] = defaultdict(int)
1644
+ for infos in by_name.values():
1645
+ for info in infos:
1646
+ for call in info.calls:
1647
+ if call.is_test:
1648
+ continue
1649
+ callee = _resolve_callee(
1650
+ call.callee,
1651
+ info,
1652
+ by_name,
1653
+ by_qual,
1654
+ symbol_table,
1655
+ project_root,
1656
+ class_index,
1657
+ )
1658
+ if callee is None or not callee.transparent:
1659
+ continue
1660
+ call_counts[callee.qual] += 1
1661
+ callee_params = callee.params
1662
+ remaining = [p for p in callee_params]
1663
+ for idx_str, value in call.const_pos.items():
1664
+ idx = int(idx_str)
1665
+ if idx >= len(callee_params):
1666
+ continue
1667
+ param = callee_params[idx]
1668
+ const_values[(callee.qual, param)].add(value)
1669
+ explicit_passed[(callee.qual, param)] = True
1670
+ if param in remaining:
1671
+ remaining.remove(param)
1672
+ for idx_str in call.pos_map:
1673
+ idx = int(idx_str)
1674
+ if idx >= len(callee_params):
1675
+ continue
1676
+ param = callee_params[idx]
1677
+ non_const[(callee.qual, param)] = True
1678
+ explicit_passed[(callee.qual, param)] = True
1679
+ if param in remaining:
1680
+ remaining.remove(param)
1681
+ for idx_str in call.non_const_pos:
1682
+ idx = int(idx_str)
1683
+ if idx >= len(callee_params):
1684
+ continue
1685
+ param = callee_params[idx]
1686
+ non_const[(callee.qual, param)] = True
1687
+ explicit_passed[(callee.qual, param)] = True
1688
+ if param in remaining:
1689
+ remaining.remove(param)
1690
+ for kw, value in call.const_kw.items():
1691
+ if kw not in callee_params:
1692
+ continue
1693
+ const_values[(callee.qual, kw)].add(value)
1694
+ explicit_passed[(callee.qual, kw)] = True
1695
+ if kw in remaining:
1696
+ remaining.remove(kw)
1697
+ for kw in call.kw_map:
1698
+ if kw not in callee_params:
1699
+ continue
1700
+ non_const[(callee.qual, kw)] = True
1701
+ explicit_passed[(callee.qual, kw)] = True
1702
+ if kw in remaining:
1703
+ remaining.remove(kw)
1704
+ for kw in call.non_const_kw:
1705
+ if kw not in callee_params:
1706
+ continue
1707
+ non_const[(callee.qual, kw)] = True
1708
+ explicit_passed[(callee.qual, kw)] = True
1709
+ if kw in remaining:
1710
+ remaining.remove(kw)
1711
+ if strictness == "low":
1712
+ if len(call.star_pos) == 1:
1713
+ for param in remaining:
1714
+ non_const[(callee.qual, param)] = True
1715
+ explicit_passed[(callee.qual, param)] = True
1716
+ if len(call.star_kw) == 1:
1717
+ for param in remaining:
1718
+ non_const[(callee.qual, param)] = True
1719
+ explicit_passed[(callee.qual, param)] = True
1720
+ knob_names: set[str] = set()
1721
+ for key, values in const_values.items():
1722
+ if non_const.get(key):
1723
+ continue
1724
+ if len(values) == 1:
1725
+ knob_names.add(key[1])
1726
+ for qual, info in by_qual.items():
1727
+ if call_counts.get(qual, 0) == 0:
1728
+ continue
1729
+ for param in info.defaults:
1730
+ if not explicit_passed.get((qual, param), False):
1731
+ knob_names.add(param)
1732
+ return knob_names
1733
+
1734
+
1539
1735
  def analyze_unused_arg_flow_repo(
1540
1736
  paths: list[Path],
1541
1737
  *,
@@ -1594,10 +1790,7 @@ def analyze_unused_arg_flow_repo(
1594
1790
  callee_params = callee.params
1595
1791
  mapped_params = set()
1596
1792
  for idx_str in call.pos_map:
1597
- try:
1598
- idx = int(idx_str)
1599
- except ValueError:
1600
- continue
1793
+ idx = int(idx_str)
1601
1794
  if idx >= len(callee_params):
1602
1795
  continue
1603
1796
  mapped_params.add(callee_params[idx])
@@ -1611,10 +1804,7 @@ def analyze_unused_arg_flow_repo(
1611
1804
  ]
1612
1805
 
1613
1806
  for idx_str, caller_param in call.pos_map.items():
1614
- try:
1615
- idx = int(idx_str)
1616
- except ValueError:
1617
- continue
1807
+ idx = int(idx_str)
1618
1808
  if idx >= len(callee_params):
1619
1809
  continue
1620
1810
  callee_param = callee_params[idx]
@@ -1628,10 +1818,7 @@ def analyze_unused_arg_flow_repo(
1628
1818
  )
1629
1819
  )
1630
1820
  for idx_str in call.non_const_pos:
1631
- try:
1632
- idx = int(idx_str)
1633
- except ValueError:
1634
- continue
1821
+ idx = int(idx_str)
1635
1822
  if idx >= len(callee_params):
1636
1823
  continue
1637
1824
  callee_param = callee_params[idx]
@@ -1791,7 +1978,7 @@ def _collect_dataclass_registry(
1791
1978
  continue
1792
1979
  if module:
1793
1980
  registry[f"{module}.{node.name}"] = fields
1794
- else:
1981
+ else: # pragma: no cover - module name is always non-empty for file paths
1795
1982
  registry[node.name] = fields
1796
1983
  return registry
1797
1984
 
@@ -1834,16 +2021,9 @@ def _iter_dataclass_call_bundles(
1834
2021
  for name, fields in local_dataclasses.items():
1835
2022
  if module:
1836
2023
  dataclass_registry[f"{module}.{name}"] = fields
1837
- else:
2024
+ else: # pragma: no cover - module name is always non-empty for file paths
1838
2025
  dataclass_registry[name] = fields
1839
2026
 
1840
- def _callee_name(call: ast.Call) -> str | None:
1841
- if isinstance(call.func, ast.Name):
1842
- return call.func.id
1843
- if isinstance(call.func, ast.Attribute):
1844
- return call.func.attr
1845
- return None
1846
-
1847
2027
  def _resolve_fields(call: ast.Call) -> list[str] | None:
1848
2028
  if isinstance(call.func, ast.Name):
1849
2029
  name = call.func.id
@@ -2048,8 +2228,6 @@ def _render_mermaid_component(
2048
2228
  documented_only = sorted(observed_norm & documented)
2049
2229
  def _tier(bundle: tuple[str, ...]) -> str:
2050
2230
  count = bundle_counts.get(bundle, 1)
2051
- if bundle in declared_global:
2052
- return "tier-1"
2053
2231
  if count > 1:
2054
2232
  return "tier-2"
2055
2233
  return "tier-3"
@@ -2201,6 +2379,27 @@ def _bundle_counts(
2201
2379
  return counts
2202
2380
 
2203
2381
 
2382
+ def _merge_counts_by_knobs(
2383
+ counts: dict[tuple[str, ...], int],
2384
+ knob_names: set[str],
2385
+ ) -> dict[tuple[str, ...], int]:
2386
+ if not knob_names:
2387
+ return counts
2388
+ bundles = [set(bundle) for bundle in counts]
2389
+ merged: dict[tuple[str, ...], int] = defaultdict(int)
2390
+ for bundle_key, count in counts.items():
2391
+ bundle = set(bundle_key)
2392
+ target = bundle
2393
+ for other in bundles:
2394
+ if bundle and bundle.issubset(other):
2395
+ extra = set(other) - bundle
2396
+ if extra and extra.issubset(knob_names):
2397
+ if len(other) < len(target) or target == bundle:
2398
+ target = set(other)
2399
+ merged[tuple(sorted(target))] += count
2400
+ return merged
2401
+
2402
+
2204
2403
  def _collect_declared_bundles(root: Path) -> set[tuple[str, ...]]:
2205
2404
  declared: set[tuple[str, ...]] = set()
2206
2405
  file_paths = sorted(root.rglob("*.py"))
@@ -2218,13 +2417,37 @@ def build_synthesis_plan(
2218
2417
  max_tier: int = 2,
2219
2418
  min_bundle_size: int = 2,
2220
2419
  allow_singletons: bool = False,
2420
+ merge_overlap_threshold: float | None = None,
2221
2421
  config: AuditConfig | None = None,
2222
2422
  ) -> dict[str, object]:
2223
2423
  audit_config = config or AuditConfig(
2224
2424
  project_root=project_root or _infer_root(groups_by_path)
2225
2425
  )
2226
2426
  root = project_root or audit_config.project_root or _infer_root(groups_by_path)
2427
+ path_list = list(groups_by_path.keys())
2428
+ by_name, by_qual = _build_function_index(
2429
+ path_list,
2430
+ root,
2431
+ audit_config.ignore_params,
2432
+ audit_config.strictness,
2433
+ audit_config.transparent_decorators,
2434
+ )
2435
+ symbol_table = _build_symbol_table(
2436
+ path_list,
2437
+ root,
2438
+ external_filter=audit_config.external_filter,
2439
+ )
2440
+ class_index = _collect_class_index(path_list, root)
2441
+ knob_names = _compute_knob_param_names(
2442
+ by_name=by_name,
2443
+ by_qual=by_qual,
2444
+ symbol_table=symbol_table,
2445
+ project_root=root,
2446
+ class_index=class_index,
2447
+ strictness=audit_config.strictness,
2448
+ )
2227
2449
  counts = _bundle_counts(groups_by_path)
2450
+ counts = _merge_counts_by_knobs(counts, knob_names)
2228
2451
  if not counts:
2229
2452
  response = SynthesisResponse(
2230
2453
  protocols=[],
@@ -2246,7 +2469,19 @@ def build_synthesis_plan(
2246
2469
 
2247
2470
  merged_bundle_tiers: dict[frozenset[str], int] = {}
2248
2471
  original_bundles = [set(bundle) for bundle in counts]
2249
- merged_bundles = merge_bundles(original_bundles)
2472
+ synth_config = SynthesisConfig(
2473
+ max_tier=max_tier,
2474
+ min_bundle_size=min_bundle_size,
2475
+ allow_singletons=allow_singletons,
2476
+ merge_overlap_threshold=(
2477
+ merge_overlap_threshold
2478
+ if merge_overlap_threshold is not None
2479
+ else SynthesisConfig().merge_overlap_threshold
2480
+ ),
2481
+ )
2482
+ merged_bundles = merge_bundles(
2483
+ original_bundles, min_overlap=synth_config.merge_overlap_threshold
2484
+ )
2250
2485
  if merged_bundles:
2251
2486
  for merged in merged_bundles:
2252
2487
  members = [
@@ -2264,16 +2499,11 @@ def build_synthesis_plan(
2264
2499
  bundle_tiers = merged_bundle_tiers
2265
2500
 
2266
2501
  naming_context = NamingContext(frequency=dict(frequency))
2267
- synth_config = SynthesisConfig(
2268
- max_tier=max_tier,
2269
- min_bundle_size=min_bundle_size,
2270
- allow_singletons=allow_singletons,
2271
- )
2272
2502
  field_types: dict[str, str] = {}
2273
2503
  type_warnings: list[str] = []
2274
2504
  if bundle_fields:
2275
2505
  inferred, _, _ = analyze_type_flow_repo_with_map(
2276
- list(groups_by_path.keys()),
2506
+ path_list,
2277
2507
  project_root=root,
2278
2508
  ignore_params=audit_config.ignore_params,
2279
2509
  strictness=audit_config.strictness,
@@ -2286,19 +2516,6 @@ def build_synthesis_plan(
2286
2516
  if name not in bundle_fields or not annot:
2287
2517
  continue
2288
2518
  type_sets[name].add(annot)
2289
- by_name, by_qual = _build_function_index(
2290
- list(groups_by_path.keys()),
2291
- root,
2292
- audit_config.ignore_params,
2293
- audit_config.strictness,
2294
- audit_config.transparent_decorators,
2295
- )
2296
- symbol_table = _build_symbol_table(
2297
- list(groups_by_path.keys()),
2298
- root,
2299
- external_filter=audit_config.external_filter,
2300
- )
2301
- class_index = _collect_class_index(list(groups_by_path.keys()), root)
2302
2519
  for infos in by_name.values():
2303
2520
  for info in infos:
2304
2521
  for call in info.calls:
@@ -2317,10 +2534,7 @@ def build_synthesis_plan(
2317
2534
  continue
2318
2535
  callee_params = callee.params
2319
2536
  for idx_str, value in call.const_pos.items():
2320
- try:
2321
- idx = int(idx_str)
2322
- except ValueError:
2323
- continue
2537
+ idx = int(idx_str)
2324
2538
  if idx >= len(callee_params):
2325
2539
  continue
2326
2540
  param = callee_params[idx]
@@ -2336,8 +2550,6 @@ def build_synthesis_plan(
2336
2550
  if hint:
2337
2551
  type_sets[kw].add(hint)
2338
2552
  for name, types in type_sets.items():
2339
- if not types:
2340
- continue
2341
2553
  combined, conflicted = _combine_type_hints(types)
2342
2554
  field_types[name] = combined
2343
2555
  if conflicted and len(types) > 1:
@@ -2517,8 +2729,6 @@ def build_refactor_plan(
2517
2729
 
2518
2730
  plans: list[dict[str, object]] = []
2519
2731
  for bundle, infos in sorted(bundle_map.items(), key=lambda item: (len(item[0]), item[0])):
2520
- if not infos:
2521
- continue
2522
2732
  comp = dict(infos)
2523
2733
  deps: dict[str, set[str]] = {qual: set() for qual in comp}
2524
2734
  for info in infos.values():
@@ -2942,6 +3152,12 @@ def _build_parser() -> argparse.ArgumentParser:
2942
3152
  action="store_true",
2943
3153
  help="Allow single-field bundles in synthesis plan.",
2944
3154
  )
3155
+ parser.add_argument(
3156
+ "--synthesis-merge-overlap",
3157
+ type=float,
3158
+ default=None,
3159
+ help="Jaccard overlap threshold for merging bundles (0.0-1.0).",
3160
+ )
2945
3161
  return parser
2946
3162
 
2947
3163
 
@@ -2985,6 +3201,7 @@ def run(argv: list[str] | None = None) -> int:
2985
3201
  ]
2986
3202
  config_path = Path(args.config) if args.config else None
2987
3203
  defaults = dataflow_defaults(Path(args.root), config_path)
3204
+ synth_defaults = synthesis_defaults(Path(args.root), config_path)
2988
3205
  merged = merge_payload(
2989
3206
  {
2990
3207
  "exclude": exclude_dirs,
@@ -3030,6 +3247,15 @@ def run(argv: list[str] | None = None) -> int:
3030
3247
  config=config,
3031
3248
  )
3032
3249
  synthesis_plan: dict[str, object] | None = None
3250
+ merge_overlap_threshold = None
3251
+ if args.synthesis_merge_overlap is not None:
3252
+ merge_overlap_threshold = args.synthesis_merge_overlap
3253
+ else:
3254
+ value = synth_defaults.get("merge_overlap_threshold")
3255
+ if isinstance(value, (int, float)):
3256
+ merge_overlap_threshold = float(value)
3257
+ if merge_overlap_threshold is not None:
3258
+ merge_overlap_threshold = max(0.0, min(1.0, merge_overlap_threshold))
3033
3259
  if args.synthesis_plan or args.synthesis_report or args.synthesis_protocols:
3034
3260
  synthesis_plan = build_synthesis_plan(
3035
3261
  analysis.groups_by_path,
@@ -3037,6 +3263,7 @@ def run(argv: list[str] | None = None) -> int:
3037
3263
  max_tier=args.synthesis_max_tier,
3038
3264
  min_bundle_size=args.synthesis_min_bundle_size,
3039
3265
  allow_singletons=args.synthesis_allow_singletons,
3266
+ merge_overlap_threshold=merge_overlap_threshold,
3040
3267
  config=config,
3041
3268
  )
3042
3269
  if args.synthesis_plan: