redup 0.4.30__tar.gz → 0.4.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {redup-0.4.30/src/redup.egg-info → redup-0.4.32}/PKG-INFO +6 -8
  2. {redup-0.4.30 → redup-0.4.32}/README.md +4 -6
  3. {redup-0.4.30 → redup-0.4.32}/pyproject.toml +4 -7
  4. {redup-0.4.30 → redup-0.4.32}/src/redup/__init__.py +1 -1
  5. {redup-0.4.30 → redup-0.4.32}/src/redup/core/config.py +2 -2
  6. {redup-0.4.30 → redup-0.4.32}/src/redup/core/hasher.py +1 -1
  7. {redup-0.4.30 → redup-0.4.32}/src/redup/core/lazy_grouper.py +30 -4
  8. {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/duplicate_finder.py +91 -0
  9. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/server.py +11 -4
  10. {redup-0.4.30 → redup-0.4.32/src/redup.egg-info}/PKG-INFO +6 -8
  11. {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/requires.txt +1 -1
  12. {redup-0.4.30 → redup-0.4.32}/tests/test_hasher.py +14 -0
  13. {redup-0.4.30 → redup-0.4.32}/tests/test_mcp_server.py +1 -0
  14. {redup-0.4.30 → redup-0.4.32}/tests/test_pipeline.py +39 -1
  15. {redup-0.4.30 → redup-0.4.32}/LICENSE +0 -0
  16. {redup-0.4.30 → redup-0.4.32}/setup.cfg +0 -0
  17. {redup-0.4.30 → redup-0.4.32}/src/redup/__main__.py +0 -0
  18. {redup-0.4.30 → redup-0.4.32}/src/redup/analysis_logic.py +0 -0
  19. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/__init__.py +0 -0
  20. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/compare_command.py +0 -0
  21. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/config_builder.py +0 -0
  22. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/fuzzy_similarity.py +0 -0
  23. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/intract_commands.py +0 -0
  24. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/main.py +0 -0
  25. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/output_writer.py +0 -0
  26. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/quality_commands.py +0 -0
  27. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/scan_commands.py +0 -0
  28. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/scan_helpers.py +0 -0
  29. {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/tasks_command.py +0 -0
  30. {redup-0.4.30 → redup-0.4.32}/src/redup/config.py +0 -0
  31. {redup-0.4.30 → redup-0.4.32}/src/redup/config_handler.py +0 -0
  32. {redup-0.4.30 → redup-0.4.32}/src/redup/core/__init__.py +0 -0
  33. {redup-0.4.30 → redup-0.4.32}/src/redup/core/cache.py +0 -0
  34. {redup-0.4.30 → redup-0.4.32}/src/redup/core/community.py +0 -0
  35. {redup-0.4.30 → redup-0.4.32}/src/redup/core/comparator.py +0 -0
  36. {redup-0.4.30 → redup-0.4.32}/src/redup/core/decision.py +0 -0
  37. {redup-0.4.30 → redup-0.4.32}/src/redup/core/differ.py +0 -0
  38. {redup-0.4.30 → redup-0.4.32}/src/redup/core/fuzzy_similarity.py +0 -0
  39. {redup-0.4.30 → redup-0.4.32}/src/redup/core/grouper.py +0 -0
  40. {redup-0.4.30 → redup-0.4.32}/src/redup/core/hash_cache.py +0 -0
  41. {redup-0.4.30 → redup-0.4.32}/src/redup/core/lsh_matcher.py +0 -0
  42. {redup-0.4.30 → redup-0.4.32}/src/redup/core/matcher.py +0 -0
  43. {redup-0.4.30 → redup-0.4.32}/src/redup/core/models.py +0 -0
  44. {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/__init__.py +0 -0
  45. {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/groups.py +0 -0
  46. {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/phases.py +0 -0
  47. {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline_utils.py +0 -0
  48. {redup-0.4.30 → redup-0.4.32}/src/redup/core/planner.py +0 -0
  49. {redup-0.4.30 → redup-0.4.32}/src/redup/core/python_parser.py +0 -0
  50. {redup-0.4.30 → redup-0.4.32}/src/redup/core/refactor_advisor.py +0 -0
  51. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner/__init__.py +0 -0
  52. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner.py +0 -0
  53. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_cache.py +0 -0
  54. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_filters.py +0 -0
  55. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_loader.py +0 -0
  56. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_models.py +0 -0
  57. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_types.py +0 -0
  58. {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_utils.py +0 -0
  59. {redup-0.4.30 → redup-0.4.32}/src/redup/core/semantic.py +0 -0
  60. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/__init__.py +0 -0
  61. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/config.py +0 -0
  62. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/dispatcher.py +0 -0
  63. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/__init__.py +0 -0
  64. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/base.py +0 -0
  65. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/c_family.py +0 -0
  66. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/dotnet.py +0 -0
  67. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/markup.py +0 -0
  68. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/php.py +0 -0
  69. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/query.py +0 -0
  70. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/ruby.py +0 -0
  71. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/shell.py +0 -0
  72. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/stylesheet.py +0 -0
  73. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/web.py +0 -0
  74. {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/main.py +0 -0
  75. {redup-0.4.30 → redup-0.4.32}/src/redup/core/universal_fuzzy.py +0 -0
  76. {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/__init__.py +0 -0
  77. {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/diff_helpers.py +0 -0
  78. {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/duplicate_finders.py +0 -0
  79. {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/function_extractor.py +0 -0
  80. {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/hash_utils.py +0 -0
  81. {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/language_dispatcher.py +0 -0
  82. {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/__init__.py +0 -0
  83. {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/intract/__init__.py +0 -0
  84. {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/intract/adapter.py +0 -0
  85. {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/intract/policy.py +0 -0
  86. {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/planfile_integration.py +0 -0
  87. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/__init__.py +0 -0
  88. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/handlers.py +0 -0
  89. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/schemas.py +0 -0
  90. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/utils.py +0 -0
  91. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp_server.py +0 -0
  92. {redup-0.4.30 → redup-0.4.32}/src/redup/mcp_server_clean.py +0 -0
  93. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/__init__.py +0 -0
  94. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/code2llm_reporter.py +0 -0
  95. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/enhanced_reporter.py +0 -0
  96. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/json_reporter.py +0 -0
  97. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/markdown_reporter.py +0 -0
  98. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/toon_reporter.py +0 -0
  99. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/yaml_reporter.py +0 -0
  100. {redup-0.4.30 → redup-0.4.32}/src/redup/reporters.py +0 -0
  101. {redup-0.4.30 → redup-0.4.32}/src/redup/utils.py +0 -0
  102. {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/SOURCES.txt +0 -0
  103. {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/dependency_links.txt +0 -0
  104. {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/entry_points.txt +0 -0
  105. {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/top_level.txt +0 -0
  106. {redup-0.4.30 → redup-0.4.32}/tests/test_cli_import_compat.py +0 -0
  107. {redup-0.4.30 → redup-0.4.32}/tests/test_compare.py +0 -0
  108. {redup-0.4.30 → redup-0.4.32}/tests/test_e2e.py +0 -0
  109. {redup-0.4.30 → redup-0.4.32}/tests/test_intent_integration.py +0 -0
  110. {redup-0.4.30 → redup-0.4.32}/tests/test_matcher.py +0 -0
  111. {redup-0.4.30 → redup-0.4.32}/tests/test_models.py +0 -0
  112. {redup-0.4.30 → redup-0.4.32}/tests/test_planfile_integration.py +0 -0
  113. {redup-0.4.30 → redup-0.4.32}/tests/test_planner.py +0 -0
  114. {redup-0.4.30 → redup-0.4.32}/tests/test_quality_commands.py +0 -0
  115. {redup-0.4.30 → redup-0.4.32}/tests/test_reporters.py +0 -0
  116. {redup-0.4.30 → redup-0.4.32}/tests/test_scanner.py +0 -0
  117. {redup-0.4.30 → redup-0.4.32}/tests/test_ts_extractor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: redup
3
- Version: 0.4.30
3
+ Version: 0.4.32
4
4
  Summary: Code duplication analyzer and refactoring planner for LLMs
5
5
  Author-email: Tom Sapletta <tom@sapletta.com>
6
6
  License-Expression: Apache-2.0
@@ -76,7 +76,7 @@ Requires-Dist: datasketch>=1.6; extra == "lsh"
76
76
  Provides-Extra: semantic
77
77
  Requires-Dist: sentence-transformers>=2.0; extra == "semantic"
78
78
  Provides-Extra: intent
79
- Requires-Dist: intract>=0.5.6; extra == "intent"
79
+ Requires-Dist: intract>=0.5.7; extra == "intent"
80
80
  Provides-Extra: dev
81
81
  Requires-Dist: pytest>=7.0; extra == "dev"
82
82
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
@@ -94,18 +94,16 @@ Dynamic: license-file
94
94
  [![PyPI](https://img.shields.io/pypi/v/redup)](https://pypi.org/project/redup/)
95
95
  [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
96
96
  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
97
- [![Version](https://img.shields.io/badge/version-0.4.30-green.svg)](https://pypi.org/project/redup/)
97
+ [![Version](https://img.shields.io/badge/version-0.4.32-green.svg)](https://pypi.org/project/redup/)
98
98
 
99
99
 
100
100
  ## AI Cost Tracking
101
101
 
102
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.4.30-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
103
- ![AI Cost](https://img.shields.io/badge/AI%20Cost-$31.52-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-26.1h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
102
+ ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-yellow) ![AI Model](https://img.shields.io/badge/AI%20Model-openrouter%2Fdeep%2Fdeep-v4-pro-lightgrey)
104
103
 
105
- - 🤖 **LLM usage:** $31.5200 (74 commits)
106
- - 👤 **Human dev:** ~$2609 (26.1h @ $100/h, 30min dedup)
104
+ This project uses AI-generated code. Total cost: **$7.5000** with **79** AI commits.
107
105
 
108
- Generated on 2026-05-31 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
106
+ Generated on 2026-06-29 using [openrouter/deep/deep-v4-pro](https://openrouter.ai/models/openrouter/deep/deep-v4-pro)
109
107
 
110
108
  ---
111
109
 
@@ -5,18 +5,16 @@
5
5
  [![PyPI](https://img.shields.io/pypi/v/redup)](https://pypi.org/project/redup/)
6
6
  [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
7
7
  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
8
- [![Version](https://img.shields.io/badge/version-0.4.30-green.svg)](https://pypi.org/project/redup/)
8
+ [![Version](https://img.shields.io/badge/version-0.4.32-green.svg)](https://pypi.org/project/redup/)
9
9
 
10
10
 
11
11
  ## AI Cost Tracking
12
12
 
13
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.4.30-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
14
- ![AI Cost](https://img.shields.io/badge/AI%20Cost-$31.52-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-26.1h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
13
+ ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-yellow) ![AI Model](https://img.shields.io/badge/AI%20Model-openrouter%2Fdeep%2Fdeep-v4-pro-lightgrey)
15
14
 
16
- - 🤖 **LLM usage:** $31.5200 (74 commits)
17
- - 👤 **Human dev:** ~$2609 (26.1h @ $100/h, 30min dedup)
15
+ This project uses AI-generated code. Total cost: **$7.5000** with **79** AI commits.
18
16
 
19
- Generated on 2026-05-31 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
17
+ Generated on 2026-06-29 using [openrouter/deep/deep-v4-pro](https://openrouter.ai/models/openrouter/deep/deep-v4-pro)
20
18
 
21
19
  ---
22
20
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "redup"
7
- version = "0.4.30"
7
+ version = "0.4.32"
8
8
  description = "Code duplication analyzer and refactoring planner for LLMs"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -75,7 +75,7 @@ ast = [
75
75
  ]
76
76
  lsh = ["datasketch>=1.6"]
77
77
  semantic = ["sentence-transformers>=2.0"]
78
- intent = ["intract>=0.5.6"]
78
+ intent = ["intract>=0.5.7"]
79
79
  dev = [
80
80
  "pytest>=7.0",
81
81
  "pytest-cov>=4.0",
@@ -99,9 +99,6 @@ Documentation = "https://github.com/semcod/redup#readme"
99
99
  [tool.setuptools.packages.find]
100
100
  where = ["src"]
101
101
 
102
- [tool.uv.sources]
103
- intract = { path = "../intract", editable = true }
104
-
105
102
  [tool.ruff]
106
103
  target-version = "py310"
107
104
  line-length = 100
@@ -123,7 +120,7 @@ addopts = "-v --tb=short"
123
120
 
124
121
  [tool.pfix]
125
122
  # Self-healing Python configuration
126
- model = "openrouter/qwen/qwen3-coder-next"
123
+ model = "openrouter/deep/deep-v4-pro"
127
124
  auto_apply = true
128
125
  auto_install_deps = true
129
126
  auto_restart = false
@@ -142,7 +139,7 @@ deduplicate = true
142
139
  badge = true
143
140
  update_readme = true
144
141
  readme_path = "README.md"
145
- default_model = "openrouter/qwen/qwen3-coder-next"
142
+ default_model = "openrouter/deep/deep-v4-pro"
146
143
  analysis_mode = "byok"
147
144
  full_history = true
148
145
  max_commits = 500
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "0.4.30"
5
+ __version__ = "0.4.32"
6
6
 
7
7
  # Click compatibility shim for older typer versions
8
8
  # This must run before any typer imports
@@ -118,7 +118,7 @@ def config_to_scan_config(config: dict[str, Any], path: Path) -> ScanConfig:
118
118
  min_similarity=config.get("min_similarity", scan_config.get("min_similarity", 0.85)),
119
119
  include_tests=config.get("include_tests", scan_config.get("include_tests", False)),
120
120
  lsh_enabled=lsh_config.get("enabled", True),
121
- lsh_min_lines=lsh_config.get("min_lines", 50),
121
+ lsh_min_lines=lsh_config.get("min_lines", 12),
122
122
  lsh_threshold=lsh_config.get("threshold", 0.8),
123
123
  )
124
124
 
@@ -147,7 +147,7 @@ include_tests = false
147
147
  # Enable LSH near-duplicate detection for large blocks
148
148
  enabled = true
149
149
  # Minimum block size for LSH analysis
150
- min_lines = 50
150
+ min_lines = 12
151
151
  # Similarity threshold for LSH (0.0-1.0)
152
152
  threshold = 0.8
153
153
 
@@ -70,7 +70,7 @@ def _ast_to_normalized_string(tree: object) -> str:
70
70
  elif isinstance(node, _ast.Attribute):
71
71
  token = "ATTR"
72
72
  elif isinstance(node, _ast.Constant):
73
- token = f"CONST_{type(node.value).__name__}"
73
+ token = "CONST"
74
74
  elif isinstance(node, _ast.BinOp):
75
75
  token = f"BINOP_{type(node.op).__name__}"
76
76
  elif isinstance(node, _ast.Compare):
@@ -37,12 +37,16 @@ def find_exact_duplicates_lazy(index: HashIndex, min_lines: int = 3) -> Iterator
37
37
 
38
38
 
39
39
  def find_structural_duplicates_lazy(
40
- index: HashIndex, min_lines: int = 3
40
+ index: HashIndex,
41
+ min_lines: int = 3,
42
+ covered_locations: set[tuple[str, int]] | None = None,
41
43
  ) -> Iterator[DuplicateGroup]:
42
44
  """Find structural duplicate groups with lazy evaluation and early exit.
43
45
 
44
46
  Similar to find_exact_duplicates_lazy but for structural hashes.
45
47
  """
48
+ covered = covered_locations or set()
49
+
46
50
  for hash_val, blocks in index.structural.items():
47
51
  if len(blocks) < 2:
48
52
  continue
@@ -51,8 +55,18 @@ def find_structural_duplicates_lazy(
51
55
  if not _blocks_from_different_locations(blocks):
52
56
  continue
53
57
 
58
+ uncovered = [
59
+ hb
60
+ for hb in blocks
61
+ if (hb.block.file, hb.block.line_start) not in covered
62
+ ]
63
+ if len(uncovered) < 2:
64
+ continue
65
+ if not _blocks_from_different_locations(uncovered):
66
+ continue
67
+
54
68
  # Create group and check line threshold
55
- group = _create_duplicate_group(hash_val, blocks, DuplicateType.STRUCTURAL)
69
+ group = _create_duplicate_group(hash_val, uncovered, DuplicateType.STRUCTURAL)
56
70
  if group.total_lines >= min_lines:
57
71
  yield group
58
72
 
@@ -106,11 +120,23 @@ def find_all_duplicates_lazy(
106
120
  Yields:
107
121
  DuplicateGroup objects that meet the threshold
108
122
  """
123
+ covered_locations: set[tuple[str, int]] = set()
124
+
109
125
  if include_exact:
110
- yield from find_exact_duplicates_lazy(index, min_lines)
126
+ for group in find_exact_duplicates_lazy(index, min_lines):
127
+ for fragment in group.fragments:
128
+ covered_locations.add((fragment.file, fragment.line_start))
129
+ yield group
111
130
 
112
131
  if include_structural:
113
- yield from find_structural_duplicates_lazy(index, min_lines)
132
+ for group in find_structural_duplicates_lazy(
133
+ index,
134
+ min_lines,
135
+ covered_locations=covered_locations,
136
+ ):
137
+ for fragment in group.fragments:
138
+ covered_locations.add((fragment.file, fragment.line_start))
139
+ yield group
114
140
 
115
141
 
116
142
  class DuplicateGroupCollector:
@@ -17,6 +17,94 @@ from redup.core.models import DuplicateFragment, DuplicateGroup, DuplicateType,
17
17
  from redup.core.scanner_types import CodeBlock
18
18
 
19
19
 
20
+ def find_fuzzy_groups(
21
+ all_blocks: list[CodeBlock],
22
+ config: ScanConfig,
23
+ covered_locations: set[tuple[str, int]] | None = None,
24
+ ) -> list[DuplicateGroup]:
25
+ """Find high-similarity function pairs missed by exact/structural hashing."""
26
+ from redup.core.matcher import sequence_similarity
27
+
28
+ covered = set(covered_locations or ())
29
+ min_lines = config.min_block_lines
30
+ min_similarity = config.min_similarity
31
+
32
+ candidates = [
33
+ block
34
+ for block in all_blocks
35
+ if block.function_name
36
+ and block.line_count >= min_lines
37
+ and (block.file, block.line_start) not in covered
38
+ ]
39
+ if len(candidates) < 2:
40
+ return []
41
+
42
+ candidates.sort(key=lambda block: (block.line_count, block.file, block.line_start))
43
+ used: set[tuple[str, int]] = set()
44
+ groups: list[DuplicateGroup] = []
45
+
46
+ for index, anchor in enumerate(candidates):
47
+ anchor_key = (anchor.file, anchor.line_start)
48
+ if anchor_key in used:
49
+ continue
50
+
51
+ cluster = [anchor]
52
+ for other in candidates[index + 1 :]:
53
+ other_key = (other.file, other.line_start)
54
+ if other_key in used:
55
+ continue
56
+ if abs(other.line_count - anchor.line_count) > 4:
57
+ break
58
+ if other.file == anchor.file and other.line_start == anchor.line_start:
59
+ continue
60
+ similarity = sequence_similarity(anchor.text, other.text)
61
+ if similarity < min_similarity:
62
+ continue
63
+ cluster.append(other)
64
+
65
+ if len(cluster) < 2:
66
+ continue
67
+
68
+ fragments = [
69
+ DuplicateFragment(
70
+ file=block.file,
71
+ line_start=block.line_start,
72
+ line_end=block.line_end,
73
+ text=block.text,
74
+ function_name=block.function_name,
75
+ class_name=block.class_name,
76
+ )
77
+ for block in cluster
78
+ ]
79
+ avg_similarity = sum(
80
+ sequence_similarity(cluster[0].text, block.text) for block in cluster[1:]
81
+ ) / (len(cluster) - 1)
82
+
83
+ groups.append(
84
+ DuplicateGroup(
85
+ id=f"F{len(groups) + 1:04d}",
86
+ duplicate_type=DuplicateType.FUZZY,
87
+ fragments=fragments,
88
+ similarity_score=avg_similarity,
89
+ normalized_hash=f"fuzzy_{anchor.file}_{anchor.line_start}",
90
+ normalized_name=anchor.function_name,
91
+ )
92
+ )
93
+ for block in cluster:
94
+ used.add((block.file, block.line_start))
95
+
96
+ return groups
97
+
98
+
99
+ def _covered_locations(groups: list[DuplicateGroup]) -> set[tuple[str, int]]:
100
+ """Collect file/line locations already assigned to duplicate groups."""
101
+ covered: set[tuple[str, int]] = set()
102
+ for group in groups:
103
+ for fragment in group.fragments:
104
+ covered.add((fragment.file, fragment.line_start))
105
+ return covered
106
+
107
+
20
108
  def _finalize_duplicate_groups(
21
109
  groups: list[DuplicateGroup],
22
110
  all_blocks: list[CodeBlock],
@@ -25,6 +113,9 @@ def _finalize_duplicate_groups(
25
113
  cache: HashCache | None = None,
26
114
  ) -> list[DuplicateGroup]:
27
115
  """Attach near duplicates, sort by impact, and report timing."""
116
+ covered = _covered_locations(groups)
117
+ groups.extend(find_fuzzy_groups(all_blocks, config, covered))
118
+ covered = _covered_locations(groups)
28
119
  groups.extend(find_near_duplicate_groups(all_blocks, config))
29
120
  if getattr(config, "intent_enabled", False):
30
121
  groups.extend(find_intent_groups(all_blocks, config))
@@ -10,14 +10,18 @@ from redup import __version__
10
10
  from redup.mcp.handlers import TOOL_HANDLERS
11
11
  from redup.mcp.schemas import TOOL_SCHEMA_REDUP
12
12
 
13
+ _PROTOCOL_VERSION = "2024-11-05"
14
+ _NOTIFICATIONS = frozenset({"notifications/initialized", "notifications/cancelled"})
13
15
 
14
- def handle_initialize(request_id: Any) -> dict[str, Any]:
16
+
17
+ def handle_initialize(request_id: Any, params: dict[str, Any] | None = None) -> dict[str, Any]:
15
18
  """Handle MCP initialize request."""
19
+ client_version = (params or {}).get("protocolVersion", _PROTOCOL_VERSION)
16
20
  return {
17
21
  "jsonrpc": "2.0",
18
22
  "id": request_id,
19
23
  "result": {
20
- "protocolVersion": "0.1.0",
24
+ "protocolVersion": client_version,
21
25
  "serverInfo": {
22
26
  "name": "redup",
23
27
  "version": __version__,
@@ -86,8 +90,10 @@ def handle_request(request: dict[str, Any]) -> dict[str, Any]:
86
90
  params = request.get("params", {}) or {}
87
91
  request_id = request.get("id")
88
92
 
93
+ if method in _NOTIFICATIONS:
94
+ return {}
89
95
  if method == "initialize":
90
- return handle_initialize(request_id)
96
+ return handle_initialize(request_id, params)
91
97
  if method == "tools/list":
92
98
  return handle_tools_list(request_id)
93
99
  if method == "tools/call":
@@ -117,7 +123,8 @@ def run_server() -> None:
117
123
  try:
118
124
  request = json.loads(line)
119
125
  response = handle_request(request)
120
- print(json.dumps(response), flush=True)
126
+ if response:
127
+ print(json.dumps(response), flush=True)
121
128
  except json.JSONDecodeError as exc:
122
129
  error_response = {
123
130
  "jsonrpc": "2.0",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: redup
3
- Version: 0.4.30
3
+ Version: 0.4.32
4
4
  Summary: Code duplication analyzer and refactoring planner for LLMs
5
5
  Author-email: Tom Sapletta <tom@sapletta.com>
6
6
  License-Expression: Apache-2.0
@@ -76,7 +76,7 @@ Requires-Dist: datasketch>=1.6; extra == "lsh"
76
76
  Provides-Extra: semantic
77
77
  Requires-Dist: sentence-transformers>=2.0; extra == "semantic"
78
78
  Provides-Extra: intent
79
- Requires-Dist: intract>=0.5.6; extra == "intent"
79
+ Requires-Dist: intract>=0.5.7; extra == "intent"
80
80
  Provides-Extra: dev
81
81
  Requires-Dist: pytest>=7.0; extra == "dev"
82
82
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
@@ -94,18 +94,16 @@ Dynamic: license-file
94
94
  [![PyPI](https://img.shields.io/pypi/v/redup)](https://pypi.org/project/redup/)
95
95
  [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
96
96
  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
97
- [![Version](https://img.shields.io/badge/version-0.4.30-green.svg)](https://pypi.org/project/redup/)
97
+ [![Version](https://img.shields.io/badge/version-0.4.32-green.svg)](https://pypi.org/project/redup/)
98
98
 
99
99
 
100
100
  ## AI Cost Tracking
101
101
 
102
- ![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.4.30-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
103
- ![AI Cost](https://img.shields.io/badge/AI%20Cost-$31.52-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-26.1h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
102
+ ![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-yellow) ![AI Model](https://img.shields.io/badge/AI%20Model-openrouter%2Fdeep%2Fdeep-v4-pro-lightgrey)
104
103
 
105
- - 🤖 **LLM usage:** $31.5200 (74 commits)
106
- - 👤 **Human dev:** ~$2609 (26.1h @ $100/h, 30min dedup)
104
+ This project uses AI-generated code. Total cost: **$7.5000** with **79** AI commits.
107
105
 
108
- Generated on 2026-05-31 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
106
+ Generated on 2026-06-29 using [openrouter/deep/deep-v4-pro](https://openrouter.ai/models/openrouter/deep/deep-v4-pro)
109
107
 
110
108
  ---
111
109
 
@@ -61,7 +61,7 @@ pybloom-live>=4.0
61
61
  rapidfuzz>=3.0
62
62
 
63
63
  [intent]
64
- intract>=0.5.6
64
+ intract>=0.5.7
65
65
 
66
66
  [llm]
67
67
  litellm>=1.0
@@ -33,6 +33,20 @@ def test_structural_hash_ignores_literals():
33
33
  assert hash_block_structural(a) == hash_block_structural(b)
34
34
 
35
35
 
36
+ def test_structural_hash_env_var_readers():
37
+ a = '''def read_a() -> str:
38
+ raw = os.environ.get("VAR_A", "").strip()
39
+ if raw.isdigit():
40
+ return raw
41
+ return "28"'''
42
+ b = '''def read_b() -> str:
43
+ raw = os.environ.get("VAR_B", "").strip()
44
+ if raw.isdigit():
45
+ return raw
46
+ return "29"'''
47
+ assert hash_block_structural(a) == hash_block_structural(b)
48
+
49
+
36
50
  def test_structural_hash_different_structure():
37
51
  a = "x = 1\ny = 2"
38
52
  b = "for i in range(10):\n print(i)"
@@ -80,6 +80,7 @@ def _analyze_test_project(root: Path):
80
80
 
81
81
  def test_initialize_and_tools_list() -> None:
82
82
  init_response = handle_request({"jsonrpc": "2.0", "method": "initialize", "id": 1})
83
+ assert init_response["result"]["protocolVersion"] == "2024-11-05"
83
84
  assert init_response["result"]["serverInfo"]["name"] == "redup"
84
85
  assert init_response["result"]["serverInfo"]["version"]
85
86
 
@@ -4,8 +4,10 @@ import sqlite3
4
4
  import tempfile
5
5
  from pathlib import Path
6
6
 
7
- from redup.core.models import ScanConfig
7
+ from redup.core.models import DuplicateType, ScanConfig
8
8
  from redup.core.pipeline import analyze, analyze_optimized
9
+ from redup.core.pipeline.duplicate_finder import find_fuzzy_groups
10
+ from redup.core.scanner import CodeBlock
9
11
 
10
12
 
11
13
  def _create_test_project(root: Path) -> None:
@@ -157,3 +159,39 @@ def test_analyze_optimized_stores_incremental_cache():
157
159
  file_count = conn.execute("SELECT COUNT(*) FROM file_hashes").fetchone()[0]
158
160
 
159
161
  assert file_count >= 1
162
+
163
+
164
+ def test_find_fuzzy_groups_detects_renamed_env_readers():
165
+ blocks = [
166
+ CodeBlock(
167
+ file="a.py",
168
+ line_start=1,
169
+ line_end=6,
170
+ text=(
171
+ 'def read_enter() -> str:\n'
172
+ ' raw = os.environ.get("ENTER", "").strip()\n'
173
+ ' if raw.isdigit():\n'
174
+ ' return raw\n'
175
+ ' return "28"\n'
176
+ ),
177
+ function_name="read_enter",
178
+ ),
179
+ CodeBlock(
180
+ file="b.py",
181
+ line_start=1,
182
+ line_end=6,
183
+ text=(
184
+ 'def read_ctrl() -> str:\n'
185
+ ' raw = os.environ.get("CTRL", "").strip()\n'
186
+ ' if raw.isdigit():\n'
187
+ ' return raw\n'
188
+ ' return "29"\n'
189
+ ),
190
+ function_name="read_ctrl",
191
+ ),
192
+ ]
193
+ config = ScanConfig(min_block_lines=3, min_similarity=0.85)
194
+ groups = find_fuzzy_groups(blocks, config)
195
+ assert len(groups) == 1
196
+ assert groups[0].duplicate_type == DuplicateType.FUZZY
197
+ assert groups[0].occurrences == 2
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes