redup 0.4.30__tar.gz → 0.4.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {redup-0.4.30/src/redup.egg-info → redup-0.4.32}/PKG-INFO +6 -8
- {redup-0.4.30 → redup-0.4.32}/README.md +4 -6
- {redup-0.4.30 → redup-0.4.32}/pyproject.toml +4 -7
- {redup-0.4.30 → redup-0.4.32}/src/redup/__init__.py +1 -1
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/config.py +2 -2
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/hasher.py +1 -1
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/lazy_grouper.py +30 -4
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/duplicate_finder.py +91 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/server.py +11 -4
- {redup-0.4.30 → redup-0.4.32/src/redup.egg-info}/PKG-INFO +6 -8
- {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/requires.txt +1 -1
- {redup-0.4.30 → redup-0.4.32}/tests/test_hasher.py +14 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_mcp_server.py +1 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_pipeline.py +39 -1
- {redup-0.4.30 → redup-0.4.32}/LICENSE +0 -0
- {redup-0.4.30 → redup-0.4.32}/setup.cfg +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/__main__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/analysis_logic.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/compare_command.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/config_builder.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/fuzzy_similarity.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/intract_commands.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/main.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/output_writer.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/quality_commands.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/scan_commands.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/scan_helpers.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/cli_app/tasks_command.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/config.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/config_handler.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/cache.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/community.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/comparator.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/decision.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/differ.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/fuzzy_similarity.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/grouper.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/hash_cache.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/lsh_matcher.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/matcher.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/models.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/groups.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline/phases.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/pipeline_utils.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/planner.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/python_parser.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/refactor_advisor.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_cache.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_filters.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_loader.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_models.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_types.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/scanner_utils.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/semantic.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/config.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/dispatcher.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/base.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/c_family.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/dotnet.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/markup.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/php.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/query.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/ruby.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/shell.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/stylesheet.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/extractors/web.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/ts_extractor/main.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/universal_fuzzy.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/diff_helpers.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/duplicate_finders.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/function_extractor.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/hash_utils.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/core/utils/language_dispatcher.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/intract/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/intract/adapter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/intract/policy.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/integrations/planfile_integration.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/handlers.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/schemas.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp/utils.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp_server.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/mcp_server_clean.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/__init__.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/code2llm_reporter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/enhanced_reporter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/json_reporter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/markdown_reporter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/toon_reporter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters/yaml_reporter.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/reporters.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup/utils.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/SOURCES.txt +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/dependency_links.txt +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/entry_points.txt +0 -0
- {redup-0.4.30 → redup-0.4.32}/src/redup.egg-info/top_level.txt +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_cli_import_compat.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_compare.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_e2e.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_intent_integration.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_matcher.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_models.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_planfile_integration.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_planner.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_quality_commands.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_reporters.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_scanner.py +0 -0
- {redup-0.4.30 → redup-0.4.32}/tests/test_ts_extractor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: redup
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.32
|
|
4
4
|
Summary: Code duplication analyzer and refactoring planner for LLMs
|
|
5
5
|
Author-email: Tom Sapletta <tom@sapletta.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -76,7 +76,7 @@ Requires-Dist: datasketch>=1.6; extra == "lsh"
|
|
|
76
76
|
Provides-Extra: semantic
|
|
77
77
|
Requires-Dist: sentence-transformers>=2.0; extra == "semantic"
|
|
78
78
|
Provides-Extra: intent
|
|
79
|
-
Requires-Dist: intract>=0.5.
|
|
79
|
+
Requires-Dist: intract>=0.5.7; extra == "intent"
|
|
80
80
|
Provides-Extra: dev
|
|
81
81
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
82
82
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
@@ -94,18 +94,16 @@ Dynamic: license-file
|
|
|
94
94
|
[](https://pypi.org/project/redup/)
|
|
95
95
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
96
96
|
[](https://python.org)
|
|
97
|
-
[](https://pypi.org/project/redup/)
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
## AI Cost Tracking
|
|
101
101
|
|
|
102
|
-
  
|
|
102
|
+
 
|
|
104
103
|
|
|
105
|
-
-
|
|
106
|
-
- 👤 **Human dev:** ~$2609 (26.1h @ $100/h, 30min dedup)
|
|
104
|
+
This project uses AI-generated code. Total cost: **$7.5000** with **79** AI commits.
|
|
107
105
|
|
|
108
|
-
Generated on 2026-
|
|
106
|
+
Generated on 2026-06-29 using [openrouter/deep/deep-v4-pro](https://openrouter.ai/models/openrouter/deep/deep-v4-pro)
|
|
109
107
|
|
|
110
108
|
---
|
|
111
109
|
|
|
@@ -5,18 +5,16 @@
|
|
|
5
5
|
[](https://pypi.org/project/redup/)
|
|
6
6
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
7
7
|
[](https://python.org)
|
|
8
|
-
[](https://pypi.org/project/redup/)
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
## AI Cost Tracking
|
|
12
12
|
|
|
13
|
-
  
|
|
13
|
+
 
|
|
15
14
|
|
|
16
|
-
-
|
|
17
|
-
- 👤 **Human dev:** ~$2609 (26.1h @ $100/h, 30min dedup)
|
|
15
|
+
This project uses AI-generated code. Total cost: **$7.5000** with **79** AI commits.
|
|
18
16
|
|
|
19
|
-
Generated on 2026-
|
|
17
|
+
Generated on 2026-06-29 using [openrouter/deep/deep-v4-pro](https://openrouter.ai/models/openrouter/deep/deep-v4-pro)
|
|
20
18
|
|
|
21
19
|
---
|
|
22
20
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "redup"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.32"
|
|
8
8
|
description = "Code duplication analyzer and refactoring planner for LLMs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -75,7 +75,7 @@ ast = [
|
|
|
75
75
|
]
|
|
76
76
|
lsh = ["datasketch>=1.6"]
|
|
77
77
|
semantic = ["sentence-transformers>=2.0"]
|
|
78
|
-
intent = ["intract>=0.5.
|
|
78
|
+
intent = ["intract>=0.5.7"]
|
|
79
79
|
dev = [
|
|
80
80
|
"pytest>=7.0",
|
|
81
81
|
"pytest-cov>=4.0",
|
|
@@ -99,9 +99,6 @@ Documentation = "https://github.com/semcod/redup#readme"
|
|
|
99
99
|
[tool.setuptools.packages.find]
|
|
100
100
|
where = ["src"]
|
|
101
101
|
|
|
102
|
-
[tool.uv.sources]
|
|
103
|
-
intract = { path = "../intract", editable = true }
|
|
104
|
-
|
|
105
102
|
[tool.ruff]
|
|
106
103
|
target-version = "py310"
|
|
107
104
|
line-length = 100
|
|
@@ -123,7 +120,7 @@ addopts = "-v --tb=short"
|
|
|
123
120
|
|
|
124
121
|
[tool.pfix]
|
|
125
122
|
# Self-healing Python configuration
|
|
126
|
-
model = "openrouter/
|
|
123
|
+
model = "openrouter/deep/deep-v4-pro"
|
|
127
124
|
auto_apply = true
|
|
128
125
|
auto_install_deps = true
|
|
129
126
|
auto_restart = false
|
|
@@ -142,7 +139,7 @@ deduplicate = true
|
|
|
142
139
|
badge = true
|
|
143
140
|
update_readme = true
|
|
144
141
|
readme_path = "README.md"
|
|
145
|
-
default_model = "openrouter/
|
|
142
|
+
default_model = "openrouter/deep/deep-v4-pro"
|
|
146
143
|
analysis_mode = "byok"
|
|
147
144
|
full_history = true
|
|
148
145
|
max_commits = 500
|
|
@@ -118,7 +118,7 @@ def config_to_scan_config(config: dict[str, Any], path: Path) -> ScanConfig:
|
|
|
118
118
|
min_similarity=config.get("min_similarity", scan_config.get("min_similarity", 0.85)),
|
|
119
119
|
include_tests=config.get("include_tests", scan_config.get("include_tests", False)),
|
|
120
120
|
lsh_enabled=lsh_config.get("enabled", True),
|
|
121
|
-
lsh_min_lines=lsh_config.get("min_lines",
|
|
121
|
+
lsh_min_lines=lsh_config.get("min_lines", 12),
|
|
122
122
|
lsh_threshold=lsh_config.get("threshold", 0.8),
|
|
123
123
|
)
|
|
124
124
|
|
|
@@ -147,7 +147,7 @@ include_tests = false
|
|
|
147
147
|
# Enable LSH near-duplicate detection for large blocks
|
|
148
148
|
enabled = true
|
|
149
149
|
# Minimum block size for LSH analysis
|
|
150
|
-
min_lines =
|
|
150
|
+
min_lines = 12
|
|
151
151
|
# Similarity threshold for LSH (0.0-1.0)
|
|
152
152
|
threshold = 0.8
|
|
153
153
|
|
|
@@ -70,7 +70,7 @@ def _ast_to_normalized_string(tree: object) -> str:
|
|
|
70
70
|
elif isinstance(node, _ast.Attribute):
|
|
71
71
|
token = "ATTR"
|
|
72
72
|
elif isinstance(node, _ast.Constant):
|
|
73
|
-
token =
|
|
73
|
+
token = "CONST"
|
|
74
74
|
elif isinstance(node, _ast.BinOp):
|
|
75
75
|
token = f"BINOP_{type(node.op).__name__}"
|
|
76
76
|
elif isinstance(node, _ast.Compare):
|
|
@@ -37,12 +37,16 @@ def find_exact_duplicates_lazy(index: HashIndex, min_lines: int = 3) -> Iterator
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def find_structural_duplicates_lazy(
|
|
40
|
-
index: HashIndex,
|
|
40
|
+
index: HashIndex,
|
|
41
|
+
min_lines: int = 3,
|
|
42
|
+
covered_locations: set[tuple[str, int]] | None = None,
|
|
41
43
|
) -> Iterator[DuplicateGroup]:
|
|
42
44
|
"""Find structural duplicate groups with lazy evaluation and early exit.
|
|
43
45
|
|
|
44
46
|
Similar to find_exact_duplicates_lazy but for structural hashes.
|
|
45
47
|
"""
|
|
48
|
+
covered = covered_locations or set()
|
|
49
|
+
|
|
46
50
|
for hash_val, blocks in index.structural.items():
|
|
47
51
|
if len(blocks) < 2:
|
|
48
52
|
continue
|
|
@@ -51,8 +55,18 @@ def find_structural_duplicates_lazy(
|
|
|
51
55
|
if not _blocks_from_different_locations(blocks):
|
|
52
56
|
continue
|
|
53
57
|
|
|
58
|
+
uncovered = [
|
|
59
|
+
hb
|
|
60
|
+
for hb in blocks
|
|
61
|
+
if (hb.block.file, hb.block.line_start) not in covered
|
|
62
|
+
]
|
|
63
|
+
if len(uncovered) < 2:
|
|
64
|
+
continue
|
|
65
|
+
if not _blocks_from_different_locations(uncovered):
|
|
66
|
+
continue
|
|
67
|
+
|
|
54
68
|
# Create group and check line threshold
|
|
55
|
-
group = _create_duplicate_group(hash_val,
|
|
69
|
+
group = _create_duplicate_group(hash_val, uncovered, DuplicateType.STRUCTURAL)
|
|
56
70
|
if group.total_lines >= min_lines:
|
|
57
71
|
yield group
|
|
58
72
|
|
|
@@ -106,11 +120,23 @@ def find_all_duplicates_lazy(
|
|
|
106
120
|
Yields:
|
|
107
121
|
DuplicateGroup objects that meet the threshold
|
|
108
122
|
"""
|
|
123
|
+
covered_locations: set[tuple[str, int]] = set()
|
|
124
|
+
|
|
109
125
|
if include_exact:
|
|
110
|
-
|
|
126
|
+
for group in find_exact_duplicates_lazy(index, min_lines):
|
|
127
|
+
for fragment in group.fragments:
|
|
128
|
+
covered_locations.add((fragment.file, fragment.line_start))
|
|
129
|
+
yield group
|
|
111
130
|
|
|
112
131
|
if include_structural:
|
|
113
|
-
|
|
132
|
+
for group in find_structural_duplicates_lazy(
|
|
133
|
+
index,
|
|
134
|
+
min_lines,
|
|
135
|
+
covered_locations=covered_locations,
|
|
136
|
+
):
|
|
137
|
+
for fragment in group.fragments:
|
|
138
|
+
covered_locations.add((fragment.file, fragment.line_start))
|
|
139
|
+
yield group
|
|
114
140
|
|
|
115
141
|
|
|
116
142
|
class DuplicateGroupCollector:
|
|
@@ -17,6 +17,94 @@ from redup.core.models import DuplicateFragment, DuplicateGroup, DuplicateType,
|
|
|
17
17
|
from redup.core.scanner_types import CodeBlock
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def find_fuzzy_groups(
|
|
21
|
+
all_blocks: list[CodeBlock],
|
|
22
|
+
config: ScanConfig,
|
|
23
|
+
covered_locations: set[tuple[str, int]] | None = None,
|
|
24
|
+
) -> list[DuplicateGroup]:
|
|
25
|
+
"""Find high-similarity function pairs missed by exact/structural hashing."""
|
|
26
|
+
from redup.core.matcher import sequence_similarity
|
|
27
|
+
|
|
28
|
+
covered = set(covered_locations or ())
|
|
29
|
+
min_lines = config.min_block_lines
|
|
30
|
+
min_similarity = config.min_similarity
|
|
31
|
+
|
|
32
|
+
candidates = [
|
|
33
|
+
block
|
|
34
|
+
for block in all_blocks
|
|
35
|
+
if block.function_name
|
|
36
|
+
and block.line_count >= min_lines
|
|
37
|
+
and (block.file, block.line_start) not in covered
|
|
38
|
+
]
|
|
39
|
+
if len(candidates) < 2:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
candidates.sort(key=lambda block: (block.line_count, block.file, block.line_start))
|
|
43
|
+
used: set[tuple[str, int]] = set()
|
|
44
|
+
groups: list[DuplicateGroup] = []
|
|
45
|
+
|
|
46
|
+
for index, anchor in enumerate(candidates):
|
|
47
|
+
anchor_key = (anchor.file, anchor.line_start)
|
|
48
|
+
if anchor_key in used:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
cluster = [anchor]
|
|
52
|
+
for other in candidates[index + 1 :]:
|
|
53
|
+
other_key = (other.file, other.line_start)
|
|
54
|
+
if other_key in used:
|
|
55
|
+
continue
|
|
56
|
+
if abs(other.line_count - anchor.line_count) > 4:
|
|
57
|
+
break
|
|
58
|
+
if other.file == anchor.file and other.line_start == anchor.line_start:
|
|
59
|
+
continue
|
|
60
|
+
similarity = sequence_similarity(anchor.text, other.text)
|
|
61
|
+
if similarity < min_similarity:
|
|
62
|
+
continue
|
|
63
|
+
cluster.append(other)
|
|
64
|
+
|
|
65
|
+
if len(cluster) < 2:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
fragments = [
|
|
69
|
+
DuplicateFragment(
|
|
70
|
+
file=block.file,
|
|
71
|
+
line_start=block.line_start,
|
|
72
|
+
line_end=block.line_end,
|
|
73
|
+
text=block.text,
|
|
74
|
+
function_name=block.function_name,
|
|
75
|
+
class_name=block.class_name,
|
|
76
|
+
)
|
|
77
|
+
for block in cluster
|
|
78
|
+
]
|
|
79
|
+
avg_similarity = sum(
|
|
80
|
+
sequence_similarity(cluster[0].text, block.text) for block in cluster[1:]
|
|
81
|
+
) / (len(cluster) - 1)
|
|
82
|
+
|
|
83
|
+
groups.append(
|
|
84
|
+
DuplicateGroup(
|
|
85
|
+
id=f"F{len(groups) + 1:04d}",
|
|
86
|
+
duplicate_type=DuplicateType.FUZZY,
|
|
87
|
+
fragments=fragments,
|
|
88
|
+
similarity_score=avg_similarity,
|
|
89
|
+
normalized_hash=f"fuzzy_{anchor.file}_{anchor.line_start}",
|
|
90
|
+
normalized_name=anchor.function_name,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
for block in cluster:
|
|
94
|
+
used.add((block.file, block.line_start))
|
|
95
|
+
|
|
96
|
+
return groups
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _covered_locations(groups: list[DuplicateGroup]) -> set[tuple[str, int]]:
|
|
100
|
+
"""Collect file/line locations already assigned to duplicate groups."""
|
|
101
|
+
covered: set[tuple[str, int]] = set()
|
|
102
|
+
for group in groups:
|
|
103
|
+
for fragment in group.fragments:
|
|
104
|
+
covered.add((fragment.file, fragment.line_start))
|
|
105
|
+
return covered
|
|
106
|
+
|
|
107
|
+
|
|
20
108
|
def _finalize_duplicate_groups(
|
|
21
109
|
groups: list[DuplicateGroup],
|
|
22
110
|
all_blocks: list[CodeBlock],
|
|
@@ -25,6 +113,9 @@ def _finalize_duplicate_groups(
|
|
|
25
113
|
cache: HashCache | None = None,
|
|
26
114
|
) -> list[DuplicateGroup]:
|
|
27
115
|
"""Attach near duplicates, sort by impact, and report timing."""
|
|
116
|
+
covered = _covered_locations(groups)
|
|
117
|
+
groups.extend(find_fuzzy_groups(all_blocks, config, covered))
|
|
118
|
+
covered = _covered_locations(groups)
|
|
28
119
|
groups.extend(find_near_duplicate_groups(all_blocks, config))
|
|
29
120
|
if getattr(config, "intent_enabled", False):
|
|
30
121
|
groups.extend(find_intent_groups(all_blocks, config))
|
|
@@ -10,14 +10,18 @@ from redup import __version__
|
|
|
10
10
|
from redup.mcp.handlers import TOOL_HANDLERS
|
|
11
11
|
from redup.mcp.schemas import TOOL_SCHEMA_REDUP
|
|
12
12
|
|
|
13
|
+
_PROTOCOL_VERSION = "2024-11-05"
|
|
14
|
+
_NOTIFICATIONS = frozenset({"notifications/initialized", "notifications/cancelled"})
|
|
13
15
|
|
|
14
|
-
|
|
16
|
+
|
|
17
|
+
def handle_initialize(request_id: Any, params: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
15
18
|
"""Handle MCP initialize request."""
|
|
19
|
+
client_version = (params or {}).get("protocolVersion", _PROTOCOL_VERSION)
|
|
16
20
|
return {
|
|
17
21
|
"jsonrpc": "2.0",
|
|
18
22
|
"id": request_id,
|
|
19
23
|
"result": {
|
|
20
|
-
"protocolVersion":
|
|
24
|
+
"protocolVersion": client_version,
|
|
21
25
|
"serverInfo": {
|
|
22
26
|
"name": "redup",
|
|
23
27
|
"version": __version__,
|
|
@@ -86,8 +90,10 @@ def handle_request(request: dict[str, Any]) -> dict[str, Any]:
|
|
|
86
90
|
params = request.get("params", {}) or {}
|
|
87
91
|
request_id = request.get("id")
|
|
88
92
|
|
|
93
|
+
if method in _NOTIFICATIONS:
|
|
94
|
+
return {}
|
|
89
95
|
if method == "initialize":
|
|
90
|
-
return handle_initialize(request_id)
|
|
96
|
+
return handle_initialize(request_id, params)
|
|
91
97
|
if method == "tools/list":
|
|
92
98
|
return handle_tools_list(request_id)
|
|
93
99
|
if method == "tools/call":
|
|
@@ -117,7 +123,8 @@ def run_server() -> None:
|
|
|
117
123
|
try:
|
|
118
124
|
request = json.loads(line)
|
|
119
125
|
response = handle_request(request)
|
|
120
|
-
|
|
126
|
+
if response:
|
|
127
|
+
print(json.dumps(response), flush=True)
|
|
121
128
|
except json.JSONDecodeError as exc:
|
|
122
129
|
error_response = {
|
|
123
130
|
"jsonrpc": "2.0",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: redup
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.32
|
|
4
4
|
Summary: Code duplication analyzer and refactoring planner for LLMs
|
|
5
5
|
Author-email: Tom Sapletta <tom@sapletta.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -76,7 +76,7 @@ Requires-Dist: datasketch>=1.6; extra == "lsh"
|
|
|
76
76
|
Provides-Extra: semantic
|
|
77
77
|
Requires-Dist: sentence-transformers>=2.0; extra == "semantic"
|
|
78
78
|
Provides-Extra: intent
|
|
79
|
-
Requires-Dist: intract>=0.5.
|
|
79
|
+
Requires-Dist: intract>=0.5.7; extra == "intent"
|
|
80
80
|
Provides-Extra: dev
|
|
81
81
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
82
82
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
@@ -94,18 +94,16 @@ Dynamic: license-file
|
|
|
94
94
|
[](https://pypi.org/project/redup/)
|
|
95
95
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
96
96
|
[](https://python.org)
|
|
97
|
-
[](https://pypi.org/project/redup/)
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
## AI Cost Tracking
|
|
101
101
|
|
|
102
|
-
  
|
|
102
|
+
 
|
|
104
103
|
|
|
105
|
-
-
|
|
106
|
-
- 👤 **Human dev:** ~$2609 (26.1h @ $100/h, 30min dedup)
|
|
104
|
+
This project uses AI-generated code. Total cost: **$7.5000** with **79** AI commits.
|
|
107
105
|
|
|
108
|
-
Generated on 2026-
|
|
106
|
+
Generated on 2026-06-29 using [openrouter/deep/deep-v4-pro](https://openrouter.ai/models/openrouter/deep/deep-v4-pro)
|
|
109
107
|
|
|
110
108
|
---
|
|
111
109
|
|
|
@@ -33,6 +33,20 @@ def test_structural_hash_ignores_literals():
|
|
|
33
33
|
assert hash_block_structural(a) == hash_block_structural(b)
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
def test_structural_hash_env_var_readers():
|
|
37
|
+
a = '''def read_a() -> str:
|
|
38
|
+
raw = os.environ.get("VAR_A", "").strip()
|
|
39
|
+
if raw.isdigit():
|
|
40
|
+
return raw
|
|
41
|
+
return "28"'''
|
|
42
|
+
b = '''def read_b() -> str:
|
|
43
|
+
raw = os.environ.get("VAR_B", "").strip()
|
|
44
|
+
if raw.isdigit():
|
|
45
|
+
return raw
|
|
46
|
+
return "29"'''
|
|
47
|
+
assert hash_block_structural(a) == hash_block_structural(b)
|
|
48
|
+
|
|
49
|
+
|
|
36
50
|
def test_structural_hash_different_structure():
|
|
37
51
|
a = "x = 1\ny = 2"
|
|
38
52
|
b = "for i in range(10):\n print(i)"
|
|
@@ -80,6 +80,7 @@ def _analyze_test_project(root: Path):
|
|
|
80
80
|
|
|
81
81
|
def test_initialize_and_tools_list() -> None:
|
|
82
82
|
init_response = handle_request({"jsonrpc": "2.0", "method": "initialize", "id": 1})
|
|
83
|
+
assert init_response["result"]["protocolVersion"] == "2024-11-05"
|
|
83
84
|
assert init_response["result"]["serverInfo"]["name"] == "redup"
|
|
84
85
|
assert init_response["result"]["serverInfo"]["version"]
|
|
85
86
|
|
|
@@ -4,8 +4,10 @@ import sqlite3
|
|
|
4
4
|
import tempfile
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
-
from redup.core.models import ScanConfig
|
|
7
|
+
from redup.core.models import DuplicateType, ScanConfig
|
|
8
8
|
from redup.core.pipeline import analyze, analyze_optimized
|
|
9
|
+
from redup.core.pipeline.duplicate_finder import find_fuzzy_groups
|
|
10
|
+
from redup.core.scanner import CodeBlock
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
def _create_test_project(root: Path) -> None:
|
|
@@ -157,3 +159,39 @@ def test_analyze_optimized_stores_incremental_cache():
|
|
|
157
159
|
file_count = conn.execute("SELECT COUNT(*) FROM file_hashes").fetchone()[0]
|
|
158
160
|
|
|
159
161
|
assert file_count >= 1
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_find_fuzzy_groups_detects_renamed_env_readers():
|
|
165
|
+
blocks = [
|
|
166
|
+
CodeBlock(
|
|
167
|
+
file="a.py",
|
|
168
|
+
line_start=1,
|
|
169
|
+
line_end=6,
|
|
170
|
+
text=(
|
|
171
|
+
'def read_enter() -> str:\n'
|
|
172
|
+
' raw = os.environ.get("ENTER", "").strip()\n'
|
|
173
|
+
' if raw.isdigit():\n'
|
|
174
|
+
' return raw\n'
|
|
175
|
+
' return "28"\n'
|
|
176
|
+
),
|
|
177
|
+
function_name="read_enter",
|
|
178
|
+
),
|
|
179
|
+
CodeBlock(
|
|
180
|
+
file="b.py",
|
|
181
|
+
line_start=1,
|
|
182
|
+
line_end=6,
|
|
183
|
+
text=(
|
|
184
|
+
'def read_ctrl() -> str:\n'
|
|
185
|
+
' raw = os.environ.get("CTRL", "").strip()\n'
|
|
186
|
+
' if raw.isdigit():\n'
|
|
187
|
+
' return raw\n'
|
|
188
|
+
' return "29"\n'
|
|
189
|
+
),
|
|
190
|
+
function_name="read_ctrl",
|
|
191
|
+
),
|
|
192
|
+
]
|
|
193
|
+
config = ScanConfig(min_block_lines=3, min_similarity=0.85)
|
|
194
|
+
groups = find_fuzzy_groups(blocks, config)
|
|
195
|
+
assert len(groups) == 1
|
|
196
|
+
assert groups[0].duplicate_type == DuplicateType.FUZZY
|
|
197
|
+
assert groups[0].occurrences == 2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|