codeclone 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeclone/__init__.py +1 -1
- codeclone/baseline.py +44 -14
- codeclone/blockhash.py +1 -1
- codeclone/blocks.py +4 -3
- codeclone/cache.py +154 -17
- codeclone/cfg.py +128 -38
- codeclone/cfg_model.py +47 -0
- codeclone/cli.py +524 -100
- codeclone/errors.py +27 -0
- codeclone/extractor.py +101 -24
- codeclone/html_report.py +230 -691
- codeclone/normalize.py +43 -13
- codeclone/py.typed +0 -0
- codeclone/report.py +23 -12
- codeclone/scanner.py +66 -3
- codeclone/templates.py +1262 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/METADATA +62 -34
- codeclone-1.2.1.dist-info/RECORD +23 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/WHEEL +1 -1
- codeclone-1.1.0.dist-info/RECORD +0 -19
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/entry_points.txt +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/top_level.txt +0 -0
codeclone/cli.py
CHANGED
|
@@ -1,25 +1,65 @@
|
|
|
1
|
-
"""
|
|
2
|
-
CodeClone — AST and CFG-based code clone detector for Python
|
|
3
|
-
focused on architectural duplication.
|
|
4
|
-
|
|
5
|
-
Copyright (c) 2026 Den Rozhnovskiy
|
|
6
|
-
Licensed under the MIT License.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
1
|
from __future__ import annotations
|
|
10
2
|
|
|
11
3
|
import argparse
|
|
12
|
-
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
7
|
+
from dataclasses import asdict, dataclass
|
|
13
8
|
from pathlib import Path
|
|
9
|
+
from typing import Any, cast
|
|
10
|
+
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.panel import Panel
|
|
13
|
+
from rich.progress import (
|
|
14
|
+
BarColumn,
|
|
15
|
+
Progress,
|
|
16
|
+
SpinnerColumn,
|
|
17
|
+
TextColumn,
|
|
18
|
+
TimeElapsedColumn,
|
|
19
|
+
)
|
|
20
|
+
from rich.table import Table
|
|
21
|
+
from rich.theme import Theme
|
|
14
22
|
|
|
15
23
|
from .baseline import Baseline
|
|
16
|
-
from .cache import Cache, file_stat_signature
|
|
24
|
+
from .cache import Cache, CacheEntry, FileStat, file_stat_signature
|
|
25
|
+
from .errors import CacheError
|
|
17
26
|
from .extractor import extract_units_from_source
|
|
18
27
|
from .html_report import build_html_report
|
|
19
28
|
from .normalize import NormalizationConfig
|
|
20
|
-
from .report import
|
|
29
|
+
from .report import build_block_groups, build_groups, to_json_report, to_text
|
|
21
30
|
from .scanner import iter_py_files, module_name_from_path
|
|
22
31
|
|
|
32
|
+
# Custom theme for Rich
|
|
33
|
+
custom_theme = Theme(
|
|
34
|
+
{
|
|
35
|
+
"info": "cyan",
|
|
36
|
+
"warning": "yellow",
|
|
37
|
+
"error": "bold red",
|
|
38
|
+
"success": "bold green",
|
|
39
|
+
"dim": "dim",
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
console = Console(theme=custom_theme, width=200)
|
|
43
|
+
|
|
44
|
+
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
|
|
45
|
+
BATCH_SIZE = 100
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class ProcessingResult:
|
|
50
|
+
"""Result of processing a single file."""
|
|
51
|
+
|
|
52
|
+
filepath: str
|
|
53
|
+
success: bool
|
|
54
|
+
error: str | None = None
|
|
55
|
+
units: list[Any] | None = None
|
|
56
|
+
blocks: list[Any] | None = None
|
|
57
|
+
stat: FileStat | None = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def expand_path(p: str) -> Path:
|
|
61
|
+
return Path(p).expanduser().resolve()
|
|
62
|
+
|
|
23
63
|
|
|
24
64
|
def process_file(
|
|
25
65
|
filepath: str,
|
|
@@ -27,96 +67,480 @@ def process_file(
|
|
|
27
67
|
cfg: NormalizationConfig,
|
|
28
68
|
min_loc: int,
|
|
29
69
|
min_stmt: int,
|
|
30
|
-
) ->
|
|
70
|
+
) -> ProcessingResult:
|
|
71
|
+
"""
|
|
72
|
+
Process a single Python file with comprehensive error handling.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
filepath: Absolute path to the file
|
|
76
|
+
root: Root directory of the scan
|
|
77
|
+
cfg: Normalization configuration
|
|
78
|
+
min_loc: Minimum lines of code to consider a function
|
|
79
|
+
min_stmt: Minimum statements to consider a function
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
ProcessingResult object indicating success/failure and containing
|
|
83
|
+
extracted units/blocks if successful.
|
|
84
|
+
"""
|
|
85
|
+
|
|
31
86
|
try:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
min_stmt=min_stmt,
|
|
46
|
-
)
|
|
87
|
+
# Check file size
|
|
88
|
+
try:
|
|
89
|
+
st_size = os.path.getsize(filepath)
|
|
90
|
+
if st_size > MAX_FILE_SIZE:
|
|
91
|
+
return ProcessingResult(
|
|
92
|
+
filepath=filepath,
|
|
93
|
+
success=False,
|
|
94
|
+
error=f"File too large: {st_size} bytes (max {MAX_FILE_SIZE})",
|
|
95
|
+
)
|
|
96
|
+
except OSError as e:
|
|
97
|
+
return ProcessingResult(
|
|
98
|
+
filepath=filepath, success=False, error=f"Cannot stat file: {e}"
|
|
99
|
+
)
|
|
47
100
|
|
|
48
|
-
|
|
101
|
+
try:
|
|
102
|
+
source = Path(filepath).read_text("utf-8")
|
|
103
|
+
except UnicodeDecodeError as e:
|
|
104
|
+
return ProcessingResult(
|
|
105
|
+
filepath=filepath, success=False, error=f"Encoding error: {e}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
stat = file_stat_signature(filepath)
|
|
109
|
+
module_name = module_name_from_path(root, filepath)
|
|
110
|
+
|
|
111
|
+
units, blocks = extract_units_from_source(
|
|
112
|
+
source=source,
|
|
113
|
+
filepath=filepath,
|
|
114
|
+
module_name=module_name,
|
|
115
|
+
cfg=cfg,
|
|
116
|
+
min_loc=min_loc,
|
|
117
|
+
min_stmt=min_stmt,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return ProcessingResult(
|
|
121
|
+
filepath=filepath,
|
|
122
|
+
success=True,
|
|
123
|
+
units=units,
|
|
124
|
+
blocks=blocks,
|
|
125
|
+
stat=stat,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
return ProcessingResult(
|
|
130
|
+
filepath=filepath,
|
|
131
|
+
success=False,
|
|
132
|
+
error=f"Unexpected error: {type(e).__name__}: {e}",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def print_banner() -> None:
|
|
137
|
+
console.print(
|
|
138
|
+
Panel.fit(
|
|
139
|
+
"[bold white]CodeClone[/bold white] [dim]v1.2.1[/dim]\n"
|
|
140
|
+
"[italic]Architectural duplication detector[/italic]",
|
|
141
|
+
border_style="blue",
|
|
142
|
+
padding=(0, 2),
|
|
143
|
+
)
|
|
144
|
+
)
|
|
49
145
|
|
|
50
146
|
|
|
51
147
|
def main() -> None:
|
|
52
|
-
ap = argparse.ArgumentParser(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
ap.
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
148
|
+
ap = argparse.ArgumentParser(
|
|
149
|
+
prog="codeclone",
|
|
150
|
+
description="AST and CFG-based code clone detector for Python.",
|
|
151
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Core Arguments
|
|
155
|
+
core_group = ap.add_argument_group("Target")
|
|
156
|
+
core_group.add_argument(
|
|
157
|
+
"root",
|
|
158
|
+
nargs="?",
|
|
159
|
+
default=".",
|
|
160
|
+
help="Project root directory to scan.",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Tuning
|
|
164
|
+
tune_group = ap.add_argument_group("Analysis Tuning")
|
|
165
|
+
tune_group.add_argument(
|
|
166
|
+
"--min-loc",
|
|
167
|
+
type=int,
|
|
168
|
+
default=15,
|
|
169
|
+
help="Minimum Lines of Code (LOC) to consider.",
|
|
170
|
+
)
|
|
171
|
+
tune_group.add_argument(
|
|
172
|
+
"--min-stmt",
|
|
173
|
+
type=int,
|
|
174
|
+
default=6,
|
|
175
|
+
help="Minimum AST statements to consider.",
|
|
176
|
+
)
|
|
177
|
+
tune_group.add_argument(
|
|
178
|
+
"--processes",
|
|
179
|
+
type=int,
|
|
180
|
+
default=4,
|
|
181
|
+
help="Number of parallel worker processes.",
|
|
182
|
+
)
|
|
183
|
+
tune_group.add_argument(
|
|
184
|
+
"--cache-dir",
|
|
185
|
+
default="~/.cache/codeclone/cache.json",
|
|
186
|
+
help="Path to the cache file to speed up subsequent runs.",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Baseline & CI
|
|
190
|
+
ci_group = ap.add_argument_group("Baseline & CI/CD")
|
|
191
|
+
ci_group.add_argument(
|
|
192
|
+
"--baseline",
|
|
193
|
+
default="codeclone.baseline.json",
|
|
194
|
+
help="Path to the baseline file (stored in repo).",
|
|
195
|
+
)
|
|
196
|
+
ci_group.add_argument(
|
|
197
|
+
"--update-baseline",
|
|
198
|
+
action="store_true",
|
|
199
|
+
help="Overwrite the baseline file with current results.",
|
|
200
|
+
)
|
|
201
|
+
ci_group.add_argument(
|
|
202
|
+
"--fail-on-new",
|
|
203
|
+
action="store_true",
|
|
204
|
+
help="Exit with error if NEW clones (not in baseline) are detected.",
|
|
205
|
+
)
|
|
206
|
+
ci_group.add_argument(
|
|
207
|
+
"--fail-threshold",
|
|
208
|
+
type=int,
|
|
209
|
+
default=-1,
|
|
210
|
+
metavar="MAX_CLONES",
|
|
211
|
+
help="Exit with error if total clone groups exceed this number.",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Output
|
|
215
|
+
out_group = ap.add_argument_group("Reporting")
|
|
216
|
+
out_group.add_argument(
|
|
217
|
+
"--html",
|
|
218
|
+
dest="html_out",
|
|
219
|
+
metavar="FILE",
|
|
220
|
+
help="Generate an HTML report to FILE.",
|
|
221
|
+
)
|
|
222
|
+
out_group.add_argument(
|
|
223
|
+
"--json",
|
|
224
|
+
dest="json_out",
|
|
225
|
+
metavar="FILE",
|
|
226
|
+
help="Generate a JSON report to FILE.",
|
|
227
|
+
)
|
|
228
|
+
out_group.add_argument(
|
|
229
|
+
"--text",
|
|
230
|
+
dest="text_out",
|
|
231
|
+
metavar="FILE",
|
|
232
|
+
help="Generate a text report to FILE.",
|
|
233
|
+
)
|
|
234
|
+
out_group.add_argument(
|
|
235
|
+
"--no-progress",
|
|
236
|
+
action="store_true",
|
|
237
|
+
help="Disable the progress bar (recommended for CI logs).",
|
|
238
|
+
)
|
|
65
239
|
|
|
66
240
|
args = ap.parse_args()
|
|
67
241
|
|
|
68
|
-
|
|
242
|
+
print_banner()
|
|
69
243
|
|
|
70
|
-
|
|
71
|
-
|
|
244
|
+
try:
|
|
245
|
+
root_path = Path(args.root).resolve()
|
|
246
|
+
if not root_path.exists():
|
|
247
|
+
console.print(f"[error]Root path does not exist: {root_path}[/error]")
|
|
248
|
+
sys.exit(1)
|
|
249
|
+
except Exception as e:
|
|
250
|
+
console.print(f"[error]Invalid root path: {e}[/error]")
|
|
251
|
+
sys.exit(1)
|
|
252
|
+
|
|
253
|
+
console.print(f"[info]Scanning root:[/info] {root_path}")
|
|
72
254
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
255
|
+
# Initialize Cache
|
|
256
|
+
cfg = NormalizationConfig()
|
|
257
|
+
cache_path = Path(args.cache_dir).expanduser()
|
|
258
|
+
cache = Cache(cache_path)
|
|
259
|
+
cache.load()
|
|
260
|
+
if cache.load_warning:
|
|
261
|
+
console.print(f"[warning]{cache.load_warning}[/warning]")
|
|
76
262
|
|
|
263
|
+
all_units: list[dict[str, Any]] = []
|
|
264
|
+
all_blocks: list[dict[str, Any]] = []
|
|
265
|
+
changed_files_count = 0
|
|
77
266
|
files_to_process: list[str] = []
|
|
78
267
|
|
|
79
|
-
|
|
80
|
-
|
|
268
|
+
def _get_cached_entry(
|
|
269
|
+
fp: str,
|
|
270
|
+
) -> tuple[FileStat | None, CacheEntry | None, str | None]:
|
|
271
|
+
try:
|
|
272
|
+
stat = file_stat_signature(fp)
|
|
273
|
+
except OSError as e:
|
|
274
|
+
return None, None, f"[warning]Skipping file {fp}: {e}[/warning]"
|
|
81
275
|
cached = cache.get_file_entry(fp)
|
|
276
|
+
return stat, cached, None
|
|
82
277
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
else:
|
|
87
|
-
files_to_process.append(fp)
|
|
88
|
-
|
|
89
|
-
with ProcessPoolExecutor(max_workers=args.processes) as executor:
|
|
90
|
-
futures = [
|
|
91
|
-
executor.submit(
|
|
92
|
-
process_file,
|
|
278
|
+
def _safe_process_file(fp: str) -> ProcessingResult | None:
|
|
279
|
+
try:
|
|
280
|
+
return process_file(
|
|
93
281
|
fp,
|
|
94
|
-
|
|
282
|
+
str(root_path),
|
|
95
283
|
cfg,
|
|
96
284
|
args.min_loc,
|
|
97
285
|
args.min_stmt,
|
|
98
286
|
)
|
|
99
|
-
|
|
100
|
-
|
|
287
|
+
except Exception as e:
|
|
288
|
+
console.print(f"[warning]Worker failed: {e}[/warning]")
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
def _safe_future_result(future: Any) -> tuple[ProcessingResult | None, str | None]:
|
|
292
|
+
try:
|
|
293
|
+
return future.result(), None
|
|
294
|
+
except Exception as e:
|
|
295
|
+
return None, str(e)
|
|
296
|
+
|
|
297
|
+
# Discovery phase
|
|
298
|
+
with console.status("[bold green]Discovering Python files...", spinner="dots"):
|
|
299
|
+
try:
|
|
300
|
+
for fp in iter_py_files(str(root_path)):
|
|
301
|
+
stat, cached, warn = _get_cached_entry(fp)
|
|
302
|
+
if warn:
|
|
303
|
+
console.print(warn)
|
|
304
|
+
continue
|
|
305
|
+
if cached and cached.get("stat") == stat:
|
|
306
|
+
all_units.extend(
|
|
307
|
+
cast(
|
|
308
|
+
list[dict[str, Any]],
|
|
309
|
+
cast(object, cached.get("units", [])),
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
all_blocks.extend(
|
|
313
|
+
cast(
|
|
314
|
+
list[dict[str, Any]],
|
|
315
|
+
cast(object, cached.get("blocks", [])),
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
else:
|
|
319
|
+
files_to_process.append(fp)
|
|
320
|
+
except Exception as e:
|
|
321
|
+
console.print(f"[error]Scan failed: {e}[/error]")
|
|
322
|
+
sys.exit(1)
|
|
323
|
+
|
|
324
|
+
total_files = len(files_to_process)
|
|
325
|
+
failed_files = []
|
|
326
|
+
|
|
327
|
+
# Processing phase
|
|
328
|
+
if total_files > 0:
|
|
329
|
+
|
|
330
|
+
def handle_result(result: ProcessingResult) -> None:
|
|
331
|
+
nonlocal changed_files_count
|
|
332
|
+
if result.success and result.stat:
|
|
333
|
+
cache.put_file_entry(
|
|
334
|
+
result.filepath,
|
|
335
|
+
result.stat,
|
|
336
|
+
result.units or [],
|
|
337
|
+
result.blocks or [],
|
|
338
|
+
)
|
|
339
|
+
changed_files_count += 1
|
|
340
|
+
if result.units:
|
|
341
|
+
all_units.extend([asdict(u) for u in result.units])
|
|
342
|
+
if result.blocks:
|
|
343
|
+
all_blocks.extend([asdict(b) for b in result.blocks])
|
|
344
|
+
else:
|
|
345
|
+
failed_files.append(f"{result.filepath}: {result.error}")
|
|
346
|
+
|
|
347
|
+
def process_sequential(with_progress: bool) -> None:
|
|
348
|
+
if with_progress:
|
|
349
|
+
with Progress(
|
|
350
|
+
SpinnerColumn(),
|
|
351
|
+
TextColumn("[progress.description]{task.description}"),
|
|
352
|
+
BarColumn(),
|
|
353
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
354
|
+
TimeElapsedColumn(),
|
|
355
|
+
console=console,
|
|
356
|
+
) as progress:
|
|
357
|
+
task = progress.add_task(
|
|
358
|
+
f"Analyzing {total_files} files...", total=total_files
|
|
359
|
+
)
|
|
360
|
+
for fp in files_to_process:
|
|
361
|
+
result = _safe_process_file(fp)
|
|
362
|
+
if result is not None:
|
|
363
|
+
handle_result(result)
|
|
364
|
+
progress.advance(task)
|
|
365
|
+
else:
|
|
366
|
+
console.print(f"[info]Processing {total_files} changed files...[/info]")
|
|
367
|
+
for fp in files_to_process:
|
|
368
|
+
result = _safe_process_file(fp)
|
|
369
|
+
if result is not None:
|
|
370
|
+
handle_result(result)
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
with ProcessPoolExecutor(max_workers=args.processes) as executor:
|
|
374
|
+
if args.no_progress:
|
|
375
|
+
console.print(
|
|
376
|
+
f"[info]Processing {total_files} changed files...[/info]"
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Process in batches to manage memory
|
|
380
|
+
for i in range(0, total_files, BATCH_SIZE):
|
|
381
|
+
batch = files_to_process[i : i + BATCH_SIZE]
|
|
382
|
+
futures = [
|
|
383
|
+
executor.submit(
|
|
384
|
+
process_file,
|
|
385
|
+
fp,
|
|
386
|
+
str(root_path),
|
|
387
|
+
cfg,
|
|
388
|
+
args.min_loc,
|
|
389
|
+
args.min_stmt,
|
|
390
|
+
)
|
|
391
|
+
for fp in batch
|
|
392
|
+
]
|
|
393
|
+
|
|
394
|
+
for future in as_completed(futures):
|
|
395
|
+
result, err = _safe_future_result(future)
|
|
396
|
+
if result is not None:
|
|
397
|
+
handle_result(result)
|
|
398
|
+
elif err is not None:
|
|
399
|
+
console.print(
|
|
400
|
+
"[warning]Failed to process batch item: "
|
|
401
|
+
f"{err}[/warning]"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
else:
|
|
405
|
+
with Progress(
|
|
406
|
+
SpinnerColumn(),
|
|
407
|
+
TextColumn("[progress.description]{task.description}"),
|
|
408
|
+
BarColumn(),
|
|
409
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
410
|
+
TimeElapsedColumn(),
|
|
411
|
+
console=console,
|
|
412
|
+
) as progress:
|
|
413
|
+
task = progress.add_task(
|
|
414
|
+
f"Analyzing {total_files} files...", total=total_files
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Process in batches
|
|
418
|
+
for i in range(0, total_files, BATCH_SIZE):
|
|
419
|
+
batch = files_to_process[i : i + BATCH_SIZE]
|
|
420
|
+
futures = [
|
|
421
|
+
executor.submit(
|
|
422
|
+
process_file,
|
|
423
|
+
fp,
|
|
424
|
+
str(root_path),
|
|
425
|
+
cfg,
|
|
426
|
+
args.min_loc,
|
|
427
|
+
args.min_stmt,
|
|
428
|
+
)
|
|
429
|
+
for fp in batch
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
for future in as_completed(futures):
|
|
433
|
+
result, err = _safe_future_result(future)
|
|
434
|
+
if result is not None:
|
|
435
|
+
handle_result(result)
|
|
436
|
+
elif err is not None:
|
|
437
|
+
# Should rarely happen due to try/except
|
|
438
|
+
# in process_file.
|
|
439
|
+
console.print(
|
|
440
|
+
f"[warning]Worker failed: {err}[/warning]"
|
|
441
|
+
)
|
|
442
|
+
progress.advance(task)
|
|
443
|
+
except (OSError, RuntimeError, PermissionError) as e:
|
|
444
|
+
console.print(
|
|
445
|
+
"[warning]Parallel processing unavailable, "
|
|
446
|
+
f"falling back to sequential: {e}[/warning]"
|
|
447
|
+
)
|
|
448
|
+
process_sequential(with_progress=not args.no_progress)
|
|
449
|
+
|
|
450
|
+
if failed_files:
|
|
451
|
+
console.print(
|
|
452
|
+
f"\n[warning]⚠ {len(failed_files)} files failed to process:[/warning]"
|
|
453
|
+
)
|
|
454
|
+
for failure in failed_files[:10]:
|
|
455
|
+
console.print(f" • {failure}")
|
|
456
|
+
if len(failed_files) > 10:
|
|
457
|
+
console.print(f" ... and {len(failed_files) - 10} more")
|
|
458
|
+
|
|
459
|
+
# Analysis phase
|
|
460
|
+
with console.status("[bold green]Grouping clones...", spinner="dots"):
|
|
461
|
+
func_groups = build_groups(all_units)
|
|
462
|
+
block_groups = build_block_groups(all_blocks)
|
|
463
|
+
try:
|
|
464
|
+
cache.save()
|
|
465
|
+
except CacheError as e:
|
|
466
|
+
console.print(f"[warning]Failed to save cache: {e}[/warning]")
|
|
467
|
+
|
|
468
|
+
# Reporting
|
|
469
|
+
func_clones_count = len(func_groups)
|
|
470
|
+
block_clones_count = len(block_groups)
|
|
471
|
+
|
|
472
|
+
# Baseline Logic
|
|
473
|
+
baseline_path = Path(args.baseline).expanduser().resolve()
|
|
474
|
+
|
|
475
|
+
# If user didn't specify path and default logic applies, baseline_path
|
|
476
|
+
# is now ./codeclone_baseline.json
|
|
477
|
+
|
|
478
|
+
baseline = Baseline(baseline_path)
|
|
479
|
+
baseline_exists = baseline_path.exists()
|
|
480
|
+
|
|
481
|
+
if baseline_exists:
|
|
482
|
+
baseline.load()
|
|
483
|
+
if not args.update_baseline and baseline.python_version:
|
|
484
|
+
current_version = f"{sys.version_info.major}.{sys.version_info.minor}"
|
|
485
|
+
if baseline.python_version != current_version:
|
|
486
|
+
console.print(
|
|
487
|
+
"[warning]Baseline Python version mismatch.[/warning]\n"
|
|
488
|
+
f"Baseline was generated with Python {baseline.python_version}.\n"
|
|
489
|
+
f"Current interpreter: Python {current_version}."
|
|
490
|
+
)
|
|
491
|
+
if args.fail_on_new:
|
|
492
|
+
console.print(
|
|
493
|
+
"[error]Baseline checks require the same Python version to "
|
|
494
|
+
"ensure deterministic results. Please regenerate the baseline "
|
|
495
|
+
"using the current interpreter.[/error]"
|
|
496
|
+
)
|
|
497
|
+
sys.exit(2)
|
|
498
|
+
else:
|
|
499
|
+
if not args.update_baseline:
|
|
500
|
+
console.print(
|
|
501
|
+
"[warning]Baseline file not found at: [bold]"
|
|
502
|
+
f"{baseline_path}"
|
|
503
|
+
"[/bold][/warning]\n"
|
|
504
|
+
"[dim]Comparing against an empty baseline. "
|
|
505
|
+
"Use --update-baseline to create it.[/dim]"
|
|
506
|
+
)
|
|
101
507
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
508
|
+
if args.update_baseline:
|
|
509
|
+
new_baseline = Baseline.from_groups(
|
|
510
|
+
func_groups,
|
|
511
|
+
block_groups,
|
|
512
|
+
path=baseline_path,
|
|
513
|
+
python_version=f"{sys.version_info.major}.{sys.version_info.minor}",
|
|
514
|
+
)
|
|
515
|
+
new_baseline.save()
|
|
516
|
+
console.print(f"[success]✔ Baseline updated:[/success] {baseline_path}")
|
|
517
|
+
# When updating, we don't fail on new, we just saved the new state.
|
|
518
|
+
# But we might still want to print the summary.
|
|
106
519
|
|
|
107
|
-
|
|
520
|
+
# Diff
|
|
521
|
+
new_func, new_block = baseline.diff(func_groups, block_groups)
|
|
522
|
+
new_clones_count = len(new_func) + len(new_block)
|
|
108
523
|
|
|
109
|
-
|
|
110
|
-
|
|
524
|
+
# Summary Table
|
|
525
|
+
table = Table(title="Analysis Summary", border_style="blue")
|
|
526
|
+
table.add_column("Metric", style="cyan")
|
|
527
|
+
table.add_column("Value", style="bold white")
|
|
111
528
|
|
|
112
|
-
|
|
113
|
-
|
|
529
|
+
table.add_row("Files Processed", str(changed_files_count))
|
|
530
|
+
table.add_row("Total Function Clones", str(func_clones_count))
|
|
531
|
+
table.add_row("Total Block Clones", str(block_clones_count))
|
|
114
532
|
|
|
115
|
-
|
|
116
|
-
|
|
533
|
+
if baseline_exists:
|
|
534
|
+
style = "error" if new_clones_count > 0 else "success"
|
|
535
|
+
table.add_row(
|
|
536
|
+
"New Clones (vs Baseline)", f"[{style}]{new_clones_count}[/{style}]"
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
console.print(table)
|
|
117
540
|
|
|
541
|
+
# Outputs
|
|
118
542
|
if args.html_out:
|
|
119
|
-
out = Path(args.html_out)
|
|
543
|
+
out = Path(args.html_out).expanduser().resolve()
|
|
120
544
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
121
545
|
out.write_text(
|
|
122
546
|
build_html_report(
|
|
@@ -128,29 +552,19 @@ def main() -> None:
|
|
|
128
552
|
),
|
|
129
553
|
"utf-8",
|
|
130
554
|
)
|
|
131
|
-
|
|
132
|
-
baseline = Baseline(args.baseline)
|
|
133
|
-
baseline.load()
|
|
134
|
-
|
|
135
|
-
if args.update_baseline:
|
|
136
|
-
new_baseline = Baseline.from_groups(func_groups, block_groups)
|
|
137
|
-
new_baseline.path = Path(args.baseline)
|
|
138
|
-
new_baseline.save()
|
|
139
|
-
print(f"Baseline updated: {args.baseline}")
|
|
140
|
-
return
|
|
141
|
-
|
|
142
|
-
new_func, new_block = baseline.diff(func_groups, block_groups)
|
|
555
|
+
console.print(f"[info]HTML report saved:[/info] {out}")
|
|
143
556
|
|
|
144
557
|
if args.json_out:
|
|
145
|
-
out = Path(args.json_out)
|
|
558
|
+
out = Path(args.json_out).expanduser().resolve()
|
|
146
559
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
147
560
|
out.write_text(
|
|
148
|
-
|
|
561
|
+
to_json_report(func_groups, block_groups),
|
|
149
562
|
"utf-8",
|
|
150
563
|
)
|
|
564
|
+
console.print(f"[info]JSON report saved:[/info] {out}")
|
|
151
565
|
|
|
152
566
|
if args.text_out:
|
|
153
|
-
out = Path(args.text_out)
|
|
567
|
+
out = Path(args.text_out).expanduser().resolve()
|
|
154
568
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
155
569
|
out.write_text(
|
|
156
570
|
"FUNCTION CLONES\n"
|
|
@@ -159,20 +573,30 @@ def main() -> None:
|
|
|
159
573
|
+ to_text(block_groups),
|
|
160
574
|
"utf-8",
|
|
161
575
|
)
|
|
576
|
+
console.print(f"[info]Text report saved:[/info] {out}")
|
|
162
577
|
|
|
163
|
-
|
|
164
|
-
print(f"Changed files parsed: {changed}")
|
|
165
|
-
print(f"Function clone groups: {len(func_groups)}")
|
|
166
|
-
print(f"Block clone groups: {len(block_groups)}")
|
|
167
|
-
|
|
578
|
+
# Exit Codes
|
|
168
579
|
if args.fail_on_new and (new_func or new_block):
|
|
169
|
-
print("\n❌ New code clones detected
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
580
|
+
console.print("\n[error]❌ FAILED: New code clones detected![/error]")
|
|
581
|
+
if new_func:
|
|
582
|
+
console.print(f" New Functions: {', '.join(sorted(new_func))}")
|
|
583
|
+
if new_block:
|
|
584
|
+
console.print(f" New Blocks: {', '.join(sorted(new_block))}")
|
|
585
|
+
sys.exit(3)
|
|
586
|
+
|
|
587
|
+
if 0 <= args.fail_threshold < (func_clones_count + block_clones_count):
|
|
588
|
+
total = func_clones_count + block_clones_count
|
|
589
|
+
console.print(
|
|
590
|
+
f"\n[error]❌ FAILED: Total clones ({total}) "
|
|
591
|
+
f"exceed threshold ({args.fail_threshold})![/error]"
|
|
592
|
+
)
|
|
593
|
+
sys.exit(2)
|
|
173
594
|
|
|
174
|
-
if
|
|
175
|
-
|
|
595
|
+
if not args.update_baseline and not args.fail_on_new and new_clones_count > 0:
|
|
596
|
+
console.print(
|
|
597
|
+
"\n[warning]New clones detected but --fail-on-new not set.[/warning]\n"
|
|
598
|
+
"Run with --update-baseline to accept them as technical debt."
|
|
599
|
+
)
|
|
176
600
|
|
|
177
601
|
|
|
178
602
|
if __name__ == "__main__":
|