codeclone 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeclone/baseline.py +13 -9
- codeclone/cache.py +12 -6
- codeclone/cfg.py +166 -1
- codeclone/cli.py +305 -75
- codeclone/html_report.py +569 -586
- codeclone/normalize.py +29 -6
- codeclone/report.py +6 -6
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/METADATA +15 -5
- codeclone-1.2.0.dist-info/RECORD +19 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/WHEEL +1 -1
- codeclone-1.1.0.dist-info/RECORD +0 -19
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/entry_points.txt +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/top_level.txt +0 -0
codeclone/cli.py
CHANGED
|
@@ -9,9 +9,22 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import argparse
|
|
12
|
-
|
|
12
|
+
import sys
|
|
13
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.panel import Panel
|
|
18
|
+
from rich.progress import (
|
|
19
|
+
Progress,
|
|
20
|
+
SpinnerColumn,
|
|
21
|
+
TextColumn,
|
|
22
|
+
BarColumn,
|
|
23
|
+
TimeElapsedColumn,
|
|
24
|
+
)
|
|
25
|
+
from rich.table import Table
|
|
26
|
+
from rich.theme import Theme
|
|
27
|
+
|
|
15
28
|
from .baseline import Baseline
|
|
16
29
|
from .cache import Cache, file_stat_signature
|
|
17
30
|
from .extractor import extract_units_from_source
|
|
@@ -20,6 +33,22 @@ from .normalize import NormalizationConfig
|
|
|
20
33
|
from .report import build_groups, build_block_groups, to_json, to_text
|
|
21
34
|
from .scanner import iter_py_files, module_name_from_path
|
|
22
35
|
|
|
36
|
+
# Custom theme for Rich
|
|
37
|
+
custom_theme = Theme(
|
|
38
|
+
{
|
|
39
|
+
"info": "cyan",
|
|
40
|
+
"warning": "yellow",
|
|
41
|
+
"error": "bold red",
|
|
42
|
+
"success": "bold green",
|
|
43
|
+
"dim": "dim",
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
console = Console(theme=custom_theme, width=200)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def expand_path(p: str) -> Path:
|
|
50
|
+
return Path(p).expanduser().resolve()
|
|
51
|
+
|
|
23
52
|
|
|
24
53
|
def process_file(
|
|
25
54
|
filepath: str,
|
|
@@ -48,75 +77,277 @@ def process_file(
|
|
|
48
77
|
return filepath, stat, units, blocks
|
|
49
78
|
|
|
50
79
|
|
|
80
|
+
def print_banner():
|
|
81
|
+
console.print(
|
|
82
|
+
Panel.fit(
|
|
83
|
+
"[bold white]CodeClone[/bold white] [dim]v1.2.0[/dim]\n"
|
|
84
|
+
"[italic]Architectural duplication detector[/italic]",
|
|
85
|
+
border_style="blue",
|
|
86
|
+
padding=(0, 2),
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
51
91
|
def main() -> None:
|
|
52
|
-
ap = argparse.ArgumentParser(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
ap.
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
92
|
+
ap = argparse.ArgumentParser(
|
|
93
|
+
prog="codeclone",
|
|
94
|
+
description="AST and CFG-based code clone detector for Python.",
|
|
95
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Core Arguments
|
|
99
|
+
core_group = ap.add_argument_group("Target")
|
|
100
|
+
core_group.add_argument(
|
|
101
|
+
"root",
|
|
102
|
+
nargs="?",
|
|
103
|
+
default=".",
|
|
104
|
+
help="Project root directory to scan.",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Tuning
|
|
108
|
+
tune_group = ap.add_argument_group("Analysis Tuning")
|
|
109
|
+
tune_group.add_argument(
|
|
110
|
+
"--min-loc",
|
|
111
|
+
type=int,
|
|
112
|
+
default=15,
|
|
113
|
+
help="Minimum Lines of Code (LOC) to consider.",
|
|
114
|
+
)
|
|
115
|
+
tune_group.add_argument(
|
|
116
|
+
"--min-stmt",
|
|
117
|
+
type=int,
|
|
118
|
+
default=6,
|
|
119
|
+
help="Minimum AST statements to consider.",
|
|
120
|
+
)
|
|
121
|
+
tune_group.add_argument(
|
|
122
|
+
"--processes",
|
|
123
|
+
type=int,
|
|
124
|
+
default=4,
|
|
125
|
+
help="Number of parallel worker processes.",
|
|
126
|
+
)
|
|
127
|
+
tune_group.add_argument(
|
|
128
|
+
"--cache-dir",
|
|
129
|
+
default="~/.cache/codeclone/cache.json",
|
|
130
|
+
help="Path to the cache file to speed up subsequent runs.",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Baseline & CI
|
|
134
|
+
ci_group = ap.add_argument_group("Baseline & CI/CD")
|
|
135
|
+
ci_group.add_argument(
|
|
136
|
+
"--baseline",
|
|
137
|
+
default="codeclone.baseline.json",
|
|
138
|
+
help="Path to the baseline file (stored in repo).",
|
|
139
|
+
)
|
|
140
|
+
ci_group.add_argument(
|
|
141
|
+
"--update-baseline",
|
|
142
|
+
action="store_true",
|
|
143
|
+
help="Overwrite the baseline file with current results.",
|
|
144
|
+
)
|
|
145
|
+
ci_group.add_argument(
|
|
146
|
+
"--fail-on-new",
|
|
147
|
+
action="store_true",
|
|
148
|
+
help="Exit with error if NEW clones (not in baseline) are detected.",
|
|
149
|
+
)
|
|
150
|
+
ci_group.add_argument(
|
|
151
|
+
"--fail-threshold",
|
|
152
|
+
type=int,
|
|
153
|
+
default=-1,
|
|
154
|
+
metavar="MAX_CLONES",
|
|
155
|
+
help="Exit with error if total clone groups exceed this number.",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Output
|
|
159
|
+
out_group = ap.add_argument_group("Reporting")
|
|
160
|
+
out_group.add_argument(
|
|
161
|
+
"--html",
|
|
162
|
+
dest="html_out",
|
|
163
|
+
metavar="FILE",
|
|
164
|
+
help="Generate an HTML report to FILE.",
|
|
165
|
+
)
|
|
166
|
+
out_group.add_argument(
|
|
167
|
+
"--json",
|
|
168
|
+
dest="json_out",
|
|
169
|
+
metavar="FILE",
|
|
170
|
+
help="Generate a JSON report to FILE.",
|
|
171
|
+
)
|
|
172
|
+
out_group.add_argument(
|
|
173
|
+
"--text",
|
|
174
|
+
dest="text_out",
|
|
175
|
+
metavar="FILE",
|
|
176
|
+
help="Generate a text report to FILE.",
|
|
177
|
+
)
|
|
178
|
+
out_group.add_argument(
|
|
179
|
+
"--no-progress",
|
|
180
|
+
action="store_true",
|
|
181
|
+
help="Disable the progress bar (recommended for CI logs).",
|
|
182
|
+
)
|
|
65
183
|
|
|
66
184
|
args = ap.parse_args()
|
|
67
185
|
|
|
68
|
-
|
|
186
|
+
print_banner()
|
|
69
187
|
|
|
70
|
-
|
|
188
|
+
root_path = Path(args.root).resolve()
|
|
189
|
+
if not root_path.exists():
|
|
190
|
+
console.print(f"[error]Root path does not exist: {root_path}[/error]")
|
|
191
|
+
sys.exit(1)
|
|
192
|
+
|
|
193
|
+
console.print(f"[info]Scanning root:[/info] {root_path}")
|
|
194
|
+
|
|
195
|
+
# Initialize Cache
|
|
196
|
+
cfg = NormalizationConfig()
|
|
197
|
+
cache_path = Path(args.cache_dir).expanduser()
|
|
198
|
+
cache = Cache(cache_path)
|
|
71
199
|
cache.load()
|
|
72
200
|
|
|
73
201
|
all_units: list[dict] = []
|
|
74
202
|
all_blocks: list[dict] = []
|
|
75
|
-
|
|
76
|
-
|
|
203
|
+
changed_files_count = 0
|
|
77
204
|
files_to_process: list[str] = []
|
|
78
205
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
206
|
+
# Discovery phase
|
|
207
|
+
with console.status("[bold green]Discovering Python files...", spinner="dots"):
|
|
208
|
+
for fp in iter_py_files(str(root_path)):
|
|
209
|
+
stat = file_stat_signature(fp)
|
|
210
|
+
cached = cache.get_file_entry(fp)
|
|
211
|
+
if cached and cached.get("stat") == stat:
|
|
212
|
+
all_units.extend(cached.get("units", []))
|
|
213
|
+
all_blocks.extend(cached.get("blocks", []))
|
|
214
|
+
else:
|
|
215
|
+
files_to_process.append(fp)
|
|
216
|
+
|
|
217
|
+
total_files = len(files_to_process)
|
|
218
|
+
|
|
219
|
+
# Processing phase
|
|
220
|
+
if total_files > 0:
|
|
221
|
+
if args.no_progress:
|
|
222
|
+
console.print(f"[info]Processing {total_files} changed files...[/info]")
|
|
223
|
+
with ProcessPoolExecutor(max_workers=args.processes) as executor:
|
|
224
|
+
futures = [
|
|
225
|
+
executor.submit(
|
|
226
|
+
process_file,
|
|
227
|
+
fp,
|
|
228
|
+
str(root_path),
|
|
229
|
+
cfg,
|
|
230
|
+
args.min_loc,
|
|
231
|
+
args.min_stmt,
|
|
232
|
+
)
|
|
233
|
+
for fp in files_to_process
|
|
234
|
+
]
|
|
235
|
+
for future in as_completed(futures):
|
|
236
|
+
try:
|
|
237
|
+
result = future.result()
|
|
238
|
+
except Exception as e:
|
|
239
|
+
console.print(f"[warning]Failed to process file: {e}[/warning]")
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
if result:
|
|
243
|
+
fp, stat, units, blocks = result
|
|
244
|
+
cache.put_file_entry(fp, stat, units, blocks)
|
|
245
|
+
changed_files_count += 1
|
|
246
|
+
all_units.extend([u.__dict__ for u in units])
|
|
247
|
+
all_blocks.extend([b.__dict__ for b in blocks])
|
|
86
248
|
else:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
249
|
+
with Progress(
|
|
250
|
+
SpinnerColumn(),
|
|
251
|
+
TextColumn("[progress.description]{task.description}"),
|
|
252
|
+
BarColumn(),
|
|
253
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
254
|
+
TimeElapsedColumn(),
|
|
255
|
+
console=console,
|
|
256
|
+
) as progress:
|
|
257
|
+
task = progress.add_task(
|
|
258
|
+
f"Analyzing {total_files} files...", total=total_files
|
|
259
|
+
)
|
|
260
|
+
with ProcessPoolExecutor(max_workers=args.processes) as executor:
|
|
261
|
+
futures = [
|
|
262
|
+
executor.submit(
|
|
263
|
+
process_file,
|
|
264
|
+
fp,
|
|
265
|
+
str(root_path),
|
|
266
|
+
cfg,
|
|
267
|
+
args.min_loc,
|
|
268
|
+
args.min_stmt,
|
|
269
|
+
)
|
|
270
|
+
for fp in files_to_process
|
|
271
|
+
]
|
|
272
|
+
for future in as_completed(futures):
|
|
273
|
+
try:
|
|
274
|
+
result = future.result()
|
|
275
|
+
except Exception:
|
|
276
|
+
# Log error but keep progress bar moving?
|
|
277
|
+
# console.print might break progress bar layout, better to rely on rich logging or just skip
|
|
278
|
+
# console.print(f"[warning]Failed to process file: {e}[/warning]")
|
|
279
|
+
continue
|
|
280
|
+
finally:
|
|
281
|
+
progress.advance(task)
|
|
282
|
+
|
|
283
|
+
if result:
|
|
284
|
+
fp, stat, units, blocks = result
|
|
285
|
+
cache.put_file_entry(fp, stat, units, blocks)
|
|
286
|
+
changed_files_count += 1
|
|
287
|
+
all_units.extend([u.__dict__ for u in units])
|
|
288
|
+
all_blocks.extend([b.__dict__ for b in blocks])
|
|
289
|
+
|
|
290
|
+
# Analysis phase
|
|
291
|
+
with console.status("[bold green]Grouping clones...", spinner="dots"):
|
|
292
|
+
func_groups = build_groups(all_units)
|
|
293
|
+
block_groups = build_block_groups(all_blocks)
|
|
294
|
+
cache.save()
|
|
295
|
+
|
|
296
|
+
# Reporting
|
|
297
|
+
func_clones_count = len(func_groups)
|
|
298
|
+
block_clones_count = len(block_groups)
|
|
299
|
+
|
|
300
|
+
# Baseline Logic
|
|
301
|
+
baseline_path = Path(args.baseline).expanduser().resolve()
|
|
302
|
+
|
|
303
|
+
# If user didn't specify path, and default logic applies, baseline_path is now ./codeclone_baseline.json
|
|
304
|
+
|
|
305
|
+
baseline = Baseline(baseline_path)
|
|
306
|
+
baseline_exists = baseline_path.exists()
|
|
307
|
+
|
|
308
|
+
if baseline_exists:
|
|
309
|
+
baseline.load()
|
|
310
|
+
else:
|
|
311
|
+
if not args.update_baseline:
|
|
312
|
+
console.print(
|
|
313
|
+
f"[warning]Baseline file not found at: [bold]{baseline_path}[/bold][/warning]\n"
|
|
314
|
+
"[dim]Comparing against an empty baseline. "
|
|
315
|
+
"Use --update-baseline to create it.[/dim]"
|
|
98
316
|
)
|
|
99
|
-
for fp in files_to_process
|
|
100
|
-
]
|
|
101
317
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
318
|
+
if args.update_baseline:
|
|
319
|
+
new_baseline = Baseline.from_groups(
|
|
320
|
+
func_groups, block_groups, path=baseline_path
|
|
321
|
+
)
|
|
322
|
+
new_baseline.save()
|
|
323
|
+
console.print(f"[success]✔ Baseline updated:[/success] {baseline_path}")
|
|
324
|
+
# When updating, we don't fail on new, we just saved the new state.
|
|
325
|
+
# But we might still want to print the summary.
|
|
326
|
+
|
|
327
|
+
# Diff
|
|
328
|
+
new_func, new_block = baseline.diff(func_groups, block_groups)
|
|
329
|
+
new_clones_count = len(new_func) + len(new_block)
|
|
106
330
|
|
|
107
|
-
|
|
331
|
+
# Summary Table
|
|
332
|
+
table = Table(title="Analysis Summary", border_style="blue")
|
|
333
|
+
table.add_column("Metric", style="cyan")
|
|
334
|
+
table.add_column("Value", style="bold white")
|
|
108
335
|
|
|
109
|
-
|
|
110
|
-
|
|
336
|
+
table.add_row("Files Processed", str(changed_files_count))
|
|
337
|
+
table.add_row("Total Function Clones", str(func_clones_count))
|
|
338
|
+
table.add_row("Total Block Clones", str(block_clones_count))
|
|
111
339
|
|
|
112
|
-
|
|
113
|
-
|
|
340
|
+
if baseline_exists:
|
|
341
|
+
style = "error" if new_clones_count > 0 else "success"
|
|
342
|
+
table.add_row(
|
|
343
|
+
"New Clones (vs Baseline)", f"[{style}]{new_clones_count}[/{style}]"
|
|
344
|
+
)
|
|
114
345
|
|
|
115
|
-
|
|
116
|
-
block_groups = build_block_groups(all_blocks)
|
|
346
|
+
console.print(table)
|
|
117
347
|
|
|
348
|
+
# Outputs
|
|
118
349
|
if args.html_out:
|
|
119
|
-
out = Path(args.html_out)
|
|
350
|
+
out = Path(args.html_out).expanduser().resolve()
|
|
120
351
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
121
352
|
out.write_text(
|
|
122
353
|
build_html_report(
|
|
@@ -128,29 +359,19 @@ def main() -> None:
|
|
|
128
359
|
),
|
|
129
360
|
"utf-8",
|
|
130
361
|
)
|
|
131
|
-
|
|
132
|
-
baseline = Baseline(args.baseline)
|
|
133
|
-
baseline.load()
|
|
134
|
-
|
|
135
|
-
if args.update_baseline:
|
|
136
|
-
new_baseline = Baseline.from_groups(func_groups, block_groups)
|
|
137
|
-
new_baseline.path = Path(args.baseline)
|
|
138
|
-
new_baseline.save()
|
|
139
|
-
print(f"Baseline updated: {args.baseline}")
|
|
140
|
-
return
|
|
141
|
-
|
|
142
|
-
new_func, new_block = baseline.diff(func_groups, block_groups)
|
|
362
|
+
console.print(f"[info]HTML report saved:[/info] {out}")
|
|
143
363
|
|
|
144
364
|
if args.json_out:
|
|
145
|
-
out = Path(args.json_out)
|
|
365
|
+
out = Path(args.json_out).expanduser().resolve()
|
|
146
366
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
147
367
|
out.write_text(
|
|
148
368
|
to_json({"functions": func_groups, "blocks": block_groups}),
|
|
149
369
|
"utf-8",
|
|
150
370
|
)
|
|
371
|
+
console.print(f"[info]JSON report saved:[/info] {out}")
|
|
151
372
|
|
|
152
373
|
if args.text_out:
|
|
153
|
-
out = Path(args.text_out)
|
|
374
|
+
out = Path(args.text_out).expanduser().resolve()
|
|
154
375
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
155
376
|
out.write_text(
|
|
156
377
|
"FUNCTION CLONES\n"
|
|
@@ -159,20 +380,29 @@ def main() -> None:
|
|
|
159
380
|
+ to_text(block_groups),
|
|
160
381
|
"utf-8",
|
|
161
382
|
)
|
|
383
|
+
console.print(f"[info]Text report saved:[/info] {out}")
|
|
162
384
|
|
|
163
|
-
|
|
164
|
-
print(f"Changed files parsed: {changed}")
|
|
165
|
-
print(f"Function clone groups: {len(func_groups)}")
|
|
166
|
-
print(f"Block clone groups: {len(block_groups)}")
|
|
167
|
-
|
|
385
|
+
# Exit Codes
|
|
168
386
|
if args.fail_on_new and (new_func or new_block):
|
|
169
|
-
print("\n❌ New code clones detected
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
387
|
+
console.print("\n[error]❌ FAILED: New code clones detected![/error]")
|
|
388
|
+
if new_func:
|
|
389
|
+
console.print(f" New Functions: {', '.join(sorted(new_func))}")
|
|
390
|
+
if new_block:
|
|
391
|
+
console.print(f" New Blocks: {', '.join(sorted(new_block))}")
|
|
392
|
+
sys.exit(3)
|
|
393
|
+
|
|
394
|
+
if 0 <= args.fail_threshold < (func_clones_count + block_clones_count):
|
|
395
|
+
console.print(
|
|
396
|
+
f"\n[error]❌ FAILED: Total clones ({func_clones_count + block_clones_count}) "
|
|
397
|
+
f"exceed threshold ({args.fail_threshold})![/error]"
|
|
398
|
+
)
|
|
399
|
+
sys.exit(2)
|
|
173
400
|
|
|
174
|
-
if
|
|
175
|
-
|
|
401
|
+
if not args.update_baseline and not args.fail_on_new and new_clones_count > 0:
|
|
402
|
+
console.print(
|
|
403
|
+
"\n[warning]New clones detected but --fail-on-new not set.[/warning]\n"
|
|
404
|
+
"Run with --update-baseline to accept them as technical debt."
|
|
405
|
+
)
|
|
176
406
|
|
|
177
407
|
|
|
178
408
|
if __name__ == "__main__":
|