codeclone 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeclone/cli.py CHANGED
@@ -9,9 +9,22 @@ Licensed under the MIT License.
9
9
  from __future__ import annotations
10
10
 
11
11
  import argparse
12
- from concurrent.futures import ProcessPoolExecutor
12
+ import sys
13
+ from concurrent.futures import ProcessPoolExecutor, as_completed
13
14
  from pathlib import Path
14
15
 
16
+ from rich.console import Console
17
+ from rich.panel import Panel
18
+ from rich.progress import (
19
+ Progress,
20
+ SpinnerColumn,
21
+ TextColumn,
22
+ BarColumn,
23
+ TimeElapsedColumn,
24
+ )
25
+ from rich.table import Table
26
+ from rich.theme import Theme
27
+
15
28
  from .baseline import Baseline
16
29
  from .cache import Cache, file_stat_signature
17
30
  from .extractor import extract_units_from_source
@@ -20,6 +33,22 @@ from .normalize import NormalizationConfig
20
33
  from .report import build_groups, build_block_groups, to_json, to_text
21
34
  from .scanner import iter_py_files, module_name_from_path
22
35
 
36
+ # Custom theme for Rich
37
+ custom_theme = Theme(
38
+ {
39
+ "info": "cyan",
40
+ "warning": "yellow",
41
+ "error": "bold red",
42
+ "success": "bold green",
43
+ "dim": "dim",
44
+ }
45
+ )
46
+ console = Console(theme=custom_theme, width=200)
47
+
48
+
49
+ def expand_path(p: str) -> Path:
50
+ return Path(p).expanduser().resolve()
51
+
23
52
 
24
53
  def process_file(
25
54
  filepath: str,
@@ -48,75 +77,277 @@ def process_file(
48
77
  return filepath, stat, units, blocks
49
78
 
50
79
 
80
+ def print_banner():
81
+ console.print(
82
+ Panel.fit(
83
+ "[bold white]CodeClone[/bold white] [dim]v1.2.0[/dim]\n"
84
+ "[italic]Architectural duplication detector[/italic]",
85
+ border_style="blue",
86
+ padding=(0, 2),
87
+ )
88
+ )
89
+
90
+
51
91
  def main() -> None:
52
- ap = argparse.ArgumentParser("codeclone")
53
- ap.add_argument("root", help="Project root")
54
- ap.add_argument("--processes", type=int, default=4)
55
- ap.add_argument("--cache", default="~/.cache/codeclone/")
56
- ap.add_argument("--min-loc", type=int, default=15)
57
- ap.add_argument("--min-stmt", type=int, default=6)
58
- ap.add_argument("--json-out", default="")
59
- ap.add_argument("--text-out", default="")
60
- ap.add_argument("--html-out", default="")
61
- ap.add_argument("--fail-if-groups", type=int, default=-1)
62
- ap.add_argument("--baseline", default="~/.config/codeclone/baseline.json")
63
- ap.add_argument("--update-baseline", action="store_true")
64
- ap.add_argument("--fail-on-new", action="store_true")
92
+ ap = argparse.ArgumentParser(
93
+ prog="codeclone",
94
+ description="AST and CFG-based code clone detector for Python.",
95
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
96
+ )
97
+
98
+ # Core Arguments
99
+ core_group = ap.add_argument_group("Target")
100
+ core_group.add_argument(
101
+ "root",
102
+ nargs="?",
103
+ default=".",
104
+ help="Project root directory to scan.",
105
+ )
106
+
107
+ # Tuning
108
+ tune_group = ap.add_argument_group("Analysis Tuning")
109
+ tune_group.add_argument(
110
+ "--min-loc",
111
+ type=int,
112
+ default=15,
113
+ help="Minimum Lines of Code (LOC) to consider.",
114
+ )
115
+ tune_group.add_argument(
116
+ "--min-stmt",
117
+ type=int,
118
+ default=6,
119
+ help="Minimum AST statements to consider.",
120
+ )
121
+ tune_group.add_argument(
122
+ "--processes",
123
+ type=int,
124
+ default=4,
125
+ help="Number of parallel worker processes.",
126
+ )
127
+ tune_group.add_argument(
128
+ "--cache-dir",
129
+ default="~/.cache/codeclone/cache.json",
130
+ help="Path to the cache file to speed up subsequent runs.",
131
+ )
132
+
133
+ # Baseline & CI
134
+ ci_group = ap.add_argument_group("Baseline & CI/CD")
135
+ ci_group.add_argument(
136
+ "--baseline",
137
+ default="codeclone.baseline.json",
138
+ help="Path to the baseline file (stored in repo).",
139
+ )
140
+ ci_group.add_argument(
141
+ "--update-baseline",
142
+ action="store_true",
143
+ help="Overwrite the baseline file with current results.",
144
+ )
145
+ ci_group.add_argument(
146
+ "--fail-on-new",
147
+ action="store_true",
148
+ help="Exit with error if NEW clones (not in baseline) are detected.",
149
+ )
150
+ ci_group.add_argument(
151
+ "--fail-threshold",
152
+ type=int,
153
+ default=-1,
154
+ metavar="MAX_CLONES",
155
+ help="Exit with error if total clone groups exceed this number.",
156
+ )
157
+
158
+ # Output
159
+ out_group = ap.add_argument_group("Reporting")
160
+ out_group.add_argument(
161
+ "--html",
162
+ dest="html_out",
163
+ metavar="FILE",
164
+ help="Generate an HTML report to FILE.",
165
+ )
166
+ out_group.add_argument(
167
+ "--json",
168
+ dest="json_out",
169
+ metavar="FILE",
170
+ help="Generate a JSON report to FILE.",
171
+ )
172
+ out_group.add_argument(
173
+ "--text",
174
+ dest="text_out",
175
+ metavar="FILE",
176
+ help="Generate a text report to FILE.",
177
+ )
178
+ out_group.add_argument(
179
+ "--no-progress",
180
+ action="store_true",
181
+ help="Disable the progress bar (recommended for CI logs).",
182
+ )
65
183
 
66
184
  args = ap.parse_args()
67
185
 
68
- cfg = NormalizationConfig()
186
+ print_banner()
69
187
 
70
- cache = Cache(args.cache)
188
+ root_path = Path(args.root).resolve()
189
+ if not root_path.exists():
190
+ console.print(f"[error]Root path does not exist: {root_path}[/error]")
191
+ sys.exit(1)
192
+
193
+ console.print(f"[info]Scanning root:[/info] {root_path}")
194
+
195
+ # Initialize Cache
196
+ cfg = NormalizationConfig()
197
+ cache_path = Path(args.cache_dir).expanduser()
198
+ cache = Cache(cache_path)
71
199
  cache.load()
72
200
 
73
201
  all_units: list[dict] = []
74
202
  all_blocks: list[dict] = []
75
- changed = 0
76
-
203
+ changed_files_count = 0
77
204
  files_to_process: list[str] = []
78
205
 
79
- for fp in iter_py_files(args.root):
80
- stat = file_stat_signature(fp)
81
- cached = cache.get_file_entry(fp)
82
-
83
- if cached and cached.get("stat") == stat:
84
- all_units.extend(cached.get("units", []))
85
- all_blocks.extend(cached.get("blocks", []))
206
+ # Discovery phase
207
+ with console.status("[bold green]Discovering Python files...", spinner="dots"):
208
+ for fp in iter_py_files(str(root_path)):
209
+ stat = file_stat_signature(fp)
210
+ cached = cache.get_file_entry(fp)
211
+ if cached and cached.get("stat") == stat:
212
+ all_units.extend(cached.get("units", []))
213
+ all_blocks.extend(cached.get("blocks", []))
214
+ else:
215
+ files_to_process.append(fp)
216
+
217
+ total_files = len(files_to_process)
218
+
219
+ # Processing phase
220
+ if total_files > 0:
221
+ if args.no_progress:
222
+ console.print(f"[info]Processing {total_files} changed files...[/info]")
223
+ with ProcessPoolExecutor(max_workers=args.processes) as executor:
224
+ futures = [
225
+ executor.submit(
226
+ process_file,
227
+ fp,
228
+ str(root_path),
229
+ cfg,
230
+ args.min_loc,
231
+ args.min_stmt,
232
+ )
233
+ for fp in files_to_process
234
+ ]
235
+ for future in as_completed(futures):
236
+ try:
237
+ result = future.result()
238
+ except Exception as e:
239
+ console.print(f"[warning]Failed to process file: {e}[/warning]")
240
+ continue
241
+
242
+ if result:
243
+ fp, stat, units, blocks = result
244
+ cache.put_file_entry(fp, stat, units, blocks)
245
+ changed_files_count += 1
246
+ all_units.extend([u.__dict__ for u in units])
247
+ all_blocks.extend([b.__dict__ for b in blocks])
86
248
  else:
87
- files_to_process.append(fp)
88
-
89
- with ProcessPoolExecutor(max_workers=args.processes) as executor:
90
- futures = [
91
- executor.submit(
92
- process_file,
93
- fp,
94
- args.root,
95
- cfg,
96
- args.min_loc,
97
- args.min_stmt,
249
+ with Progress(
250
+ SpinnerColumn(),
251
+ TextColumn("[progress.description]{task.description}"),
252
+ BarColumn(),
253
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
254
+ TimeElapsedColumn(),
255
+ console=console,
256
+ ) as progress:
257
+ task = progress.add_task(
258
+ f"Analyzing {total_files} files...", total=total_files
259
+ )
260
+ with ProcessPoolExecutor(max_workers=args.processes) as executor:
261
+ futures = [
262
+ executor.submit(
263
+ process_file,
264
+ fp,
265
+ str(root_path),
266
+ cfg,
267
+ args.min_loc,
268
+ args.min_stmt,
269
+ )
270
+ for fp in files_to_process
271
+ ]
272
+ for future in as_completed(futures):
273
+ try:
274
+ result = future.result()
275
+ except Exception:
276
+ # Log error but keep progress bar moving?
277
+ # console.print might break progress bar layout, better to rely on rich logging or just skip
278
+ # console.print(f"[warning]Failed to process file: {e}[/warning]")
279
+ continue
280
+ finally:
281
+ progress.advance(task)
282
+
283
+ if result:
284
+ fp, stat, units, blocks = result
285
+ cache.put_file_entry(fp, stat, units, blocks)
286
+ changed_files_count += 1
287
+ all_units.extend([u.__dict__ for u in units])
288
+ all_blocks.extend([b.__dict__ for b in blocks])
289
+
290
+ # Analysis phase
291
+ with console.status("[bold green]Grouping clones...", spinner="dots"):
292
+ func_groups = build_groups(all_units)
293
+ block_groups = build_block_groups(all_blocks)
294
+ cache.save()
295
+
296
+ # Reporting
297
+ func_clones_count = len(func_groups)
298
+ block_clones_count = len(block_groups)
299
+
300
+ # Baseline Logic
301
+ baseline_path = Path(args.baseline).expanduser().resolve()
302
+
303
+ # If user didn't specify path, and default logic applies, baseline_path is now ./codeclone_baseline.json
304
+
305
+ baseline = Baseline(baseline_path)
306
+ baseline_exists = baseline_path.exists()
307
+
308
+ if baseline_exists:
309
+ baseline.load()
310
+ else:
311
+ if not args.update_baseline:
312
+ console.print(
313
+ f"[warning]Baseline file not found at: [bold]{baseline_path}[/bold][/warning]\n"
314
+ "[dim]Comparing against an empty baseline. "
315
+ "Use --update-baseline to create it.[/dim]"
98
316
  )
99
- for fp in files_to_process
100
- ]
101
317
 
102
- for future in futures:
103
- result = future.result()
104
- if result is None:
105
- continue
318
+ if args.update_baseline:
319
+ new_baseline = Baseline.from_groups(
320
+ func_groups, block_groups, path=baseline_path
321
+ )
322
+ new_baseline.save()
323
+ console.print(f"[success]✔ Baseline updated:[/success] {baseline_path}")
324
+ # When updating, we don't fail on new, we just saved the new state.
325
+ # But we might still want to print the summary.
326
+
327
+ # Diff
328
+ new_func, new_block = baseline.diff(func_groups, block_groups)
329
+ new_clones_count = len(new_func) + len(new_block)
106
330
 
107
- fp, stat, units, blocks = result
331
+ # Summary Table
332
+ table = Table(title="Analysis Summary", border_style="blue")
333
+ table.add_column("Metric", style="cyan")
334
+ table.add_column("Value", style="bold white")
108
335
 
109
- cache.put_file_entry(fp, stat, units, blocks)
110
- changed += 1
336
+ table.add_row("Files Processed", str(changed_files_count))
337
+ table.add_row("Total Function Clones", str(func_clones_count))
338
+ table.add_row("Total Block Clones", str(block_clones_count))
111
339
 
112
- all_units.extend([u.__dict__ for u in units])
113
- all_blocks.extend([b.__dict__ for b in blocks])
340
+ if baseline_exists:
341
+ style = "error" if new_clones_count > 0 else "success"
342
+ table.add_row(
343
+ "New Clones (vs Baseline)", f"[{style}]{new_clones_count}[/{style}]"
344
+ )
114
345
 
115
- func_groups = build_groups(all_units)
116
- block_groups = build_block_groups(all_blocks)
346
+ console.print(table)
117
347
 
348
+ # Outputs
118
349
  if args.html_out:
119
- out = Path(args.html_out)
350
+ out = Path(args.html_out).expanduser().resolve()
120
351
  out.parent.mkdir(parents=True, exist_ok=True)
121
352
  out.write_text(
122
353
  build_html_report(
@@ -128,29 +359,19 @@ def main() -> None:
128
359
  ),
129
360
  "utf-8",
130
361
  )
131
-
132
- baseline = Baseline(args.baseline)
133
- baseline.load()
134
-
135
- if args.update_baseline:
136
- new_baseline = Baseline.from_groups(func_groups, block_groups)
137
- new_baseline.path = Path(args.baseline)
138
- new_baseline.save()
139
- print(f"Baseline updated: {args.baseline}")
140
- return
141
-
142
- new_func, new_block = baseline.diff(func_groups, block_groups)
362
+ console.print(f"[info]HTML report saved:[/info] {out}")
143
363
 
144
364
  if args.json_out:
145
- out = Path(args.json_out)
365
+ out = Path(args.json_out).expanduser().resolve()
146
366
  out.parent.mkdir(parents=True, exist_ok=True)
147
367
  out.write_text(
148
368
  to_json({"functions": func_groups, "blocks": block_groups}),
149
369
  "utf-8",
150
370
  )
371
+ console.print(f"[info]JSON report saved:[/info] {out}")
151
372
 
152
373
  if args.text_out:
153
- out = Path(args.text_out)
374
+ out = Path(args.text_out).expanduser().resolve()
154
375
  out.parent.mkdir(parents=True, exist_ok=True)
155
376
  out.write_text(
156
377
  "FUNCTION CLONES\n"
@@ -159,20 +380,29 @@ def main() -> None:
159
380
  + to_text(block_groups),
160
381
  "utf-8",
161
382
  )
383
+ console.print(f"[info]Text report saved:[/info] {out}")
162
384
 
163
- print(f"Scanned root: {args.root}")
164
- print(f"Changed files parsed: {changed}")
165
- print(f"Function clone groups: {len(func_groups)}")
166
- print(f"Block clone groups: {len(block_groups)}")
167
-
385
+ # Exit Codes
168
386
  if args.fail_on_new and (new_func or new_block):
169
- print("\n❌ New code clones detected\n")
170
- raise SystemExit(3)
171
-
172
- cache.save()
387
+ console.print("\n[error]FAILED: New code clones detected![/error]")
388
+ if new_func:
389
+ console.print(f" New Functions: {', '.join(sorted(new_func))}")
390
+ if new_block:
391
+ console.print(f" New Blocks: {', '.join(sorted(new_block))}")
392
+ sys.exit(3)
393
+
394
+ if 0 <= args.fail_threshold < (func_clones_count + block_clones_count):
395
+ console.print(
396
+ f"\n[error]❌ FAILED: Total clones ({func_clones_count + block_clones_count}) "
397
+ f"exceed threshold ({args.fail_threshold})![/error]"
398
+ )
399
+ sys.exit(2)
173
400
 
174
- if 0 <= args.fail_if_groups < len(func_groups):
175
- raise SystemExit(2)
401
+ if not args.update_baseline and not args.fail_on_new and new_clones_count > 0:
402
+ console.print(
403
+ "\n[warning]New clones detected but --fail-on-new not set.[/warning]\n"
404
+ "Run with --update-baseline to accept them as technical debt."
405
+ )
176
406
 
177
407
 
178
408
  if __name__ == "__main__":