codeclone 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeclone/cli.py CHANGED
@@ -1,105 +1,377 @@
1
+ """
2
+ CodeClone — AST and CFG-based code clone detector for Python
3
+ focused on architectural duplication.
4
+
5
+ Copyright (c) 2026 Den Rozhnovskiy
6
+ Licensed under the MIT License.
7
+ """
8
+
1
9
  from __future__ import annotations
2
10
 
3
11
  import argparse
12
+ import sys
13
+ from concurrent.futures import ProcessPoolExecutor, as_completed
4
14
  from pathlib import Path
5
15
 
16
+ from rich.console import Console
17
+ from rich.panel import Panel
18
+ from rich.progress import (
19
+ Progress,
20
+ SpinnerColumn,
21
+ TextColumn,
22
+ BarColumn,
23
+ TimeElapsedColumn,
24
+ )
25
+ from rich.table import Table
26
+ from rich.theme import Theme
27
+
6
28
  from .baseline import Baseline
7
29
  from .cache import Cache, file_stat_signature
8
30
  from .extractor import extract_units_from_source
31
+ from .html_report import build_html_report
9
32
  from .normalize import NormalizationConfig
10
33
  from .report import build_groups, build_block_groups, to_json, to_text
11
34
  from .scanner import iter_py_files, module_name_from_path
12
35
 
36
+ # Custom theme for Rich
37
+ custom_theme = Theme(
38
+ {
39
+ "info": "cyan",
40
+ "warning": "yellow",
41
+ "error": "bold red",
42
+ "success": "bold green",
43
+ "dim": "dim",
44
+ }
45
+ )
46
+ console = Console(theme=custom_theme, width=200)
47
+
48
+
49
+ def expand_path(p: str) -> Path:
50
+ return Path(p).expanduser().resolve()
51
+
52
+
53
+ def process_file(
54
+ filepath: str,
55
+ root: str,
56
+ cfg: NormalizationConfig,
57
+ min_loc: int,
58
+ min_stmt: int,
59
+ ) -> tuple[str, dict, list, list] | None:
60
+ try:
61
+ source = Path(filepath).read_text("utf-8")
62
+ except UnicodeDecodeError:
63
+ return None
64
+
65
+ stat = file_stat_signature(filepath)
66
+ module_name = module_name_from_path(root, filepath)
67
+
68
+ units, blocks = extract_units_from_source(
69
+ source=source,
70
+ filepath=filepath,
71
+ module_name=module_name,
72
+ cfg=cfg,
73
+ min_loc=min_loc,
74
+ min_stmt=min_stmt,
75
+ )
76
+
77
+ return filepath, stat, units, blocks
78
+
79
+
80
+ def print_banner():
81
+ console.print(
82
+ Panel.fit(
83
+ "[bold white]CodeClone[/bold white] [dim]v1.2.0[/dim]\n"
84
+ "[italic]Architectural duplication detector[/italic]",
85
+ border_style="blue",
86
+ padding=(0, 2),
87
+ )
88
+ )
13
89
 
14
- def main():
15
- ap = argparse.ArgumentParser("codeclone")
16
- ap.add_argument("root", help="Project root")
17
- ap.add_argument("--cache", default="~/.cache/codeclone/")
18
- ap.add_argument("--min-loc", type=int, default=15)
19
- ap.add_argument("--min-stmt", type=int, default=6)
20
- ap.add_argument("--json-out", default="")
21
- ap.add_argument("--text-out", default="")
22
- ap.add_argument("--fail-if-groups", type=int, default=-1)
23
- ap.add_argument("--baseline", default="~/.config/codeclone/baseline.json")
24
- ap.add_argument("--update-baseline", action="store_true",
25
- help="Write current clones as baseline")
26
- ap.add_argument("--fail-on-new", action="store_true",
27
- help="Fail if new clones appear vs baseline")
28
- args = ap.parse_args()
29
90
 
30
- cfg = NormalizationConfig(
31
- ignore_docstrings=True,
32
- ignore_type_annotations=True,
33
- normalize_attributes=True,
34
- normalize_constants=True,
35
- normalize_names=True,
91
+ def main() -> None:
92
+ ap = argparse.ArgumentParser(
93
+ prog="codeclone",
94
+ description="AST and CFG-based code clone detector for Python.",
95
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
96
+ )
97
+
98
+ # Core Arguments
99
+ core_group = ap.add_argument_group("Target")
100
+ core_group.add_argument(
101
+ "root",
102
+ nargs="?",
103
+ default=".",
104
+ help="Project root directory to scan.",
105
+ )
106
+
107
+ # Tuning
108
+ tune_group = ap.add_argument_group("Analysis Tuning")
109
+ tune_group.add_argument(
110
+ "--min-loc",
111
+ type=int,
112
+ default=15,
113
+ help="Minimum Lines of Code (LOC) to consider.",
114
+ )
115
+ tune_group.add_argument(
116
+ "--min-stmt",
117
+ type=int,
118
+ default=6,
119
+ help="Minimum AST statements to consider.",
120
+ )
121
+ tune_group.add_argument(
122
+ "--processes",
123
+ type=int,
124
+ default=4,
125
+ help="Number of parallel worker processes.",
126
+ )
127
+ tune_group.add_argument(
128
+ "--cache-dir",
129
+ default="~/.cache/codeclone/cache.json",
130
+ help="Path to the cache file to speed up subsequent runs.",
131
+ )
132
+
133
+ # Baseline & CI
134
+ ci_group = ap.add_argument_group("Baseline & CI/CD")
135
+ ci_group.add_argument(
136
+ "--baseline",
137
+ default="codeclone.baseline.json",
138
+ help="Path to the baseline file (stored in repo).",
139
+ )
140
+ ci_group.add_argument(
141
+ "--update-baseline",
142
+ action="store_true",
143
+ help="Overwrite the baseline file with current results.",
144
+ )
145
+ ci_group.add_argument(
146
+ "--fail-on-new",
147
+ action="store_true",
148
+ help="Exit with error if NEW clones (not in baseline) are detected.",
149
+ )
150
+ ci_group.add_argument(
151
+ "--fail-threshold",
152
+ type=int,
153
+ default=-1,
154
+ metavar="MAX_CLONES",
155
+ help="Exit with error if total clone groups exceed this number.",
156
+ )
157
+
158
+ # Output
159
+ out_group = ap.add_argument_group("Reporting")
160
+ out_group.add_argument(
161
+ "--html",
162
+ dest="html_out",
163
+ metavar="FILE",
164
+ help="Generate an HTML report to FILE.",
165
+ )
166
+ out_group.add_argument(
167
+ "--json",
168
+ dest="json_out",
169
+ metavar="FILE",
170
+ help="Generate a JSON report to FILE.",
171
+ )
172
+ out_group.add_argument(
173
+ "--text",
174
+ dest="text_out",
175
+ metavar="FILE",
176
+ help="Generate a text report to FILE.",
36
177
  )
178
+ out_group.add_argument(
179
+ "--no-progress",
180
+ action="store_true",
181
+ help="Disable the progress bar (recommended for CI logs).",
182
+ )
183
+
184
+ args = ap.parse_args()
185
+
186
+ print_banner()
37
187
 
38
- cache = Cache(args.cache)
188
+ root_path = Path(args.root).resolve()
189
+ if not root_path.exists():
190
+ console.print(f"[error]Root path does not exist: {root_path}[/error]")
191
+ sys.exit(1)
192
+
193
+ console.print(f"[info]Scanning root:[/info] {root_path}")
194
+
195
+ # Initialize Cache
196
+ cfg = NormalizationConfig()
197
+ cache_path = Path(args.cache_dir).expanduser()
198
+ cache = Cache(cache_path)
39
199
  cache.load()
40
200
 
41
201
  all_units: list[dict] = []
42
202
  all_blocks: list[dict] = []
43
- changed = 0
44
-
45
- for fp in iter_py_files(args.root):
46
- stat = file_stat_signature(fp)
47
- cached = cache.get_file_entry(fp)
48
-
49
- if cached and cached.get("stat") == stat:
50
- all_units.extend(cached.get("units", []))
51
- all_blocks.extend(cached.get("blocks", []))
52
- continue
53
-
54
- try:
55
- source = Path(fp).read_text("utf-8")
56
- except UnicodeDecodeError:
57
- continue
58
-
59
- module_name = module_name_from_path(args.root, fp)
60
- units, blocks = extract_units_from_source(
61
- source=source,
62
- filepath=fp,
63
- module_name=module_name,
64
- cfg=cfg,
65
- min_loc=args.min_loc,
66
- min_stmt=args.min_stmt,
67
- )
203
+ changed_files_count = 0
204
+ files_to_process: list[str] = []
205
+
206
+ # Discovery phase
207
+ with console.status("[bold green]Discovering Python files...", spinner="dots"):
208
+ for fp in iter_py_files(str(root_path)):
209
+ stat = file_stat_signature(fp)
210
+ cached = cache.get_file_entry(fp)
211
+ if cached and cached.get("stat") == stat:
212
+ all_units.extend(cached.get("units", []))
213
+ all_blocks.extend(cached.get("blocks", []))
214
+ else:
215
+ files_to_process.append(fp)
68
216
 
69
- cache.put_file_entry(fp, stat, units, blocks)
70
- changed += 1
217
+ total_files = len(files_to_process)
71
218
 
72
- all_units.extend([u.__dict__ for u in units])
73
- all_blocks.extend([b.__dict__ for b in blocks])
219
+ # Processing phase
220
+ if total_files > 0:
221
+ if args.no_progress:
222
+ console.print(f"[info]Processing {total_files} changed files...[/info]")
223
+ with ProcessPoolExecutor(max_workers=args.processes) as executor:
224
+ futures = [
225
+ executor.submit(
226
+ process_file,
227
+ fp,
228
+ str(root_path),
229
+ cfg,
230
+ args.min_loc,
231
+ args.min_stmt,
232
+ )
233
+ for fp in files_to_process
234
+ ]
235
+ for future in as_completed(futures):
236
+ try:
237
+ result = future.result()
238
+ except Exception as e:
239
+ console.print(f"[warning]Failed to process file: {e}[/warning]")
240
+ continue
74
241
 
75
- func_groups = build_groups(all_units)
76
- block_groups = build_block_groups(all_blocks)
242
+ if result:
243
+ fp, stat, units, blocks = result
244
+ cache.put_file_entry(fp, stat, units, blocks)
245
+ changed_files_count += 1
246
+ all_units.extend([u.__dict__ for u in units])
247
+ all_blocks.extend([b.__dict__ for b in blocks])
248
+ else:
249
+ with Progress(
250
+ SpinnerColumn(),
251
+ TextColumn("[progress.description]{task.description}"),
252
+ BarColumn(),
253
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
254
+ TimeElapsedColumn(),
255
+ console=console,
256
+ ) as progress:
257
+ task = progress.add_task(
258
+ f"Analyzing {total_files} files...", total=total_files
259
+ )
260
+ with ProcessPoolExecutor(max_workers=args.processes) as executor:
261
+ futures = [
262
+ executor.submit(
263
+ process_file,
264
+ fp,
265
+ str(root_path),
266
+ cfg,
267
+ args.min_loc,
268
+ args.min_stmt,
269
+ )
270
+ for fp in files_to_process
271
+ ]
272
+ for future in as_completed(futures):
273
+ try:
274
+ result = future.result()
275
+ except Exception:
276
+ # Log error but keep progress bar moving?
277
+ # console.print might break progress bar layout, better to rely on rich logging or just skip
278
+ # console.print(f"[warning]Failed to process file: {e}[/warning]")
279
+ continue
280
+ finally:
281
+ progress.advance(task)
77
282
 
78
- baseline = Baseline(args.baseline)
79
- baseline.load()
283
+ if result:
284
+ fp, stat, units, blocks = result
285
+ cache.put_file_entry(fp, stat, units, blocks)
286
+ changed_files_count += 1
287
+ all_units.extend([u.__dict__ for u in units])
288
+ all_blocks.extend([b.__dict__ for b in blocks])
289
+
290
+ # Analysis phase
291
+ with console.status("[bold green]Grouping clones...", spinner="dots"):
292
+ func_groups = build_groups(all_units)
293
+ block_groups = build_block_groups(all_blocks)
294
+ cache.save()
295
+
296
+ # Reporting
297
+ func_clones_count = len(func_groups)
298
+ block_clones_count = len(block_groups)
299
+
300
+ # Baseline Logic
301
+ baseline_path = Path(args.baseline).expanduser().resolve()
302
+
303
+ # If user didn't specify path, and default logic applies, baseline_path is now ./codeclone_baseline.json
304
+
305
+ baseline = Baseline(baseline_path)
306
+ baseline_exists = baseline_path.exists()
307
+
308
+ if baseline_exists:
309
+ baseline.load()
310
+ else:
311
+ if not args.update_baseline:
312
+ console.print(
313
+ f"[warning]Baseline file not found at: [bold]{baseline_path}[/bold][/warning]\n"
314
+ "[dim]Comparing against an empty baseline. "
315
+ "Use --update-baseline to create it.[/dim]"
316
+ )
80
317
 
81
318
  if args.update_baseline:
82
- new_baseline = Baseline.from_groups(func_groups, block_groups)
83
- new_baseline.path = Path(args.baseline)
319
+ new_baseline = Baseline.from_groups(
320
+ func_groups, block_groups, path=baseline_path
321
+ )
84
322
  new_baseline.save()
85
- print(f"Baseline updated: {args.baseline}")
86
- return
323
+ console.print(f"[success]✔ Baseline updated:[/success] {baseline_path}")
324
+ # When updating, we don't fail on new, we just saved the new state.
325
+ # But we might still want to print the summary.
87
326
 
327
+ # Diff
88
328
  new_func, new_block = baseline.diff(func_groups, block_groups)
329
+ new_clones_count = len(new_func) + len(new_block)
330
+
331
+ # Summary Table
332
+ table = Table(title="Analysis Summary", border_style="blue")
333
+ table.add_column("Metric", style="cyan")
334
+ table.add_column("Value", style="bold white")
335
+
336
+ table.add_row("Files Processed", str(changed_files_count))
337
+ table.add_row("Total Function Clones", str(func_clones_count))
338
+ table.add_row("Total Block Clones", str(block_clones_count))
339
+
340
+ if baseline_exists:
341
+ style = "error" if new_clones_count > 0 else "success"
342
+ table.add_row(
343
+ "New Clones (vs Baseline)", f"[{style}]{new_clones_count}[/{style}]"
344
+ )
345
+
346
+ console.print(table)
347
+
348
+ # Outputs
349
+ if args.html_out:
350
+ out = Path(args.html_out).expanduser().resolve()
351
+ out.parent.mkdir(parents=True, exist_ok=True)
352
+ out.write_text(
353
+ build_html_report(
354
+ func_groups=func_groups,
355
+ block_groups=block_groups,
356
+ title="CodeClone Report",
357
+ context_lines=3,
358
+ max_snippet_lines=220,
359
+ ),
360
+ "utf-8",
361
+ )
362
+ console.print(f"[info]HTML report saved:[/info] {out}")
89
363
 
90
364
  if args.json_out:
91
- out = Path(args.json_out)
365
+ out = Path(args.json_out).expanduser().resolve()
92
366
  out.parent.mkdir(parents=True, exist_ok=True)
93
367
  out.write_text(
94
- to_json({
95
- "functions": func_groups,
96
- "blocks": block_groups,
97
- }),
368
+ to_json({"functions": func_groups, "blocks": block_groups}),
98
369
  "utf-8",
99
370
  )
371
+ console.print(f"[info]JSON report saved:[/info] {out}")
100
372
 
101
373
  if args.text_out:
102
- out = Path(args.text_out)
374
+ out = Path(args.text_out).expanduser().resolve()
103
375
  out.parent.mkdir(parents=True, exist_ok=True)
104
376
  out.write_text(
105
377
  "FUNCTION CLONES\n"
@@ -108,37 +380,29 @@ def main():
108
380
  + to_text(block_groups),
109
381
  "utf-8",
110
382
  )
383
+ console.print(f"[info]Text report saved:[/info] {out}")
111
384
 
112
- print(f"Scanned root: {args.root}")
113
- print(f"Changed files parsed: {changed}")
114
- print(f"Function clone groups: {len(func_groups)}")
115
- print(f"Block clone groups: {len(block_groups)}")
116
-
117
- if args.fail_on_new:
118
- if new_func or new_block:
119
- print("\n❌ New code clones detected\n")
120
-
121
- if new_func:
122
- print(f"New FUNCTION clone groups: {len(new_func)}")
123
- for k in sorted(new_func):
124
- print(f" - {k}")
125
-
126
- if new_block:
127
- print(f"New BLOCK clone groups: {len(new_block)}")
128
- for k in sorted(new_block):
129
- print(f" - {k}")
385
+ # Exit Codes
386
+ if args.fail_on_new and (new_func or new_block):
387
+ console.print("\n[error]❌ FAILED: New code clones detected![/error]")
388
+ if new_func:
389
+ console.print(f" New Functions: {', '.join(sorted(new_func))}")
390
+ if new_block:
391
+ console.print(f" New Blocks: {', '.join(sorted(new_block))}")
392
+ sys.exit(3)
130
393
 
131
- raise SystemExit(3)
132
-
133
- print(f"Baseline function clones: {len(baseline.functions)}")
134
- print(f"Baseline block clones: {len(baseline.blocks)}")
135
- print(f"New function clones: {len(new_func)}")
136
- print(f"New block clones: {len(new_block)}")
137
-
138
- cache.save()
394
+ if 0 <= args.fail_threshold < (func_clones_count + block_clones_count):
395
+ console.print(
396
+ f"\n[error]❌ FAILED: Total clones ({func_clones_count + block_clones_count}) "
397
+ f"exceed threshold ({args.fail_threshold})![/error]"
398
+ )
399
+ sys.exit(2)
139
400
 
140
- if 0 <= args.fail_if_groups < len(func_groups):
141
- raise SystemExit(2)
401
+ if not args.update_baseline and not args.fail_on_new and new_clones_count > 0:
402
+ console.print(
403
+ "\n[warning]New clones detected but --fail-on-new not set.[/warning]\n"
404
+ "Run with --update-baseline to accept them as technical debt."
405
+ )
142
406
 
143
407
 
144
408
  if __name__ == "__main__":
codeclone/extractor.py CHANGED
@@ -1,11 +1,26 @@
1
+ """
2
+ CodeClone — AST and CFG-based code clone detector for Python
3
+ focused on architectural duplication.
4
+
5
+ Copyright (c) 2026 Den Rozhnovskiy
6
+ Licensed under the MIT License.
7
+ """
8
+
1
9
  from __future__ import annotations
2
10
 
3
11
  import ast
4
12
  from dataclasses import dataclass
13
+ from typing import Sequence
5
14
 
6
15
  from .blocks import extract_blocks, BlockUnit
16
+ from .cfg import CFGBuilder
7
17
  from .fingerprint import sha1, bucket_loc
8
- from .normalize import NormalizationConfig, normalized_ast_dump
18
+ from .normalize import NormalizationConfig, normalized_ast_dump_from_list
19
+
20
+
21
+ # =========================
22
+ # Data structures
23
+ # =========================
9
24
 
10
25
 
11
26
  @dataclass(frozen=True)
@@ -20,37 +35,83 @@ class Unit:
20
35
  loc_bucket: str
21
36
 
22
37
 
38
+ # =========================
39
+ # Helpers
40
+ # =========================
41
+
42
+
23
43
  def _stmt_count(node: ast.AST) -> int:
24
44
  body = getattr(node, "body", None)
25
45
  return len(body) if isinstance(body, list) else 0
26
46
 
27
47
 
28
48
  class _QualnameBuilder(ast.NodeVisitor):
29
- def __init__(self):
49
+ def __init__(self) -> None:
30
50
  self.stack: list[str] = []
31
- self.units: list[tuple[str, ast.AST]] = []
51
+ self.units: list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]] = []
32
52
 
33
- def visit_ClassDef(self, node: ast.ClassDef):
53
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
34
54
  self.stack.append(node.name)
35
55
  self.generic_visit(node)
36
56
  self.stack.pop()
37
57
 
38
- def visit_FunctionDef(self, node: ast.FunctionDef):
58
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
39
59
  name = ".".join(self.stack + [node.name]) if self.stack else node.name
40
60
  self.units.append((name, node))
41
61
 
42
- def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
62
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
43
63
  name = ".".join(self.stack + [node.name]) if self.stack else node.name
44
64
  self.units.append((name, node))
45
65
 
46
66
 
67
+ # =========================
68
+ # CFG fingerprinting
69
+ # =========================
70
+
71
+
72
+ def get_cfg_fingerprint(
73
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
74
+ cfg: NormalizationConfig,
75
+ qualname: str,
76
+ ) -> str:
77
+ """
78
+ Build CFG, normalize it into a canonical form, and hash it.
79
+ """
80
+ builder = CFGBuilder()
81
+ graph = builder.build(qualname, node)
82
+
83
+ parts: list[str] = []
84
+
85
+ # Stable order for deterministic hash
86
+ for block in sorted(graph.blocks, key=lambda b: b.id):
87
+ # NOTE: normalized_ast_dump_from_list must accept Sequence[ast.AST] (covariant),
88
+ # but even if it still accepts list[ast.AST], passing list[ast.stmt] will fail
89
+ # due to invariance. We pass as Sequence[ast.AST] via a typed view.
90
+ stmts_as_ast: Sequence[ast.AST] = block.statements
91
+ normalized_stmts = normalized_ast_dump_from_list(stmts_as_ast, cfg)
92
+
93
+ successor_ids = sorted(succ.id for succ in block.successors)
94
+
95
+ parts.append(
96
+ f"BLOCK[{block.id}]:{normalized_stmts}"
97
+ f"|SUCCESSORS:{','.join(map(str, successor_ids))}"
98
+ )
99
+
100
+ return sha1("|".join(parts))
101
+
102
+
103
+ # =========================
104
+ # Public API
105
+ # =========================
106
+
107
+
47
108
  def extract_units_from_source(
48
- source: str,
49
- filepath: str,
50
- module_name: str,
51
- cfg: NormalizationConfig,
52
- min_loc: int,
53
- min_stmt: int,
109
+ source: str,
110
+ filepath: str,
111
+ module_name: str,
112
+ cfg: NormalizationConfig,
113
+ min_loc: int,
114
+ min_stmt: int,
54
115
  ) -> tuple[list[Unit], list[BlockUnit]]:
55
116
  try:
56
117
  tree = ast.parse(source)
@@ -66,6 +127,7 @@ def extract_units_from_source(
66
127
  for local_name, node in qb.units:
67
128
  start = getattr(node, "lineno", None)
68
129
  end = getattr(node, "end_lineno", None)
130
+
69
131
  if not start or not end or end < start:
70
132
  continue
71
133
 
@@ -76,26 +138,24 @@ def extract_units_from_source(
76
138
  continue
77
139
 
78
140
  qualname = f"{module_name}:{local_name}"
79
- dump = normalized_ast_dump(node, cfg)
80
- fp = sha1(dump)
81
-
82
- # ✅ __init__ INCLUDED as function-level unit
83
- units.append(Unit(
84
- qualname=qualname,
85
- filepath=filepath,
86
- start_line=start,
87
- end_line=end,
88
- loc=loc,
89
- stmt_count=stmt_count,
90
- fingerprint=fp,
91
- loc_bucket=bucket_loc(loc),
92
- ))
93
-
94
- if (
95
- not local_name.endswith("__init__")
96
- and loc >= 40
97
- and stmt_count >= 10
98
- ):
141
+ fingerprint = get_cfg_fingerprint(node, cfg, qualname)
142
+
143
+ # Function-level unit (including __init__)
144
+ units.append(
145
+ Unit(
146
+ qualname=qualname,
147
+ filepath=filepath,
148
+ start_line=start,
149
+ end_line=end,
150
+ loc=loc,
151
+ stmt_count=stmt_count,
152
+ fingerprint=fingerprint,
153
+ loc_bucket=bucket_loc(loc),
154
+ )
155
+ )
156
+
157
+ # Block-level units (exclude __init__)
158
+ if not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10:
99
159
  blocks = extract_blocks(
100
160
  node,
101
161
  filepath=filepath,