codeclone 1.4.1__tar.gz → 1.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {codeclone-1.4.1 → codeclone-1.4.2}/PKG-INFO +1 -1
  2. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/blocks.py +19 -2
  3. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cache.py +5 -5
  4. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cli.py +39 -62
  5. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/extractor.py +38 -23
  6. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/scanner.py +12 -8
  7. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/PKG-INFO +1 -1
  8. {codeclone-1.4.1 → codeclone-1.4.2}/pyproject.toml +1 -1
  9. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_unit.py +7 -3
  10. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_extractor.py +105 -0
  11. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_security.py +22 -6
  12. {codeclone-1.4.1 → codeclone-1.4.2}/LICENSE +0 -0
  13. {codeclone-1.4.1 → codeclone-1.4.2}/README.md +0 -0
  14. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/__init__.py +0 -0
  15. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_args.py +0 -0
  16. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_meta.py +0 -0
  17. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_paths.py +0 -0
  18. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_summary.py +0 -0
  19. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_html_escape.py +0 -0
  20. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_html_snippets.py +0 -0
  21. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_blocks.py +0 -0
  22. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_explain.py +0 -0
  23. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_explain_contract.py +0 -0
  24. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_grouping.py +0 -0
  25. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_segments.py +0 -0
  26. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_serialize.py +0 -0
  27. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_types.py +0 -0
  28. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/baseline.py +0 -0
  29. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/blockhash.py +0 -0
  30. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cfg.py +0 -0
  31. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cfg_model.py +0 -0
  32. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/contracts.py +0 -0
  33. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/errors.py +0 -0
  34. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/fingerprint.py +0 -0
  35. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/html_report.py +0 -0
  36. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/meta_markers.py +0 -0
  37. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/normalize.py +0 -0
  38. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/py.typed +0 -0
  39. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/report.py +0 -0
  40. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/templates.py +0 -0
  41. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/ui_messages.py +0 -0
  42. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/SOURCES.txt +0 -0
  43. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/dependency_links.txt +0 -0
  44. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/entry_points.txt +0 -0
  45. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/requires.txt +0 -0
  46. {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/top_level.txt +0 -0
  47. {codeclone-1.4.1 → codeclone-1.4.2}/setup.cfg +0 -0
  48. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_baseline.py +0 -0
  49. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_blockhash.py +0 -0
  50. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_blocks.py +0 -0
  51. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cache.py +0 -0
  52. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cfg.py +0 -0
  53. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cfg_model.py +0 -0
  54. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_inprocess.py +0 -0
  55. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_main_guard.py +0 -0
  56. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_main_guard_runpy.py +0 -0
  57. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_smoke.py +0 -0
  58. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_detector_golden.py +0 -0
  59. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_fingerprint.py +0 -0
  60. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_html_report.py +0 -0
  61. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_init.py +0 -0
  62. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_normalize.py +0 -0
  63. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_report.py +0 -0
  64. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_report_explain.py +0 -0
  65. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_scanner_extra.py +0 -0
  66. {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_segments.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeclone
3
- Version: 1.4.1
3
+ Version: 1.4.2
4
4
  Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
5
5
  Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
6
6
  Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
@@ -9,6 +9,7 @@ Licensed under the MIT License.
9
9
  from __future__ import annotations
10
10
 
11
11
  import ast
12
+ from collections.abc import Sequence
12
13
  from dataclasses import dataclass
13
14
 
14
15
  from .blockhash import stmt_hash
@@ -45,12 +46,20 @@ def extract_blocks(
45
46
  cfg: NormalizationConfig,
46
47
  block_size: int,
47
48
  max_blocks: int,
49
+ precomputed_hashes: Sequence[str] | None = None,
48
50
  ) -> list[BlockUnit]:
49
51
  body = getattr(func_node, "body", None)
50
52
  if not isinstance(body, list) or len(body) < block_size:
51
53
  return []
52
54
 
53
- stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
55
+ if precomputed_hashes is not None:
56
+ assert len(precomputed_hashes) == len(body), (
57
+ f"precomputed_hashes length {len(precomputed_hashes)} "
58
+ f"!= body length {len(body)}"
59
+ )
60
+ stmt_hashes = precomputed_hashes
61
+ else:
62
+ stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
54
63
 
55
64
  blocks: list[BlockUnit] = []
56
65
  last_start: int | None = None
@@ -94,12 +103,20 @@ def extract_segments(
94
103
  cfg: NormalizationConfig,
95
104
  window_size: int,
96
105
  max_segments: int,
106
+ precomputed_hashes: Sequence[str] | None = None,
97
107
  ) -> list[SegmentUnit]:
98
108
  body = getattr(func_node, "body", None)
99
109
  if not isinstance(body, list) or len(body) < window_size:
100
110
  return []
101
111
 
102
- stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
112
+ if precomputed_hashes is not None:
113
+ assert len(precomputed_hashes) == len(body), (
114
+ f"precomputed_hashes length {len(precomputed_hashes)} "
115
+ f"!= body length {len(body)}"
116
+ )
117
+ stmt_hashes = precomputed_hashes
118
+ else:
119
+ stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
103
120
 
104
121
  segments: list[SegmentUnit] = []
105
122
 
@@ -344,14 +344,14 @@ class Cache:
344
344
  try:
345
345
  self.path.parent.mkdir(parents=True, exist_ok=True)
346
346
  wire_files: dict[str, object] = {}
347
- for runtime_path in sorted(
348
- self.data["files"], key=self._wire_filepath_from_runtime
349
- ):
347
+ wire_map = {
348
+ rp: self._wire_filepath_from_runtime(rp) for rp in self.data["files"]
349
+ }
350
+ for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__):
350
351
  entry = self.get_file_entry(runtime_path)
351
352
  if entry is None:
352
353
  continue
353
- wire_path = self._wire_filepath_from_runtime(runtime_path)
354
- wire_files[wire_path] = _encode_wire_file_entry(entry)
354
+ wire_files[wire_map[runtime_path]] = _encode_wire_file_entry(entry)
355
355
 
356
356
  payload: dict[str, object] = {
357
357
  "py": current_python_tag(),
@@ -122,14 +122,14 @@ def process_file(
122
122
  """
123
123
 
124
124
  try:
125
- # Check file size
125
+ # Single os.stat() for both size check and cache signature
126
126
  try:
127
- st_size = os.path.getsize(filepath)
128
- if st_size > MAX_FILE_SIZE:
127
+ st = os.stat(filepath)
128
+ if st.st_size > MAX_FILE_SIZE:
129
129
  return ProcessingResult(
130
130
  filepath=filepath,
131
131
  success=False,
132
- error=f"File too large: {st_size} bytes (max {MAX_FILE_SIZE})",
132
+ error=f"File too large: {st.st_size} bytes (max {MAX_FILE_SIZE})",
133
133
  error_kind="file_too_large",
134
134
  )
135
135
  except OSError as e:
@@ -140,6 +140,8 @@ def process_file(
140
140
  error_kind="stat_error",
141
141
  )
142
142
 
143
+ stat: FileStat = {"mtime_ns": st.st_mtime_ns, "size": st.st_size}
144
+
143
145
  try:
144
146
  source = Path(filepath).read_text("utf-8")
145
147
  except UnicodeDecodeError as e:
@@ -157,7 +159,6 @@ def process_file(
157
159
  error_kind="source_read_error",
158
160
  )
159
161
 
160
- stat = file_stat_signature(filepath)
161
162
  module_name = module_name_from_path(root, filepath)
162
163
 
163
164
  units, blocks, segments = extract_units_from_source(
@@ -355,68 +356,44 @@ def _main_impl() -> None:
355
356
  return None, str(e)
356
357
 
357
358
  # Discovery phase
358
- try:
359
- if args.quiet:
360
- for fp in iter_py_files(str(root_path)):
361
- files_found += 1
362
- stat, cached, warn = _get_cached_entry(fp)
363
- if warn:
364
- console.print(warn)
365
- files_skipped += 1
366
- continue
367
- if cached and cached.get("stat") == stat:
368
- cache_hits += 1
369
- all_units.extend(
370
- cast(
371
- list[GroupItem],
372
- cast(object, cached.get("units", [])),
373
- )
359
+ def _discover_files() -> None:
360
+ nonlocal files_found, cache_hits, files_skipped
361
+ for fp in iter_py_files(str(root_path)):
362
+ files_found += 1
363
+ stat, cached, warn = _get_cached_entry(fp)
364
+ if warn:
365
+ console.print(warn)
366
+ files_skipped += 1
367
+ continue
368
+ if cached and cached.get("stat") == stat:
369
+ cache_hits += 1
370
+ all_units.extend(
371
+ cast(
372
+ list[GroupItem],
373
+ cast(object, cached.get("units", [])),
374
374
  )
375
- all_blocks.extend(
376
- cast(
377
- list[GroupItem],
378
- cast(object, cached.get("blocks", [])),
379
- )
375
+ )
376
+ all_blocks.extend(
377
+ cast(
378
+ list[GroupItem],
379
+ cast(object, cached.get("blocks", [])),
380
380
  )
381
- all_segments.extend(
382
- cast(
383
- list[GroupItem],
384
- cast(object, cached.get("segments", [])),
385
- )
381
+ )
382
+ all_segments.extend(
383
+ cast(
384
+ list[GroupItem],
385
+ cast(object, cached.get("segments", [])),
386
386
  )
387
- else:
388
- files_to_process.append(fp)
387
+ )
388
+ else:
389
+ files_to_process.append(fp)
390
+
391
+ try:
392
+ if args.quiet:
393
+ _discover_files()
389
394
  else:
390
395
  with console.status(ui.STATUS_DISCOVERING, spinner="dots"):
391
- for fp in iter_py_files(str(root_path)):
392
- files_found += 1
393
- stat, cached, warn = _get_cached_entry(fp)
394
- if warn:
395
- console.print(warn)
396
- files_skipped += 1
397
- continue
398
- if cached and cached.get("stat") == stat:
399
- cache_hits += 1
400
- all_units.extend(
401
- cast(
402
- list[GroupItem],
403
- cast(object, cached.get("units", [])),
404
- )
405
- )
406
- all_blocks.extend(
407
- cast(
408
- list[GroupItem],
409
- cast(object, cached.get("blocks", [])),
410
- )
411
- )
412
- all_segments.extend(
413
- cast(
414
- list[GroupItem],
415
- cast(object, cached.get("segments", [])),
416
- )
417
- )
418
- else:
419
- files_to_process.append(fp)
396
+ _discover_files()
420
397
  except OSError as e:
421
398
  console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=e)))
422
399
  sys.exit(ExitCode.CONTRACT_ERROR)
@@ -16,6 +16,7 @@ from collections.abc import Iterator
16
16
  from contextlib import contextmanager
17
17
  from dataclasses import dataclass
18
18
 
19
+ from .blockhash import stmt_hash
19
20
  from .blocks import BlockUnit, SegmentUnit, extract_blocks, extract_segments
20
21
  from .cfg import CFGBuilder
21
22
  from .errors import ParseError
@@ -250,28 +251,42 @@ def extract_units_from_source(
250
251
  )
251
252
  )
252
253
 
253
- # Block-level units (exclude __init__)
254
- if not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10:
255
- blocks = extract_blocks(
256
- node,
257
- filepath=filepath,
258
- qualname=qualname,
259
- cfg=cfg,
260
- block_size=4,
261
- max_blocks=15,
262
- )
263
- block_units.extend(blocks)
264
-
265
- # Segment-level units (windows within functions, for internal clones)
266
- if loc >= 30 and stmt_count >= 12:
267
- segments = extract_segments(
268
- node,
269
- filepath=filepath,
270
- qualname=qualname,
271
- cfg=cfg,
272
- window_size=6,
273
- max_segments=60,
274
- )
275
- segment_units.extend(segments)
254
+ # Block-level and segment-level units share statement hashes
255
+ needs_blocks = (
256
+ not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10
257
+ )
258
+ needs_segments = loc >= 30 and stmt_count >= 12
259
+
260
+ if needs_blocks or needs_segments:
261
+ body = getattr(node, "body", None)
262
+ hashes: list[str] | None = None
263
+ if isinstance(body, list):
264
+ hashes = [stmt_hash(stmt, cfg) for stmt in body]
265
+
266
+ if needs_blocks:
267
+ block_units.extend(
268
+ extract_blocks(
269
+ node,
270
+ filepath=filepath,
271
+ qualname=qualname,
272
+ cfg=cfg,
273
+ block_size=4,
274
+ max_blocks=15,
275
+ precomputed_hashes=hashes,
276
+ )
277
+ )
278
+
279
+ if needs_segments:
280
+ segment_units.extend(
281
+ extract_segments(
282
+ node,
283
+ filepath=filepath,
284
+ qualname=qualname,
285
+ cfg=cfg,
286
+ window_size=6,
287
+ max_segments=60,
288
+ precomputed_hashes=hashes,
289
+ )
290
+ )
276
291
 
277
292
  return units, block_units, segment_units
@@ -77,8 +77,9 @@ def iter_py_files(
77
77
  if root_str.startswith(sensitive + "/"):
78
78
  raise ValidationError(f"Cannot scan under sensitive directory: {root}")
79
79
 
80
- file_count = 0
81
- for p in sorted(rootp.rglob("*.py"), key=lambda path: str(path)):
80
+ # Collect and filter first, then sort — avoids sorting excluded paths
81
+ candidates: list[Path] = []
82
+ for p in rootp.rglob("*.py"):
82
83
  # Verify path is actually under root (prevent symlink attacks)
83
84
  try:
84
85
  p.resolve().relative_to(rootp)
@@ -90,12 +91,15 @@ def iter_py_files(
90
91
  if any(ex in parts for ex in excludes):
91
92
  continue
92
93
 
93
- file_count += 1
94
- if file_count > max_files:
95
- raise ValidationError(
96
- f"File count exceeds limit of {max_files}. "
97
- "Use more specific root or increase limit."
98
- )
94
+ candidates.append(p)
95
+
96
+ if len(candidates) > max_files:
97
+ raise ValidationError(
98
+ f"File count exceeds limit of {max_files}. "
99
+ "Use more specific root or increase limit."
100
+ )
101
+
102
+ for p in sorted(candidates, key=lambda path: str(path)):
99
103
  yield str(p)
100
104
 
101
105
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeclone
3
- Version: 1.4.1
3
+ Version: 1.4.2
4
4
  Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
5
5
  Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
6
6
  Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codeclone"
7
- version = "1.4.1"
7
+ version = "1.4.2"
8
8
  description = "AST and CFG-based code clone detector for Python focused on architectural duplication"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { text = "MIT" }
@@ -28,10 +28,14 @@ def test_process_file_stat_error(
28
28
  src = tmp_path / "a.py"
29
29
  src.write_text("def f():\n return 1\n", "utf-8")
30
30
 
31
- def _boom(_path: str) -> int:
32
- raise OSError("nope")
31
+ _original_stat = os.stat
33
32
 
34
- monkeypatch.setattr(os.path, "getsize", _boom)
33
+ def _boom(path: str, *args: object, **kwargs: object) -> os.stat_result:
34
+ if str(path) == str(src):
35
+ raise OSError("nope")
36
+ return _original_stat(path, *args, **kwargs) # type: ignore[arg-type]
37
+
38
+ monkeypatch.setattr(os, "stat", _boom)
35
39
  result = process_file(str(src), str(tmp_path), NormalizationConfig(), 1, 1)
36
40
  assert result.success is False
37
41
  assert result.error is not None
@@ -374,6 +374,111 @@ def f():
374
374
  assert segments == []
375
375
 
376
376
 
377
+ def test_extract_generates_segments_without_blocks_when_only_segment_gate_met() -> None:
378
+ lines = ["def f():"]
379
+ for i in range(12):
380
+ lines.append(f" x{i} = {i}")
381
+ lines.append("")
382
+ lines.append("")
383
+ src = "\n".join(lines)
384
+
385
+ units, blocks, segments = extract_units_from_source(
386
+ source=src,
387
+ filepath="x.py",
388
+ module_name="mod",
389
+ cfg=NormalizationConfig(),
390
+ min_loc=1,
391
+ min_stmt=1,
392
+ )
393
+
394
+ assert units
395
+ assert blocks == []
396
+ assert segments
397
+
398
+
399
+ def test_extract_generates_blocks_without_segments_when_only_block_gate_met() -> None:
400
+ lines = ["def f():"]
401
+ for i in range(10):
402
+ lines.append(f" x{i} = {i}")
403
+ lines.append("")
404
+ lines.append("")
405
+ lines.append("")
406
+ lines.append("")
407
+ src = "\n".join(lines)
408
+
409
+ units, blocks, segments = extract_units_from_source(
410
+ source=src,
411
+ filepath="x.py",
412
+ module_name="mod",
413
+ cfg=NormalizationConfig(),
414
+ min_loc=1,
415
+ min_stmt=1,
416
+ )
417
+
418
+ assert units
419
+ assert blocks
420
+ assert segments == []
421
+
422
+
423
+ def test_extract_handles_non_list_function_body_for_hash_reuse(
424
+ monkeypatch: pytest.MonkeyPatch,
425
+ ) -> None:
426
+ lines = ["def f():"]
427
+ for i in range(12):
428
+ lines.append(f" x{i} = {i}")
429
+ lines.append("")
430
+ lines.append("")
431
+ tree = ast.parse("\n".join(lines))
432
+ func = tree.body[0]
433
+ assert isinstance(func, ast.FunctionDef)
434
+ func.body = tuple(func.body) # type: ignore[assignment]
435
+
436
+ captured_hashes: dict[str, object] = {}
437
+
438
+ def _fake_parse(_source: str, _timeout_s: int) -> ast.AST:
439
+ return tree
440
+
441
+ def _fake_fingerprint(
442
+ _node: ast.FunctionDef | ast.AsyncFunctionDef,
443
+ _cfg: NormalizationConfig,
444
+ _qualname: str,
445
+ ) -> str:
446
+ return "f" * 40
447
+
448
+ def _fake_extract_segments(
449
+ _node: ast.FunctionDef | ast.AsyncFunctionDef,
450
+ filepath: str,
451
+ qualname: str,
452
+ cfg: NormalizationConfig,
453
+ window_size: int = 6,
454
+ max_segments: int = 60,
455
+ *,
456
+ precomputed_hashes: list[str] | None = None,
457
+ ) -> list[object]:
458
+ del filepath, qualname, cfg, window_size, max_segments
459
+ captured_hashes["value"] = precomputed_hashes
460
+ return []
461
+
462
+ monkeypatch.setattr(extractor, "_parse_with_limits", _fake_parse)
463
+ monkeypatch.setattr(extractor, "_stmt_count", lambda _node: 12)
464
+ monkeypatch.setattr(extractor, "get_cfg_fingerprint", _fake_fingerprint)
465
+ monkeypatch.setattr(extractor, "extract_segments", _fake_extract_segments)
466
+
467
+ units, blocks, segments = extract_units_from_source(
468
+ source="def f():\n pass\n",
469
+ filepath="x.py",
470
+ module_name="mod",
471
+ cfg=NormalizationConfig(),
472
+ min_loc=1,
473
+ min_stmt=1,
474
+ )
475
+
476
+ assert len(units) == 1
477
+ assert blocks == []
478
+ assert segments == []
479
+ assert captured_hashes["value"] is None
480
+
481
+
377
482
  def test_extract_skips_invalid_positions(monkeypatch: pytest.MonkeyPatch) -> None:
378
483
  tree = ast.parse(
379
484
  """
@@ -30,18 +30,34 @@ def test_process_file_size_limit() -> None:
30
30
 
31
31
  try:
32
32
  cfg = NormalizationConfig()
33
+ real_stat = os.stat(tmp_path)
33
34
 
34
- # Mock os.path.getsize to return huge size
35
- with patch("os.path.getsize", return_value=MAX_FILE_SIZE + 1):
35
+ # Mock os.stat to return huge st_size
36
+ def _huge_stat(path: str, *args: object, **kwargs: object) -> os.stat_result:
37
+ return os.stat_result(
38
+ (
39
+ real_stat.st_mode,
40
+ real_stat.st_ino,
41
+ real_stat.st_dev,
42
+ real_stat.st_nlink,
43
+ real_stat.st_uid,
44
+ real_stat.st_gid,
45
+ MAX_FILE_SIZE + 1, # st_size
46
+ int(real_stat.st_atime),
47
+ int(real_stat.st_mtime),
48
+ int(real_stat.st_ctime),
49
+ )
50
+ )
51
+
52
+ with patch("os.stat", side_effect=_huge_stat):
36
53
  result = process_file(tmp_path, os.path.dirname(tmp_path), cfg, 0, 0)
37
54
  assert result.success is False
38
55
  assert result.error is not None
39
56
  assert "File too large" in result.error
40
57
 
41
- # Normal size should pass
42
- with patch("os.path.getsize", return_value=10):
43
- result = process_file(tmp_path, os.path.dirname(tmp_path), cfg, 0, 0)
44
- assert result.success is True
58
+ # Normal size should pass (no mock — real stat)
59
+ result = process_file(tmp_path, os.path.dirname(tmp_path), cfg, 0, 0)
60
+ assert result.success is True
45
61
 
46
62
  finally:
47
63
  os.remove(tmp_path)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes