codeclone 1.4.2__tar.gz → 1.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {codeclone-1.4.2 → codeclone-1.4.3}/PKG-INFO +9 -8
  2. {codeclone-1.4.2 → codeclone-1.4.3}/README.md +8 -7
  3. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cache.py +59 -0
  4. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cli.py +2 -0
  5. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/contracts.py +1 -1
  6. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/PKG-INFO +9 -8
  7. {codeclone-1.4.2 → codeclone-1.4.3}/pyproject.toml +1 -1
  8. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cache.py +93 -26
  9. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_inprocess.py +117 -1
  10. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_html_report.py +2 -2
  11. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_report.py +3 -3
  12. {codeclone-1.4.2 → codeclone-1.4.3}/LICENSE +0 -0
  13. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/__init__.py +0 -0
  14. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_args.py +0 -0
  15. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_meta.py +0 -0
  16. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_paths.py +0 -0
  17. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_summary.py +0 -0
  18. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_html_escape.py +0 -0
  19. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_html_snippets.py +0 -0
  20. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_blocks.py +0 -0
  21. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_explain.py +0 -0
  22. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_explain_contract.py +0 -0
  23. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_grouping.py +0 -0
  24. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_segments.py +0 -0
  25. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_serialize.py +0 -0
  26. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_types.py +0 -0
  27. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/baseline.py +0 -0
  28. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/blockhash.py +0 -0
  29. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/blocks.py +0 -0
  30. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cfg.py +0 -0
  31. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cfg_model.py +0 -0
  32. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/errors.py +0 -0
  33. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/extractor.py +0 -0
  34. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/fingerprint.py +0 -0
  35. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/html_report.py +0 -0
  36. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/meta_markers.py +0 -0
  37. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/normalize.py +0 -0
  38. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/py.typed +0 -0
  39. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/report.py +0 -0
  40. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/scanner.py +0 -0
  41. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/templates.py +0 -0
  42. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/ui_messages.py +0 -0
  43. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/SOURCES.txt +0 -0
  44. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/dependency_links.txt +0 -0
  45. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/entry_points.txt +0 -0
  46. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/requires.txt +0 -0
  47. {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/top_level.txt +0 -0
  48. {codeclone-1.4.2 → codeclone-1.4.3}/setup.cfg +0 -0
  49. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_baseline.py +0 -0
  50. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_blockhash.py +0 -0
  51. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_blocks.py +0 -0
  52. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cfg.py +0 -0
  53. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cfg_model.py +0 -0
  54. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_main_guard.py +0 -0
  55. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_main_guard_runpy.py +0 -0
  56. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_smoke.py +0 -0
  57. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_unit.py +0 -0
  58. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_detector_golden.py +0 -0
  59. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_extractor.py +0 -0
  60. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_fingerprint.py +0 -0
  61. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_init.py +0 -0
  62. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_normalize.py +0 -0
  63. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_report_explain.py +0 -0
  64. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_scanner_extra.py +0 -0
  65. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_security.py +0 -0
  66. {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_segments.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeclone
3
- Version: 1.4.2
3
+ Version: 1.4.3
4
4
  Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
5
5
  Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
6
6
  Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
@@ -158,12 +158,12 @@ Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md)
158
158
 
159
159
  CodeClone uses a deterministic exit code contract:
160
160
 
161
- | Code | Meaning |
162
- |------|-----------------------------------------------------------------------------|
163
- | `0` | Success — run completed without gating failures |
161
+ | Code | Meaning |
162
+ |------|-------------------------------------------------------------------------------------------------------------------------------------|
163
+ | `0` | Success — run completed without gating failures |
164
164
  | `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating |
165
- | `3` | Gating failure — new clones detected or threshold exceeded |
166
- | `5` | Internal error — unexpected exception |
165
+ | `3` | Gating failure — new clones detected or threshold exceeded |
166
+ | `5` | Internal error — unexpected exception |
167
167
 
168
168
  **Priority:** Contract errors (`2`) override gating failures (`3`) when both occur.
169
169
 
@@ -223,7 +223,7 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
223
223
  "cache_path": "/path/to/.cache/codeclone/cache.json",
224
224
  "cache_used": true,
225
225
  "cache_status": "ok",
226
- "cache_schema_version": "1.2",
226
+ "cache_schema_version": "1.3",
227
227
  "files_skipped_source_io": 0,
228
228
  "groups_counts": {
229
229
  "functions": {
@@ -304,7 +304,8 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
304
304
  Cache is an optimization layer only and is never a source of truth.
305
305
 
306
306
  - Default path: `<root>/.cache/codeclone/cache.json`
307
- - Schema version: **v1.2**
307
+ - Schema version: **v1.3**
308
+ - Compatibility includes analysis profile (`min_loc`, `min_stmt`)
308
309
  - Invalid or oversized cache is ignored with warning and rebuilt (fail-open)
309
310
 
310
311
  Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md)
@@ -117,12 +117,12 @@ Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md)
117
117
 
118
118
  CodeClone uses a deterministic exit code contract:
119
119
 
120
- | Code | Meaning |
121
- |------|-----------------------------------------------------------------------------|
122
- | `0` | Success — run completed without gating failures |
120
+ | Code | Meaning |
121
+ |------|-------------------------------------------------------------------------------------------------------------------------------------|
122
+ | `0` | Success — run completed without gating failures |
123
123
  | `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating |
124
- | `3` | Gating failure — new clones detected or threshold exceeded |
125
- | `5` | Internal error — unexpected exception |
124
+ | `3` | Gating failure — new clones detected or threshold exceeded |
125
+ | `5` | Internal error — unexpected exception |
126
126
 
127
127
  **Priority:** Contract errors (`2`) override gating failures (`3`) when both occur.
128
128
 
@@ -182,7 +182,7 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
182
182
  "cache_path": "/path/to/.cache/codeclone/cache.json",
183
183
  "cache_used": true,
184
184
  "cache_status": "ok",
185
- "cache_schema_version": "1.2",
185
+ "cache_schema_version": "1.3",
186
186
  "files_skipped_source_io": 0,
187
187
  "groups_counts": {
188
188
  "functions": {
@@ -263,7 +263,8 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
263
263
  Cache is an optimization layer only and is never a source of truth.
264
264
 
265
265
  - Default path: `<root>/.cache/codeclone/cache.json`
266
- - Schema version: **v1.2**
266
+ - Schema version: **v1.3**
267
+ - Compatibility includes analysis profile (`min_loc`, `min_stmt`)
267
268
  - Invalid or oversized cache is ignored with warning and rebuilt (fail-open)
268
269
 
269
270
  Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md)
@@ -39,6 +39,7 @@ class CacheStatus(str, Enum):
39
39
  VERSION_MISMATCH = "version_mismatch"
40
40
  PYTHON_TAG_MISMATCH = "python_tag_mismatch"
41
41
  FINGERPRINT_MISMATCH = "mismatch_fingerprint_version"
42
+ ANALYSIS_PROFILE_MISMATCH = "analysis_profile_mismatch"
42
43
  INTEGRITY_FAILED = "integrity_failed"
43
44
 
44
45
 
@@ -84,15 +85,22 @@ class CacheEntry(TypedDict):
84
85
  segments: list[SegmentDict]
85
86
 
86
87
 
88
+ class AnalysisProfile(TypedDict):
89
+ min_loc: int
90
+ min_stmt: int
91
+
92
+
87
93
  class CacheData(TypedDict):
88
94
  version: str
89
95
  python_tag: str
90
96
  fingerprint_version: str
97
+ analysis_profile: AnalysisProfile
91
98
  files: dict[str, CacheEntry]
92
99
 
93
100
 
94
101
  class Cache:
95
102
  __slots__ = (
103
+ "analysis_profile",
96
104
  "cache_schema_version",
97
105
  "data",
98
106
  "fingerprint_version",
@@ -112,14 +120,21 @@ class Cache:
112
120
  *,
113
121
  root: str | Path | None = None,
114
122
  max_size_bytes: int | None = None,
123
+ min_loc: int = 15,
124
+ min_stmt: int = 6,
115
125
  ):
116
126
  self.path = Path(path)
117
127
  self.root = _resolve_root(root)
118
128
  self.fingerprint_version = BASELINE_FINGERPRINT_VERSION
129
+ self.analysis_profile: AnalysisProfile = {
130
+ "min_loc": min_loc,
131
+ "min_stmt": min_stmt,
132
+ }
119
133
  self.data: CacheData = _empty_cache_data(
120
134
  version=self._CACHE_VERSION,
121
135
  python_tag=current_python_tag(),
122
136
  fingerprint_version=self.fingerprint_version,
137
+ analysis_profile=self.analysis_profile,
123
138
  )
124
139
  self.legacy_secret_warning = self._detect_legacy_secret_warning()
125
140
  self.cache_schema_version: str | None = None
@@ -164,6 +179,7 @@ class Cache:
164
179
  version=self._CACHE_VERSION,
165
180
  python_tag=current_python_tag(),
166
181
  fingerprint_version=self.fingerprint_version,
182
+ analysis_profile=self.analysis_profile,
167
183
  )
168
184
 
169
185
  def _sign_data(self, data: Mapping[str, object]) -> str:
@@ -309,6 +325,28 @@ class Cache:
309
325
  )
310
326
  return None
311
327
 
328
+ analysis_profile = _as_analysis_profile(payload.get("ap"))
329
+ if analysis_profile is None:
330
+ self._ignore_cache(
331
+ "Cache format invalid; ignoring cache.",
332
+ status=CacheStatus.INVALID_TYPE,
333
+ schema_version=version,
334
+ )
335
+ return None
336
+
337
+ if analysis_profile != self.analysis_profile:
338
+ self._ignore_cache(
339
+ "Cache analysis profile mismatch "
340
+ f"(found min_loc={analysis_profile['min_loc']}, "
341
+ f"min_stmt={analysis_profile['min_stmt']}; "
342
+ f"expected min_loc={self.analysis_profile['min_loc']}, "
343
+ f"min_stmt={self.analysis_profile['min_stmt']}); "
344
+ "ignoring cache.",
345
+ status=CacheStatus.ANALYSIS_PROFILE_MISMATCH,
346
+ schema_version=version,
347
+ )
348
+ return None
349
+
312
350
  files_obj = payload.get("files")
313
351
  files_dict = _as_str_dict(files_obj)
314
352
  if files_dict is None:
@@ -337,6 +375,7 @@ class Cache:
337
375
  "version": self._CACHE_VERSION,
338
376
  "python_tag": runtime_tag,
339
377
  "fingerprint_version": self.fingerprint_version,
378
+ "analysis_profile": self.analysis_profile,
340
379
  "files": parsed_files,
341
380
  }
342
381
 
@@ -356,6 +395,7 @@ class Cache:
356
395
  payload: dict[str, object] = {
357
396
  "py": current_python_tag(),
358
397
  "fp": self.fingerprint_version,
398
+ "ap": self.analysis_profile,
359
399
  "files": wire_files,
360
400
  }
361
401
  signed_doc = {
@@ -371,6 +411,7 @@ class Cache:
371
411
  self.data["version"] = self._CACHE_VERSION
372
412
  self.data["python_tag"] = current_python_tag()
373
413
  self.data["fingerprint_version"] = self.fingerprint_version
414
+ self.data["analysis_profile"] = self.analysis_profile
374
415
 
375
416
  except OSError as e:
376
417
  raise CacheError(f"Failed to save cache: {e}") from e
@@ -508,11 +549,13 @@ def _empty_cache_data(
508
549
  version: str,
509
550
  python_tag: str,
510
551
  fingerprint_version: str,
552
+ analysis_profile: AnalysisProfile,
511
553
  ) -> CacheData:
512
554
  return {
513
555
  "version": version,
514
556
  "python_tag": python_tag,
515
557
  "fingerprint_version": fingerprint_version,
558
+ "analysis_profile": analysis_profile,
516
559
  "files": {},
517
560
  }
518
561
 
@@ -542,6 +585,22 @@ def _as_str_dict(value: object) -> dict[str, object] | None:
542
585
  return value
543
586
 
544
587
 
588
+ def _as_analysis_profile(value: object) -> AnalysisProfile | None:
589
+ obj = _as_str_dict(value)
590
+ if obj is None:
591
+ return None
592
+
593
+ if set(obj.keys()) != {"min_loc", "min_stmt"}:
594
+ return None
595
+
596
+ min_loc = _as_int(obj.get("min_loc"))
597
+ min_stmt = _as_int(obj.get("min_stmt"))
598
+ if min_loc is None or min_stmt is None:
599
+ return None
600
+
601
+ return {"min_loc": min_loc, "min_stmt": min_stmt}
602
+
603
+
545
604
  def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None:
546
605
  obj = _as_str_dict(value)
547
606
  if obj is None:
@@ -310,6 +310,8 @@ def _main_impl() -> None:
310
310
  cache_path,
311
311
  root=root_path,
312
312
  max_size_bytes=args.max_cache_size_mb * 1024 * 1024,
313
+ min_loc=args.min_loc,
314
+ min_stmt=args.min_stmt,
313
315
  )
314
316
  cache.load()
315
317
  if cache.load_warning:
@@ -14,7 +14,7 @@ from typing import Final
14
14
  BASELINE_SCHEMA_VERSION: Final = "1.0"
15
15
  BASELINE_FINGERPRINT_VERSION: Final = "1"
16
16
 
17
- CACHE_VERSION: Final = "1.2"
17
+ CACHE_VERSION: Final = "1.3"
18
18
  REPORT_SCHEMA_VERSION: Final = "1.1"
19
19
 
20
20
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeclone
3
- Version: 1.4.2
3
+ Version: 1.4.3
4
4
  Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
5
5
  Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
6
6
  Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
@@ -158,12 +158,12 @@ Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md)
158
158
 
159
159
  CodeClone uses a deterministic exit code contract:
160
160
 
161
- | Code | Meaning |
162
- |------|-----------------------------------------------------------------------------|
163
- | `0` | Success — run completed without gating failures |
161
+ | Code | Meaning |
162
+ |------|-------------------------------------------------------------------------------------------------------------------------------------|
163
+ | `0` | Success — run completed without gating failures |
164
164
  | `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating |
165
- | `3` | Gating failure — new clones detected or threshold exceeded |
166
- | `5` | Internal error — unexpected exception |
165
+ | `3` | Gating failure — new clones detected or threshold exceeded |
166
+ | `5` | Internal error — unexpected exception |
167
167
 
168
168
  **Priority:** Contract errors (`2`) override gating failures (`3`) when both occur.
169
169
 
@@ -223,7 +223,7 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
223
223
  "cache_path": "/path/to/.cache/codeclone/cache.json",
224
224
  "cache_used": true,
225
225
  "cache_status": "ok",
226
- "cache_schema_version": "1.2",
226
+ "cache_schema_version": "1.3",
227
227
  "files_skipped_source_io": 0,
228
228
  "groups_counts": {
229
229
  "functions": {
@@ -304,7 +304,8 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
304
304
  Cache is an optimization layer only and is never a source of truth.
305
305
 
306
306
  - Default path: `<root>/.cache/codeclone/cache.json`
307
- - Schema version: **v1.2**
307
+ - Schema version: **v1.3**
308
+ - Compatibility includes analysis profile (`min_loc`, `min_stmt`)
308
309
  - Invalid or oversized cache is ignored with warning and rebuilt (fail-open)
309
310
 
310
311
  Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codeclone"
7
- version = "1.4.2"
7
+ version = "1.4.3"
8
8
  description = "AST and CFG-based code clone detector for Python focused on architectural duplication"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { text = "MIT" }
@@ -50,6 +50,15 @@ def _make_segment(filepath: str) -> SegmentUnit:
50
50
  )
51
51
 
52
52
 
53
+ def _analysis_payload(cache: Cache, *, files: object) -> dict[str, object]:
54
+ return {
55
+ "py": cache.data["python_tag"],
56
+ "fp": cache.data["fingerprint_version"],
57
+ "ap": cache.data["analysis_profile"],
58
+ "files": files,
59
+ }
60
+
61
+
53
62
  def test_cache_roundtrip(tmp_path: Path) -> None:
54
63
  cache_path = tmp_path / "cache.json"
55
64
  cache = Cache(cache_path)
@@ -97,7 +106,7 @@ def test_get_file_entry_missing_after_fallback_returns_none(tmp_path: Path) -> N
97
106
  assert cache.get_file_entry(str(root / "pkg" / "missing.py")) is None
98
107
 
99
108
 
100
- def test_cache_v12_uses_relpaths_when_root_set(tmp_path: Path) -> None:
109
+ def test_cache_v13_uses_relpaths_when_root_set(tmp_path: Path) -> None:
101
110
  project_root = tmp_path / "project"
102
111
  target = project_root / "pkg" / "module.py"
103
112
  target.parent.mkdir(parents=True, exist_ok=True)
@@ -121,14 +130,10 @@ def test_cache_v12_uses_relpaths_when_root_set(tmp_path: Path) -> None:
121
130
  assert str(target) not in files
122
131
 
123
132
 
124
- def test_cache_v12_missing_optional_sections_default_empty(tmp_path: Path) -> None:
133
+ def test_cache_v13_missing_optional_sections_default_empty(tmp_path: Path) -> None:
125
134
  cache_path = tmp_path / "cache.json"
126
135
  cache = Cache(cache_path)
127
- payload = {
128
- "py": cache.data["python_tag"],
129
- "fp": cache.data["fingerprint_version"],
130
- "files": {"x.py": {"st": [1, 2]}},
131
- }
136
+ payload = _analysis_payload(cache, files={"x.py": {"st": [1, 2]}})
132
137
  signature = cache._sign_data(payload)
133
138
  cache_path.write_text(
134
139
  json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}),
@@ -201,11 +206,7 @@ def test_cache_version_mismatch_warns(tmp_path: Path) -> None:
201
206
  def test_cache_v_field_version_mismatch_warns(tmp_path: Path) -> None:
202
207
  cache_path = tmp_path / "cache.json"
203
208
  cache = Cache(cache_path)
204
- payload = {
205
- "py": cache.data["python_tag"],
206
- "fp": cache.data["fingerprint_version"],
207
- "files": {},
208
- }
209
+ payload = _analysis_payload(cache, files={})
209
210
  signature = cache._sign_data(payload)
210
211
  cache_path.write_text(
211
212
  json.dumps({"v": "0.0", "payload": payload, "sig": signature}), "utf-8"
@@ -527,11 +528,7 @@ def test_cache_load_unreadable_read_graceful_ignore(
527
528
  def test_cache_load_invalid_files_type(tmp_path: Path) -> None:
528
529
  cache_path = tmp_path / "cache.json"
529
530
  cache = Cache(cache_path)
530
- payload = {
531
- "py": cache.data["python_tag"],
532
- "fp": cache.data["fingerprint_version"],
533
- "files": [],
534
- }
531
+ payload = _analysis_payload(cache, files=[])
535
532
  signature = cache._sign_data(payload)
536
533
  cache_path.write_text(
537
534
  json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}),
@@ -644,11 +641,7 @@ def test_cache_load_invalid_top_level_type(tmp_path: Path) -> None:
644
641
  def test_cache_load_missing_v_field(tmp_path: Path) -> None:
645
642
  cache_path = tmp_path / "cache.json"
646
643
  cache = Cache(cache_path)
647
- payload = {
648
- "py": cache.data["python_tag"],
649
- "fp": cache.data["fingerprint_version"],
650
- "files": {},
651
- }
644
+ payload = _analysis_payload(cache, files={})
652
645
  sig = cache._sign_data(payload)
653
646
  cache_path.write_text(json.dumps({"payload": payload, "sig": sig}), "utf-8")
654
647
  cache.load()
@@ -683,7 +676,12 @@ def test_cache_load_missing_python_tag_in_payload(tmp_path: Path) -> None:
683
676
  def test_cache_load_python_tag_mismatch(tmp_path: Path) -> None:
684
677
  cache_path = tmp_path / "cache.json"
685
678
  cache = Cache(cache_path)
686
- payload = {"py": "cp999", "fp": cache.data["fingerprint_version"], "files": {}}
679
+ payload = {
680
+ "py": "cp999",
681
+ "fp": cache.data["fingerprint_version"],
682
+ "ap": cache.data["analysis_profile"],
683
+ "files": {},
684
+ }
687
685
  sig = cache._sign_data(payload)
688
686
  cache_path.write_text(
689
687
  json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
@@ -709,7 +707,12 @@ def test_cache_load_missing_fingerprint_version(tmp_path: Path) -> None:
709
707
  def test_cache_load_fingerprint_version_mismatch(tmp_path: Path) -> None:
710
708
  cache_path = tmp_path / "cache.json"
711
709
  cache = Cache(cache_path)
712
- payload = {"py": cache.data["python_tag"], "fp": "old", "files": {}}
710
+ payload = {
711
+ "py": cache.data["python_tag"],
712
+ "fp": "old",
713
+ "ap": cache.data["analysis_profile"],
714
+ "files": {},
715
+ }
713
716
  sig = cache._sign_data(payload)
714
717
  cache_path.write_text(
715
718
  json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
@@ -719,18 +722,82 @@ def test_cache_load_fingerprint_version_mismatch(tmp_path: Path) -> None:
719
722
  assert "fingerprint version mismatch" in cache.load_warning
720
723
 
721
724
 
722
- def test_cache_load_invalid_wire_file_entry(tmp_path: Path) -> None:
725
+ def test_cache_load_analysis_profile_mismatch(tmp_path: Path) -> None:
726
+ cache_path = tmp_path / "cache.json"
727
+ cache = Cache(cache_path, min_loc=1, min_stmt=1)
728
+ cache.put_file_entry("x.py", {"mtime_ns": 1, "size": 10}, [], [], [])
729
+ cache.save()
730
+
731
+ loaded = Cache(cache_path, min_loc=15, min_stmt=6)
732
+ loaded.load()
733
+
734
+ assert loaded.load_warning is not None
735
+ assert "analysis profile mismatch" in loaded.load_warning
736
+ assert loaded.data["files"] == {}
737
+ assert loaded.load_status == CacheStatus.ANALYSIS_PROFILE_MISMATCH
738
+ assert loaded.cache_schema_version == Cache._CACHE_VERSION
739
+
740
+
741
+ def test_cache_load_missing_analysis_profile_in_payload(tmp_path: Path) -> None:
742
+ cache_path = tmp_path / "cache.json"
743
+ cache = Cache(cache_path)
744
+ payload = {
745
+ "py": cache.data["python_tag"],
746
+ "fp": cache.data["fingerprint_version"],
747
+ "files": {},
748
+ }
749
+ sig = cache._sign_data(payload)
750
+ cache_path.write_text(
751
+ json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
752
+ )
753
+
754
+ cache.load()
755
+ assert cache.load_warning is not None
756
+ assert "format invalid" in cache.load_warning
757
+ assert cache.load_status == CacheStatus.INVALID_TYPE
758
+ assert cache.cache_schema_version == Cache._CACHE_VERSION
759
+ assert cache.data["files"] == {}
760
+
761
+
762
+ @pytest.mark.parametrize(
763
+ "bad_analysis_profile",
764
+ [
765
+ {"min_loc": 15},
766
+ {"min_loc": "15", "min_stmt": 6},
767
+ ],
768
+ )
769
+ def test_cache_load_invalid_analysis_profile_payload(
770
+ tmp_path: Path, bad_analysis_profile: object
771
+ ) -> None:
723
772
  cache_path = tmp_path / "cache.json"
724
773
  cache = Cache(cache_path)
725
774
  payload = {
726
775
  "py": cache.data["python_tag"],
727
776
  "fp": cache.data["fingerprint_version"],
728
- "files": {"x.py": {"st": "bad"}},
777
+ "ap": bad_analysis_profile,
778
+ "files": {},
729
779
  }
730
780
  sig = cache._sign_data(payload)
731
781
  cache_path.write_text(
732
782
  json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
733
783
  )
784
+
785
+ cache.load()
786
+ assert cache.load_warning is not None
787
+ assert "format invalid" in cache.load_warning
788
+ assert cache.load_status == CacheStatus.INVALID_TYPE
789
+ assert cache.cache_schema_version == Cache._CACHE_VERSION
790
+ assert cache.data["files"] == {}
791
+
792
+
793
+ def test_cache_load_invalid_wire_file_entry(tmp_path: Path) -> None:
794
+ cache_path = tmp_path / "cache.json"
795
+ cache = Cache(cache_path)
796
+ payload = _analysis_payload(cache, files={"x.py": {"st": "bad"}})
797
+ sig = cache._sign_data(payload)
798
+ cache_path.write_text(
799
+ json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
800
+ )
734
801
  cache.load()
735
802
  assert cache.load_warning is not None
736
803
  assert "format invalid" in cache.load_warning
@@ -708,7 +708,7 @@ def test_cli_cache_status_string_fallback(
708
708
  def __init__(self, _path: Path, **_kwargs: object) -> None:
709
709
  self.load_warning = load_warning
710
710
  self.load_status = "not-a-cache-status"
711
- self.cache_schema_version = "1.2"
711
+ self.cache_schema_version = CACHE_VERSION
712
712
 
713
713
  def load(self) -> None:
714
714
  return None
@@ -1716,6 +1716,122 @@ def test_cli_reports_cache_meta_when_cache_missing(
1716
1716
  assert meta["cache_schema_version"] is None
1717
1717
 
1718
1718
 
1719
+ @pytest.mark.parametrize(
1720
+ (
1721
+ "first_min_loc",
1722
+ "first_min_stmt",
1723
+ "second_min_loc",
1724
+ "second_min_stmt",
1725
+ "expected_cache_used",
1726
+ "expected_cache_status",
1727
+ "expected_functions_total",
1728
+ "expected_warning",
1729
+ ),
1730
+ [
1731
+ (
1732
+ 1,
1733
+ 1,
1734
+ 15,
1735
+ 6,
1736
+ False,
1737
+ "analysis_profile_mismatch",
1738
+ 0,
1739
+ "analysis profile mismatch",
1740
+ ),
1741
+ (
1742
+ 15,
1743
+ 6,
1744
+ 1,
1745
+ 1,
1746
+ False,
1747
+ "analysis_profile_mismatch",
1748
+ 1,
1749
+ "analysis profile mismatch",
1750
+ ),
1751
+ (1, 1, 1, 1, True, "ok", 1, None),
1752
+ ],
1753
+ )
1754
+ def test_cli_cache_analysis_profile_compatibility(
1755
+ tmp_path: Path,
1756
+ monkeypatch: pytest.MonkeyPatch,
1757
+ capsys: pytest.CaptureFixture[str],
1758
+ first_min_loc: int,
1759
+ first_min_stmt: int,
1760
+ second_min_loc: int,
1761
+ second_min_stmt: int,
1762
+ expected_cache_used: bool,
1763
+ expected_cache_status: str,
1764
+ expected_functions_total: int,
1765
+ expected_warning: str | None,
1766
+ ) -> None:
1767
+ src = tmp_path / "a.py"
1768
+ src.write_text(
1769
+ """
1770
+ def f1():
1771
+ x = 1
1772
+ return x
1773
+
1774
+ def f2():
1775
+ y = 1
1776
+ return y
1777
+ """,
1778
+ "utf-8",
1779
+ )
1780
+ baseline_path = _write_baseline(
1781
+ tmp_path / "baseline.json",
1782
+ python_version=f"{sys.version_info.major}.{sys.version_info.minor}",
1783
+ )
1784
+ cache_path = tmp_path / "cache.json"
1785
+ json_first = tmp_path / "report-first.json"
1786
+ json_second = tmp_path / "report-second.json"
1787
+ _patch_parallel(monkeypatch)
1788
+
1789
+ _run_main(
1790
+ monkeypatch,
1791
+ [
1792
+ str(tmp_path),
1793
+ "--baseline",
1794
+ str(baseline_path),
1795
+ "--cache-path",
1796
+ str(cache_path),
1797
+ "--json",
1798
+ str(json_first),
1799
+ "--min-loc",
1800
+ str(first_min_loc),
1801
+ "--min-stmt",
1802
+ str(first_min_stmt),
1803
+ "--no-progress",
1804
+ ],
1805
+ )
1806
+ capsys.readouterr()
1807
+
1808
+ _run_main(
1809
+ monkeypatch,
1810
+ [
1811
+ str(tmp_path),
1812
+ "--baseline",
1813
+ str(baseline_path),
1814
+ "--cache-path",
1815
+ str(cache_path),
1816
+ "--json",
1817
+ str(json_second),
1818
+ "--min-loc",
1819
+ str(second_min_loc),
1820
+ "--min-stmt",
1821
+ str(second_min_stmt),
1822
+ "--no-progress",
1823
+ ],
1824
+ )
1825
+ out = capsys.readouterr().out
1826
+ payload = json.loads(json_second.read_text("utf-8"))
1827
+ meta = payload["meta"]
1828
+ if expected_warning is not None:
1829
+ assert expected_warning in out
1830
+ assert meta["cache_used"] is expected_cache_used
1831
+ assert meta["cache_status"] == expected_cache_status
1832
+ assert meta["groups_counts"]["functions"]["total"] == expected_functions_total
1833
+
1834
+
1719
1835
  @pytest.mark.parametrize(
1720
1836
  ("flag", "bad_name", "label", "expected"),
1721
1837
  [
@@ -6,7 +6,7 @@ from typing import Any
6
6
 
7
7
  import pytest
8
8
 
9
- from codeclone.contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL
9
+ from codeclone.contracts import CACHE_VERSION, DOCS_URL, ISSUES_URL, REPOSITORY_URL
10
10
  from codeclone.errors import FileProcessingError
11
11
  from codeclone.html_report import (
12
12
  _FileCache,
@@ -507,7 +507,7 @@ def test_html_report_includes_provenance_metadata(
507
507
  'data-cache-used="true"',
508
508
  "Cache schema",
509
509
  "Cache status",
510
- 'data-cache-schema-version="1.2"',
510
+ f'data-cache-schema-version="{CACHE_VERSION}"',
511
511
  'data-cache-status="ok"',
512
512
  'data-files-skipped-source-io="0"',
513
513
  "Source IO skipped",
@@ -7,7 +7,7 @@ from typing import cast
7
7
  import pytest
8
8
 
9
9
  import codeclone.report as report_mod
10
- from codeclone.contracts import REPORT_SCHEMA_VERSION
10
+ from codeclone.contracts import CACHE_VERSION, REPORT_SCHEMA_VERSION
11
11
  from codeclone.report import (
12
12
  GroupMap,
13
13
  build_block_group_facts,
@@ -276,7 +276,7 @@ def test_report_output_formats(
276
276
  '"baseline_schema_version": 1',
277
277
  f'"baseline_payload_sha256": "{"a" * 64}"',
278
278
  '"baseline_payload_sha256_verified": true',
279
- '"cache_schema_version": "1.2"',
279
+ f'"cache_schema_version": "{CACHE_VERSION}"',
280
280
  '"cache_status": "ok"',
281
281
  '"files_skipped_source_io": 0',
282
282
  ]
@@ -288,7 +288,7 @@ def test_report_output_formats(
288
288
  "Baseline generator name: codeclone",
289
289
  f"Baseline payload sha256: {'a' * 64}",
290
290
  "Baseline payload verified: true",
291
- "Cache schema version: 1.2",
291
+ f"Cache schema version: {CACHE_VERSION}",
292
292
  "Cache status: ok",
293
293
  "Source IO skipped: 0",
294
294
  "FUNCTION CLONES (NEW) (groups=2)",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes