codeclone 1.4.2__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeclone-1.4.2 → codeclone-1.4.3}/PKG-INFO +9 -8
- {codeclone-1.4.2 → codeclone-1.4.3}/README.md +8 -7
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cache.py +59 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cli.py +2 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/contracts.py +1 -1
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/PKG-INFO +9 -8
- {codeclone-1.4.2 → codeclone-1.4.3}/pyproject.toml +1 -1
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cache.py +93 -26
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_inprocess.py +117 -1
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_html_report.py +2 -2
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_report.py +3 -3
- {codeclone-1.4.2 → codeclone-1.4.3}/LICENSE +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/__init__.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_args.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_meta.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_paths.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_cli_summary.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_html_escape.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_html_snippets.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_blocks.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_explain.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_explain_contract.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_grouping.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_segments.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_serialize.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/_report_types.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/baseline.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/blockhash.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/blocks.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cfg.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/cfg_model.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/errors.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/extractor.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/fingerprint.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/html_report.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/meta_markers.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/normalize.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/py.typed +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/report.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/scanner.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/templates.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone/ui_messages.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/SOURCES.txt +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/dependency_links.txt +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/entry_points.txt +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/requires.txt +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/codeclone.egg-info/top_level.txt +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/setup.cfg +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_baseline.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_blockhash.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_blocks.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cfg.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cfg_model.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_main_guard.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_main_guard_runpy.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_smoke.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_cli_unit.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_detector_golden.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_extractor.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_fingerprint.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_init.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_normalize.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_report_explain.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_scanner_extra.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_security.py +0 -0
- {codeclone-1.4.2 → codeclone-1.4.3}/tests/test_segments.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -158,12 +158,12 @@ Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md)
|
|
|
158
158
|
|
|
159
159
|
CodeClone uses a deterministic exit code contract:
|
|
160
160
|
|
|
161
|
-
| Code | Meaning
|
|
162
|
-
|
|
163
|
-
| `0` | Success — run completed without gating failures
|
|
161
|
+
| Code | Meaning |
|
|
162
|
+
|------|-------------------------------------------------------------------------------------------------------------------------------------|
|
|
163
|
+
| `0` | Success — run completed without gating failures |
|
|
164
164
|
| `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating |
|
|
165
|
-
| `3` | Gating failure — new clones detected or threshold exceeded
|
|
166
|
-
| `5` | Internal error — unexpected exception
|
|
165
|
+
| `3` | Gating failure — new clones detected or threshold exceeded |
|
|
166
|
+
| `5` | Internal error — unexpected exception |
|
|
167
167
|
|
|
168
168
|
**Priority:** Contract errors (`2`) override gating failures (`3`) when both occur.
|
|
169
169
|
|
|
@@ -223,7 +223,7 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
|
|
|
223
223
|
"cache_path": "/path/to/.cache/codeclone/cache.json",
|
|
224
224
|
"cache_used": true,
|
|
225
225
|
"cache_status": "ok",
|
|
226
|
-
"cache_schema_version": "1.
|
|
226
|
+
"cache_schema_version": "1.3",
|
|
227
227
|
"files_skipped_source_io": 0,
|
|
228
228
|
"groups_counts": {
|
|
229
229
|
"functions": {
|
|
@@ -304,7 +304,8 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
|
|
|
304
304
|
Cache is an optimization layer only and is never a source of truth.
|
|
305
305
|
|
|
306
306
|
- Default path: `<root>/.cache/codeclone/cache.json`
|
|
307
|
-
- Schema version: **v1.
|
|
307
|
+
- Schema version: **v1.3**
|
|
308
|
+
- Compatibility includes analysis profile (`min_loc`, `min_stmt`)
|
|
308
309
|
- Invalid or oversized cache is ignored with warning and rebuilt (fail-open)
|
|
309
310
|
|
|
310
311
|
Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md)
|
|
@@ -117,12 +117,12 @@ Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md)
|
|
|
117
117
|
|
|
118
118
|
CodeClone uses a deterministic exit code contract:
|
|
119
119
|
|
|
120
|
-
| Code | Meaning
|
|
121
|
-
|
|
122
|
-
| `0` | Success — run completed without gating failures
|
|
120
|
+
| Code | Meaning |
|
|
121
|
+
|------|-------------------------------------------------------------------------------------------------------------------------------------|
|
|
122
|
+
| `0` | Success — run completed without gating failures |
|
|
123
123
|
| `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating |
|
|
124
|
-
| `3` | Gating failure — new clones detected or threshold exceeded
|
|
125
|
-
| `5` | Internal error — unexpected exception
|
|
124
|
+
| `3` | Gating failure — new clones detected or threshold exceeded |
|
|
125
|
+
| `5` | Internal error — unexpected exception |
|
|
126
126
|
|
|
127
127
|
**Priority:** Contract errors (`2`) override gating failures (`3`) when both occur.
|
|
128
128
|
|
|
@@ -182,7 +182,7 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
|
|
|
182
182
|
"cache_path": "/path/to/.cache/codeclone/cache.json",
|
|
183
183
|
"cache_used": true,
|
|
184
184
|
"cache_status": "ok",
|
|
185
|
-
"cache_schema_version": "1.
|
|
185
|
+
"cache_schema_version": "1.3",
|
|
186
186
|
"files_skipped_source_io": 0,
|
|
187
187
|
"groups_counts": {
|
|
188
188
|
"functions": {
|
|
@@ -263,7 +263,8 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
|
|
|
263
263
|
Cache is an optimization layer only and is never a source of truth.
|
|
264
264
|
|
|
265
265
|
- Default path: `<root>/.cache/codeclone/cache.json`
|
|
266
|
-
- Schema version: **v1.
|
|
266
|
+
- Schema version: **v1.3**
|
|
267
|
+
- Compatibility includes analysis profile (`min_loc`, `min_stmt`)
|
|
267
268
|
- Invalid or oversized cache is ignored with warning and rebuilt (fail-open)
|
|
268
269
|
|
|
269
270
|
Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md)
|
|
@@ -39,6 +39,7 @@ class CacheStatus(str, Enum):
|
|
|
39
39
|
VERSION_MISMATCH = "version_mismatch"
|
|
40
40
|
PYTHON_TAG_MISMATCH = "python_tag_mismatch"
|
|
41
41
|
FINGERPRINT_MISMATCH = "mismatch_fingerprint_version"
|
|
42
|
+
ANALYSIS_PROFILE_MISMATCH = "analysis_profile_mismatch"
|
|
42
43
|
INTEGRITY_FAILED = "integrity_failed"
|
|
43
44
|
|
|
44
45
|
|
|
@@ -84,15 +85,22 @@ class CacheEntry(TypedDict):
|
|
|
84
85
|
segments: list[SegmentDict]
|
|
85
86
|
|
|
86
87
|
|
|
88
|
+
class AnalysisProfile(TypedDict):
|
|
89
|
+
min_loc: int
|
|
90
|
+
min_stmt: int
|
|
91
|
+
|
|
92
|
+
|
|
87
93
|
class CacheData(TypedDict):
|
|
88
94
|
version: str
|
|
89
95
|
python_tag: str
|
|
90
96
|
fingerprint_version: str
|
|
97
|
+
analysis_profile: AnalysisProfile
|
|
91
98
|
files: dict[str, CacheEntry]
|
|
92
99
|
|
|
93
100
|
|
|
94
101
|
class Cache:
|
|
95
102
|
__slots__ = (
|
|
103
|
+
"analysis_profile",
|
|
96
104
|
"cache_schema_version",
|
|
97
105
|
"data",
|
|
98
106
|
"fingerprint_version",
|
|
@@ -112,14 +120,21 @@ class Cache:
|
|
|
112
120
|
*,
|
|
113
121
|
root: str | Path | None = None,
|
|
114
122
|
max_size_bytes: int | None = None,
|
|
123
|
+
min_loc: int = 15,
|
|
124
|
+
min_stmt: int = 6,
|
|
115
125
|
):
|
|
116
126
|
self.path = Path(path)
|
|
117
127
|
self.root = _resolve_root(root)
|
|
118
128
|
self.fingerprint_version = BASELINE_FINGERPRINT_VERSION
|
|
129
|
+
self.analysis_profile: AnalysisProfile = {
|
|
130
|
+
"min_loc": min_loc,
|
|
131
|
+
"min_stmt": min_stmt,
|
|
132
|
+
}
|
|
119
133
|
self.data: CacheData = _empty_cache_data(
|
|
120
134
|
version=self._CACHE_VERSION,
|
|
121
135
|
python_tag=current_python_tag(),
|
|
122
136
|
fingerprint_version=self.fingerprint_version,
|
|
137
|
+
analysis_profile=self.analysis_profile,
|
|
123
138
|
)
|
|
124
139
|
self.legacy_secret_warning = self._detect_legacy_secret_warning()
|
|
125
140
|
self.cache_schema_version: str | None = None
|
|
@@ -164,6 +179,7 @@ class Cache:
|
|
|
164
179
|
version=self._CACHE_VERSION,
|
|
165
180
|
python_tag=current_python_tag(),
|
|
166
181
|
fingerprint_version=self.fingerprint_version,
|
|
182
|
+
analysis_profile=self.analysis_profile,
|
|
167
183
|
)
|
|
168
184
|
|
|
169
185
|
def _sign_data(self, data: Mapping[str, object]) -> str:
|
|
@@ -309,6 +325,28 @@ class Cache:
|
|
|
309
325
|
)
|
|
310
326
|
return None
|
|
311
327
|
|
|
328
|
+
analysis_profile = _as_analysis_profile(payload.get("ap"))
|
|
329
|
+
if analysis_profile is None:
|
|
330
|
+
self._ignore_cache(
|
|
331
|
+
"Cache format invalid; ignoring cache.",
|
|
332
|
+
status=CacheStatus.INVALID_TYPE,
|
|
333
|
+
schema_version=version,
|
|
334
|
+
)
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
if analysis_profile != self.analysis_profile:
|
|
338
|
+
self._ignore_cache(
|
|
339
|
+
"Cache analysis profile mismatch "
|
|
340
|
+
f"(found min_loc={analysis_profile['min_loc']}, "
|
|
341
|
+
f"min_stmt={analysis_profile['min_stmt']}; "
|
|
342
|
+
f"expected min_loc={self.analysis_profile['min_loc']}, "
|
|
343
|
+
f"min_stmt={self.analysis_profile['min_stmt']}); "
|
|
344
|
+
"ignoring cache.",
|
|
345
|
+
status=CacheStatus.ANALYSIS_PROFILE_MISMATCH,
|
|
346
|
+
schema_version=version,
|
|
347
|
+
)
|
|
348
|
+
return None
|
|
349
|
+
|
|
312
350
|
files_obj = payload.get("files")
|
|
313
351
|
files_dict = _as_str_dict(files_obj)
|
|
314
352
|
if files_dict is None:
|
|
@@ -337,6 +375,7 @@ class Cache:
|
|
|
337
375
|
"version": self._CACHE_VERSION,
|
|
338
376
|
"python_tag": runtime_tag,
|
|
339
377
|
"fingerprint_version": self.fingerprint_version,
|
|
378
|
+
"analysis_profile": self.analysis_profile,
|
|
340
379
|
"files": parsed_files,
|
|
341
380
|
}
|
|
342
381
|
|
|
@@ -356,6 +395,7 @@ class Cache:
|
|
|
356
395
|
payload: dict[str, object] = {
|
|
357
396
|
"py": current_python_tag(),
|
|
358
397
|
"fp": self.fingerprint_version,
|
|
398
|
+
"ap": self.analysis_profile,
|
|
359
399
|
"files": wire_files,
|
|
360
400
|
}
|
|
361
401
|
signed_doc = {
|
|
@@ -371,6 +411,7 @@ class Cache:
|
|
|
371
411
|
self.data["version"] = self._CACHE_VERSION
|
|
372
412
|
self.data["python_tag"] = current_python_tag()
|
|
373
413
|
self.data["fingerprint_version"] = self.fingerprint_version
|
|
414
|
+
self.data["analysis_profile"] = self.analysis_profile
|
|
374
415
|
|
|
375
416
|
except OSError as e:
|
|
376
417
|
raise CacheError(f"Failed to save cache: {e}") from e
|
|
@@ -508,11 +549,13 @@ def _empty_cache_data(
|
|
|
508
549
|
version: str,
|
|
509
550
|
python_tag: str,
|
|
510
551
|
fingerprint_version: str,
|
|
552
|
+
analysis_profile: AnalysisProfile,
|
|
511
553
|
) -> CacheData:
|
|
512
554
|
return {
|
|
513
555
|
"version": version,
|
|
514
556
|
"python_tag": python_tag,
|
|
515
557
|
"fingerprint_version": fingerprint_version,
|
|
558
|
+
"analysis_profile": analysis_profile,
|
|
516
559
|
"files": {},
|
|
517
560
|
}
|
|
518
561
|
|
|
@@ -542,6 +585,22 @@ def _as_str_dict(value: object) -> dict[str, object] | None:
|
|
|
542
585
|
return value
|
|
543
586
|
|
|
544
587
|
|
|
588
|
+
def _as_analysis_profile(value: object) -> AnalysisProfile | None:
|
|
589
|
+
obj = _as_str_dict(value)
|
|
590
|
+
if obj is None:
|
|
591
|
+
return None
|
|
592
|
+
|
|
593
|
+
if set(obj.keys()) != {"min_loc", "min_stmt"}:
|
|
594
|
+
return None
|
|
595
|
+
|
|
596
|
+
min_loc = _as_int(obj.get("min_loc"))
|
|
597
|
+
min_stmt = _as_int(obj.get("min_stmt"))
|
|
598
|
+
if min_loc is None or min_stmt is None:
|
|
599
|
+
return None
|
|
600
|
+
|
|
601
|
+
return {"min_loc": min_loc, "min_stmt": min_stmt}
|
|
602
|
+
|
|
603
|
+
|
|
545
604
|
def _decode_wire_file_entry(value: object, filepath: str) -> CacheEntry | None:
|
|
546
605
|
obj = _as_str_dict(value)
|
|
547
606
|
if obj is None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -158,12 +158,12 @@ Full contract details: [`docs/book/06-baseline.md`](docs/book/06-baseline.md)
|
|
|
158
158
|
|
|
159
159
|
CodeClone uses a deterministic exit code contract:
|
|
160
160
|
|
|
161
|
-
| Code | Meaning
|
|
162
|
-
|
|
163
|
-
| `0` | Success — run completed without gating failures
|
|
161
|
+
| Code | Meaning |
|
|
162
|
+
|------|-------------------------------------------------------------------------------------------------------------------------------------|
|
|
163
|
+
| `0` | Success — run completed without gating failures |
|
|
164
164
|
| `2` | Contract error — baseline missing/untrusted, invalid output extensions, incompatible versions, unreadable source files in CI/gating |
|
|
165
|
-
| `3` | Gating failure — new clones detected or threshold exceeded
|
|
166
|
-
| `5` | Internal error — unexpected exception
|
|
165
|
+
| `3` | Gating failure — new clones detected or threshold exceeded |
|
|
166
|
+
| `5` | Internal error — unexpected exception |
|
|
167
167
|
|
|
168
168
|
**Priority:** Contract errors (`2`) override gating failures (`3`) when both occur.
|
|
169
169
|
|
|
@@ -223,7 +223,7 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
|
|
|
223
223
|
"cache_path": "/path/to/.cache/codeclone/cache.json",
|
|
224
224
|
"cache_used": true,
|
|
225
225
|
"cache_status": "ok",
|
|
226
|
-
"cache_schema_version": "1.
|
|
226
|
+
"cache_schema_version": "1.3",
|
|
227
227
|
"files_skipped_source_io": 0,
|
|
228
228
|
"groups_counts": {
|
|
229
229
|
"functions": {
|
|
@@ -304,7 +304,8 @@ Canonical report contract: [`docs/book/08-report.md`](docs/book/08-report.md)
|
|
|
304
304
|
Cache is an optimization layer only and is never a source of truth.
|
|
305
305
|
|
|
306
306
|
- Default path: `<root>/.cache/codeclone/cache.json`
|
|
307
|
-
- Schema version: **v1.
|
|
307
|
+
- Schema version: **v1.3**
|
|
308
|
+
- Compatibility includes analysis profile (`min_loc`, `min_stmt`)
|
|
308
309
|
- Invalid or oversized cache is ignored with warning and rebuilt (fail-open)
|
|
309
310
|
|
|
310
311
|
Full contract details: [`docs/book/07-cache.md`](docs/book/07-cache.md)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "codeclone"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.3"
|
|
8
8
|
description = "AST and CFG-based code clone detector for Python focused on architectural duplication"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -50,6 +50,15 @@ def _make_segment(filepath: str) -> SegmentUnit:
|
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def _analysis_payload(cache: Cache, *, files: object) -> dict[str, object]:
|
|
54
|
+
return {
|
|
55
|
+
"py": cache.data["python_tag"],
|
|
56
|
+
"fp": cache.data["fingerprint_version"],
|
|
57
|
+
"ap": cache.data["analysis_profile"],
|
|
58
|
+
"files": files,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
53
62
|
def test_cache_roundtrip(tmp_path: Path) -> None:
|
|
54
63
|
cache_path = tmp_path / "cache.json"
|
|
55
64
|
cache = Cache(cache_path)
|
|
@@ -97,7 +106,7 @@ def test_get_file_entry_missing_after_fallback_returns_none(tmp_path: Path) -> N
|
|
|
97
106
|
assert cache.get_file_entry(str(root / "pkg" / "missing.py")) is None
|
|
98
107
|
|
|
99
108
|
|
|
100
|
-
def
|
|
109
|
+
def test_cache_v13_uses_relpaths_when_root_set(tmp_path: Path) -> None:
|
|
101
110
|
project_root = tmp_path / "project"
|
|
102
111
|
target = project_root / "pkg" / "module.py"
|
|
103
112
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -121,14 +130,10 @@ def test_cache_v12_uses_relpaths_when_root_set(tmp_path: Path) -> None:
|
|
|
121
130
|
assert str(target) not in files
|
|
122
131
|
|
|
123
132
|
|
|
124
|
-
def
|
|
133
|
+
def test_cache_v13_missing_optional_sections_default_empty(tmp_path: Path) -> None:
|
|
125
134
|
cache_path = tmp_path / "cache.json"
|
|
126
135
|
cache = Cache(cache_path)
|
|
127
|
-
payload = {
|
|
128
|
-
"py": cache.data["python_tag"],
|
|
129
|
-
"fp": cache.data["fingerprint_version"],
|
|
130
|
-
"files": {"x.py": {"st": [1, 2]}},
|
|
131
|
-
}
|
|
136
|
+
payload = _analysis_payload(cache, files={"x.py": {"st": [1, 2]}})
|
|
132
137
|
signature = cache._sign_data(payload)
|
|
133
138
|
cache_path.write_text(
|
|
134
139
|
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}),
|
|
@@ -201,11 +206,7 @@ def test_cache_version_mismatch_warns(tmp_path: Path) -> None:
|
|
|
201
206
|
def test_cache_v_field_version_mismatch_warns(tmp_path: Path) -> None:
|
|
202
207
|
cache_path = tmp_path / "cache.json"
|
|
203
208
|
cache = Cache(cache_path)
|
|
204
|
-
payload = {
|
|
205
|
-
"py": cache.data["python_tag"],
|
|
206
|
-
"fp": cache.data["fingerprint_version"],
|
|
207
|
-
"files": {},
|
|
208
|
-
}
|
|
209
|
+
payload = _analysis_payload(cache, files={})
|
|
209
210
|
signature = cache._sign_data(payload)
|
|
210
211
|
cache_path.write_text(
|
|
211
212
|
json.dumps({"v": "0.0", "payload": payload, "sig": signature}), "utf-8"
|
|
@@ -527,11 +528,7 @@ def test_cache_load_unreadable_read_graceful_ignore(
|
|
|
527
528
|
def test_cache_load_invalid_files_type(tmp_path: Path) -> None:
|
|
528
529
|
cache_path = tmp_path / "cache.json"
|
|
529
530
|
cache = Cache(cache_path)
|
|
530
|
-
payload =
|
|
531
|
-
"py": cache.data["python_tag"],
|
|
532
|
-
"fp": cache.data["fingerprint_version"],
|
|
533
|
-
"files": [],
|
|
534
|
-
}
|
|
531
|
+
payload = _analysis_payload(cache, files=[])
|
|
535
532
|
signature = cache._sign_data(payload)
|
|
536
533
|
cache_path.write_text(
|
|
537
534
|
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": signature}),
|
|
@@ -644,11 +641,7 @@ def test_cache_load_invalid_top_level_type(tmp_path: Path) -> None:
|
|
|
644
641
|
def test_cache_load_missing_v_field(tmp_path: Path) -> None:
|
|
645
642
|
cache_path = tmp_path / "cache.json"
|
|
646
643
|
cache = Cache(cache_path)
|
|
647
|
-
payload = {
|
|
648
|
-
"py": cache.data["python_tag"],
|
|
649
|
-
"fp": cache.data["fingerprint_version"],
|
|
650
|
-
"files": {},
|
|
651
|
-
}
|
|
644
|
+
payload = _analysis_payload(cache, files={})
|
|
652
645
|
sig = cache._sign_data(payload)
|
|
653
646
|
cache_path.write_text(json.dumps({"payload": payload, "sig": sig}), "utf-8")
|
|
654
647
|
cache.load()
|
|
@@ -683,7 +676,12 @@ def test_cache_load_missing_python_tag_in_payload(tmp_path: Path) -> None:
|
|
|
683
676
|
def test_cache_load_python_tag_mismatch(tmp_path: Path) -> None:
|
|
684
677
|
cache_path = tmp_path / "cache.json"
|
|
685
678
|
cache = Cache(cache_path)
|
|
686
|
-
payload = {
|
|
679
|
+
payload = {
|
|
680
|
+
"py": "cp999",
|
|
681
|
+
"fp": cache.data["fingerprint_version"],
|
|
682
|
+
"ap": cache.data["analysis_profile"],
|
|
683
|
+
"files": {},
|
|
684
|
+
}
|
|
687
685
|
sig = cache._sign_data(payload)
|
|
688
686
|
cache_path.write_text(
|
|
689
687
|
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
|
|
@@ -709,7 +707,12 @@ def test_cache_load_missing_fingerprint_version(tmp_path: Path) -> None:
|
|
|
709
707
|
def test_cache_load_fingerprint_version_mismatch(tmp_path: Path) -> None:
|
|
710
708
|
cache_path = tmp_path / "cache.json"
|
|
711
709
|
cache = Cache(cache_path)
|
|
712
|
-
payload = {
|
|
710
|
+
payload = {
|
|
711
|
+
"py": cache.data["python_tag"],
|
|
712
|
+
"fp": "old",
|
|
713
|
+
"ap": cache.data["analysis_profile"],
|
|
714
|
+
"files": {},
|
|
715
|
+
}
|
|
713
716
|
sig = cache._sign_data(payload)
|
|
714
717
|
cache_path.write_text(
|
|
715
718
|
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
|
|
@@ -719,18 +722,82 @@ def test_cache_load_fingerprint_version_mismatch(tmp_path: Path) -> None:
|
|
|
719
722
|
assert "fingerprint version mismatch" in cache.load_warning
|
|
720
723
|
|
|
721
724
|
|
|
722
|
-
def
|
|
725
|
+
def test_cache_load_analysis_profile_mismatch(tmp_path: Path) -> None:
|
|
726
|
+
cache_path = tmp_path / "cache.json"
|
|
727
|
+
cache = Cache(cache_path, min_loc=1, min_stmt=1)
|
|
728
|
+
cache.put_file_entry("x.py", {"mtime_ns": 1, "size": 10}, [], [], [])
|
|
729
|
+
cache.save()
|
|
730
|
+
|
|
731
|
+
loaded = Cache(cache_path, min_loc=15, min_stmt=6)
|
|
732
|
+
loaded.load()
|
|
733
|
+
|
|
734
|
+
assert loaded.load_warning is not None
|
|
735
|
+
assert "analysis profile mismatch" in loaded.load_warning
|
|
736
|
+
assert loaded.data["files"] == {}
|
|
737
|
+
assert loaded.load_status == CacheStatus.ANALYSIS_PROFILE_MISMATCH
|
|
738
|
+
assert loaded.cache_schema_version == Cache._CACHE_VERSION
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def test_cache_load_missing_analysis_profile_in_payload(tmp_path: Path) -> None:
|
|
742
|
+
cache_path = tmp_path / "cache.json"
|
|
743
|
+
cache = Cache(cache_path)
|
|
744
|
+
payload = {
|
|
745
|
+
"py": cache.data["python_tag"],
|
|
746
|
+
"fp": cache.data["fingerprint_version"],
|
|
747
|
+
"files": {},
|
|
748
|
+
}
|
|
749
|
+
sig = cache._sign_data(payload)
|
|
750
|
+
cache_path.write_text(
|
|
751
|
+
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
cache.load()
|
|
755
|
+
assert cache.load_warning is not None
|
|
756
|
+
assert "format invalid" in cache.load_warning
|
|
757
|
+
assert cache.load_status == CacheStatus.INVALID_TYPE
|
|
758
|
+
assert cache.cache_schema_version == Cache._CACHE_VERSION
|
|
759
|
+
assert cache.data["files"] == {}
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
@pytest.mark.parametrize(
|
|
763
|
+
"bad_analysis_profile",
|
|
764
|
+
[
|
|
765
|
+
{"min_loc": 15},
|
|
766
|
+
{"min_loc": "15", "min_stmt": 6},
|
|
767
|
+
],
|
|
768
|
+
)
|
|
769
|
+
def test_cache_load_invalid_analysis_profile_payload(
|
|
770
|
+
tmp_path: Path, bad_analysis_profile: object
|
|
771
|
+
) -> None:
|
|
723
772
|
cache_path = tmp_path / "cache.json"
|
|
724
773
|
cache = Cache(cache_path)
|
|
725
774
|
payload = {
|
|
726
775
|
"py": cache.data["python_tag"],
|
|
727
776
|
"fp": cache.data["fingerprint_version"],
|
|
728
|
-
"
|
|
777
|
+
"ap": bad_analysis_profile,
|
|
778
|
+
"files": {},
|
|
729
779
|
}
|
|
730
780
|
sig = cache._sign_data(payload)
|
|
731
781
|
cache_path.write_text(
|
|
732
782
|
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
|
|
733
783
|
)
|
|
784
|
+
|
|
785
|
+
cache.load()
|
|
786
|
+
assert cache.load_warning is not None
|
|
787
|
+
assert "format invalid" in cache.load_warning
|
|
788
|
+
assert cache.load_status == CacheStatus.INVALID_TYPE
|
|
789
|
+
assert cache.cache_schema_version == Cache._CACHE_VERSION
|
|
790
|
+
assert cache.data["files"] == {}
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def test_cache_load_invalid_wire_file_entry(tmp_path: Path) -> None:
|
|
794
|
+
cache_path = tmp_path / "cache.json"
|
|
795
|
+
cache = Cache(cache_path)
|
|
796
|
+
payload = _analysis_payload(cache, files={"x.py": {"st": "bad"}})
|
|
797
|
+
sig = cache._sign_data(payload)
|
|
798
|
+
cache_path.write_text(
|
|
799
|
+
json.dumps({"v": cache._CACHE_VERSION, "payload": payload, "sig": sig}), "utf-8"
|
|
800
|
+
)
|
|
734
801
|
cache.load()
|
|
735
802
|
assert cache.load_warning is not None
|
|
736
803
|
assert "format invalid" in cache.load_warning
|
|
@@ -708,7 +708,7 @@ def test_cli_cache_status_string_fallback(
|
|
|
708
708
|
def __init__(self, _path: Path, **_kwargs: object) -> None:
|
|
709
709
|
self.load_warning = load_warning
|
|
710
710
|
self.load_status = "not-a-cache-status"
|
|
711
|
-
self.cache_schema_version =
|
|
711
|
+
self.cache_schema_version = CACHE_VERSION
|
|
712
712
|
|
|
713
713
|
def load(self) -> None:
|
|
714
714
|
return None
|
|
@@ -1716,6 +1716,122 @@ def test_cli_reports_cache_meta_when_cache_missing(
|
|
|
1716
1716
|
assert meta["cache_schema_version"] is None
|
|
1717
1717
|
|
|
1718
1718
|
|
|
1719
|
+
@pytest.mark.parametrize(
|
|
1720
|
+
(
|
|
1721
|
+
"first_min_loc",
|
|
1722
|
+
"first_min_stmt",
|
|
1723
|
+
"second_min_loc",
|
|
1724
|
+
"second_min_stmt",
|
|
1725
|
+
"expected_cache_used",
|
|
1726
|
+
"expected_cache_status",
|
|
1727
|
+
"expected_functions_total",
|
|
1728
|
+
"expected_warning",
|
|
1729
|
+
),
|
|
1730
|
+
[
|
|
1731
|
+
(
|
|
1732
|
+
1,
|
|
1733
|
+
1,
|
|
1734
|
+
15,
|
|
1735
|
+
6,
|
|
1736
|
+
False,
|
|
1737
|
+
"analysis_profile_mismatch",
|
|
1738
|
+
0,
|
|
1739
|
+
"analysis profile mismatch",
|
|
1740
|
+
),
|
|
1741
|
+
(
|
|
1742
|
+
15,
|
|
1743
|
+
6,
|
|
1744
|
+
1,
|
|
1745
|
+
1,
|
|
1746
|
+
False,
|
|
1747
|
+
"analysis_profile_mismatch",
|
|
1748
|
+
1,
|
|
1749
|
+
"analysis profile mismatch",
|
|
1750
|
+
),
|
|
1751
|
+
(1, 1, 1, 1, True, "ok", 1, None),
|
|
1752
|
+
],
|
|
1753
|
+
)
|
|
1754
|
+
def test_cli_cache_analysis_profile_compatibility(
|
|
1755
|
+
tmp_path: Path,
|
|
1756
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1757
|
+
capsys: pytest.CaptureFixture[str],
|
|
1758
|
+
first_min_loc: int,
|
|
1759
|
+
first_min_stmt: int,
|
|
1760
|
+
second_min_loc: int,
|
|
1761
|
+
second_min_stmt: int,
|
|
1762
|
+
expected_cache_used: bool,
|
|
1763
|
+
expected_cache_status: str,
|
|
1764
|
+
expected_functions_total: int,
|
|
1765
|
+
expected_warning: str | None,
|
|
1766
|
+
) -> None:
|
|
1767
|
+
src = tmp_path / "a.py"
|
|
1768
|
+
src.write_text(
|
|
1769
|
+
"""
|
|
1770
|
+
def f1():
|
|
1771
|
+
x = 1
|
|
1772
|
+
return x
|
|
1773
|
+
|
|
1774
|
+
def f2():
|
|
1775
|
+
y = 1
|
|
1776
|
+
return y
|
|
1777
|
+
""",
|
|
1778
|
+
"utf-8",
|
|
1779
|
+
)
|
|
1780
|
+
baseline_path = _write_baseline(
|
|
1781
|
+
tmp_path / "baseline.json",
|
|
1782
|
+
python_version=f"{sys.version_info.major}.{sys.version_info.minor}",
|
|
1783
|
+
)
|
|
1784
|
+
cache_path = tmp_path / "cache.json"
|
|
1785
|
+
json_first = tmp_path / "report-first.json"
|
|
1786
|
+
json_second = tmp_path / "report-second.json"
|
|
1787
|
+
_patch_parallel(monkeypatch)
|
|
1788
|
+
|
|
1789
|
+
_run_main(
|
|
1790
|
+
monkeypatch,
|
|
1791
|
+
[
|
|
1792
|
+
str(tmp_path),
|
|
1793
|
+
"--baseline",
|
|
1794
|
+
str(baseline_path),
|
|
1795
|
+
"--cache-path",
|
|
1796
|
+
str(cache_path),
|
|
1797
|
+
"--json",
|
|
1798
|
+
str(json_first),
|
|
1799
|
+
"--min-loc",
|
|
1800
|
+
str(first_min_loc),
|
|
1801
|
+
"--min-stmt",
|
|
1802
|
+
str(first_min_stmt),
|
|
1803
|
+
"--no-progress",
|
|
1804
|
+
],
|
|
1805
|
+
)
|
|
1806
|
+
capsys.readouterr()
|
|
1807
|
+
|
|
1808
|
+
_run_main(
|
|
1809
|
+
monkeypatch,
|
|
1810
|
+
[
|
|
1811
|
+
str(tmp_path),
|
|
1812
|
+
"--baseline",
|
|
1813
|
+
str(baseline_path),
|
|
1814
|
+
"--cache-path",
|
|
1815
|
+
str(cache_path),
|
|
1816
|
+
"--json",
|
|
1817
|
+
str(json_second),
|
|
1818
|
+
"--min-loc",
|
|
1819
|
+
str(second_min_loc),
|
|
1820
|
+
"--min-stmt",
|
|
1821
|
+
str(second_min_stmt),
|
|
1822
|
+
"--no-progress",
|
|
1823
|
+
],
|
|
1824
|
+
)
|
|
1825
|
+
out = capsys.readouterr().out
|
|
1826
|
+
payload = json.loads(json_second.read_text("utf-8"))
|
|
1827
|
+
meta = payload["meta"]
|
|
1828
|
+
if expected_warning is not None:
|
|
1829
|
+
assert expected_warning in out
|
|
1830
|
+
assert meta["cache_used"] is expected_cache_used
|
|
1831
|
+
assert meta["cache_status"] == expected_cache_status
|
|
1832
|
+
assert meta["groups_counts"]["functions"]["total"] == expected_functions_total
|
|
1833
|
+
|
|
1834
|
+
|
|
1719
1835
|
@pytest.mark.parametrize(
|
|
1720
1836
|
("flag", "bad_name", "label", "expected"),
|
|
1721
1837
|
[
|
|
@@ -6,7 +6,7 @@ from typing import Any
|
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
8
|
|
|
9
|
-
from codeclone.contracts import DOCS_URL, ISSUES_URL, REPOSITORY_URL
|
|
9
|
+
from codeclone.contracts import CACHE_VERSION, DOCS_URL, ISSUES_URL, REPOSITORY_URL
|
|
10
10
|
from codeclone.errors import FileProcessingError
|
|
11
11
|
from codeclone.html_report import (
|
|
12
12
|
_FileCache,
|
|
@@ -507,7 +507,7 @@ def test_html_report_includes_provenance_metadata(
|
|
|
507
507
|
'data-cache-used="true"',
|
|
508
508
|
"Cache schema",
|
|
509
509
|
"Cache status",
|
|
510
|
-
'data-cache-schema-version="
|
|
510
|
+
f'data-cache-schema-version="{CACHE_VERSION}"',
|
|
511
511
|
'data-cache-status="ok"',
|
|
512
512
|
'data-files-skipped-source-io="0"',
|
|
513
513
|
"Source IO skipped",
|
|
@@ -7,7 +7,7 @@ from typing import cast
|
|
|
7
7
|
import pytest
|
|
8
8
|
|
|
9
9
|
import codeclone.report as report_mod
|
|
10
|
-
from codeclone.contracts import REPORT_SCHEMA_VERSION
|
|
10
|
+
from codeclone.contracts import CACHE_VERSION, REPORT_SCHEMA_VERSION
|
|
11
11
|
from codeclone.report import (
|
|
12
12
|
GroupMap,
|
|
13
13
|
build_block_group_facts,
|
|
@@ -276,7 +276,7 @@ def test_report_output_formats(
|
|
|
276
276
|
'"baseline_schema_version": 1',
|
|
277
277
|
f'"baseline_payload_sha256": "{"a" * 64}"',
|
|
278
278
|
'"baseline_payload_sha256_verified": true',
|
|
279
|
-
'"cache_schema_version": "
|
|
279
|
+
f'"cache_schema_version": "{CACHE_VERSION}"',
|
|
280
280
|
'"cache_status": "ok"',
|
|
281
281
|
'"files_skipped_source_io": 0',
|
|
282
282
|
]
|
|
@@ -288,7 +288,7 @@ def test_report_output_formats(
|
|
|
288
288
|
"Baseline generator name: codeclone",
|
|
289
289
|
f"Baseline payload sha256: {'a' * 64}",
|
|
290
290
|
"Baseline payload verified: true",
|
|
291
|
-
"Cache schema version:
|
|
291
|
+
f"Cache schema version: {CACHE_VERSION}",
|
|
292
292
|
"Cache status: ok",
|
|
293
293
|
"Source IO skipped: 0",
|
|
294
294
|
"FUNCTION CLONES (NEW) (groups=2)",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|