clean-code-tools 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +66 -0
  2. package/configs/eslint.clean-code.recommended.mjs +211 -0
  3. package/configs/python.clean-code.pyproject.toml +143 -0
  4. package/data/clean-code-patterns.jsonl +264 -0
  5. package/data/vector-record.schema.json +77 -0
  6. package/docs/README.md +29 -0
  7. package/docs/eslint-custom-rules.md +74 -0
  8. package/docs/eslint-recommended-config.md +87 -0
  9. package/docs/fastmcp-local-server.md +104 -0
  10. package/docs/publishing.md +125 -0
  11. package/docs/python-lint-recommended-config.md +57 -0
  12. package/docs/python-pylint-custom-rules.md +77 -0
  13. package/docs/semantic-weaviate.md +80 -0
  14. package/docs/static-trigger-semantic-review.md +97 -0
  15. package/evals/clean-code-retrieval.jsonl +13 -0
  16. package/ops/dev/weaviate/README.md +34 -0
  17. package/ops/dev/weaviate/compose.yaml +34 -0
  18. package/ops/dev/weaviate/smoke.sh +28 -0
  19. package/package.json +96 -0
  20. package/pyproject.toml +303 -0
  21. package/sample-apps/README.md +40 -0
  22. package/sample-apps/python-app/pyproject.toml +113 -0
  23. package/sample-apps/python-app/src/clean_pricing.py +10 -0
  24. package/sample-apps/python-app/src/smelly_pricing.py +8 -0
  25. package/sample-apps/ts-backend/eslint.config.mjs +3 -0
  26. package/sample-apps/ts-backend/package.json +18 -0
  27. package/sample-apps/ts-backend/src/clean-handler.ts +19 -0
  28. package/sample-apps/ts-backend/src/smelly-handler.ts +29 -0
  29. package/sample-apps/ts-backend/tsconfig.json +9 -0
  30. package/sample-apps/ts-frontend/eslint.config.mjs +3 -0
  31. package/sample-apps/ts-frontend/package.json +18 -0
  32. package/sample-apps/ts-frontend/src/CleanWidget.tsx +18 -0
  33. package/sample-apps/ts-frontend/src/SmellyWidget.tsx +27 -0
  34. package/sample-apps/ts-frontend/tsconfig.json +10 -0
  35. package/scripts/_mcp_app.py +21 -0
  36. package/scripts/check_clean_code_review_candidates.py +302 -0
  37. package/scripts/check_fastmcp_server.py +106 -0
  38. package/scripts/check_packages.py +137 -0
  39. package/scripts/check_python_config.py +130 -0
  40. package/scripts/check_repo_python_lint.py +46 -0
  41. package/scripts/check_retrieval_evals.py +132 -0
  42. package/scripts/check_sample_apps.py +169 -0
  43. package/scripts/check_semantic_search_tooling.py +102 -0
  44. package/scripts/clean_code_eslint_triggers.py +272 -0
  45. package/scripts/clean_code_mcp_server.py +7 -0
  46. package/scripts/clean_code_python_triggers.py +318 -0
  47. package/scripts/clean_code_review_candidates.py +291 -0
  48. package/scripts/clean_code_review_io.py +36 -0
  49. package/scripts/clean_code_review_models.py +43 -0
  50. package/scripts/clean_code_semantic.py +27 -0
  51. package/scripts/set_package_versions.py +82 -0
  52. package/scripts/weaviate_ingest_clean_code.py +44 -0
  53. package/scripts/weaviate_search_clean_code.py +51 -0
  54. package/skills/clean-code-mcp-reviewer/SKILL.md +209 -0
  55. package/skills/clean-code-mcp-reviewer/evals/evals.json +30 -0
  56. package/src/js/eslint-plugin-clean-code.mjs +758 -0
  57. package/src/python/clean_code_tools_pylint/__init__.py +14 -0
  58. package/src/python/clean_code_tools_pylint/ast_checker.py +122 -0
  59. package/src/python/clean_code_tools_pylint/comments.py +83 -0
  60. package/src/python/clean_code_tools_pylint/helpers.py +196 -0
  61. package/src/python/mcp_server/__init__.py +1 -0
  62. package/src/python/mcp_server/corpus.py +160 -0
  63. package/src/python/mcp_server/markdown.py +126 -0
  64. package/src/python/mcp_server/models.py +73 -0
  65. package/src/python/mcp_server/ranking.py +125 -0
  66. package/src/python/mcp_server/ranking_scoring.py +232 -0
  67. package/src/python/mcp_server/semantic.py +192 -0
  68. package/src/python/mcp_server/server.py +235 -0
  69. package/src/python/mcp_server/server_payloads.py +83 -0
  70. package/src/python/mcp_server/text.py +104 -0
  71. package/src/python/mcp_server/utils/__init__.py +1 -0
  72. package/src/python/mcp_server/utils/httpx_loader.py +14 -0
  73. package/src/python/mcp_server/utils/increment.py +7 -0
  74. package/src/python/mcp_server/utils/sha256_text.py +8 -0
  75. package/src/python/mcp_server/utils/unique_strings.py +15 -0
  76. package/src/python/mcp_server/weaviate.py +182 -0
  77. package/uv.lock +2012 -0
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ from typing import Any, cast
6
+
7
+ from _mcp_app import load_server_module
8
+
9
+
10
+ async def main() -> None:
11
+ try:
12
+ from fastmcp import Client
13
+ except ImportError as exc:
14
+ raise SystemExit(
15
+ "Install FastMCP to run this check: python3 -m pip install 'fastmcp>=2.0.0'"
16
+ ) from exc
17
+
18
+ mcp_server = cast(Any, load_server_module())
19
+
20
+ def fake_search_pattern_records(**_: object) -> dict[str, object]:
21
+ return {
22
+ "query": "stub",
23
+ "filters_applied": {},
24
+ "no_strong_match": False,
25
+ "results": [
26
+ {
27
+ "pattern_id": "CC-043",
28
+ "chunk_id": "pattern:CC-043",
29
+ "title": "Flag Arguments",
30
+ "topic": "Chapter 3: Functions",
31
+ "rule_family": "functions",
32
+ "lintability": "high",
33
+ "languages": ["typescript", "python"],
34
+ "aliases": ["flag argument", "boolean parameter"],
35
+ "lint_candidates": ["Replace boolean modes with named functions."],
36
+ "source_kind": "clean_code_pattern",
37
+ "chunk_kind": "pattern_record",
38
+ "score": 0.91,
39
+ "confidence": "high",
40
+ "distance": 0.09,
41
+ "match_reasons": ["matched exact terms: flag argument"],
42
+ }
43
+ ],
44
+ }
45
+
46
+ mcp_server.search_pattern_records = fake_search_pattern_records
47
+
48
+ async with Client(mcp_server.mcp) as client:
49
+ tools = await client.list_tools()
50
+ tool_names = {tool.name for tool in tools}
51
+ assert {
52
+ "clean_code_corpus_summary",
53
+ "clean_code_weaviate_schema",
54
+ "search_clean_code",
55
+ "search_clean_code_patterns",
56
+ "get_clean_code_pattern",
57
+ "recommend_clean_code_lint_rules",
58
+ "list_clean_code_facets",
59
+ } <= tool_names
60
+
61
+ resources = await client.list_resources()
62
+ resource_uris = {str(resource.uri) for resource in resources}
63
+ assert "clean-code://corpus/summary" in resource_uris
64
+ assert "clean-code://weaviate/schema" in resource_uris
65
+
66
+ summary = await client.call_tool("clean_code_corpus_summary", {})
67
+ summary_data = summary.data
68
+ assert summary_data["chunks"] >= 560
69
+ assert summary_data["by_kind"]["pattern_record"] == 264
70
+
71
+ schema = await client.call_tool("clean_code_weaviate_schema", {})
72
+ schema_data = schema.data
73
+ assert schema_data["class"] == "CleanCodeChunks"
74
+ assert schema_data["vectorConfig"]["content"]["vectorizer"] == {"none": {}}
75
+
76
+ pattern = await client.call_tool("get_clean_code_pattern", {"pattern_id": "CC-043"})
77
+ pattern_data = pattern.data
78
+ assert pattern_data["id"] == "CC-043"
79
+ assert "flag argument" in " ".join(pattern_data["aliases"])
80
+
81
+ facets = await client.call_tool("list_clean_code_facets", {})
82
+ facet_data = facets.data
83
+ assert facet_data["rule_families"]["functions"] >= 1
84
+ assert facet_data["languages"]["typescript"] >= 1
85
+
86
+ search = await client.call_tool(
87
+ "search_clean_code_patterns",
88
+ {"query": "typescript boolean flag argument", "language": "typescript"},
89
+ )
90
+ search_data = search.data
91
+ assert search_data["results"][0]["pattern_id"] == "CC-043"
92
+ assert search_data["no_strong_match"] is False
93
+
94
+ recommendations = await client.call_tool(
95
+ "recommend_clean_code_lint_rules",
96
+ {"query": "typescript boolean flag argument", "language": "typescript"},
97
+ )
98
+ recommendation_data = recommendations.data
99
+ assert recommendation_data["results"][0]["pattern_id"] == "CC-043"
100
+ assert recommendation_data["results"][0]["targets"] == ["eslint", "semgrep"]
101
+
102
+ print("fastmcp_server_check=ok")
103
+
104
+
105
+ if __name__ == "__main__":
106
+ asyncio.run(main())
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import subprocess
6
+ import sys
7
+ import tempfile
8
+ from pathlib import Path
9
+
10
+ ROOT = Path(__file__).resolve().parents[1]
11
+
12
+
13
+ def run(command: list[str], *, cwd: Path = ROOT, check: bool = True) -> subprocess.CompletedProcess[str]:
14
+ completed = subprocess.run(
15
+ command,
16
+ cwd=cwd,
17
+ check=False,
18
+ text=True,
19
+ stdout=subprocess.PIPE,
20
+ stderr=subprocess.STDOUT,
21
+ )
22
+ if check and completed.returncode != 0:
23
+ print(completed.stdout)
24
+ raise SystemExit(completed.returncode)
25
+ return completed
26
+
27
+
28
+ def check_npm_package() -> None:
29
+ packed = run(["npm", "pack", "--dry-run", "--json"]).stdout
30
+ package_files = {item["path"] for item in json.loads(packed)[0]["files"]}
31
+ required_files = {
32
+ "src/js/eslint-plugin-clean-code.mjs",
33
+ "configs/eslint.clean-code.recommended.mjs",
34
+ "configs/python.clean-code.pyproject.toml",
35
+ "data/clean-code-patterns.jsonl",
36
+ "data/vector-record.schema.json",
37
+ "src/python/clean_code_tools_pylint/__init__.py",
38
+ "src/python/clean_code_tools_pylint/ast_checker.py",
39
+ "src/python/clean_code_tools_pylint/comments.py",
40
+ "src/python/clean_code_tools_pylint/helpers.py",
41
+ }
42
+ missing_files = sorted(required_files - package_files)
43
+ if missing_files:
44
+ raise SystemExit(f"npm package missing files: {', '.join(missing_files)}")
45
+
46
+
47
+ def check_version_parity() -> None:
48
+ package_json = json.loads((ROOT / "package.json").read_text())
49
+ try:
50
+ import tomllib
51
+ except ImportError as exc: # pragma: no cover - Python 3.12 project
52
+ raise SystemExit("Python 3.11+ is required for tomllib") from exc
53
+ pyproject = tomllib.loads((ROOT / "pyproject.toml").read_text())
54
+ npm_version = package_json["version"]
55
+ python_version = pyproject["project"]["version"]
56
+ if npm_version == python_version:
57
+ return
58
+ raise SystemExit(
59
+ f"Package versions must match for dual publishing: npm={npm_version}, python={python_version}"
60
+ )
61
+
62
+
63
+ def venv_executable(venv: Path, executable: str) -> Path:
64
+ return venv / "bin" / executable
65
+
66
+
67
+ def installed_python_config(venv: Path) -> str:
68
+ script = (
69
+ "from importlib.resources import files\n"
70
+ "print(files('clean_code_tools_pylint').joinpath("
71
+ "'configs/python.clean-code.pyproject.toml').read_text())\n"
72
+ )
73
+ return run([str(venv_executable(venv, "python")), "-c", script], cwd=venv).stdout
74
+
75
+
76
+ def check_python_package() -> None:
77
+ with tempfile.TemporaryDirectory(prefix="clean-code-package-") as raw_tmp:
78
+ tmp = Path(raw_tmp)
79
+ dist = tmp / "dist"
80
+ run(["uv", "build", "--wheel", "--out-dir", str(dist)])
81
+ wheels = sorted(dist.glob("clean_code_tools_python-*.whl"))
82
+ if not wheels:
83
+ raise SystemExit("Expected uv build to create a clean-code-tools-python wheel")
84
+
85
+ venv = tmp / ".venv"
86
+ run([sys.executable, "-m", "venv", str(venv)])
87
+ run([str(venv_executable(venv, "python")), "-m", "pip", "install", "--quiet", str(wheels[-1])])
88
+
89
+ fixture = tmp / "fixture"
90
+ fixture.mkdir()
91
+ (fixture / "pyproject.toml").write_text(installed_python_config(venv))
92
+ (fixture / "smelly.py").write_text(
93
+ """
94
+ # TODO clean this up
95
+ # old_result = calculate_total(order)
96
+
97
+ def calculate_total(order, include_tax, dry_run, retry, verbose, mode):
98
+ if order.status == "pending":
99
+ order["status"] = "retry"
100
+ calculate_total(order, True, False, False, False, "mode")
101
+ return 5
102
+ return 0
103
+ """.lstrip(),
104
+ )
105
+
106
+ ruff = run([str(venv_executable(venv, "ruff")), "check", "smelly.py"], cwd=fixture, check=False)
107
+ pylint = run(
108
+ [
109
+ str(venv_executable(venv, "pylint")),
110
+ "--rcfile=pyproject.toml",
111
+ "smelly.py",
112
+ ],
113
+ cwd=fixture,
114
+ check=False,
115
+ )
116
+ if ruff.returncode == 0 or pylint.returncode == 0:
117
+ print(ruff.stdout)
118
+ print(pylint.stdout)
119
+ raise SystemExit("Expected installed package lint commands to report findings")
120
+ for code in ["TD002", "TD003", "ERA001"]:
121
+ if code not in ruff.stdout:
122
+ print(ruff.stdout)
123
+ raise SystemExit(f"Expected installed Ruff output to include {code}")
124
+ for code in ["C9001", "C9002", "C9003", "C9004", "C9007"]:
125
+ if code not in pylint.stdout:
126
+ print(pylint.stdout)
127
+ raise SystemExit(f"Expected installed Pylint output to include {code}")
128
+
129
+ def main() -> None:
130
+ check_version_parity()
131
+ check_npm_package()
132
+ check_python_package()
133
+ print("package_checks=ok")
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import tempfile
8
+ from pathlib import Path
9
+
10
+ ROOT = Path(__file__).resolve().parents[1]
11
+ PYTHON_SRC = ROOT / "src" / "python"
12
+ CONFIG = ROOT / "configs" / "python.clean-code.pyproject.toml"
13
+
14
+
15
+ def run(command: list[str], cwd: Path, check: bool = True) -> subprocess.CompletedProcess[str]:
16
+ env = os.environ.copy()
17
+ env["PYTHONPATH"] = f"{PYTHON_SRC}:{env.get('PYTHONPATH', '')}".rstrip(":")
18
+ completed = subprocess.run(
19
+ command,
20
+ cwd=cwd,
21
+ env=env,
22
+ check=False,
23
+ text=True,
24
+ stdout=subprocess.PIPE,
25
+ stderr=subprocess.STDOUT,
26
+ )
27
+ if check and completed.returncode != 0:
28
+ print(completed.stdout)
29
+ raise SystemExit(completed.returncode)
30
+ return completed
31
+
32
+
33
+ def main() -> None:
34
+ with tempfile.TemporaryDirectory(prefix="clean-code-python-config-") as raw_tmp:
35
+ tmp = Path(raw_tmp)
36
+ shutil.copyfile(CONFIG, tmp / "pyproject.toml")
37
+
38
+ (tmp / "sample.py").write_text(
39
+ """
40
+ MAX_ATTEMPTS = 5
41
+
42
+
43
+ def can_retry(failed_attempts: int) -> bool:
44
+ return failed_attempts < MAX_ATTEMPTS
45
+ """.lstrip(),
46
+ )
47
+ (tmp / "smelly.py").write_text(
48
+ """
49
+ # TODO clean this up
50
+ # old_result = calculate_total(order)
51
+
52
+ def calculate_total(order, include_tax, dry_run, retry, verbose, mode):
53
+ if order.status == "pending":
54
+ return 5
55
+ return 0
56
+ """.lstrip(),
57
+ )
58
+
59
+ run(["uv", "run", "--project", str(ROOT), "--group", "lint", "ruff", "check", "sample.py"], cwd=tmp)
60
+ run(
61
+ [
62
+ "uv",
63
+ "run",
64
+ "--project",
65
+ str(ROOT),
66
+ "--group",
67
+ "lint",
68
+ "pylint",
69
+ "--rcfile=pyproject.toml",
70
+ "sample.py",
71
+ ],
72
+ cwd=tmp,
73
+ )
74
+
75
+ ruff_smelly = run(
76
+ ["uv", "run", "--project", str(ROOT), "--group", "lint", "ruff", "check", "smelly.py"],
77
+ cwd=tmp,
78
+ check=False,
79
+ )
80
+ pylint_smelly = run(
81
+ [
82
+ "uv",
83
+ "run",
84
+ "--project",
85
+ str(ROOT),
86
+ "--group",
87
+ "lint",
88
+ "pylint",
89
+ "--rcfile=pyproject.toml",
90
+ "smelly.py",
91
+ ],
92
+ cwd=tmp,
93
+ check=False,
94
+ )
95
+
96
+ if ruff_smelly.returncode == 0:
97
+ print("Expected Ruff to report clean-code findings for smelly.py")
98
+ raise SystemExit(1)
99
+ if pylint_smelly.returncode == 0:
100
+ print("Expected Pylint to report design findings for smelly.py")
101
+ raise SystemExit(1)
102
+
103
+ ruff_output = ruff_smelly.stdout
104
+ pylint_output = pylint_smelly.stdout
105
+ required_ruff_codes = ["TD002", "TD003", "ERA001", "ARG001"]
106
+ required_pylint_codes = ["R0913", "C9001", "C9002", "C9007"]
107
+ forbidden_codes = ["FIX002", "R0917"]
108
+
109
+ for code in required_ruff_codes:
110
+ if code not in ruff_output:
111
+ print(ruff_output)
112
+ raise SystemExit(f"Expected Ruff output to include {code}")
113
+ for code in required_pylint_codes:
114
+ if code not in pylint_output:
115
+ print(pylint_output)
116
+ raise SystemExit(f"Expected Pylint output to include {code}")
117
+ for code in forbidden_codes:
118
+ combined = f"{ruff_output}\n{pylint_output}"
119
+ if code in combined:
120
+ print(combined)
121
+ raise SystemExit(f"Did not expect duplicate/noisy code {code}")
122
+ if "bad-plugin-value" in combined or "unknown-option-value" in combined:
123
+ print(combined)
124
+ raise SystemExit("Expected custom Pylint plugin to load cleanly")
125
+
126
+ print("python_config_check=ok")
127
+
128
+
129
+ if __name__ == "__main__":
130
+ main()
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ ROOT = Path(__file__).resolve().parents[1]
9
+ PYTHON_SRC = ROOT / "src" / "python"
10
+ PYTHON_TARGETS = [
11
+ "src/python/clean_code_tools_pylint",
12
+ "src/python/mcp_server",
13
+ "scripts",
14
+ ]
15
+ PYLINT_TARGETS = [
16
+ "src/python/clean_code_tools_pylint",
17
+ "src/python/mcp_server",
18
+ ]
19
+
20
+
21
+ def run(command: list[str], *, cwd: Path = ROOT) -> subprocess.CompletedProcess[str]:
22
+ env = os.environ.copy()
23
+ env["PYTHONPATH"] = f"{PYTHON_SRC}:{env.get('PYTHONPATH', '')}".rstrip(":")
24
+ completed = subprocess.run(
25
+ command,
26
+ cwd=cwd,
27
+ env=env,
28
+ check=False,
29
+ text=True,
30
+ stdout=subprocess.PIPE,
31
+ stderr=subprocess.STDOUT,
32
+ )
33
+ if completed.returncode != 0:
34
+ print(completed.stdout)
35
+ raise SystemExit(completed.returncode)
36
+ return completed
37
+
38
+
39
+ def main() -> None:
40
+ run(["uv", "run", "--group", "lint", "ruff", "check", *PYTHON_TARGETS])
41
+ run(["uv", "run", "--group", "lint", "pylint", *PYLINT_TARGETS])
42
+ print("repo_python_lint_check=ok")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from _mcp_app import load_semantic_module
9
+
10
+ ROOT = Path(__file__).resolve().parents[1]
11
+ EVALS = ROOT / "evals" / "clean-code-retrieval.jsonl"
12
+
13
+
14
+ def main() -> None:
15
+ semantic = load_semantic_module()
16
+ cases = [json.loads(line) for line in EVALS.read_text().splitlines() if line.strip()]
17
+ failures: list[str] = []
18
+ metrics = empty_metrics()
19
+ for case in cases:
20
+ payload = local_search_payload(semantic, case)
21
+ failures.extend(evaluate_case(case, payload, metrics))
22
+ failures.extend(production_ranking_checks(semantic))
23
+
24
+ if failures:
25
+ raise SystemExit("\n".join(failures))
26
+ print(
27
+ "retrieval_eval_check=ok "
28
+ f"cases={len(cases)} "
29
+ f"top1={metrics['top1_correct']} "
30
+ f"top5={metrics['top5_recall']} "
31
+ f"strong_calibration={metrics['strong_calibration']} "
32
+ f"duplicate_failures={metrics['duplicate_failures']} "
33
+ f"markdown_pollution={metrics['markdown_pollution']}"
34
+ )
35
+
36
+
37
+ def local_search_payload(semantic: Any, case: dict[str, Any]) -> dict[str, Any]:
38
+ return semantic.rank_pattern_rows(
39
+ query=str(case["query"]),
40
+ vector_rows=mock_vector_rows(semantic, case),
41
+ limit=8,
42
+ language=str(case.get("language", "any")),
43
+ source_kinds=("clean_code_pattern",),
44
+ )
45
+
46
+
47
+ def mock_vector_rows(semantic: Any, case: dict[str, Any]) -> list[dict[str, Any]]:
48
+ chunks = {
49
+ chunk.record_id: chunk
50
+ for chunk in semantic.build_chunks()
51
+ if chunk.chunk_kind == "pattern_record"
52
+ }
53
+ rows: list[dict[str, Any]] = []
54
+ for index, pattern_id in enumerate(case.get("vector_ids", [])):
55
+ chunk = chunks.get(pattern_id)
56
+ if chunk is not None:
57
+ rows.append(chunk.properties | {"_additional": {"id": chunk.object_id, "distance": 0.20 + index * 0.05}})
58
+ return rows
59
+
60
+
61
+ def empty_metrics() -> dict[str, int]:
62
+ return {
63
+ "top1_correct": 0,
64
+ "top5_recall": 0,
65
+ "strong_calibration": 0,
66
+ "duplicate_failures": 0,
67
+ "markdown_pollution": 0,
68
+ }
69
+
70
+
71
+ def evaluate_case(
72
+ case: dict[str, Any],
73
+ payload: dict[str, Any],
74
+ metrics: dict[str, int],
75
+ ) -> list[str]:
76
+ results = payload["results"][:5]
77
+ ids = [str(result["pattern_id"]) for result in results]
78
+ expected = set(case["expected_top_ids"])
79
+ failures: list[str] = []
80
+
81
+ if expected and ids[:1] and ids[0] in expected:
82
+ metrics["top1_correct"] += 1
83
+ if not expected or expected & set(ids):
84
+ metrics["top5_recall"] += 1
85
+ if bool(case["should_strong_match"]) != bool(payload["no_strong_match"]):
86
+ metrics["strong_calibration"] += 1
87
+ if len(ids) != len(set(ids)):
88
+ metrics["duplicate_failures"] += 1
89
+ failures.append(f"{case['id']}: duplicate pattern IDs in top 5: {ids}")
90
+ if any(result.get("source_kind") != "clean_code_pattern" for result in results):
91
+ metrics["markdown_pollution"] += 1
92
+
93
+ if case["should_strong_match"]:
94
+ if not expected & set(ids[:3]):
95
+ failures.append(f"{case['id']}: expected one of {sorted(expected)} in top 3, got {ids[:3]}")
96
+ if payload["no_strong_match"]:
97
+ failures.append(f"{case['id']}: expected strong match")
98
+ elif not payload["no_strong_match"]:
99
+ failures.append(f"{case['id']}: expected no_strong_match")
100
+ if expected and not case["should_strong_match"] and not expected & set(ids):
101
+ failures.append(f"{case['id']}: expected relevant pattern {sorted(expected)} in top 5, got {ids}")
102
+ return failures
103
+
104
+
105
+ def production_ranking_checks(semantic: Any) -> list[str]:
106
+ chunks = {chunk.chunk_id: chunk for chunk in semantic.build_chunks()}
107
+ pattern = chunks["pattern:CC-043"]
108
+ markdown = next(chunk for chunk in chunks.values() if chunk.source_kind == "markdown_doc")
109
+ payload = semantic.rank_pattern_rows(
110
+ query="typescript boolean flag argument",
111
+ vector_rows=[
112
+ markdown.properties | {"_additional": {"id": markdown.object_id, "distance": 0.01}},
113
+ pattern.properties | {"_additional": {"id": pattern.object_id, "distance": 0.40}},
114
+ pattern.properties | {"_additional": {"id": pattern.object_id, "distance": 0.10}},
115
+ ],
116
+ limit=5,
117
+ language="typescript",
118
+ source_kinds=("clean_code_pattern",),
119
+ )
120
+ ids = [result["pattern_id"] for result in payload["results"]]
121
+ failures: list[str] = []
122
+ if ids.count("CC-043") != 1:
123
+ failures.append(f"production-ranking: expected deduped CC-043 once, got {ids}")
124
+ if payload["results"][0]["source_kind"] != "clean_code_pattern":
125
+ failures.append("production-ranking: markdown vector row was not filtered out")
126
+ if payload["results"][0]["distance"] != 0.1:
127
+ failures.append(f"production-ranking: expected best vector distance 0.1, got {payload['results'][0]['distance']}")
128
+ return failures
129
+
130
+
131
+ if __name__ == "__main__":
132
+ main()