clean-code-tools 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +66 -0
  2. package/configs/eslint.clean-code.recommended.mjs +211 -0
  3. package/configs/python.clean-code.pyproject.toml +143 -0
  4. package/data/clean-code-patterns.jsonl +264 -0
  5. package/data/vector-record.schema.json +77 -0
  6. package/docs/README.md +29 -0
  7. package/docs/eslint-custom-rules.md +74 -0
  8. package/docs/eslint-recommended-config.md +87 -0
  9. package/docs/fastmcp-local-server.md +104 -0
  10. package/docs/publishing.md +125 -0
  11. package/docs/python-lint-recommended-config.md +57 -0
  12. package/docs/python-pylint-custom-rules.md +77 -0
  13. package/docs/semantic-weaviate.md +80 -0
  14. package/docs/static-trigger-semantic-review.md +97 -0
  15. package/evals/clean-code-retrieval.jsonl +13 -0
  16. package/ops/dev/weaviate/README.md +34 -0
  17. package/ops/dev/weaviate/compose.yaml +34 -0
  18. package/ops/dev/weaviate/smoke.sh +28 -0
  19. package/package.json +96 -0
  20. package/pyproject.toml +303 -0
  21. package/sample-apps/README.md +40 -0
  22. package/sample-apps/python-app/pyproject.toml +113 -0
  23. package/sample-apps/python-app/src/clean_pricing.py +10 -0
  24. package/sample-apps/python-app/src/smelly_pricing.py +8 -0
  25. package/sample-apps/ts-backend/eslint.config.mjs +3 -0
  26. package/sample-apps/ts-backend/package.json +18 -0
  27. package/sample-apps/ts-backend/src/clean-handler.ts +19 -0
  28. package/sample-apps/ts-backend/src/smelly-handler.ts +29 -0
  29. package/sample-apps/ts-backend/tsconfig.json +9 -0
  30. package/sample-apps/ts-frontend/eslint.config.mjs +3 -0
  31. package/sample-apps/ts-frontend/package.json +18 -0
  32. package/sample-apps/ts-frontend/src/CleanWidget.tsx +18 -0
  33. package/sample-apps/ts-frontend/src/SmellyWidget.tsx +27 -0
  34. package/sample-apps/ts-frontend/tsconfig.json +10 -0
  35. package/scripts/_mcp_app.py +21 -0
  36. package/scripts/check_clean_code_review_candidates.py +302 -0
  37. package/scripts/check_fastmcp_server.py +106 -0
  38. package/scripts/check_packages.py +137 -0
  39. package/scripts/check_python_config.py +130 -0
  40. package/scripts/check_repo_python_lint.py +46 -0
  41. package/scripts/check_retrieval_evals.py +132 -0
  42. package/scripts/check_sample_apps.py +169 -0
  43. package/scripts/check_semantic_search_tooling.py +102 -0
  44. package/scripts/clean_code_eslint_triggers.py +272 -0
  45. package/scripts/clean_code_mcp_server.py +7 -0
  46. package/scripts/clean_code_python_triggers.py +318 -0
  47. package/scripts/clean_code_review_candidates.py +291 -0
  48. package/scripts/clean_code_review_io.py +36 -0
  49. package/scripts/clean_code_review_models.py +43 -0
  50. package/scripts/clean_code_semantic.py +27 -0
  51. package/scripts/set_package_versions.py +82 -0
  52. package/scripts/weaviate_ingest_clean_code.py +44 -0
  53. package/scripts/weaviate_search_clean_code.py +51 -0
  54. package/skills/clean-code-mcp-reviewer/SKILL.md +209 -0
  55. package/skills/clean-code-mcp-reviewer/evals/evals.json +30 -0
  56. package/src/js/eslint-plugin-clean-code.mjs +758 -0
  57. package/src/python/clean_code_tools_pylint/__init__.py +14 -0
  58. package/src/python/clean_code_tools_pylint/ast_checker.py +122 -0
  59. package/src/python/clean_code_tools_pylint/comments.py +83 -0
  60. package/src/python/clean_code_tools_pylint/helpers.py +196 -0
  61. package/src/python/mcp_server/__init__.py +1 -0
  62. package/src/python/mcp_server/corpus.py +160 -0
  63. package/src/python/mcp_server/markdown.py +126 -0
  64. package/src/python/mcp_server/models.py +73 -0
  65. package/src/python/mcp_server/ranking.py +125 -0
  66. package/src/python/mcp_server/ranking_scoring.py +232 -0
  67. package/src/python/mcp_server/semantic.py +192 -0
  68. package/src/python/mcp_server/server.py +235 -0
  69. package/src/python/mcp_server/server_payloads.py +83 -0
  70. package/src/python/mcp_server/text.py +104 -0
  71. package/src/python/mcp_server/utils/__init__.py +1 -0
  72. package/src/python/mcp_server/utils/httpx_loader.py +14 -0
  73. package/src/python/mcp_server/utils/increment.py +7 -0
  74. package/src/python/mcp_server/utils/sha256_text.py +8 -0
  75. package/src/python/mcp_server/utils/unique_strings.py +15 -0
  76. package/src/python/mcp_server/weaviate.py +182 -0
  77. package/uv.lock +2012 -0
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ import re
7
+
8
+ from mcp_server.models import DEFAULT_EMBEDDING_MODEL, CleanCodeChunk, JsonDict
9
+ from mcp_server.utils.httpx_loader import require_httpx
10
+
11
+ COLLECTION_NAME = "CleanCodeChunks"
12
+ VECTOR_NAME = "content"
13
+ DEFAULT_WEAVIATE_URL = os.environ.get("WEAVIATE_URL", "http://127.0.0.1:8080") # pylint: disable=clean-code-business-policy-literal
14
+ DEFAULT_BATCH_SIZE = 64
15
+ HTTP_NOT_FOUND = 404
16
+ GRAPHQL_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
17
+ FASTEMBED_INSTALL_MESSAGE = "Install fastembed to embed clean-code chunks: python3 -m pip install fastembed"
18
+
19
+
20
+ def create_schema_payload(*, collection_name: str = COLLECTION_NAME) -> JsonDict:
21
+ return {
22
+ "class": collection_name,
23
+ "vectorConfig": {
24
+ VECTOR_NAME: {
25
+ "vectorIndexType": "hnsw",
26
+ "vectorizer": {"none": {}},
27
+ }
28
+ },
29
+ "properties": [
30
+ {"name": "chunkId", "dataType": ["text"]},
31
+ {"name": "sourceFile", "dataType": ["text"]},
32
+ {"name": "sourceKind", "dataType": ["text"]},
33
+ {"name": "recordId", "dataType": ["text"]},
34
+ {"name": "title", "dataType": ["text"]},
35
+ {"name": "topic", "dataType": ["text"]},
36
+ {"name": "sectionPath", "dataType": ["text[]"]},
37
+ {"name": "chunkKind", "dataType": ["text"]},
38
+ {"name": "chunkIndex", "dataType": ["number"]},
39
+ {"name": "ruleFamily", "dataType": ["text"]},
40
+ {"name": "lintability", "dataType": ["text"]},
41
+ {"name": "aliases", "dataType": ["text[]"]},
42
+ {"name": "languages", "dataType": ["text[]"]},
43
+ {"name": "lintCandidates", "dataType": ["text[]"]},
44
+ {"name": "contentText", "dataType": ["text"]},
45
+ {"name": "embeddingText", "dataType": ["text"]},
46
+ {"name": "displayText", "dataType": ["text"]},
47
+ {"name": "textHash", "dataType": ["text"]},
48
+ {"name": "chunkerVersion", "dataType": ["text"]},
49
+ {"name": "embeddingModel", "dataType": ["text"]},
50
+ {"name": "embeddingProvider", "dataType": ["text"]},
51
+ {"name": "createdAt", "dataType": ["date"]},
52
+ ],
53
+ }
54
+
55
+
56
+ def reset_collection(*, url: str, collection_name: str = COLLECTION_NAME) -> None:
57
+ httpx = require_httpx()
58
+ base_url = url.rstrip("/")
59
+ with httpx.Client(timeout=120) as client:
60
+ existing = client.get(f"{base_url}/v1/schema/{collection_name}")
61
+ if existing.status_code != HTTP_NOT_FOUND:
62
+ existing.raise_for_status()
63
+ deleted = client.delete(f"{base_url}/v1/schema/{collection_name}")
64
+ deleted.raise_for_status()
65
+ created = client.post(
66
+ f"{base_url}/v1/schema",
67
+ json=create_schema_payload(collection_name=collection_name),
68
+ )
69
+ created.raise_for_status()
70
+
71
+
72
+ def embed_texts(texts: list[str], *, model_name: str, batch_size: int) -> list[list[float]]:
73
+ try:
74
+ from fastembed import TextEmbedding # noqa: PLC0415
75
+ except ImportError as exc:
76
+ raise SystemExit(FASTEMBED_INSTALL_MESSAGE) from exc
77
+ model = TextEmbedding(model_name=model_name)
78
+ return [[float(value) for value in vector] for vector in model.embed(texts, batch_size=batch_size)]
79
+
80
+
81
+ def ingest_chunks(
82
+ *,
83
+ chunks: list[CleanCodeChunk],
84
+ url: str,
85
+ collection_name: str = COLLECTION_NAME,
86
+ model_name: str = DEFAULT_EMBEDDING_MODEL,
87
+ batch_size: int = DEFAULT_BATCH_SIZE,
88
+ ) -> int:
89
+ httpx = require_httpx()
90
+ base_url = url.rstrip("/")
91
+ inserted = 0
92
+ with httpx.Client(timeout=120) as client:
93
+ for offset in range(0, len(chunks), batch_size):
94
+ batch = chunks[offset : offset + batch_size]
95
+ vectors = embed_texts(
96
+ [chunk.embedding_text for chunk in batch],
97
+ model_name=model_name,
98
+ batch_size=batch_size,
99
+ )
100
+ objects = [
101
+ {
102
+ "class": collection_name,
103
+ "id": chunk.object_id,
104
+ "properties": chunk.properties,
105
+ "vectors": {VECTOR_NAME: vector},
106
+ }
107
+ for chunk, vector in zip(batch, vectors, strict=True)
108
+ ]
109
+ response = client.post(f"{base_url}/v1/batch/objects", json={"objects": objects})
110
+ response.raise_for_status()
111
+ failures = batch_failures(response.json())
112
+ if failures:
113
+ raise RuntimeError(f"Weaviate rejected {len(failures)} objects: {failures[:3]}") # noqa: TRY003 # pylint: disable=clean-code-business-policy-literal
114
+ inserted += len(batch)
115
+ return inserted
116
+
117
+
118
+ def search_chunks(
119
+ *,
120
+ query: str,
121
+ url: str,
122
+ collection_name: str = COLLECTION_NAME,
123
+ model_name: str = DEFAULT_EMBEDDING_MODEL,
124
+ limit: int = 8,
125
+ ) -> list[JsonDict]:
126
+ vector = embed_query(query, model_name=model_name)
127
+ graphql_query = build_search_graphql_query(
128
+ collection_name=collection_name,
129
+ vector=vector,
130
+ limit=limit,
131
+ )
132
+ payload = execute_graphql_search(url=url, graphql_query=graphql_query)
133
+ return search_rows_from_payload(payload, collection_name=collection_name)
134
+
135
+
136
+ def embed_query(query: str, *, model_name: str) -> list[float]:
137
+ return embed_texts([query], model_name=model_name, batch_size=1)[0]
138
+
139
+
140
+ def execute_graphql_search(*, url: str, graphql_query: str) -> JsonDict:
141
+ httpx = require_httpx()
142
+ response = httpx.post(f"{url.rstrip('/')}/v1/graphql", json={"query": graphql_query}, timeout=120)
143
+ response.raise_for_status()
144
+ return response.json()
145
+
146
+
147
+ def search_rows_from_payload(payload: JsonDict, *, collection_name: str) -> list[JsonDict]:
148
+ if payload.get("errors"):
149
+ raise RuntimeError(payload["errors"])
150
+ return payload.get("data", {}).get("Get", {}).get(collection_name, [])
151
+
152
+
153
+ def build_search_graphql_query(
154
+ *,
155
+ collection_name: str,
156
+ vector: list[float],
157
+ limit: int,
158
+ ) -> str:
159
+ if not GRAPHQL_NAME_RE.fullmatch(collection_name):
160
+ raise ValueError("collection_name must be a valid GraphQL identifier") # noqa: TRY003
161
+ return (
162
+ "{ Get { "
163
+ f"{collection_name}("
164
+ f"nearVector: {{vector: {json.dumps(vector)}, targetVectors: [{json.dumps(VECTOR_NAME)}]}}, "
165
+ f"limit: {limit}"
166
+ ") { "
167
+ "chunkId recordId sourceFile sourceKind title topic sectionPath chunkKind "
168
+ "ruleFamily lintability aliases languages lintCandidates contentText textHash "
169
+ "_additional { id distance } "
170
+ "} } }"
171
+ )
172
+
173
+
174
+ def batch_failures(payload: JsonDict) -> list[JsonDict]:
175
+ rows = payload if isinstance(payload, list) else payload.get("objects", [])
176
+ return [row for row in rows if isinstance(row, dict) and not is_successful_batch_row(row)]
177
+
178
+
179
+ def is_successful_batch_row(row: JsonDict) -> bool:
180
+ result = row.get("result")
181
+ status = result.get("status") if isinstance(result, dict) else row.get("status")
182
+ return isinstance(status, str) and status.upper() in {"SUCCESS", "OK"} # pylint: disable=clean-code-business-policy-literal