clean-code-tools 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +66 -0
  2. package/configs/eslint.clean-code.recommended.mjs +211 -0
  3. package/configs/python.clean-code.pyproject.toml +143 -0
  4. package/data/clean-code-patterns.jsonl +264 -0
  5. package/data/vector-record.schema.json +77 -0
  6. package/docs/README.md +29 -0
  7. package/docs/eslint-custom-rules.md +74 -0
  8. package/docs/eslint-recommended-config.md +87 -0
  9. package/docs/fastmcp-local-server.md +104 -0
  10. package/docs/publishing.md +125 -0
  11. package/docs/python-lint-recommended-config.md +57 -0
  12. package/docs/python-pylint-custom-rules.md +77 -0
  13. package/docs/semantic-weaviate.md +80 -0
  14. package/docs/static-trigger-semantic-review.md +97 -0
  15. package/evals/clean-code-retrieval.jsonl +13 -0
  16. package/ops/dev/weaviate/README.md +34 -0
  17. package/ops/dev/weaviate/compose.yaml +34 -0
  18. package/ops/dev/weaviate/smoke.sh +28 -0
  19. package/package.json +96 -0
  20. package/pyproject.toml +303 -0
  21. package/sample-apps/README.md +40 -0
  22. package/sample-apps/python-app/pyproject.toml +113 -0
  23. package/sample-apps/python-app/src/clean_pricing.py +10 -0
  24. package/sample-apps/python-app/src/smelly_pricing.py +8 -0
  25. package/sample-apps/ts-backend/eslint.config.mjs +3 -0
  26. package/sample-apps/ts-backend/package.json +18 -0
  27. package/sample-apps/ts-backend/src/clean-handler.ts +19 -0
  28. package/sample-apps/ts-backend/src/smelly-handler.ts +29 -0
  29. package/sample-apps/ts-backend/tsconfig.json +9 -0
  30. package/sample-apps/ts-frontend/eslint.config.mjs +3 -0
  31. package/sample-apps/ts-frontend/package.json +18 -0
  32. package/sample-apps/ts-frontend/src/CleanWidget.tsx +18 -0
  33. package/sample-apps/ts-frontend/src/SmellyWidget.tsx +27 -0
  34. package/sample-apps/ts-frontend/tsconfig.json +10 -0
  35. package/scripts/_mcp_app.py +21 -0
  36. package/scripts/check_clean_code_review_candidates.py +302 -0
  37. package/scripts/check_fastmcp_server.py +106 -0
  38. package/scripts/check_packages.py +137 -0
  39. package/scripts/check_python_config.py +130 -0
  40. package/scripts/check_repo_python_lint.py +46 -0
  41. package/scripts/check_retrieval_evals.py +132 -0
  42. package/scripts/check_sample_apps.py +169 -0
  43. package/scripts/check_semantic_search_tooling.py +102 -0
  44. package/scripts/clean_code_eslint_triggers.py +272 -0
  45. package/scripts/clean_code_mcp_server.py +7 -0
  46. package/scripts/clean_code_python_triggers.py +318 -0
  47. package/scripts/clean_code_review_candidates.py +291 -0
  48. package/scripts/clean_code_review_io.py +36 -0
  49. package/scripts/clean_code_review_models.py +43 -0
  50. package/scripts/clean_code_semantic.py +27 -0
  51. package/scripts/set_package_versions.py +82 -0
  52. package/scripts/weaviate_ingest_clean_code.py +44 -0
  53. package/scripts/weaviate_search_clean_code.py +51 -0
  54. package/skills/clean-code-mcp-reviewer/SKILL.md +209 -0
  55. package/skills/clean-code-mcp-reviewer/evals/evals.json +30 -0
  56. package/src/js/eslint-plugin-clean-code.mjs +758 -0
  57. package/src/python/clean_code_tools_pylint/__init__.py +14 -0
  58. package/src/python/clean_code_tools_pylint/ast_checker.py +122 -0
  59. package/src/python/clean_code_tools_pylint/comments.py +83 -0
  60. package/src/python/clean_code_tools_pylint/helpers.py +196 -0
  61. package/src/python/mcp_server/__init__.py +1 -0
  62. package/src/python/mcp_server/corpus.py +160 -0
  63. package/src/python/mcp_server/markdown.py +126 -0
  64. package/src/python/mcp_server/models.py +73 -0
  65. package/src/python/mcp_server/ranking.py +125 -0
  66. package/src/python/mcp_server/ranking_scoring.py +232 -0
  67. package/src/python/mcp_server/semantic.py +192 -0
  68. package/src/python/mcp_server/server.py +235 -0
  69. package/src/python/mcp_server/server_payloads.py +83 -0
  70. package/src/python/mcp_server/text.py +104 -0
  71. package/src/python/mcp_server/utils/__init__.py +1 -0
  72. package/src/python/mcp_server/utils/httpx_loader.py +14 -0
  73. package/src/python/mcp_server/utils/increment.py +7 -0
  74. package/src/python/mcp_server/utils/sha256_text.py +8 -0
  75. package/src/python/mcp_server/utils/unique_strings.py +15 -0
  76. package/src/python/mcp_server/weaviate.py +182 -0
  77. package/uv.lock +2012 -0
@@ -0,0 +1,318 @@
1
+ from __future__ import annotations
2
+
3
+ from clean_code_review_models import TriggerRule
4
+
5
+ PYLINT_TRIGGERS = {
6
+ "clean-code-boolean-flag-argument": TriggerRule(
7
+ questions=(
8
+ "Does the boolean select behavior rather than represent plain data?",
9
+ "Would intention-revealing functions or an explicit options object make call sites clearer?",
10
+ ),
11
+ mcp_query="python boolean flag argument intention revealing functions options object",
12
+ ),
13
+ "clean-code-business-policy-literal": TriggerRule(
14
+ questions=(
15
+ "Does this literal encode business policy or domain state?",
16
+ "Would a named constant or typed domain value make changes safer?",
17
+ ),
18
+ mcp_query="python business policy literal named constant domain state",
19
+ ),
20
+ "clean-code-commented-out-code": TriggerRule(
21
+ questions=(
22
+ "Is this commented code obsolete implementation detail?",
23
+ "Can it be removed in favor of version control history or a tracked task?",
24
+ ),
25
+ mcp_query="python commented out code remove obsolete implementation",
26
+ ),
27
+ "clean-code-noisy-comment": TriggerRule(
28
+ questions=(
29
+ "Is the comment adding constraint/context or just visual noise?",
30
+ "Would deleting or replacing it with a precise explanation improve scanability?",
31
+ ),
32
+ mcp_query="python noisy comments remove visual separators byline comments",
33
+ ),
34
+ "clean-code-output-argument-mutation": TriggerRule(
35
+ questions=(
36
+ "Is mutation hidden behind an output parameter?",
37
+ "Would returning a value or copying data make the contract clearer?",
38
+ ),
39
+ mcp_query="python output argument mutation return value contract",
40
+ ),
41
+ "clean-code-redundant-comment": TriggerRule(
42
+ questions=(
43
+ "Does the comment repeat the next line instead of explaining why?",
44
+ "Can clearer naming or deletion remove the need for the comment?",
45
+ ),
46
+ mcp_query="python redundant comment intention revealing code",
47
+ ),
48
+ "clean-code-todo-format": TriggerRule(
49
+ questions=(
50
+ "Is this TODO actionable and traceable?",
51
+ "Should the comment include a tracked issue or be converted into code/tests?",
52
+ ),
53
+ mcp_query="python todo comment issue id actionable technical debt",
54
+ ),
55
+ "clean-code-train-wreck": TriggerRule(
56
+ questions=(
57
+ "Is this chain exposing too much object structure to the caller?",
58
+ "Would a named query/helper or domain method reduce coupling?",
59
+ ),
60
+ mcp_query="python train wreck law of demeter object navigation",
61
+ ),
62
+ "cyclic-import": TriggerRule(
63
+ questions=(
64
+ "Is the cycle exposing a module boundary problem?",
65
+ "Can dependencies be inverted or shared types/constants moved lower?",
66
+ ),
67
+ mcp_query="python cyclic import module boundary dependency inversion",
68
+ ),
69
+ "duplicate-code": TriggerRule(
70
+ questions=(
71
+ "Is this real domain duplication or a benign framework shape?",
72
+ "Can a shared helper remove policy drift without obscuring the caller?",
73
+ ),
74
+ mcp_query="python duplicate code shared helper policy drift",
75
+ ),
76
+ "too-few-public-methods": TriggerRule(
77
+ questions=(
78
+ "Is this class just a passive data container or unnecessary wrapper?",
79
+ "Would a dataclass, TypedDict, or plain function fit the local style better?",
80
+ ),
81
+ mcp_query="python too few public methods unnecessary class dataclass function",
82
+ ),
83
+ "too-many-ancestors": TriggerRule(
84
+ questions=(
85
+ "Is inheritance making behavior harder to trace?",
86
+ "Would composition or a flatter design reduce coupling?",
87
+ ),
88
+ mcp_query="python too many ancestors inheritance composition coupling",
89
+ ),
90
+ "too-many-arguments": TriggerRule(
91
+ questions=(
92
+ "Do these arguments form a stable concept?",
93
+ "Would a dataclass, TypedDict, or keyword-only boundary clarify the call site?",
94
+ ),
95
+ mcp_query="python too many arguments dataclass typed dict call site clarity",
96
+ ),
97
+ "too-many-boolean-expressions": TriggerRule(
98
+ questions=(
99
+ "Is this condition encoding several decisions at once?",
100
+ "Would named predicates or explicit decision steps make the policy clearer?",
101
+ ),
102
+ mcp_query="python too many boolean expressions named predicates policy clarity",
103
+ ),
104
+ "too-many-branches": TriggerRule(
105
+ questions=(
106
+ "Is branching mixing policy decisions with execution?",
107
+ "Can guard clauses, named predicates, or extracted decisions simplify the flow?",
108
+ ),
109
+ mcp_query="python too many branches guard clauses named predicates",
110
+ ),
111
+ "too-many-instance-attributes": TriggerRule(
112
+ questions=(
113
+ "Is this object accumulating multiple responsibilities or state groups?",
114
+ "Can cohesive state move into smaller value objects or collaborators?",
115
+ ),
116
+ mcp_query="python too many instance attributes class responsibility state groups",
117
+ ),
118
+ "too-many-lines": TriggerRule(
119
+ questions=(
120
+ "Does this module have more than one reason to change?",
121
+ "Are there cohesive extraction boundaries that preserve imports and public APIs?",
122
+ ),
123
+ mcp_query="python large module single responsibility extraction boundaries",
124
+ ),
125
+ "too-many-locals": TriggerRule(
126
+ questions=(
127
+ "Is the function carrying too much intermediate state?",
128
+ "Can parsing, calculation, and side effects be separated?",
129
+ ),
130
+ mcp_query="python too many locals split parsing calculation side effects",
131
+ ),
132
+ "too-many-nested-blocks": TriggerRule(
133
+ questions=(
134
+ "Is nesting hiding the main path through the function?",
135
+ "Can early returns or named predicates flatten the code?",
136
+ ),
137
+ mcp_query="python nested blocks guard clauses main path readability",
138
+ ),
139
+ "too-many-public-methods": TriggerRule(
140
+ questions=(
141
+ "Does this class represent more than one responsibility?",
142
+ "Can cohesive behavior move into smaller collaborators or plain functions?",
143
+ ),
144
+ mcp_query="python class too many public methods single responsibility",
145
+ ),
146
+ "too-many-return-statements": TriggerRule(
147
+ questions=(
148
+ "Are return paths representing distinct outcomes that should be named?",
149
+ "Can guard clauses or result objects make the contract clearer?",
150
+ ),
151
+ mcp_query="python too many return statements guard clauses result contract",
152
+ ),
153
+ "too-many-statements": TriggerRule(
154
+ questions=(
155
+ "Does this function combine validation, transformation, and side effects?",
156
+ "Can smaller named steps preserve behavior while improving scanability?",
157
+ ),
158
+ mcp_query="python too many statements split validation transformation side effects",
159
+ ),
160
+ }
161
+
162
+
163
+ RUFF_TRIGGERS = {
164
+ "ARG001": TriggerRule(
165
+ questions=(
166
+ "Is this unused argument required by a framework or callback contract?",
167
+ "If not, can the function boundary be narrowed?",
168
+ ),
169
+ mcp_query="python unused function argument narrow function boundary",
170
+ ),
171
+ "ARG002": TriggerRule(
172
+ questions=(
173
+ "Is this unused method argument required by inheritance or framework convention?",
174
+ "If not, can the method contract be simplified?",
175
+ ),
176
+ mcp_query="python unused method argument simplify contract",
177
+ ),
178
+ "ERA001": TriggerRule(
179
+ questions=(
180
+ "Is this commented code obsolete implementation detail?",
181
+ "Can it be removed in favor of version control history or a tracked task?",
182
+ ),
183
+ mcp_query="python commented out code remove obsolete implementation",
184
+ ),
185
+ "F401": TriggerRule(
186
+ questions=(
187
+ "Is this unused import dead code or a leftover dependency?",
188
+ "Can removing it clarify the module boundary?",
189
+ ),
190
+ mcp_query="python unused import dead code module boundary",
191
+ ),
192
+ "F841": TriggerRule(
193
+ questions=(
194
+ "Is this unused variable dead code or an incomplete refactor?",
195
+ "Can the unused state or surrounding responsibility be removed?",
196
+ ),
197
+ mcp_query="python unused variable dead code incomplete refactor",
198
+ ),
199
+ "PLR0911": TriggerRule(
200
+ questions=(
201
+ "Are return paths representing distinct outcomes that should be named?",
202
+ "Can guard clauses or result objects make the contract clearer?",
203
+ ),
204
+ mcp_query="python too many return statements guard clauses result contract",
205
+ ),
206
+ "PLR0912": TriggerRule(
207
+ questions=(
208
+ "Is branching mixing policy decisions with execution?",
209
+ "Can guard clauses, named predicates, or extracted decisions simplify the flow?",
210
+ ),
211
+ mcp_query="python too many branches guard clauses named predicates",
212
+ ),
213
+ "PLR0913": TriggerRule(
214
+ questions=(
215
+ "Do these arguments form a stable concept?",
216
+ "Would a dataclass, TypedDict, or keyword-only boundary clarify the call site?",
217
+ ),
218
+ mcp_query="python too many arguments dataclass typed dict call site clarity",
219
+ ),
220
+ "PLR0914": TriggerRule(
221
+ questions=(
222
+ "Is the function carrying too much intermediate state?",
223
+ "Can parsing, calculation, and side effects be separated?",
224
+ ),
225
+ mcp_query="python too many locals split parsing calculation side effects",
226
+ ),
227
+ "PLR0915": TriggerRule(
228
+ questions=(
229
+ "Does this function combine validation, transformation, and side effects?",
230
+ "Can smaller named steps preserve behavior while improving scanability?",
231
+ ),
232
+ mcp_query="python too many statements split validation transformation side effects",
233
+ ),
234
+ "PLR0916": TriggerRule(
235
+ questions=(
236
+ "Is this condition encoding several decisions at once?",
237
+ "Would named predicates or explicit decision steps make the policy clearer?",
238
+ ),
239
+ mcp_query="python too many boolean expressions named predicates policy clarity",
240
+ ),
241
+ "PLR1702": TriggerRule(
242
+ questions=(
243
+ "Is nesting hiding the main path through the function?",
244
+ "Can early returns or named predicates flatten the code?",
245
+ ),
246
+ mcp_query="python nested blocks guard clauses main path readability",
247
+ ),
248
+ "PLR2004": TriggerRule(
249
+ questions=(
250
+ "Does this value encode business policy or a domain threshold?",
251
+ "Would a named constant make the rule searchable and easier to change?",
252
+ ),
253
+ mcp_query="python magic value comparison named constant business policy",
254
+ ),
255
+ "RET505": TriggerRule(
256
+ questions=(
257
+ "Is the else branch unnecessary after a return?",
258
+ "Would flattening the control flow make the main path clearer?",
259
+ ),
260
+ mcp_query="python unnecessary else after return flatten control flow",
261
+ ),
262
+ "RET506": TriggerRule(
263
+ questions=(
264
+ "Is the else branch unnecessary after an exception?",
265
+ "Would flattening the control flow make failure handling clearer?",
266
+ ),
267
+ mcp_query="python unnecessary else after raise flatten error handling",
268
+ ),
269
+ "RET507": TriggerRule(
270
+ questions=(
271
+ "Is the else branch unnecessary after continue?",
272
+ "Would flattening the loop body improve scanability?",
273
+ ),
274
+ mcp_query="python unnecessary else after continue flatten loop",
275
+ ),
276
+ "RET508": TriggerRule(
277
+ questions=(
278
+ "Is the else branch unnecessary after break?",
279
+ "Would flattening the loop body improve scanability?",
280
+ ),
281
+ mcp_query="python unnecessary else after break flatten loop",
282
+ ),
283
+ "SIM102": TriggerRule(
284
+ questions=(
285
+ "Is nested branching hiding a single combined condition?",
286
+ "Would a named predicate or combined guard clarify the decision?",
287
+ ),
288
+ mcp_query="python nested if combined condition named predicate",
289
+ ),
290
+ "SIM103": TriggerRule(
291
+ questions=(
292
+ "Is a boolean branch returning literal booleans instead of the condition?",
293
+ "Would returning a named predicate make intent clearer?",
294
+ ),
295
+ mcp_query="python needless boolean return named predicate",
296
+ ),
297
+ "SIM108": TriggerRule(
298
+ questions=(
299
+ "Is assignment spread across branches obscuring one decision?",
300
+ "Would a named expression or helper make the choice clearer?",
301
+ ),
302
+ mcp_query="python if else assignment decision helper readability",
303
+ ),
304
+ "TD002": TriggerRule(
305
+ questions=(
306
+ "Is this TODO owned by a person or team?",
307
+ "Should the debt be tracked or removed?",
308
+ ),
309
+ mcp_query="python todo owner tracked technical debt",
310
+ ),
311
+ "TD003": TriggerRule(
312
+ questions=(
313
+ "Is this TODO tied to a tracked issue?",
314
+ "Should the comment become an actionable task or be removed?",
315
+ ),
316
+ mcp_query="python todo issue link actionable technical debt",
317
+ ),
318
+ }
@@ -0,0 +1,291 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ from dataclasses import asdict
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from clean_code_eslint_triggers import ESLINT_TRIGGERS
11
+ from clean_code_python_triggers import PYLINT_TRIGGERS, RUFF_TRIGGERS
12
+ from clean_code_review_io import load_json_file, run_json_command
13
+ from clean_code_review_models import LintTrigger, ReviewCandidate, TriggerInput, TriggerRule
14
+
15
+ ROOT = Path(__file__).resolve().parents[1]
16
+ JsonDict = dict[str, Any]
17
+
18
+
19
+ def eslint_candidates(results: list[JsonDict]) -> list[ReviewCandidate]:
20
+ candidates: list[ReviewCandidate] = []
21
+ for result in results:
22
+ file_path = result.get("filePath")
23
+ if not isinstance(file_path, str):
24
+ continue
25
+ for message in result.get("messages", []):
26
+ if not isinstance(message, dict):
27
+ continue
28
+ rule_id = message.get("ruleId")
29
+ if not isinstance(rule_id, str) or rule_id not in ESLINT_TRIGGERS:
30
+ continue
31
+ candidates.append(
32
+ review_candidate(
33
+ TriggerInput(
34
+ language="typescript",
35
+ file=file_path,
36
+ symbol=None,
37
+ anchor=line_anchor(optional_int(message.get("line"))),
38
+ tool="eslint",
39
+ rule=rule_id,
40
+ message=str(message.get("message", "")),
41
+ line=optional_int(message.get("line")),
42
+ column=optional_int(message.get("column")),
43
+ ),
44
+ trigger_rule=ESLINT_TRIGGERS[rule_id],
45
+ )
46
+ )
47
+ return candidates
48
+
49
+
50
+ def ruff_candidates(messages: list[JsonDict]) -> list[ReviewCandidate]:
51
+ candidates: list[ReviewCandidate] = []
52
+ for message in messages:
53
+ code = message.get("code")
54
+ if not isinstance(code, str) or code not in RUFF_TRIGGERS:
55
+ continue
56
+ location = message.get("location", {})
57
+ if not isinstance(location, dict):
58
+ location = {}
59
+ row = optional_int(location.get("row"))
60
+ candidates.append(
61
+ review_candidate(
62
+ TriggerInput(
63
+ language="python",
64
+ file=str(message.get("filename", "")),
65
+ symbol=None,
66
+ anchor=line_anchor(row),
67
+ tool="ruff",
68
+ rule=code,
69
+ message=str(message.get("message", "")),
70
+ line=row,
71
+ column=optional_int(location.get("column")),
72
+ ),
73
+ trigger_rule=RUFF_TRIGGERS[code],
74
+ )
75
+ )
76
+ return candidates
77
+
78
+
79
+ def pylint_candidates(messages: list[JsonDict]) -> list[ReviewCandidate]:
80
+ candidates: list[ReviewCandidate] = []
81
+ for message in messages:
82
+ symbol = message.get("symbol")
83
+ if not isinstance(symbol, str) or symbol not in PYLINT_TRIGGERS:
84
+ continue
85
+ candidates.append(
86
+ review_candidate(
87
+ TriggerInput(
88
+ language="python",
89
+ file=str(message.get("path", "")),
90
+ symbol=optional_str(message.get("obj")),
91
+ anchor=line_anchor(optional_int(message.get("line"))),
92
+ tool="pylint",
93
+ rule=symbol,
94
+ message=str(message.get("message", "")),
95
+ line=optional_int(message.get("line")),
96
+ column=optional_int(message.get("column")),
97
+ ),
98
+ trigger_rule=PYLINT_TRIGGERS[symbol],
99
+ )
100
+ )
101
+ return candidates
102
+
103
+
104
+ def optional_int(value: object) -> int | None:
105
+ return value if isinstance(value, int) else None
106
+
107
+
108
+ def optional_str(value: object) -> str | None:
109
+ return value if isinstance(value, str) and value else None
110
+
111
+
112
+ def line_anchor(line: int | None) -> str | None:
113
+ if line is None:
114
+ return None
115
+ return f"line {line}"
116
+
117
+
118
+ def review_candidate(
119
+ trigger_input: TriggerInput,
120
+ *,
121
+ trigger_rule: TriggerRule,
122
+ ) -> ReviewCandidate:
123
+ return ReviewCandidate(
124
+ language=trigger_input.language,
125
+ file=normalize_file(trigger_input.file),
126
+ symbol=trigger_input.symbol,
127
+ anchor=trigger_input.anchor,
128
+ skill="clean-code-mcp-reviewer",
129
+ triggers=(
130
+ LintTrigger(
131
+ tool=trigger_input.tool,
132
+ rule=trigger_input.rule,
133
+ message=trigger_input.message,
134
+ line=trigger_input.line,
135
+ column=trigger_input.column,
136
+ ),
137
+ ),
138
+ semantic_questions=trigger_rule.questions,
139
+ mcp_queries=(trigger_rule.mcp_query,),
140
+ )
141
+
142
+
143
+ def normalize_file(path: str) -> str:
144
+ if not path:
145
+ return path
146
+ raw_path = Path(path)
147
+ try:
148
+ return str(raw_path.resolve().relative_to(ROOT))
149
+ except ValueError:
150
+ return path
151
+
152
+
153
+ def merge_candidates(candidates: list[ReviewCandidate]) -> list[ReviewCandidate]:
154
+ grouped: dict[tuple[str, str, str | None, str | None, str], list[ReviewCandidate]] = {}
155
+ for candidate in candidates:
156
+ key = (
157
+ candidate.language,
158
+ candidate.file,
159
+ candidate.symbol,
160
+ candidate.anchor,
161
+ candidate.skill,
162
+ )
163
+ grouped.setdefault(key, []).append(candidate)
164
+
165
+ merged: list[ReviewCandidate] = []
166
+ for (language, file, symbol, anchor, skill), group in grouped.items():
167
+ triggers = tuple(trigger for candidate in group for trigger in candidate.triggers)
168
+ questions = unique_items(
169
+ question for candidate in group for question in candidate.semantic_questions
170
+ )
171
+ queries = unique_items(query for candidate in group for query in candidate.mcp_queries)
172
+ merged.append(
173
+ ReviewCandidate(
174
+ language=language,
175
+ file=file,
176
+ symbol=symbol,
177
+ anchor=anchor,
178
+ skill=skill,
179
+ triggers=triggers,
180
+ semantic_questions=questions,
181
+ mcp_queries=queries,
182
+ )
183
+ )
184
+ return sorted(merged, key=candidate_sort_key)
185
+
186
+
187
+ def candidate_sort_key(candidate: ReviewCandidate) -> tuple[str, str, int]:
188
+ first_line = min(
189
+ (trigger.line for trigger in candidate.triggers if trigger.line is not None),
190
+ default=0,
191
+ )
192
+ return (candidate.file, candidate.symbol or candidate.anchor or "", first_line)
193
+
194
+
195
+ def unique_items(items: Any) -> tuple[str, ...]:
196
+ seen: set[str] = set()
197
+ unique: list[str] = []
198
+ for item in items:
199
+ if item not in seen:
200
+ seen.add(item)
201
+ unique.append(item)
202
+ return tuple(unique)
203
+
204
+
205
+ def candidates_from_sources(args: argparse.Namespace) -> list[ReviewCandidate]:
206
+ candidates: list[ReviewCandidate] = []
207
+ if args.eslint_json:
208
+ candidates.extend(eslint_candidates(load_json_file(args.eslint_json)))
209
+ if args.eslint_command:
210
+ candidates.extend(eslint_candidates(run_json_command(args.eslint_command)))
211
+ if args.pylint_json:
212
+ candidates.extend(pylint_candidates(load_json_file(args.pylint_json)))
213
+ if args.pylint_command:
214
+ candidates.extend(pylint_candidates(run_json_command(args.pylint_command)))
215
+ if args.ruff_json:
216
+ candidates.extend(ruff_candidates(load_json_file(args.ruff_json)))
217
+ if args.ruff_command:
218
+ candidates.extend(ruff_candidates(run_json_command(args.ruff_command)))
219
+ return merge_candidates(candidates)
220
+
221
+
222
+ def candidate_payload(candidates: list[ReviewCandidate]) -> JsonDict:
223
+ return {
224
+ "schema": "clean-code-review-candidates/v1",
225
+ "candidate_count": len(candidates),
226
+ "candidates": [asdict(candidate) for candidate in candidates],
227
+ }
228
+
229
+
230
+ def markdown_payload(candidates: list[ReviewCandidate]) -> str:
231
+ if not candidates:
232
+ return "No clean-code semantic review candidates found.\n"
233
+
234
+ lines = ["# Clean-Code Semantic Review Candidates", ""]
235
+ for candidate in candidates:
236
+ location = candidate.file
237
+ if candidate.symbol:
238
+ location = f"{location}::{candidate.symbol}"
239
+ elif candidate.anchor:
240
+ location = f"{location}::{candidate.anchor}"
241
+ lines.extend(
242
+ [
243
+ f"## {location}",
244
+ "",
245
+ f"- language: `{candidate.language}`",
246
+ f"- skill: `{candidate.skill}`",
247
+ "- triggers:",
248
+ ]
249
+ )
250
+ for trigger in candidate.triggers:
251
+ line = f" - `{trigger.tool}/{trigger.rule}`"
252
+ if trigger.line is not None:
253
+ line = f"{line} at line {trigger.line}"
254
+ if trigger.message:
255
+ line = f"{line}: {trigger.message}"
256
+ lines.append(line)
257
+ lines.append("- semantic questions:")
258
+ for question in candidate.semantic_questions:
259
+ lines.extend([f" - {question}"])
260
+ lines.append("- MCP queries:")
261
+ for query in candidate.mcp_queries:
262
+ lines.extend([f" - `{query}`"])
263
+ lines.append("")
264
+ return "\n".join(lines)
265
+
266
+
267
+ def parse_args() -> argparse.Namespace:
268
+ parser = argparse.ArgumentParser(
269
+ description="Convert deterministic lint findings into clean-code semantic review candidates."
270
+ )
271
+ parser.add_argument("--eslint-json", type=Path)
272
+ parser.add_argument("--eslint-command")
273
+ parser.add_argument("--pylint-json", type=Path)
274
+ parser.add_argument("--pylint-command")
275
+ parser.add_argument("--ruff-json", type=Path)
276
+ parser.add_argument("--ruff-command")
277
+ parser.add_argument("--format", choices=("json", "markdown"), default="json")
278
+ return parser.parse_args()
279
+
280
+
281
+ def main() -> None:
282
+ args = parse_args()
283
+ candidates = candidates_from_sources(args)
284
+ if args.format == "markdown":
285
+ print(markdown_payload(candidates))
286
+ else:
287
+ print(json.dumps(candidate_payload(candidates), indent=2))
288
+
289
+
290
+ if __name__ == "__main__":
291
+ main()
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import shlex
5
+ import subprocess
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ ROOT = Path(__file__).resolve().parents[1]
11
+
12
+
13
+ def parse_json(raw_json: str, *, source: str) -> Any:
14
+ try:
15
+ return json.loads(raw_json)
16
+ except json.JSONDecodeError as exc:
17
+ error = f"Could not parse {source} as JSON: {exc}"
18
+ raise SystemExit(error) from exc
19
+
20
+
21
+ def load_json_file(path: Path) -> Any:
22
+ return parse_json(path.read_text(), source=str(path))
23
+
24
+
25
+ def run_json_command(command: str) -> Any:
26
+ completed = subprocess.run( # noqa: S603 - runs caller-provided local lint commands.
27
+ shlex.split(command),
28
+ cwd=ROOT,
29
+ check=False,
30
+ text=True,
31
+ capture_output=True,
32
+ )
33
+ if not completed.stdout.strip():
34
+ print(completed.stderr, file=sys.stderr)
35
+ raise SystemExit(completed.returncode or 1)
36
+ return parse_json(completed.stdout, source=command)
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class TriggerRule:
8
+ questions: tuple[str, ...]
9
+ mcp_query: str
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class LintTrigger:
14
+ tool: str
15
+ rule: str
16
+ message: str
17
+ line: int | None
18
+ column: int | None
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class TriggerInput:
23
+ language: str
24
+ file: str
25
+ symbol: str | None
26
+ anchor: str | None
27
+ tool: str
28
+ rule: str
29
+ message: str
30
+ line: int | None
31
+ column: int | None
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class ReviewCandidate:
36
+ language: str
37
+ file: str
38
+ symbol: str | None
39
+ anchor: str | None
40
+ skill: str
41
+ triggers: tuple[LintTrigger, ...]
42
+ semantic_questions: tuple[str, ...]
43
+ mcp_queries: tuple[str, ...]