clean-code-tools 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -0
- package/configs/eslint.clean-code.recommended.mjs +211 -0
- package/configs/python.clean-code.pyproject.toml +143 -0
- package/data/clean-code-patterns.jsonl +264 -0
- package/data/vector-record.schema.json +77 -0
- package/docs/README.md +29 -0
- package/docs/eslint-custom-rules.md +74 -0
- package/docs/eslint-recommended-config.md +87 -0
- package/docs/fastmcp-local-server.md +104 -0
- package/docs/publishing.md +125 -0
- package/docs/python-lint-recommended-config.md +57 -0
- package/docs/python-pylint-custom-rules.md +77 -0
- package/docs/semantic-weaviate.md +80 -0
- package/docs/static-trigger-semantic-review.md +97 -0
- package/evals/clean-code-retrieval.jsonl +13 -0
- package/ops/dev/weaviate/README.md +34 -0
- package/ops/dev/weaviate/compose.yaml +34 -0
- package/ops/dev/weaviate/smoke.sh +28 -0
- package/package.json +96 -0
- package/pyproject.toml +303 -0
- package/sample-apps/README.md +40 -0
- package/sample-apps/python-app/pyproject.toml +113 -0
- package/sample-apps/python-app/src/clean_pricing.py +10 -0
- package/sample-apps/python-app/src/smelly_pricing.py +8 -0
- package/sample-apps/ts-backend/eslint.config.mjs +3 -0
- package/sample-apps/ts-backend/package.json +18 -0
- package/sample-apps/ts-backend/src/clean-handler.ts +19 -0
- package/sample-apps/ts-backend/src/smelly-handler.ts +29 -0
- package/sample-apps/ts-backend/tsconfig.json +9 -0
- package/sample-apps/ts-frontend/eslint.config.mjs +3 -0
- package/sample-apps/ts-frontend/package.json +18 -0
- package/sample-apps/ts-frontend/src/CleanWidget.tsx +18 -0
- package/sample-apps/ts-frontend/src/SmellyWidget.tsx +27 -0
- package/sample-apps/ts-frontend/tsconfig.json +10 -0
- package/scripts/_mcp_app.py +21 -0
- package/scripts/check_clean_code_review_candidates.py +302 -0
- package/scripts/check_fastmcp_server.py +106 -0
- package/scripts/check_packages.py +137 -0
- package/scripts/check_python_config.py +130 -0
- package/scripts/check_repo_python_lint.py +46 -0
- package/scripts/check_retrieval_evals.py +132 -0
- package/scripts/check_sample_apps.py +169 -0
- package/scripts/check_semantic_search_tooling.py +102 -0
- package/scripts/clean_code_eslint_triggers.py +272 -0
- package/scripts/clean_code_mcp_server.py +7 -0
- package/scripts/clean_code_python_triggers.py +318 -0
- package/scripts/clean_code_review_candidates.py +291 -0
- package/scripts/clean_code_review_io.py +36 -0
- package/scripts/clean_code_review_models.py +43 -0
- package/scripts/clean_code_semantic.py +27 -0
- package/scripts/set_package_versions.py +82 -0
- package/scripts/weaviate_ingest_clean_code.py +44 -0
- package/scripts/weaviate_search_clean_code.py +51 -0
- package/skills/clean-code-mcp-reviewer/SKILL.md +209 -0
- package/skills/clean-code-mcp-reviewer/evals/evals.json +30 -0
- package/src/js/eslint-plugin-clean-code.mjs +758 -0
- package/src/python/clean_code_tools_pylint/__init__.py +14 -0
- package/src/python/clean_code_tools_pylint/ast_checker.py +122 -0
- package/src/python/clean_code_tools_pylint/comments.py +83 -0
- package/src/python/clean_code_tools_pylint/helpers.py +196 -0
- package/src/python/mcp_server/__init__.py +1 -0
- package/src/python/mcp_server/corpus.py +160 -0
- package/src/python/mcp_server/markdown.py +126 -0
- package/src/python/mcp_server/models.py +73 -0
- package/src/python/mcp_server/ranking.py +125 -0
- package/src/python/mcp_server/ranking_scoring.py +232 -0
- package/src/python/mcp_server/semantic.py +192 -0
- package/src/python/mcp_server/server.py +235 -0
- package/src/python/mcp_server/server_payloads.py +83 -0
- package/src/python/mcp_server/text.py +104 -0
- package/src/python/mcp_server/utils/__init__.py +1 -0
- package/src/python/mcp_server/utils/httpx_loader.py +14 -0
- package/src/python/mcp_server/utils/increment.py +7 -0
- package/src/python/mcp_server/utils/sha256_text.py +8 -0
- package/src/python/mcp_server/utils/unique_strings.py +15 -0
- package/src/python/mcp_server/weaviate.py +182 -0
- package/uv.lock +2012 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pylint.lint import PyLinter
|
|
4
|
+
|
|
5
|
+
from .ast_checker import CleanCodeAstChecker
|
|
6
|
+
from .comments import CleanCodeCommentChecker
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def register(linter: PyLinter) -> None:
|
|
10
|
+
if getattr(linter, "_clean_code_tools_registered", False): # pylint: disable=clean-code-boolean-flag-argument
|
|
11
|
+
return
|
|
12
|
+
linter._clean_code_tools_registered = True
|
|
13
|
+
linter.register_checker(CleanCodeCommentChecker(linter))
|
|
14
|
+
linter.register_checker(CleanCodeAstChecker(linter))
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from astroid import nodes
|
|
6
|
+
from pylint.checkers import BaseChecker
|
|
7
|
+
from pylint.lint import PyLinter
|
|
8
|
+
|
|
9
|
+
from .helpers import (
|
|
10
|
+
MAX_ATTRIBUTE_CHAIN_DEPTH,
|
|
11
|
+
MUTATOR_METHODS,
|
|
12
|
+
annotation_is_bool,
|
|
13
|
+
attribute_depth,
|
|
14
|
+
is_allowed_literal_context,
|
|
15
|
+
is_policy_literal_context,
|
|
16
|
+
literal_looks_like_policy,
|
|
17
|
+
name_looks_like_selector,
|
|
18
|
+
root_name,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CleanCodeAstChecker(BaseChecker):
|
|
23
|
+
name = "clean-code-ast"
|
|
24
|
+
msgs: ClassVar = {
|
|
25
|
+
"C9003": (
|
|
26
|
+
"Boolean selector argument changes behavior by mode; prefer named operations or an explicit options object.",
|
|
27
|
+
"clean-code-boolean-flag-argument",
|
|
28
|
+
"Discourage boolean selector arguments and boolean mode parameters.",
|
|
29
|
+
),
|
|
30
|
+
"C9004": (
|
|
31
|
+
"Avoid mutating parameter '%s' as an output argument; return a value or create a local copy instead.",
|
|
32
|
+
"clean-code-output-argument-mutation",
|
|
33
|
+
"Flag parameter mutation that treats arguments as output containers.",
|
|
34
|
+
),
|
|
35
|
+
"C9007": (
|
|
36
|
+
"Policy literal '%s' should usually be a named constant so the rule is searchable.",
|
|
37
|
+
"clean-code-business-policy-literal",
|
|
38
|
+
"Flag hard-coded policy literals in branch, return, and call expressions.",
|
|
39
|
+
),
|
|
40
|
+
"C9008": (
|
|
41
|
+
"Deep attribute chain exposes object internals; prefer a named query on the owning object.",
|
|
42
|
+
"clean-code-train-wreck",
|
|
43
|
+
"Flag deep attribute chains that expose transitive object structure.",
|
|
44
|
+
),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(self, linter: PyLinter) -> None:
|
|
48
|
+
super().__init__(linter)
|
|
49
|
+
self._function_params: list[set[str]] = []
|
|
50
|
+
self._function_locals: list[set[str]] = []
|
|
51
|
+
|
|
52
|
+
def visit_functiondef(self, node: nodes.FunctionDef) -> None:
|
|
53
|
+
params = {argument.name for argument in node.args.args + node.args.kwonlyargs}
|
|
54
|
+
self._function_params.append(params)
|
|
55
|
+
self._function_locals.append(set())
|
|
56
|
+
self.check_boolean_params(node)
|
|
57
|
+
|
|
58
|
+
visit_asyncfunctiondef = visit_functiondef
|
|
59
|
+
|
|
60
|
+
def leave_functiondef(self, _node: nodes.FunctionDef) -> None:
|
|
61
|
+
self._function_params.pop()
|
|
62
|
+
self._function_locals.pop()
|
|
63
|
+
|
|
64
|
+
leave_asyncfunctiondef = leave_functiondef
|
|
65
|
+
|
|
66
|
+
def visit_assignname(self, node: nodes.AssignName) -> None:
|
|
67
|
+
if isinstance(node.parent, nodes.Arguments):
|
|
68
|
+
return
|
|
69
|
+
if self._function_locals:
|
|
70
|
+
self._function_locals[-1].add(node.name)
|
|
71
|
+
|
|
72
|
+
def visit_call(self, node: nodes.Call) -> None:
|
|
73
|
+
for argument in node.args:
|
|
74
|
+
if isinstance(argument, nodes.Const) and isinstance(argument.value, bool):
|
|
75
|
+
self.add_message("clean-code-boolean-flag-argument", node=argument)
|
|
76
|
+
if isinstance(node.func, nodes.Attribute) and node.func.attrname in MUTATOR_METHODS:
|
|
77
|
+
self.report_if_param_mutation(node.func.expr, node.func.expr)
|
|
78
|
+
|
|
79
|
+
def visit_assignattr(self, node: nodes.AssignAttr) -> None:
|
|
80
|
+
self.report_if_param_mutation(node, node)
|
|
81
|
+
|
|
82
|
+
def visit_assign(self, node: nodes.Assign) -> None:
|
|
83
|
+
for target in node.targets:
|
|
84
|
+
self.report_if_param_mutation(target, target)
|
|
85
|
+
|
|
86
|
+
def visit_augassign(self, node: nodes.AugAssign) -> None:
|
|
87
|
+
self.report_if_param_mutation(node.target, node.target)
|
|
88
|
+
|
|
89
|
+
def visit_const(self, node: nodes.Const) -> None:
|
|
90
|
+
if (
|
|
91
|
+
literal_looks_like_policy(node.value)
|
|
92
|
+
and is_policy_literal_context(node)
|
|
93
|
+
and not is_allowed_literal_context(node)
|
|
94
|
+
):
|
|
95
|
+
self.add_message("clean-code-business-policy-literal", node=node, args=(str(node.value),))
|
|
96
|
+
|
|
97
|
+
def visit_attribute(self, node: nodes.Attribute) -> None:
|
|
98
|
+
if isinstance(node.parent, nodes.Attribute):
|
|
99
|
+
return
|
|
100
|
+
if attribute_depth(node) > MAX_ATTRIBUTE_CHAIN_DEPTH:
|
|
101
|
+
self.add_message("clean-code-train-wreck", node=node)
|
|
102
|
+
|
|
103
|
+
def check_boolean_params(self, node: nodes.FunctionDef) -> None:
|
|
104
|
+
arguments = node.args.args + node.args.kwonlyargs
|
|
105
|
+
annotations = node.args.annotations + node.args.kwonlyargs_annotations
|
|
106
|
+
for argument, annotation in zip(arguments, annotations, strict=False):
|
|
107
|
+
if argument.name in {"self", "cls"}:
|
|
108
|
+
continue
|
|
109
|
+
if annotation_is_bool(annotation) and name_looks_like_selector(argument.name):
|
|
110
|
+
self.add_message("clean-code-boolean-flag-argument", node=argument)
|
|
111
|
+
|
|
112
|
+
def report_if_param_mutation(self, node: nodes.NodeNG, expression: nodes.NodeNG) -> None:
|
|
113
|
+
name = root_name(expression)
|
|
114
|
+
if not name or name in {"self", "cls"}:
|
|
115
|
+
return
|
|
116
|
+
scopes = zip(reversed(self._function_params), reversed(self._function_locals), strict=False)
|
|
117
|
+
for params, locals_ in scopes:
|
|
118
|
+
if name in locals_:
|
|
119
|
+
return
|
|
120
|
+
if name in params:
|
|
121
|
+
self.add_message("clean-code-output-argument-mutation", node=node, args=(name,))
|
|
122
|
+
return
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tokenize
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from typing import ClassVar
|
|
6
|
+
|
|
7
|
+
from astroid import nodes
|
|
8
|
+
from pylint.checkers import BaseRawFileChecker
|
|
9
|
+
|
|
10
|
+
from .helpers import (
|
|
11
|
+
MIN_SHARED_COMMENT_WORDS,
|
|
12
|
+
REDUNDANT_COMMENT_OVERLAP_RATIO,
|
|
13
|
+
TODO_PATTERN,
|
|
14
|
+
TODO_SEGMENT,
|
|
15
|
+
clean_comment,
|
|
16
|
+
is_byline_or_date,
|
|
17
|
+
is_likely_code_comment,
|
|
18
|
+
is_separator_comment,
|
|
19
|
+
normalized_words,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CleanCodeCommentChecker(BaseRawFileChecker):
|
|
24
|
+
name = "clean-code-comments"
|
|
25
|
+
msgs: ClassVar = {
|
|
26
|
+
"C9001": (
|
|
27
|
+
"TODO/FIXME comments should include an owner or issue ID, for example TODO(PROJ-123): remove fallback.",
|
|
28
|
+
"clean-code-todo-format",
|
|
29
|
+
"Require TODO, FIXME, and XXX comments to include an owner or issue identifier.",
|
|
30
|
+
),
|
|
31
|
+
"C9002": (
|
|
32
|
+
"Remove commented-out code; version history should preserve old implementations.",
|
|
33
|
+
"clean-code-commented-out-code",
|
|
34
|
+
"Flag comments that look like disabled Python code.",
|
|
35
|
+
),
|
|
36
|
+
"C9005": (
|
|
37
|
+
"Comment mostly repeats the next line; prefer making the code name carry the intent.",
|
|
38
|
+
"clean-code-redundant-comment",
|
|
39
|
+
"Flag comments that mostly repeat the following line of code.",
|
|
40
|
+
),
|
|
41
|
+
"C9006": (
|
|
42
|
+
"Avoid noisy separator, byline, or date comments; use structure and version control instead.",
|
|
43
|
+
"clean-code-noisy-comment",
|
|
44
|
+
"Flag separator, byline, and date comments.",
|
|
45
|
+
),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
def process_module(self, node: nodes.Module) -> None:
|
|
49
|
+
raw_bytes = node.stream().read()
|
|
50
|
+
lines = raw_bytes.decode("utf-8", errors="replace").splitlines()
|
|
51
|
+
for token in tokenize.tokenize(BytesIO(raw_bytes).readline):
|
|
52
|
+
if token.type != tokenize.COMMENT:
|
|
53
|
+
continue
|
|
54
|
+
text = clean_comment(token.string)
|
|
55
|
+
line_number = token.start[0]
|
|
56
|
+
self.check_todo(text, line_number)
|
|
57
|
+
self.check_comment_shape(text, line_number)
|
|
58
|
+
self.check_redundant_comment(text, line_number, lines)
|
|
59
|
+
|
|
60
|
+
def check_todo(self, text: str, line_number: int) -> None:
|
|
61
|
+
todo_segments = TODO_SEGMENT.findall(text)
|
|
62
|
+
if any(not TODO_PATTERN.match(segment.strip()) for segment in todo_segments):
|
|
63
|
+
self.add_message("clean-code-todo-format", line=line_number)
|
|
64
|
+
|
|
65
|
+
def check_comment_shape(self, text: str, line_number: int) -> None:
|
|
66
|
+
if TODO_SEGMENT.search(text):
|
|
67
|
+
return
|
|
68
|
+
if is_likely_code_comment(text):
|
|
69
|
+
self.add_message("clean-code-commented-out-code", line=line_number)
|
|
70
|
+
if is_separator_comment(text) or is_byline_or_date(text):
|
|
71
|
+
self.add_message("clean-code-noisy-comment", line=line_number)
|
|
72
|
+
|
|
73
|
+
def check_redundant_comment(self, text: str, line_number: int, lines: list[str]) -> None:
|
|
74
|
+
comment_words = normalized_words(text)
|
|
75
|
+
if len(comment_words) < MIN_SHARED_COMMENT_WORDS or line_number >= len(lines):
|
|
76
|
+
return
|
|
77
|
+
next_line_words = set(normalized_words(lines[line_number]))
|
|
78
|
+
shared_words = [word for word in comment_words if word in next_line_words]
|
|
79
|
+
if (
|
|
80
|
+
len(shared_words) >= MIN_SHARED_COMMENT_WORDS
|
|
81
|
+
and len(shared_words) / len(comment_words) >= REDUNDANT_COMMENT_OVERLAP_RATIO
|
|
82
|
+
):
|
|
83
|
+
self.add_message("clean-code-redundant-comment", line=line_number)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from astroid import nodes
|
|
7
|
+
|
|
8
|
+
TODO_PATTERN = re.compile(r"^(TODO|FIXME|XXX)\([A-Z][A-Z0-9]+-\d+\):\s+\S", re.IGNORECASE)
|
|
9
|
+
TODO_SEGMENT = re.compile(r"\b(?:TODO|FIXME|XXX)\b[^\n;]*", re.IGNORECASE)
|
|
10
|
+
SELECTOR_PARAM_NAMES = (
|
|
11
|
+
"flag",
|
|
12
|
+
"mode",
|
|
13
|
+
"option",
|
|
14
|
+
"type",
|
|
15
|
+
"kind",
|
|
16
|
+
"variant",
|
|
17
|
+
"selector",
|
|
18
|
+
"enabled",
|
|
19
|
+
"disabled",
|
|
20
|
+
"dry_run",
|
|
21
|
+
"verbose",
|
|
22
|
+
"silent",
|
|
23
|
+
"force",
|
|
24
|
+
"skip",
|
|
25
|
+
"include",
|
|
26
|
+
"exclude",
|
|
27
|
+
)
|
|
28
|
+
MUTATOR_METHODS = {
|
|
29
|
+
"add",
|
|
30
|
+
"append",
|
|
31
|
+
"clear",
|
|
32
|
+
"discard",
|
|
33
|
+
"extend",
|
|
34
|
+
"insert",
|
|
35
|
+
"pop",
|
|
36
|
+
"popitem",
|
|
37
|
+
"remove",
|
|
38
|
+
"reverse",
|
|
39
|
+
"setdefault",
|
|
40
|
+
"sort",
|
|
41
|
+
"update",
|
|
42
|
+
}
|
|
43
|
+
ALLOWED_LITERAL_CALLS = {
|
|
44
|
+
"bool",
|
|
45
|
+
"bytes",
|
|
46
|
+
"dict",
|
|
47
|
+
"float",
|
|
48
|
+
"int",
|
|
49
|
+
"len",
|
|
50
|
+
"list",
|
|
51
|
+
"print",
|
|
52
|
+
"range",
|
|
53
|
+
"repr",
|
|
54
|
+
"set",
|
|
55
|
+
"str",
|
|
56
|
+
"tuple",
|
|
57
|
+
}
|
|
58
|
+
STATUS_WORD = re.compile(
|
|
59
|
+
r"(?:^|[_\s-])(active|approved|cancelled|canceled|draft|failed|paid|pending|rejected|retry|suspended)(?:$|[_\s-])",
|
|
60
|
+
re.IGNORECASE,
|
|
61
|
+
)
|
|
62
|
+
MIN_CODE_COMMENT_LENGTH = 4
|
|
63
|
+
MIN_SEPARATOR_LENGTH = 8
|
|
64
|
+
MIN_POLICY_STRING_LENGTH = 2
|
|
65
|
+
MIN_SHARED_COMMENT_WORDS = 2
|
|
66
|
+
REDUNDANT_COMMENT_OVERLAP_RATIO = 0.65
|
|
67
|
+
MAX_ATTRIBUTE_CHAIN_DEPTH = 3
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def clean_comment(comment: str) -> str:
|
|
71
|
+
return comment.lstrip("#").strip()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def normalized_words(value: str) -> list[str]:
|
|
75
|
+
return re.findall(r"[a-z][a-z0-9]+", re.sub(r"[_$]+", " ", value).lower())
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def is_likely_code_comment(text: str) -> bool:
|
|
79
|
+
if len(text.strip()) < MIN_CODE_COMMENT_LENGTH:
|
|
80
|
+
return False
|
|
81
|
+
code_patterns = (
|
|
82
|
+
r"\b(await|def|class|return|raise|if|for|while|match|import|from|with|try|except)\b",
|
|
83
|
+
r"(?:^|\s)[\w.]+\([^)]*\)\s*$",
|
|
84
|
+
r"^\s*[\w.]+\s*=\s*.+$",
|
|
85
|
+
r"[{}\[\]]",
|
|
86
|
+
r"->",
|
|
87
|
+
)
|
|
88
|
+
return any(re.search(pattern, text) for pattern in code_patterns)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def is_separator_comment(text: str) -> bool:
|
|
92
|
+
compact = re.sub(r"\s+", "", text)
|
|
93
|
+
return len(compact) >= MIN_SEPARATOR_LENGTH and bool(re.fullmatch(r"[-=*_/#]+", compact))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def is_byline_or_date(text: str) -> bool:
|
|
97
|
+
byline = r"\b(author|created by|written by|modified by|last modified|since)\b"
|
|
98
|
+
date = r"\b(?:\d{4}[-/]\d{1,2}[-/]\d{1,2}|\d{1,2}[-/]\d{1,2}[-/]\d{2,4})\b"
|
|
99
|
+
return bool(re.search(byline, text, re.IGNORECASE) or re.search(date, text))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def annotation_is_bool(annotation: nodes.NodeNG | None) -> bool:
|
|
103
|
+
return isinstance(annotation, nodes.Name) and annotation.name == "bool"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def name_looks_like_selector(name: str) -> bool:
|
|
107
|
+
lower_name = name.lower()
|
|
108
|
+
return any(selector in lower_name for selector in SELECTOR_PARAM_NAMES)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def call_name(node: nodes.Call) -> str | None:
|
|
112
|
+
func = node.func
|
|
113
|
+
if isinstance(func, nodes.Name):
|
|
114
|
+
return func.name
|
|
115
|
+
if isinstance(func, nodes.Attribute):
|
|
116
|
+
return func.attrname
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def root_name(node: nodes.NodeNG) -> str | None:
|
|
121
|
+
current = node
|
|
122
|
+
while isinstance(current, (nodes.Attribute, nodes.Subscript)):
|
|
123
|
+
current = current.expr if isinstance(current, nodes.Attribute) else current.value
|
|
124
|
+
if isinstance(current, nodes.Name):
|
|
125
|
+
return current.name
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def is_uppercase_assignment(node: nodes.Const) -> bool:
|
|
130
|
+
parent = node.parent
|
|
131
|
+
if not isinstance(parent, (nodes.Assign, nodes.AnnAssign)):
|
|
132
|
+
return False
|
|
133
|
+
targets = parent.targets if isinstance(parent, nodes.Assign) else [parent.target]
|
|
134
|
+
return any(isinstance(target, nodes.AssignName) and target.name.isupper() for target in targets)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def literal_looks_like_policy(value: Any) -> bool:
|
|
138
|
+
if isinstance(value, bool):
|
|
139
|
+
return False
|
|
140
|
+
if isinstance(value, int | float):
|
|
141
|
+
return value not in {-1, 0, 1}
|
|
142
|
+
if not isinstance(value, str) or len(value) < MIN_POLICY_STRING_LENGTH:
|
|
143
|
+
return False
|
|
144
|
+
return bool(
|
|
145
|
+
re.fullmatch(r"[A-Z][A-Z0-9_]+", value)
|
|
146
|
+
or re.fullmatch(r"\d{4}-\d{2}-\d{2}", value)
|
|
147
|
+
or STATUS_WORD.search(value)
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def is_allowed_literal_context(node: nodes.Const) -> bool:
|
|
152
|
+
if is_uppercase_assignment(node):
|
|
153
|
+
return True
|
|
154
|
+
current = node.parent
|
|
155
|
+
while current is not None:
|
|
156
|
+
if isinstance(current, nodes.Call) and call_name(current) in ALLOWED_LITERAL_CALLS:
|
|
157
|
+
return True
|
|
158
|
+
current = current.parent
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def is_policy_literal_context(node: nodes.Const) -> bool:
|
|
163
|
+
if isinstance(node.value, int | float) and not literal_looks_like_named_threshold(node):
|
|
164
|
+
return False
|
|
165
|
+
current = node.parent
|
|
166
|
+
while current is not None and not isinstance(
|
|
167
|
+
current,
|
|
168
|
+
(nodes.ClassDef, nodes.FunctionDef, nodes.AsyncFunctionDef, nodes.Module),
|
|
169
|
+
):
|
|
170
|
+
if isinstance(current, (nodes.Compare, nodes.If, nodes.Return, nodes.Call)):
|
|
171
|
+
return True
|
|
172
|
+
if isinstance(current, nodes.Assign):
|
|
173
|
+
return any(isinstance(target, (nodes.AssignAttr, nodes.Subscript)) for target in current.targets)
|
|
174
|
+
current = current.parent
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def literal_looks_like_named_threshold(node: nodes.Const) -> bool:
|
|
179
|
+
current = node.parent
|
|
180
|
+
while current is not None and not isinstance(
|
|
181
|
+
current,
|
|
182
|
+
(nodes.ClassDef, nodes.FunctionDef, nodes.AsyncFunctionDef, nodes.Module),
|
|
183
|
+
):
|
|
184
|
+
if isinstance(current, nodes.Compare):
|
|
185
|
+
return True
|
|
186
|
+
current = current.parent
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def attribute_depth(node: nodes.Attribute) -> int:
|
|
191
|
+
depth = 0
|
|
192
|
+
current: nodes.NodeNG = node
|
|
193
|
+
while isinstance(current, nodes.Attribute):
|
|
194
|
+
depth += 1
|
|
195
|
+
current = current.expr
|
|
196
|
+
return depth
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import uuid
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from mcp_server.markdown import (
|
|
9
|
+
infer_markdown_rule_family,
|
|
10
|
+
markdown_aliases,
|
|
11
|
+
markdown_sections,
|
|
12
|
+
split_section_body,
|
|
13
|
+
)
|
|
14
|
+
from mcp_server.models import CleanCodeChunk, JsonDict
|
|
15
|
+
from mcp_server.text import (
|
|
16
|
+
clean_alias,
|
|
17
|
+
clean_topic,
|
|
18
|
+
clean_topic_text,
|
|
19
|
+
detected_record_id,
|
|
20
|
+
languages_in_text,
|
|
21
|
+
lint_candidates_in_text,
|
|
22
|
+
slug,
|
|
23
|
+
)
|
|
24
|
+
from mcp_server.utils.sha256_text import sha256_text
|
|
25
|
+
|
|
26
|
+
ROOT = Path(__file__).resolve().parents[3]
|
|
27
|
+
PATTERN_RECORDS = ROOT / "data" / "clean-code-patterns.jsonl"
|
|
28
|
+
MARKDOWN_SOURCES = (
|
|
29
|
+
ROOT / "README.md",
|
|
30
|
+
ROOT / "docs" / "eslint-custom-rules.md",
|
|
31
|
+
ROOT / "docs" / "eslint-recommended-config.md",
|
|
32
|
+
ROOT / "docs" / "python-lint-recommended-config.md",
|
|
33
|
+
ROOT / "docs" / "python-pylint-custom-rules.md",
|
|
34
|
+
ROOT / "docs" / "static-trigger-semantic-review.md",
|
|
35
|
+
)
|
|
36
|
+
CHUNK_ID_NAMESPACE = uuid.UUID("fd1b279f-073e-5aa4-bf70-9f70446a3d8f")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def build_chunks(root: Path = ROOT) -> list[CleanCodeChunk]:
|
|
40
|
+
chunks = [*pattern_record_chunks(root / PATTERN_RECORDS.relative_to(ROOT))]
|
|
41
|
+
for source in MARKDOWN_SOURCES:
|
|
42
|
+
path = root / source.relative_to(ROOT)
|
|
43
|
+
if path.exists():
|
|
44
|
+
chunks.extend(markdown_chunks(path, root=root))
|
|
45
|
+
return chunks
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def pattern_record_chunks(path: Path) -> list[CleanCodeChunk]:
|
|
49
|
+
chunks: list[CleanCodeChunk] = []
|
|
50
|
+
with path.open() as handle:
|
|
51
|
+
for index, line in enumerate(handle):
|
|
52
|
+
if not line.strip():
|
|
53
|
+
continue
|
|
54
|
+
record = json.loads(line)
|
|
55
|
+
chunk_id = f"pattern:{record['id']}"
|
|
56
|
+
topic = clean_topic(str(record["topic"]))
|
|
57
|
+
aliases = tuple(
|
|
58
|
+
alias
|
|
59
|
+
for alias in (clean_alias(str(item)) for item in record["aliases"])
|
|
60
|
+
if alias
|
|
61
|
+
)
|
|
62
|
+
embedding_text = clean_topic_text(str(record["embedding_text"]).strip())
|
|
63
|
+
display_text = clean_topic_text(str(record["display_text"]).strip())
|
|
64
|
+
languages = tuple(
|
|
65
|
+
language
|
|
66
|
+
for language in ("typescript", "python")
|
|
67
|
+
if record.get("good_examples", {}).get(language)
|
|
68
|
+
or record.get("bad_examples", {}).get(language)
|
|
69
|
+
)
|
|
70
|
+
chunks.append(
|
|
71
|
+
CleanCodeChunk(
|
|
72
|
+
chunk_id=chunk_id,
|
|
73
|
+
object_id=object_id_for(chunk_id),
|
|
74
|
+
source_file=path.name,
|
|
75
|
+
source_kind="clean_code_pattern",
|
|
76
|
+
record_id=str(record["id"]),
|
|
77
|
+
title=str(record["title"]),
|
|
78
|
+
topic=topic,
|
|
79
|
+
section_path=(topic, str(record["title"])),
|
|
80
|
+
chunk_kind="pattern_record",
|
|
81
|
+
chunk_index=index,
|
|
82
|
+
rule_family=str(record["rule_family"]),
|
|
83
|
+
lintability=str(record["lintability"]),
|
|
84
|
+
aliases=aliases,
|
|
85
|
+
languages=languages,
|
|
86
|
+
lint_candidates=tuple(str(item) for item in record["lint_candidates"]),
|
|
87
|
+
content_text=display_text,
|
|
88
|
+
embedding_text=embedding_text,
|
|
89
|
+
display_text=display_text,
|
|
90
|
+
text_hash=sha256_text(embedding_text),
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
return chunks
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def load_pattern_records(path: Path = PATTERN_RECORDS) -> list[JsonDict]:
|
|
97
|
+
records: list[JsonDict] = []
|
|
98
|
+
with path.open() as handle:
|
|
99
|
+
for line in handle:
|
|
100
|
+
if line.strip():
|
|
101
|
+
record = json.loads(line)
|
|
102
|
+
if isinstance(record, dict):
|
|
103
|
+
records.append(record)
|
|
104
|
+
return records
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_pattern_record(pattern_id: str, *, root: Path = ROOT) -> JsonDict | None:
|
|
108
|
+
normalized_id = pattern_id.strip().upper()
|
|
109
|
+
for record in load_pattern_records(root / PATTERN_RECORDS.relative_to(ROOT)):
|
|
110
|
+
if str(record.get("id", "")).upper() == normalized_id:
|
|
111
|
+
return record
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def markdown_chunks(path: Path, *, root: Path = ROOT) -> list[CleanCodeChunk]:
|
|
116
|
+
chunks: list[CleanCodeChunk] = []
|
|
117
|
+
relative_path = path.relative_to(root).as_posix()
|
|
118
|
+
for section_index, section in enumerate(markdown_sections(path, root=root)):
|
|
119
|
+
for split_index, body in enumerate(split_section_body(section.body)):
|
|
120
|
+
heading_text = " > ".join(section.section_path)
|
|
121
|
+
record_id = detected_record_id(section.heading)
|
|
122
|
+
chunk_id = (
|
|
123
|
+
f"md:{relative_path}:{slug(heading_text)}"
|
|
124
|
+
if split_index == 0
|
|
125
|
+
else f"md:{relative_path}:{slug(heading_text)}:{split_index + 1}"
|
|
126
|
+
)
|
|
127
|
+
content_text = clean_topic_text(body.strip())
|
|
128
|
+
embedding_text = (
|
|
129
|
+
f"Markdown section: {heading_text}\n"
|
|
130
|
+
f"Source: {relative_path}:{section.start_line}-{section.end_line}\n\n"
|
|
131
|
+
f"{content_text}"
|
|
132
|
+
)
|
|
133
|
+
chunks.append(
|
|
134
|
+
CleanCodeChunk(
|
|
135
|
+
chunk_id=chunk_id,
|
|
136
|
+
object_id=object_id_for(chunk_id),
|
|
137
|
+
source_file=relative_path,
|
|
138
|
+
source_kind="markdown_doc",
|
|
139
|
+
record_id=record_id,
|
|
140
|
+
title=section.heading,
|
|
141
|
+
topic=clean_topic(section.section_path[0]) if section.section_path else clean_topic(section.heading),
|
|
142
|
+
section_path=tuple(clean_topic(item) for item in section.section_path),
|
|
143
|
+
chunk_kind="markdown_section" if split_index == 0 else "markdown_section_part",
|
|
144
|
+
chunk_index=section_index * 100 + split_index,
|
|
145
|
+
rule_family=infer_markdown_rule_family(section),
|
|
146
|
+
lintability="",
|
|
147
|
+
aliases=tuple(clean_alias(alias) for alias in markdown_aliases(section) if clean_alias(alias)),
|
|
148
|
+
languages=languages_in_text(content_text),
|
|
149
|
+
lint_candidates=lint_candidates_in_text(content_text),
|
|
150
|
+
content_text=content_text,
|
|
151
|
+
embedding_text=embedding_text,
|
|
152
|
+
display_text=embedding_text,
|
|
153
|
+
text_hash=sha256_text(embedding_text),
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
return chunks
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def object_id_for(chunk_id: str) -> str:
|
|
160
|
+
return str(uuid.uuid5(CHUNK_ID_NAMESPACE, chunk_id))
|