claude-dev-env 1.50.1 → 1.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/_shared/pr-loop/audit-contract.md +3 -3
  2. package/audit-rubrics/category_rubrics/category-e-dead-code.md +3 -2
  3. package/audit-rubrics/prompts/category-a-api-contracts.md +1 -1
  4. package/audit-rubrics/prompts/category-b-selector-engine-compat.md +2 -2
  5. package/audit-rubrics/prompts/category-c-resource-cleanup.md +2 -2
  6. package/audit-rubrics/prompts/category-d-scoping-and-ordering.md +2 -2
  7. package/audit-rubrics/prompts/category-e-dead-code.md +5 -4
  8. package/audit-rubrics/prompts/category-f-silent-failures.md +2 -2
  9. package/audit-rubrics/prompts/category-g-bounds-and-overflow.md +2 -2
  10. package/audit-rubrics/prompts/category-h-security-boundaries.md +2 -2
  11. package/audit-rubrics/prompts/category-i-concurrency.md +2 -2
  12. package/audit-rubrics/prompts/category-j-code-rules-compliance.md +2 -2
  13. package/audit-rubrics/prompts/category-k-codebase-conflicts.md +2 -2
  14. package/docs/CODE_RULES.md +1 -1
  15. package/hooks/blocking/code_rules_annotations_length.py +167 -0
  16. package/hooks/blocking/code_rules_banned_identifiers.py +385 -0
  17. package/hooks/blocking/code_rules_boolean_mustcheck.py +350 -0
  18. package/hooks/blocking/code_rules_comments.py +337 -0
  19. package/hooks/blocking/code_rules_constants_config.py +252 -0
  20. package/hooks/blocking/code_rules_docstrings.py +308 -0
  21. package/hooks/blocking/code_rules_enforcer.py +98 -5807
  22. package/hooks/blocking/code_rules_imports_logging.py +276 -0
  23. package/hooks/blocking/code_rules_magic_values.py +180 -0
  24. package/hooks/blocking/code_rules_mock_completeness.py +295 -0
  25. package/hooks/blocking/code_rules_naming_collection.py +264 -0
  26. package/hooks/blocking/code_rules_optional_params.py +288 -0
  27. package/hooks/blocking/code_rules_paths_syspath.py +186 -0
  28. package/hooks/blocking/code_rules_probe_chains.py +305 -0
  29. package/hooks/blocking/code_rules_probe_detection.py +257 -0
  30. package/hooks/blocking/code_rules_probe_recording.py +225 -0
  31. package/hooks/blocking/code_rules_scope_binding.py +151 -0
  32. package/hooks/blocking/code_rules_shared.py +301 -0
  33. package/hooks/blocking/code_rules_string_magic.py +207 -0
  34. package/hooks/blocking/code_rules_test_assertions.py +226 -0
  35. package/hooks/blocking/code_rules_test_branching_except.py +181 -0
  36. package/hooks/blocking/code_rules_test_isolation.py +341 -0
  37. package/hooks/blocking/code_rules_type_escape.py +341 -0
  38. package/hooks/blocking/code_rules_typeddict_stub.py +305 -0
  39. package/hooks/blocking/code_rules_unused_imports.py +256 -0
  40. package/hooks/blocking/tdd_enforcer.py +31 -0
  41. package/hooks/blocking/test_code_rules_constants_config.py +26 -0
  42. package/hooks/blocking/test_code_rules_enforcer_banned_noun_word.py +5 -2
  43. package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +0 -5
  44. package/hooks/blocking/test_code_rules_enforcer_comment_string_awareness.py +21 -15
  45. package/hooks/blocking/test_code_rules_enforcer_config_path.py +20 -16
  46. package/hooks/blocking/test_code_rules_enforcer_exempt_marker_chained.py +4 -2
  47. package/hooks/blocking/test_code_rules_enforcer_function_length.py +18 -13
  48. package/hooks/blocking/test_code_rules_enforcer_hardcoded_user_path.py +1 -2
  49. package/hooks/blocking/test_code_rules_enforcer_ignored_must_check_return.py +22 -12
  50. package/hooks/blocking/test_code_rules_enforcer_split_annotations_length.py +55 -0
  51. package/hooks/blocking/test_code_rules_enforcer_split_banned.py +170 -0
  52. package/hooks/blocking/test_code_rules_enforcer_split_comments.py +60 -0
  53. package/hooks/blocking/test_code_rules_enforcer_split_config_path.py +52 -0
  54. package/hooks/blocking/test_code_rules_enforcer_split_constants_config.py +236 -0
  55. package/hooks/blocking/test_code_rules_enforcer_split_entry_1.py +296 -0
  56. package/hooks/blocking/test_code_rules_enforcer_split_entry_2.py +238 -0
  57. package/hooks/blocking/test_code_rules_enforcer_split_isolation_1.py +271 -0
  58. package/hooks/blocking/test_code_rules_enforcer_split_isolation_2.py +283 -0
  59. package/hooks/blocking/test_code_rules_enforcer_split_isolation_3.py +268 -0
  60. package/hooks/blocking/test_code_rules_enforcer_split_isolation_4.py +85 -0
  61. package/hooks/blocking/test_code_rules_enforcer_split_mocks_1.py +303 -0
  62. package/hooks/blocking/test_code_rules_enforcer_split_mocks_2.py +111 -0
  63. package/hooks/blocking/test_code_rules_enforcer_split_mustcheck.py +87 -0
  64. package/hooks/blocking/test_code_rules_enforcer_split_naming.py +107 -0
  65. package/hooks/blocking/test_code_rules_enforcer_split_optional_params.py +325 -0
  66. package/hooks/blocking/test_code_rules_enforcer_split_paths_syspath.py +110 -0
  67. package/hooks/blocking/test_code_rules_enforcer_split_shared.py +44 -0
  68. package/hooks/blocking/test_code_rules_enforcer_split_string_magic.py +55 -0
  69. package/hooks/blocking/test_code_rules_enforcer_split_test_assertions.py +56 -0
  70. package/hooks/blocking/test_code_rules_enforcer_todo_markers.py +21 -15
  71. package/hooks/blocking/test_code_rules_paths_syspath.py +26 -0
  72. package/hooks/blocking/test_tdd_enforcer.py +116 -0
  73. package/hooks/hooks_constants/blocking_check_limits.py +3 -0
  74. package/hooks/hooks_constants/code_rules_enforcer_constants.py +8 -0
  75. package/hooks/hooks_constants/sys_path_insert_constants.py +1 -0
  76. package/package.json +1 -1
  77. package/skills/_shared/pr-loop/scripts/build_audit_prompt.py +13 -7
  78. package/skills/_shared/pr-loop/scripts/skills_pr_loop_constants/path_resolver_constants.py +21 -11
  79. package/skills/_shared/pr-loop/scripts/test_build_audit_prompt.py +92 -0
  80. package/skills/bugteam/CONSTRAINTS.md +1 -1
  81. package/skills/bugteam/PROMPTS.md +20 -48
  82. package/skills/bugteam/SKILL.md +5 -5
  83. package/skills/bugteam/reference/audit-and-teammates.md +1 -1
  84. package/skills/bugteam/reference/audit-contract.md +4 -4
  85. package/skills/bugteam/reference/design-rationale.md +1 -1
  86. package/skills/findbugs/SKILL.md +21 -12
  87. package/skills/fixbugs/SKILL.md +1 -1
  88. package/skills/qbug/SKILL.md +5 -5
  89. package/skills/qbug/test_qbug_skill_audit_schema.py +13 -23
  90. package/skills/refine/SKILL.md +1 -1
  91. package/hooks/blocking/test_code_rules_enforcer.py +0 -2669
@@ -0,0 +1,350 @@
1
+ """Boolean naming-prefix and ignored must-check-return checks."""
2
+
3
+ import ast
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ _blocking_directory = str(Path(__file__).resolve().parent)
8
+ _hooks_directory = str(Path(__file__).resolve().parent.parent)
9
+ if _blocking_directory not in sys.path:
10
+ sys.path.insert(0, _blocking_directory)
11
+ if _hooks_directory not in sys.path:
12
+ sys.path.insert(0, _hooks_directory)
13
+
14
+ from code_rules_path_utils import ( # noqa: E402
15
+ is_config_file,
16
+ )
17
+ from code_rules_shared import ( # noqa: E402
18
+ _scope_violations_to_changed_lines,
19
+ is_hook_infrastructure,
20
+ is_test_file,
21
+ is_workflow_registry_file,
22
+ )
23
+
24
+ from hooks_constants.blocking_check_limits import ( # noqa: E402
25
+ MAX_IGNORED_MUST_CHECK_RETURN_ISSUES,
26
+ )
27
+ from hooks_constants.code_rules_enforcer_constants import ( # noqa: E402
28
+ ALL_BOOLEAN_NAME_PREFIXES,
29
+ ALL_MUST_CHECK_RETURN_FUNCTION_NAMES,
30
+ ALL_SELF_AND_CLS_PARAMETER_NAMES,
31
+ UPPER_SNAKE_CONSTANT_PATTERN,
32
+ )
33
+
34
+
35
+ def _is_bool_constant(node: ast.AST) -> bool:
36
+ return isinstance(node, ast.Constant) and isinstance(node.value, bool)
37
+
38
+
39
+ def _rhs_names_if_all_bool(value_node: ast.AST, target_node: ast.AST) -> list[str]:
40
+ """Return names from a tuple assignment target when every RHS element is a bool constant.
41
+
42
+ Handles cases like `valid, permitted = True, False` where target is a Tuple
43
+ and value is a Tuple of bool constants. Returns empty list otherwise.
44
+ """
45
+ if not isinstance(target_node, ast.Tuple):
46
+ return []
47
+ if not isinstance(value_node, ast.Tuple):
48
+ return []
49
+ if len(target_node.elts) != len(value_node.elts):
50
+ return []
51
+ if not all(_is_bool_constant(element) for element in value_node.elts):
52
+ return []
53
+ names: list[str] = []
54
+ for each_element in target_node.elts:
55
+ if isinstance(each_element, ast.Name):
56
+ names.append(each_element.id)
57
+ return names
58
+
59
+
60
+ def _assign_target_names_for_bool(node: ast.Assign) -> list[str]:
61
+ if not node.targets:
62
+ return []
63
+ names: list[str] = []
64
+ for each_target in node.targets:
65
+ if isinstance(each_target, ast.Name) and _is_bool_constant(node.value):
66
+ names.append(each_target.id)
67
+ else:
68
+ names.extend(_rhs_names_if_all_bool(node.value, each_target))
69
+ return names
70
+
71
+
72
+ def _annassign_target_name_for_bool(node: ast.AnnAssign) -> list[str]:
73
+ if not isinstance(node.target, ast.Name):
74
+ return []
75
+ is_annotation_bool_type = isinstance(node.annotation, ast.Name) and node.annotation.id == "bool"
76
+ is_value_bool_constant = node.value is not None and _is_bool_constant(node.value)
77
+ if is_annotation_bool_type and is_value_bool_constant:
78
+ return [node.target.id]
79
+ return []
80
+
81
+
82
+ def _walrus_name_for_bool(node: ast.NamedExpr) -> list[str]:
83
+ if not isinstance(node.target, ast.Name):
84
+ return []
85
+ if not _is_bool_constant(node.value):
86
+ return []
87
+ return [node.target.id]
88
+
89
+
90
+ def _collect_boolean_assignments(tree: ast.Module) -> list[tuple[str, int, bool]]:
91
+ """Collect boolean-constant assignments with (name, line_number, is_upper_snake_scope).
92
+
93
+ `is_upper_snake_scope` is True for module-level statements and direct class body
94
+ statements, where UPPER_SNAKE constants are acceptable (dataclass fields, class
95
+ constants). Function/method scope is False.
96
+
97
+ Invariant: relies on `ast.walk` returning the same node instances that were
98
+ stored in `upper_snake_scope_ids` via their `id()`. Do not call this helper
99
+ on a tree that has been rebuilt through an `ast.NodeTransformer` — the
100
+ transformer may replace nodes with fresh instances, and the identity-based
101
+ scope tagging will silently fail for the replaced nodes.
102
+ """
103
+ upper_snake_scope_ids: set[int] = set()
104
+ for each_statement in tree.body:
105
+ upper_snake_scope_ids.add(id(each_statement))
106
+ for each_node in ast.walk(tree):
107
+ if isinstance(each_node, ast.ClassDef):
108
+ for each_class_statement in each_node.body:
109
+ upper_snake_scope_ids.add(id(each_class_statement))
110
+ collected: list[tuple[str, int, bool]] = []
111
+ for each_node in ast.walk(tree):
112
+ names: list[str] = []
113
+ line_number = 0
114
+ if isinstance(each_node, ast.Assign):
115
+ names = _assign_target_names_for_bool(each_node)
116
+ line_number = each_node.lineno
117
+ elif isinstance(each_node, ast.AnnAssign):
118
+ names = _annassign_target_name_for_bool(each_node)
119
+ line_number = each_node.lineno
120
+ elif isinstance(each_node, ast.NamedExpr):
121
+ names = _walrus_name_for_bool(each_node)
122
+ line_number = each_node.lineno
123
+ if not names:
124
+ continue
125
+ is_in_upper_snake_scope = id(each_node) in upper_snake_scope_ids
126
+ for each_name in names:
127
+ collected.append((each_name, line_number, is_in_upper_snake_scope))
128
+ return collected
129
+
130
+
131
+ def _argument_is_boolean(argument_node: ast.arg, default_node: ast.expr | None) -> bool:
132
+ annotation_is_bool = (
133
+ isinstance(argument_node.annotation, ast.Name)
134
+ and argument_node.annotation.id == "bool"
135
+ )
136
+ default_is_bool = default_node is not None and _is_bool_constant(default_node)
137
+ return annotation_is_bool or default_is_bool
138
+
139
+
140
+ def _bool_parameters_for_function(
141
+ function_node: ast.FunctionDef | ast.AsyncFunctionDef,
142
+ ) -> list[tuple[str, int]]:
143
+ arguments = function_node.args
144
+ positional_arguments = arguments.posonlyargs + arguments.args
145
+ positional_defaults = arguments.defaults
146
+ leading_without_default = len(positional_arguments) - len(positional_defaults)
147
+ bool_parameters: list[tuple[str, int]] = []
148
+ for each_position, each_argument in enumerate(positional_arguments):
149
+ default_index = each_position - leading_without_default
150
+ default_node = (
151
+ positional_defaults[default_index] if default_index >= 0 else None
152
+ )
153
+ if each_argument.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
154
+ continue
155
+ if _argument_is_boolean(each_argument, default_node):
156
+ bool_parameters.append((each_argument.arg, each_argument.lineno))
157
+ for each_argument, each_default in zip(arguments.kwonlyargs, arguments.kw_defaults):
158
+ if each_argument.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
159
+ continue
160
+ if _argument_is_boolean(each_argument, each_default):
161
+ bool_parameters.append((each_argument.arg, each_argument.lineno))
162
+ return bool_parameters
163
+
164
+
165
+ def _collect_bool_parameter_names(tree: ast.Module) -> list[tuple[str, int]]:
166
+ """Collect (name, line_number) for boolean-typed function parameters.
167
+
168
+ A parameter counts as boolean when its annotation is the ``bool`` name or
169
+ its default is a boolean literal. ``self`` and ``cls`` are skipped.
170
+
171
+ Args:
172
+ tree: The parsed module to inspect.
173
+
174
+ Returns:
175
+ Each boolean parameter as a (name, line_number) pair.
176
+ """
177
+ bool_parameters: list[tuple[str, int]] = []
178
+ for each_node in ast.walk(tree):
179
+ if isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
180
+ bool_parameters.extend(_bool_parameters_for_function(each_node))
181
+ return bool_parameters
182
+
183
+
184
+ def check_boolean_naming(
185
+ content: str,
186
+ file_path: str,
187
+ all_changed_lines: set[int] | None = None,
188
+ defer_scope_to_caller: bool = False,
189
+ ) -> list[str]:
190
+ """Flag boolean assignments and parameters whose name lacks a required prefix.
191
+
192
+ The caller passes the reconstructed full file as *content* so ``ast.parse``
193
+ sees a complete module rather than an Edit's ``new_string`` fragment, which is
194
+ rarely valid standalone Python. Findings are then scoped to *all_changed_lines*
195
+ so an Edit blocks on the unprefixed boolean it just introduced while a
196
+ pre-existing violation on an untouched line does not block the edit.
197
+
198
+ Args:
199
+ content: The source text to inspect — the reconstructed full file on an
200
+ Edit so the parse succeeds.
201
+ file_path: The path the source will be written to, used for exemptions.
202
+ all_changed_lines: Post-edit line numbers the current edit touched, or
203
+ None to treat the whole file as in scope. When provided, a violation
204
+ blocks only when its source line intersects the changed lines.
205
+ defer_scope_to_caller: When True, return every violation so the
206
+ commit/push gate's ``split_violations_by_scope`` can scope by added
207
+ line.
208
+
209
+ Returns:
210
+ One issue per unprefixed boolean assignment and parameter, scoped to the
211
+ changed lines unless *defer_scope_to_caller* is True or *all_changed_lines*
212
+ is None. This check has no module cap.
213
+ """
214
+ if is_test_file(file_path):
215
+ return []
216
+ if is_hook_infrastructure(file_path):
217
+ return []
218
+ if is_config_file(file_path):
219
+ return []
220
+ if is_workflow_registry_file(file_path):
221
+ return []
222
+ try:
223
+ tree = ast.parse(content)
224
+ except SyntaxError as parse_error:
225
+ print(
226
+ f"[CODE_RULES advisory] {file_path}: boolean-naming check skipped - "
227
+ f"SyntaxError at line {parse_error.lineno}: {parse_error.msg}",
228
+ file=sys.stderr,
229
+ )
230
+ return []
231
+ all_violations_in_walk_order: list[tuple[range, str]] = []
232
+ for each_name, each_line_number, each_is_in_upper_snake_scope in _collect_boolean_assignments(tree):
233
+ if len(each_name) == 1:
234
+ continue
235
+ if each_is_in_upper_snake_scope and UPPER_SNAKE_CONSTANT_PATTERN.match(each_name):
236
+ continue
237
+ if each_name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
238
+ continue
239
+ boolean_prefix_suffix = "is_/has_/should_/can_/was_/did_"
240
+ message = (
241
+ f"Line {each_line_number}: Boolean {each_name} - prefix with "
242
+ f"{boolean_prefix_suffix}"
243
+ )
244
+ all_violations_in_walk_order.append(
245
+ (range(each_line_number, each_line_number + 1), message)
246
+ )
247
+ for each_name, each_line_number in _collect_bool_parameter_names(tree):
248
+ if len(each_name) == 1:
249
+ continue
250
+ if each_name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
251
+ continue
252
+ boolean_prefix_suffix = "is_/has_/should_/can_/was_/did_"
253
+ message = (
254
+ f"Line {each_line_number}: Boolean parameter {each_name} - prefix with "
255
+ f"{boolean_prefix_suffix}"
256
+ )
257
+ all_violations_in_walk_order.append(
258
+ (range(each_line_number, each_line_number + 1), message)
259
+ )
260
+ return _scope_violations_to_changed_lines(
261
+ all_violations_in_walk_order,
262
+ all_changed_lines,
263
+ defer_scope_to_caller,
264
+ )
265
+
266
+
267
+ def _called_terminal_name(call_node: ast.Call) -> str | None:
268
+ callee = call_node.func
269
+ if isinstance(callee, ast.Name):
270
+ return callee.id
271
+ if isinstance(callee, ast.Attribute):
272
+ return callee.attr
273
+ return None
274
+
275
+
276
+ def check_ignored_must_check_return(
277
+ content: str,
278
+ file_path: str,
279
+ all_changed_lines: set[int] | None = None,
280
+ defer_scope_to_caller: bool = False,
281
+ ) -> list[str]:
282
+ """Flag bare-expression calls whose discarded return is the only failure signal.
283
+
284
+ Functions in ``ALL_MUST_CHECK_RETURN_FUNCTION_NAMES`` report success or failure
285
+ solely through their return value. A bare-statement call discards that value,
286
+ so the caller silently proceeds on failure. Bare ``ast.Expr`` calls are flagged,
287
+ including a bare ``await``-wrapped call (``await find_and_click(...)`` as a
288
+ statement); an assigned or branched-on call is exempt.
289
+
290
+ The caller passes the reconstructed full file as *content* so ``ast.parse``
291
+ sees a complete module rather than an Edit's ``new_string`` fragment, which is
292
+ rarely valid standalone Python (a bare ``await find_and_click(...)`` line is a
293
+ SyntaxError on its own). Findings are then scoped to *all_changed_lines* so an
294
+ Edit blocks on the discarded return it just introduced while a pre-existing
295
+ violation on an untouched line does not block the edit.
296
+
297
+ Args:
298
+ content: The source text to inspect — the reconstructed full file on an
299
+ Edit so the parse succeeds.
300
+ file_path: The path the source will be written to, used for exemptions.
301
+ all_changed_lines: Post-edit line numbers the current edit touched, or
302
+ None to treat the whole file as in scope. When provided, a violation
303
+ blocks only when the bare call's line intersects the changed lines.
304
+ defer_scope_to_caller: When True, return every violation so the
305
+ commit/push gate's ``split_violations_by_scope`` can scope by added
306
+ line.
307
+
308
+ Returns:
309
+ One issue per discarded must-check return, scoped to the changed lines
310
+ unless *defer_scope_to_caller* is True or *all_changed_lines* is None. When
311
+ *defer_scope_to_caller* is True every violation is returned uncapped so the
312
+ gate can scope by added line and apply its own ceiling; otherwise the
313
+ terminal result is capped at the module limit.
314
+ """
315
+ if is_test_file(file_path):
316
+ return []
317
+ try:
318
+ tree = ast.parse(content)
319
+ except SyntaxError:
320
+ return []
321
+ all_violations_in_walk_order: list[tuple[range, str]] = []
322
+ for each_node in ast.walk(tree):
323
+ if not isinstance(each_node, ast.Expr):
324
+ continue
325
+ expression_value = each_node.value
326
+ call_node = (
327
+ expression_value.value
328
+ if isinstance(expression_value, ast.Await)
329
+ else expression_value
330
+ )
331
+ if not isinstance(call_node, ast.Call):
332
+ continue
333
+ called_name = _called_terminal_name(call_node)
334
+ if called_name is None or called_name not in ALL_MUST_CHECK_RETURN_FUNCTION_NAMES:
335
+ continue
336
+ end_line_number = each_node.end_lineno or each_node.lineno
337
+ line_span = range(each_node.lineno, end_line_number + 1)
338
+ message = (
339
+ f"Line {each_node.lineno}: return value of {called_name}() is discarded - "
340
+ "assign and check it (the boolean/outcome is the only failure signal)"
341
+ )
342
+ all_violations_in_walk_order.append((line_span, message))
343
+ scoped_issues = _scope_violations_to_changed_lines(
344
+ all_violations_in_walk_order,
345
+ all_changed_lines,
346
+ defer_scope_to_caller,
347
+ )
348
+ if defer_scope_to_caller:
349
+ return scoped_issues
350
+ return scoped_issues[:MAX_IGNORED_MUST_CHECK_RETURN_ISSUES]
@@ -0,0 +1,337 @@
1
+ """Comment-presence and comment-change checks for Python and JavaScript sources."""
2
+
3
+ import io
4
+ import sys
5
+ import tokenize
6
+ from collections.abc import Iterator
7
+ from pathlib import Path
8
+
9
+ _blocking_directory = str(Path(__file__).resolve().parent)
10
+ _hooks_directory = str(Path(__file__).resolve().parent.parent)
11
+ if _blocking_directory not in sys.path:
12
+ sys.path.insert(0, _blocking_directory)
13
+ if _hooks_directory not in sys.path:
14
+ sys.path.insert(0, _hooks_directory)
15
+
16
+ from code_rules_shared import ( # noqa: E402
17
+ get_file_extension,
18
+ )
19
+
20
+ from hooks_constants.code_rules_enforcer_constants import ( # noqa: E402
21
+ ALL_FREE_FORM_EXEMPT_COMMENT_BODIES,
22
+ ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES,
23
+ ALL_JAVASCRIPT_EXEMPT_INLINE_COMMENT_PREFIXES,
24
+ ALL_JAVASCRIPT_EXTENSIONS,
25
+ ALL_PYTHON_EXTENSIONS,
26
+ ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS,
27
+ ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS,
28
+ ALL_TOKEN_ANCHORED_EXEMPT_COMMENT_BODIES,
29
+ CHAINED_INLINE_COMMENT_PATTERN,
30
+ MAX_COMMENT_ISSUES,
31
+ )
32
+
33
+
34
+ def check_comments_python(content: str) -> list[str]:
35
+ """Check for comments in Python code.
36
+
37
+ Uses ``tokenize.generate_tokens`` to find true ``COMMENT`` tokens.
38
+ Hash characters that appear inside string literals (hex color codes,
39
+ URL fragments, and the hash inside an f-string interpolation pattern)
40
+ are correctly skipped because the tokenizer recognizes them as parts
41
+ of string tokens rather than comment tokens.
42
+
43
+ When the tokenizer cannot parse the file (partial content during
44
+ Edit, invalid syntax), the check returns no findings rather than
45
+ falling back to a line-walker scan — false negatives on
46
+ syntactically-invalid drafts are preferable to false positives that
47
+ mis-classify string-interior hash characters as comments.
48
+ """
49
+ issues = []
50
+ for each_comment_token in _comment_tokens(content):
51
+ if _is_exempt_python_comment(each_comment_token):
52
+ continue
53
+ line_number = each_comment_token.start[0]
54
+ issues.append(
55
+ f"Line {line_number}: Comment found - refactor to self-documenting code"
56
+ )
57
+ if len(issues) >= MAX_COMMENT_ISSUES:
58
+ break
59
+
60
+ return issues
61
+
62
+
63
+ def check_comments_javascript(content: str) -> list[str]:
64
+ """Check for comments in JavaScript/TypeScript code."""
65
+ issues = []
66
+ lines = content.split("\n")
67
+ is_in_multiline_comment = False
68
+
69
+ for each_line_number, each_line in enumerate(lines, 1):
70
+ stripped = each_line.strip()
71
+
72
+ if not stripped:
73
+ continue
74
+
75
+ if is_in_multiline_comment:
76
+ if "*/" in stripped:
77
+ is_in_multiline_comment = False
78
+ continue
79
+
80
+ if stripped.startswith("/*"):
81
+ is_in_multiline_comment = "*/" not in stripped
82
+ if not stripped.startswith("/**"):
83
+ issues.append(f"Line {each_line_number}: Block comment found - refactor to self-documenting code")
84
+ continue
85
+
86
+ if stripped.startswith("//"):
87
+ if not stripped.startswith(ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES):
88
+ issues.append(f"Line {each_line_number}: Comment found - refactor to self-documenting code")
89
+
90
+ if len(issues) >= MAX_COMMENT_ISSUES:
91
+ break
92
+
93
+ return issues
94
+
95
+
96
+ def extract_comment_texts(content: str, file_path: str) -> tuple[set[str], set[str]]:
97
+ """Extract normalized comment text strings from content for comparison.
98
+
99
+ Returns:
100
+ Tuple of (inline_comments, standalone_comments).
101
+ Inline comments appear after code on the same line.
102
+ Standalone comments are lines where the entire line is a comment.
103
+ """
104
+ extension = get_file_extension(file_path)
105
+ inline_comments: set[str] = set()
106
+ standalone_comments: set[str] = set()
107
+ if not content:
108
+ return inline_comments, standalone_comments
109
+
110
+ if extension in ALL_PYTHON_EXTENSIONS:
111
+ inline_comments, standalone_comments, _ = _extract_python_comment_sets(content)
112
+ return inline_comments, standalone_comments
113
+
114
+ lines = content.split("\n")
115
+
116
+ if extension in ALL_JAVASCRIPT_EXTENSIONS:
117
+ is_in_multiline = False
118
+ for each_line in lines:
119
+ stripped = each_line.strip()
120
+ if not stripped:
121
+ continue
122
+ if is_in_multiline:
123
+ if "*/" in stripped:
124
+ is_in_multiline = False
125
+ continue
126
+ if stripped.startswith("/*"):
127
+ is_in_multiline = "*/" not in stripped
128
+ if not stripped.startswith("/**"):
129
+ standalone_comments.add(stripped)
130
+ continue
131
+ if stripped.startswith("//"):
132
+ if not stripped.startswith(ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES):
133
+ standalone_comments.add(stripped)
134
+ elif "//" in each_line:
135
+ before_slash = each_line[:each_line.index("//")]
136
+ if before_slash.strip():
137
+ comment_start = stripped.index("//")
138
+ comment_text = stripped[comment_start + 2 :].strip()
139
+ if not comment_text.startswith(ALL_JAVASCRIPT_EXEMPT_INLINE_COMMENT_PREFIXES):
140
+ inline_comments.add(stripped[comment_start:])
141
+
142
+ return inline_comments, standalone_comments
143
+
144
+
145
+ def check_comment_changes(old_content: str, new_content: str, file_path: str) -> list[str]:
146
+ """Check for comment additions or removals between old and new content.
147
+
148
+ Inline comments (after code on same line): BLOCK when added.
149
+ Standalone comment lines: NUDGE (print advisory) when added.
150
+ Existing comments being removed: BLOCK (comment preservation principle).
151
+
152
+ When the file is Python and either *old_content* or *new_content* cannot
153
+ be tokenized (common for mid-edit Edit fragments), the comparison is
154
+ indeterminate: the per-side tokenize failure would empty one set and
155
+ misrepresent every comment on the other side as either added or
156
+ removed. The check returns no issues in that case — false negatives on
157
+ syntactically-invalid drafts are preferable to false positives that
158
+ flag legitimate comments as deleted.
159
+ """
160
+ issues: list[str] = []
161
+
162
+ extension = get_file_extension(file_path)
163
+ if extension in ALL_PYTHON_EXTENSIONS:
164
+ old_inline, old_standalone, old_tokenize_ok = _extract_python_comment_sets(old_content)
165
+ new_inline, new_standalone, new_tokenize_ok = _extract_python_comment_sets(new_content)
166
+ if not (old_tokenize_ok and new_tokenize_ok):
167
+ return issues
168
+ else:
169
+ old_inline, old_standalone = extract_comment_texts(old_content, file_path)
170
+ new_inline, new_standalone = extract_comment_texts(new_content, file_path)
171
+
172
+ added_inline = new_inline - old_inline
173
+ if added_inline:
174
+ sample = next(iter(added_inline))
175
+ issues.append(f"Inline comment added: {sample[:60]} - refactor to self-documenting code")
176
+
177
+ added_standalone = new_standalone - old_standalone
178
+ if added_standalone:
179
+ sample = next(iter(added_standalone))
180
+ print(f"[CODE_RULES advisory] Standalone comment added: {sample[:60]} - prefer self-documenting code", file=sys.stderr)
181
+
182
+ all_old = old_inline | old_standalone
183
+ all_new = new_inline | new_standalone
184
+ removed_comments = all_old - all_new
185
+ if removed_comments:
186
+ old_line_count = len([line for line in old_content.split("\n") if line.strip()])
187
+ new_line_count = len([line for line in new_content.split("\n") if line.strip()])
188
+ code_was_removed = new_line_count < old_line_count - len(removed_comments)
189
+ if not code_was_removed:
190
+ sample = next(iter(removed_comments))
191
+ issues.append(f"Existing comment removed: {sample[:60]} - NEVER delete existing comments")
192
+
193
+ return issues
194
+
195
+
196
+ def _python_tokens(source: str) -> Iterator[tokenize.TokenInfo]:
197
+ """Yield Python tokens from *source* one at a time.
198
+
199
+ Centralizes the ``tokenize.generate_tokens`` entry-point so a future
200
+ change to the API lands in exactly one place. Iteration may raise
201
+ any of ``ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS`` when the source is
202
+ not valid Python (mid-edit Edit fragments, unterminated strings,
203
+ mismatched indentation) — callers handle the exception according to
204
+ their own contract (silently stop, return an indeterminate flag, etc.).
205
+ """
206
+ yield from tokenize.generate_tokens(io.StringIO(source).readline)
207
+
208
+
209
+ def _comment_tokens(source: str) -> Iterator[tokenize.TokenInfo]:
210
+ """Yield COMMENT tokens from *source* one at a time.
211
+
212
+ Streams from ``_python_tokens`` so consumers that early-exit (e.g.
213
+ ``check_comments_python`` caps at ``MAX_COMMENT_ISSUES``) avoid
214
+ materializing the entire token list. Silently stops on tokenize
215
+ failure so callers receive only valid comment tokens — no
216
+ indeterminate signal is exposed at this layer because the consumers
217
+ that need it (``_extract_python_comment_sets``) bypass this helper.
218
+ """
219
+ try:
220
+ for each_token in _python_tokens(source):
221
+ if each_token.type == tokenize.COMMENT:
222
+ yield each_token
223
+ except ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS:
224
+ return
225
+
226
+
227
+ def _is_exempt_python_comment(comment_token: tokenize.TokenInfo) -> bool:
228
+ """Return True for shebangs and tooling-directive comments.
229
+
230
+ The shebang exemption applies only when the comment token starts
231
+ at line 1, column 0 — matching the OS-level convention that a
232
+ shebang line is meaningful only as the first line of an executable
233
+ file. An inline shebang-lookalike later in the file (an
234
+ after-code occurrence on any line, or a standalone occurrence on
235
+ the second line or later) is NOT a real shebang and remains subject to the
236
+ no-comments rule.
237
+
238
+ Matches any prefix listed in the token-anchored or free-form exempt-
239
+ comment-body sets regardless of whether the directive sits flush
240
+ against the leading hash character or carries one or more whitespace
241
+ characters (space or tab) between the hash and the directive body.
242
+
243
+ Token-anchored markers (``noqa``, ``pylint:``, ``pragma:``) are
244
+ exempt only when the comment carries no chained second comment. Any
245
+ second ``#`` after the directive body — regardless of whitespace
246
+ around the inner hash, so ``# noqa: F401#note``,
247
+ ``# noqa: F401 #prose``, and ``# noqa: F401 # imported for re-export``
248
+ all qualify — indicates a second free-form inline comment
249
+ piggybacking on the exempt marker; the trailing prose is not itself
250
+ an exempt directive and therefore must not inherit exemption. A
251
+ token-anchored directive body never legitimately carries a ``#``
252
+ (noqa codes, pylint symbols, and pragma directives contain none), so
253
+ any inner ``#`` reliably marks chained prose. Free-form markers
254
+ (``type:``, ``TODO``, ``FIXME``, ``HACK``, ``XXX``) accept any
255
+ trailing prose:
256
+ ``# type:`` participates in the documented justification
257
+ convention enforced by ``check_type_escape_hatches`` (which
258
+ requires a trailing reason), and the TODO-family markers carry
259
+ annotation text by convention.
260
+ """
261
+ comment_string = comment_token.string
262
+ if comment_string.startswith("#!") and comment_token.start == (1, 0):
263
+ return True
264
+ directive_body = comment_string[1:].lstrip()
265
+ if not directive_body:
266
+ return True
267
+ if directive_body.startswith(ALL_FREE_FORM_EXEMPT_COMMENT_BODIES):
268
+ return True
269
+ if not _starts_with_bounded_token_anchored_directive(directive_body):
270
+ return False
271
+ return CHAINED_INLINE_COMMENT_PATTERN.search(directive_body) is None
272
+
273
+
274
+ def _starts_with_bounded_token_anchored_directive(directive_body: str) -> bool:
275
+ """Return True when *directive_body* opens with a real exempt directive.
276
+
277
+ A token-anchored marker (``noqa``, ``pylint:``, ``pragma:``) counts only
278
+ when the matched token is immediately followed by a directive boundary —
279
+ end of string, a colon, or whitespace — so prose like
280
+ ``noqa-but-not-really: explanation`` that merely shares the prefix does
281
+ not inherit the exemption.
282
+
283
+ Args:
284
+ directive_body: The comment text with the leading hash and surrounding
285
+ whitespace already stripped.
286
+
287
+ Returns:
288
+ True when a token-anchored exempt directive is present at a real token
289
+ boundary, False otherwise.
290
+ """
291
+ for each_token in ALL_TOKEN_ANCHORED_EXEMPT_COMMENT_BODIES:
292
+ if not directive_body.startswith(each_token):
293
+ continue
294
+ if each_token[-1] in ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS:
295
+ return True
296
+ following_text = directive_body[len(each_token):]
297
+ if not following_text:
298
+ return True
299
+ next_character = following_text[0]
300
+ if next_character.isspace():
301
+ return True
302
+ if next_character in ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS:
303
+ return True
304
+ return False
305
+
306
+
307
+ def _extract_python_comment_sets(content: str) -> tuple[set[str], set[str], bool]:
308
+ """Return (inline_comments, standalone_comments, tokenize_succeeded).
309
+
310
+ Streams *content* once via ``_python_tokens``. A tokenize failure
311
+ (mid-edit fragment, syntax error) returns empty sets and ``False``
312
+ so callers can treat the situation as indeterminate rather than as
313
+ "no comments present". Inline vs standalone is decided by inspecting
314
+ the column offset of each ``COMMENT`` token against its source
315
+ line: an all-whitespace prefix means standalone.
316
+ """
317
+ inline_comments: set[str] = set()
318
+ standalone_comments: set[str] = set()
319
+ lines = content.split("\n")
320
+ try:
321
+ for each_token in _python_tokens(content):
322
+ if each_token.type != tokenize.COMMENT:
323
+ continue
324
+ if _is_exempt_python_comment(each_token):
325
+ continue
326
+ line_number = each_token.start[0]
327
+ column_offset = each_token.start[1]
328
+ source_line = lines[line_number - 1] if line_number - 1 < len(lines) else ""
329
+ text_before_comment = source_line[:column_offset]
330
+ normalized_comment_text = each_token.string.strip()
331
+ if not text_before_comment.strip():
332
+ standalone_comments.add(normalized_comment_text)
333
+ else:
334
+ inline_comments.add(normalized_comment_text)
335
+ except ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS:
336
+ return set(), set(), False
337
+ return inline_comments, standalone_comments, True