cursordoctrine 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,405 @@
1
+ """low_density.py - semantic-density scorer (the anti semantic-opacity layer).
2
+
3
+ Shared source of truth for the semantic-density signal. Used in two places:
4
+
5
+ 1. scan_slop.py imports `score_identifiers()` to add a thirteenth signal
6
+ bucket (semantic_density) to its audit-of-record.
7
+ 2. density_scan.py (the per-edit hook wrapper) imports the same functions
8
+ to flag only the identifiers the agent JUST introduced in the diff.
9
+
10
+ One denylist, two execution points, zero drift.
11
+
12
+ THE INVARIANT
13
+ If you cannot predict what a function/class/file does from its name alone,
14
+ there is semantic debt. DataManager, process(), utils.ts, CoreEngine -
15
+ names that exist but communicate no intent. High-density names
16
+ (InvoiceEmailSender, PostgresUserRepository, GenerateMonthlyReport) pass.
17
+
18
+ SCORING MODEL (three tiers, deliberately conservative on FAIL)
19
+ FAIL - the name IS a low-density token, or a generic-suffix class with no
20
+ domain noun before it (DataManager, CoreEngine, process, utils).
21
+ These almost never have a defensible reading.
22
+ WARN - the name carries a low-density token but has a domain noun, OR is
23
+ an anemic verb alone, OR a 1-2 char id, OR a placeholder
24
+ (UserManager, handle(), fn, x1, tempFix). Suspicious but defensible
25
+ in context; the model judges.
26
+ OK - none of the above.
27
+
28
+ The Repository/Service/Provider DDD cases are the false-positive risk. They
29
+ land as WARN (not FAIL) when a domain noun precedes them
30
+ (PostgresUserRepository -> WARN, kept), and FAIL only when naked
31
+ (Repository -> FAIL). Calibrated against real DDD code before shipping.
32
+
33
+ Stdlib only; Python 3.9+. REPORTS only - never edits.
34
+ """
35
+ from __future__ import annotations
36
+
37
+ import re
38
+ import sys
39
+ from typing import Any
40
+
41
+ # Resolve sibling scan_slop.py at runtime. The hook wrapper (density_scan.py)
42
+ # is invoked from arbitrary cwds with no package context, and scan_slop imports
43
+ # this module via `from low_density import ...` only when both sit in the same
44
+ # scripts/ dir. The sys.path insert makes both directions work regardless of
45
+ # where Python was launched from.
46
+ _SCRIPT_DIR = re.sub(r"[\\/][^\\/]+$", "", __file__) or "."
47
+ if _SCRIPT_DIR not in sys.path:
48
+ sys.path.insert(0, _SCRIPT_DIR)
49
+
50
+ # Reuse scan_slop's language detection + comment/string stripping rather than
51
+ # duplicating the per-language tokenization that already lives there.
52
+ import scan_slop # noqa: E402 (path set up above)
53
+
54
+ ID = scan_slop.ID
55
+ TYPE_DECL = scan_slop.TYPE_DECL
56
+ FUNC_PATTERNS = scan_slop.FUNC_PATTERNS
57
+ METHOD_JS = scan_slop.METHOD_JS
58
+ METHOD_CSTYLE = scan_slop.METHOD_CSTYLE
59
+ NOT_METHOD = scan_slop.NOT_METHOD
60
+ lang_of = scan_slop.lang_of
61
+ _strip_comments = scan_slop._strip_comments
62
+
63
+ # ---- the denylist (single source of truth) --------------------------------
64
+ # Token stems, lowercased. Matched case-insensitively against identifier
65
+ # tokens. Kept short and high-signal: every entry here is one a senior would
66
+ # flag on sight, and every false positive costs trust in the whole layer.
67
+ LOW_DENSITY_TOKENS = frozenset({
68
+ # generic role nouns - describe a category, not a thing
69
+ "manager", "mgr", "handler", "processor", "controller", "provider",
70
+ "service", "svc", "engine", "framework", "system", "base", "core",
71
+ "common", "shared", "generic", "universal", "global",
72
+ # filler nouns - mean nothing on their own
73
+ "data", "info", "thing", "things", "stuff", "object", "item", "entity",
74
+ "business", "misc", "util", "utils", "utility", "helper", "helpers",
75
+ "tool", "tools",
76
+ # placeholder / temporaries that leaked to prod
77
+ "temp", "tmp", "new", "old", "current", "local", "main", "simple",
78
+ })
79
+
80
+ # Filenames whose bare basename IS the low-density signal. A file named
81
+ # utils.ts, helpers.py, manager.go communicates nothing. The fix is a
82
+ # domain name: invoice_totals.ts, smtp_retry.py.
83
+ LOW_DENSITY_FILENAMES = frozenset({
84
+ "utils", "helpers", "helper", "common", "shared", "manager", "service",
85
+ "provider", "handler", "processor", "engine", "base", "core", "misc",
86
+ "stuff", "things", "temp", "tmp", "generic", "util", "utility",
87
+ "controller", "framework", "system", "business", "global",
88
+ # NOTE: 'main' and 'app' are intentionally EXCLUDED - they are conventional
89
+ # entry-point names (next.js app/, rails application.py, fastapi main.py).
90
+ # Flagging them would make the hook fire on every new project scaffold.
91
+ })
92
+
93
+ # Verbs that name an action without naming the object of the action. Fine as
94
+ # part of a longer name (GenerateMonthlyReport), suspect alone (process()).
95
+ # Deliberately EXCLUDES concrete action verbs (get/set/send/load/save/fetch/
96
+ # render/format/parse/validate/check/verify) - those name a specific operation
97
+ # and are legitimate as methods on a domain-noun class (InvoiceEmailSender.send).
98
+ # Only the truly content-free verbs (do/run/execute/process/handle/...) stay.
99
+ ANEMIC_VERBS = frozenset({
100
+ "do", "run", "execute", "process", "handle", "manage", "perform",
101
+ "apply", "compute", "calculate", "make", "build", "update",
102
+ "delete", "remove", "add",
103
+ })
104
+
105
+ # Suffixes that, on a class with no domain noun before them, are textbook
106
+ # Meaningless Abstraction: DataProvider, CoreEngine, SystemManager. When a
107
+ # domain noun precedes (PostgresUserRepository) the score stays WARN not FAIL.
108
+ GENERIC_SUFFIXES = re.compile(
109
+ r"(Manager|Handler|Processor|Controller|Provider|Service|Engine"
110
+ r"|Framework|System|Factory|Builder|Wrapper|Adapter|Resolver"
111
+ r"|Strategy|Mediator|Orchestrator|Registry|Repository)$"
112
+ )
113
+
114
+ # Placeholder fingerprints: tempFix, newThing, finalFinal, test2, abc, x1, fn.
115
+ PLACEHOLDER = re.compile(
116
+ r"^(temp|tmp|new|old|final|test|fix|copy|backup|draft|wip)"
117
+ r"[A-Z0-9_]"
118
+ r"|^(final){2,}"
119
+ r"|^[a-z]{0,2}\d+$" # x1, fn2, abc123 (kept narrow: a-c only)
120
+ r"|^[a-z]{1,2}$" # bare 1-2 char alpha: fn, cb, x, aq - predict nothing
121
+ r"|^(foo|bar|baz|qux|tmp|asdf|qwerty)$",
122
+ re.I,
123
+ )
124
+
125
+ # A "domain noun" is an identifier token that is NOT in LOW_DENSITY_TOKENS and
126
+ # NOT an anemic verb. Used to decide whether a generic-suffix class has real
127
+ # subject matter or is pure filler (DataManager vs InvoiceEmailSender).
128
+ def _tokens_of(name: str) -> list[str]:
129
+ """Split a PascalCase / camelCase / snake_case name into lowercased word
130
+ stems. UserEmailSender -> [user, email, sender]. process_data ->
131
+ [process, data]. Single-word names return [self.lower()]."""
132
+ # split on non-alnum, then on case boundaries (aB -> a|B), then trailing digits
133
+ s = re.sub(r"[^A-Za-z0-9]+", " ", name)
134
+ s = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", s)
135
+ s = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", s) # HTTPSConnection -> HTTPS Connection
136
+ s = re.sub(r"(\d+)$", r" \1", s)
137
+ return [w.lower() for w in s.split() if w]
138
+
139
+
140
+ def _has_domain_noun(tokens: list[str]) -> bool:
141
+ """True if at least one token is neither low-density filler nor an anemic
142
+ verb. DataManager -> tokens [data, manager] -> no domain noun -> False.
143
+ InvoiceEmailSender -> [invoice, email, sender] -> 'invoice' is a noun ->
144
+ True."""
145
+ for t in tokens:
146
+ if t in LOW_DENSITY_TOKENS:
147
+ continue
148
+ if t in ANEMIC_VERBS:
149
+ continue
150
+ return True
151
+ return False
152
+
153
+
154
+ # Conventional CLI/runtime entrypoint names that are exempt from the
155
+ # low-density signal even when they appear as a bare single token. `main` is
156
+ # THE Python/C convention (`if __name__ == "__main__": main()`); `run` is the
157
+ # common CLI entrypoint in Go binaries and many scripts. These get a pass only
158
+ # as exact single-token function names, never inside larger names.
159
+ ENTRYPOINT_NAMES = frozenset({"main", "run", "cli", "app"})
160
+
161
+
162
+ Finding = dict[str, Any]
163
+
164
+
165
+ def score_density(name: str) -> tuple[str, list[str]]:
166
+ """Return (severity, reasons) for one identifier name.
167
+
168
+ severity in {"ok", "warn", "fail"}. reasons is a list of short human
169
+ strings explaining each contributing rule. Empty reasons + "ok" = clean.
170
+
171
+ The function is pure and side-effect free; density_scan.py and scan_slop.py
172
+ both rely on that.
173
+ """
174
+ if not name or not name.strip():
175
+ return "ok", []
176
+ name = name.strip()
177
+ lower = name.lower()
178
+ tokens = _tokens_of(name)
179
+
180
+ # 0. Conventional CLI/runtime entrypoints get a free pass as bare names:
181
+ # `main` (Python/C), `run`/`cli` (CLIs). These are idioms, not slop.
182
+ if lower in ENTRYPOINT_NAMES:
183
+ return "ok", []
184
+
185
+ # 1. Placeholder / temp-leaked-to-prod. Always FAIL - these never belong.
186
+ if PLACEHOLDER.search(name):
187
+ if re.match(r"^(final){2,}", name, re.I):
188
+ return "fail", ["placeholder name (finalFinal / repeated 'final')"]
189
+ if re.match(r"^[a-z]{0,2}\d+$", name, re.I):
190
+ return "fail", [f"cryptic short id '{name}' - predict nothing"]
191
+ if re.match(r"^[a-z]{1,2}$", name, re.I):
192
+ return "fail", [f"cryptic 1-2 char id '{name}' - predict nothing"]
193
+ return "fail", [f"placeholder name '{name}' - temp/test marker leaked to prod"]
194
+
195
+ # 2. Bare low-density single token: the name IS the filler word.
196
+ # "Helper", "Utils", "Manager", "process", "Data" on their own.
197
+ if len(tokens) == 1 and lower in LOW_DENSITY_TOKENS:
198
+ return "fail", [f"bare low-density token '{name}' - names a category, not a thing"]
199
+
200
+ # 3. Generic-suffix class with no domain noun: DataManager, CoreEngine,
201
+ # SystemProvider. The whole point of the abstraction is hidden.
202
+ if GENERIC_SUFFIXES.search(name) and not _has_domain_noun(tokens):
203
+ suffix = GENERIC_SUFFIXES.search(name).group(1)
204
+ return "fail", [
205
+ f"{suffix} with no domain noun - predict nothing from the name",
206
+ "fix: replace with verb+noun naming the concrete responsibility",
207
+ " (e.g. DataManager -> InvoiceRepository or PersistUserSessions)",
208
+ ]
209
+
210
+ # 4. Generic-suffix class WITH a domain noun: UserRepository,
211
+ # StripePaymentProvider. WARN - defensible DDD, still worth a glance.
212
+ if GENERIC_SUFFIXES.search(name) and _has_domain_noun(tokens):
213
+ suffix = GENERIC_SUFFIXES.search(name).group(1)
214
+ return "warn", [f"{suffix} suffix (has domain noun -> defensible, still generic)"]
215
+
216
+ # 5. Anemic verb alone (process, handle, run, do). Function-shaped, empty.
217
+ if len(tokens) == 1 and lower in ANEMIC_VERBS:
218
+ return "warn", [f"anemic verb '{name}()' - names an action without its object"]
219
+
220
+ # 6. Any remaining low-density token present (multiword): UserManager,
221
+ # processStuff, DataThing. WARN if there's any domain noun, FAIL if not.
222
+ low_hits = [t for t in tokens if t in LOW_DENSITY_TOKENS]
223
+ if low_hits:
224
+ joined = ", ".join(sorted(set(low_hits)))
225
+ if _has_domain_noun(tokens):
226
+ return "warn", [f"low-density token(s) [{joined}] but has domain noun -> defensible"]
227
+ return "fail", [f"low-density token(s) [{joined}] and no domain noun -> opaque"]
228
+
229
+ # 7. Two-or-more word name where the leading token is an anemic verb and
230
+ # the rest is filler: handleStuff, processData, doThing. FAIL - the
231
+ # classic AI-slop function shape.
232
+ if len(tokens) >= 2 and tokens[0] in ANEMIC_VERBS:
233
+ rest_low = all(t in LOW_DENSITY_TOKENS for t in tokens[1:])
234
+ if rest_low:
235
+ return "fail", [f"anemic verb + filler [{lower}] - action with no real object"]
236
+
237
+ return "ok", []
238
+
239
+
240
+ def score_filename(base: str) -> tuple[str, list[str]]:
241
+ """Score a file basename (no extension). utils.ts -> FAIL.
242
+ invoice_totals.ts -> OK. Mirrors score_density for the file-name case."""
243
+ if not base:
244
+ return "ok", []
245
+ stem = re.sub(r"\.[A-Za-z0-9]+$", "", base).lower()
246
+ if stem in LOW_DENSITY_FILENAMES:
247
+ return "fail", [f"file named '{base}' - basename is a generic category, not a module"]
248
+ # conventional entry-point / layout names get a pass
249
+ if stem in {"index", "mod", "main", "app", "server", "test", "tests",
250
+ "conftest", "__init__", "setup"}:
251
+ return "ok", []
252
+ # multi-word file names (invoice_totals, user_repository) are fine even
253
+ # if they contain a low-density word, because the other word carries meaning
254
+ parts = re.split(r"[^a-z0-9]+", stem)
255
+ parts = [p for p in parts if p]
256
+ low_hits = [p for p in parts if p in LOW_DENSITY_FILENAMES]
257
+ if low_hits and not any(p not in LOW_DENSITY_FILENAMES for p in parts):
258
+ # all parts are low-density: e.g. utils_helpers.py
259
+ return "fail", [f"file '{base}' - all name parts are generic ({', '.join(low_hits)})"]
260
+ if low_hits:
261
+ return "warn", [f"file '{base}' contains generic part(s) ({', '.join(low_hits)}) but has a specific part"]
262
+ return "ok", []
263
+
264
+
265
+ def _def_names_from_patterns(line: str, lang: str) -> list[tuple[str, str]]:
266
+ """Return [(kind, name), ...] for definitions declared on `line`, or [] if
267
+ the line declares nothing. kind in {func, class, type, method}. Uses the
268
+ same FUNC_PATTERNS as scan_slop so definitions parse identically."""
269
+ out: list[tuple[str, str]] = []
270
+ # type/interface declarations (TS)
271
+ m = TYPE_DECL.search(line)
272
+ if m:
273
+ out.append(("type", m.group(1)))
274
+ # class/struct/trait/protocol/interface (cstyle/rust/swift/etc.)
275
+ cm = re.search(
276
+ r"\b(?:class|struct|trait|protocol|interface|enum)\s+([A-Z][A-Za-z0-9_]*)\b",
277
+ line,
278
+ )
279
+ if cm:
280
+ out.append(("class", cm.group(1)))
281
+ # function-shaped declarations, per language
282
+ patterns = FUNC_PATTERNS.get(lang, [])
283
+ seen: set[str] = set()
284
+ for rx in patterns:
285
+ fm = rx.search(line)
286
+ if fm:
287
+ cand = fm.group(1)
288
+ if rx in (METHOD_JS, METHOD_CSTYLE) and cand in NOT_METHOD:
289
+ continue
290
+ if cand not in seen:
291
+ seen.add(cand)
292
+ kind = "method" if rx in (METHOD_JS, METHOD_CSTYLE) else "func"
293
+ out.append((kind, cand))
294
+ break
295
+ return out
296
+
297
+
298
+ def extract_identifiers(added_lines: list[str], rel: str) -> list[Finding]:
299
+ """Walk `added_lines` and return identifier findings worth scoring:
300
+ newly DECLARED function/class/type/method names plus the filename itself.
301
+
302
+ Only declarations count, not references. We are judging what the agent
303
+ chose to NAME, not every token it touched. A call to `processData(x)` is
304
+ not interesting unless the agent also declared `function processData`.
305
+
306
+ Comment lines are skipped: a comment that happens to say "// the Manager"
307
+ is documentation, not a naming decision.
308
+ """
309
+ lang = lang_of(rel)
310
+ if lang == "other":
311
+ # Still score the filename even for unknown languages.
312
+ base = rel.rsplit("/", 1)[-1]
313
+ sevs, reasons = score_filename(base)
314
+ if sevs != "ok":
315
+ return [{"name": base, "line": 0, "kind": "file",
316
+ "severity": sevs, "reasons": reasons}]
317
+ return []
318
+
319
+ findings: list[Finding] = []
320
+ seen_names: set[str] = set()
321
+
322
+ for i, raw in enumerate(added_lines, start=1):
323
+ if not raw.strip():
324
+ continue
325
+ # Skip pure-comment lines so a docstring mentioning "Manager" cannot
326
+ # trip the scorer. _strip_comments with string_repl="L" keeps strings
327
+ # masked so a string literal cannot trip it either.
328
+ stripped = _strip_comments(raw, lang, "L").strip()
329
+ if not stripped:
330
+ continue
331
+ for kind, name in _def_names_from_patterns(raw, lang):
332
+ if name in seen_names:
333
+ continue
334
+ seen_names.add(name)
335
+ sevs, reasons = score_density(name)
336
+ if sevs != "ok":
337
+ findings.append({"name": name, "line": i, "kind": kind,
338
+ "severity": sevs, "reasons": reasons})
339
+
340
+ # The file's own name is a naming decision too. Only flag if it's the
341
+ # file being created/renamed (the basename is always available; we score
342
+ # it always but it's cheap and a renamed utils.ts -> foo.ts deserves
343
+ # flagging the new name).
344
+ base = rel.rsplit("/", 1)[-1]
345
+ if base and base not in seen_names:
346
+ sevs, reasons = score_filename(base)
347
+ if sevs != "ok":
348
+ findings.append({"name": base, "line": 0, "kind": "file",
349
+ "severity": sevs, "reasons": reasons})
350
+
351
+ return findings
352
+
353
+
354
+ def score_identifiers(added_lines: list[str], rel: str) -> list[Finding]:
355
+ """Convenience wrapper: extract + filter to warn/fail only. This is what
356
+ scan_slop.py's signal bucket calls."""
357
+ return [f for f in extract_identifiers(added_lines, rel) if f["severity"] != "ok"]
358
+
359
+
360
+ def format_for_report(findings: list[Finding]) -> list[str]:
361
+ """Flatten findings into the short strings scan_slop prints per finding."""
362
+ out: list[str] = []
363
+ for f in findings:
364
+ tag = f["severity"].upper()
365
+ kind = f["kind"]
366
+ name = f["name"]
367
+ reason = "; ".join(f["reasons"]) if f["reasons"] else "low semantic density"
368
+ loc = f"line {f['line']}" if f["line"] else "file name"
369
+ out.append(f"[{tag}] {kind} '{name}' ({loc}): {reason}")
370
+ return out
371
+
372
+
373
+ if __name__ == "__main__":
374
+ # Smoke entrypoint: score names passed as argv so a human can sanity-check
375
+ # the scorer without writing a test harness.
376
+ import json
377
+ if len(sys.argv) > 1 and sys.argv[1] == "--self-test":
378
+ cases = [
379
+ ("DataManager", "fail"), ("CoreEngine", "fail"), ("process", "warn"),
380
+ ("InvoiceEmailSender", "ok"), ("PostgresUserRepository", "warn"),
381
+ ("GenerateMonthlyReport", "ok"), ("Helper", "fail"), ("Utils", "fail"),
382
+ ("UserManager", "warn"), ("handle", "warn"), ("doStuff", "fail"),
383
+ ("processData", "fail"), ("x1", "fail"), ("finalFinal", "fail"),
384
+ ("tempFix", "fail"), ("SystemProvider", "fail"),
385
+ ("StripePaymentProvider", "warn"), ("utils.ts", "fail"),
386
+ ("invoice_totals.ts", "ok"), ("SendInvoiceEmail", "ok"),
387
+ ("DiscordWebhookClient", "ok"), ("fn", "fail"), ("Helper", "fail"),
388
+ ("BaseService", "fail"), ("framework", "fail"), ("BusinessProcessor", "fail"),
389
+ ]
390
+ fails = 0
391
+ for name, want in cases:
392
+ sevs, _ = score_density(name) if "." not in name else score_filename(name)
393
+ mark = "OK " if sevs == want else "BAD"
394
+ if sevs != want:
395
+ fails += 1
396
+ print(f" {mark} {name:<32} got={sevs:<5} want={want}")
397
+ print(f"\n{'PASS' if not fails else f'{fails} FAILURES'}")
398
+ sys.exit(1 if fails else 0)
399
+ # default: score argv names as identifiers
400
+ for n in sys.argv[1:]:
401
+ sevs, reasons = score_density(n)
402
+ print(f"{n}: {sevs} ({'; '.join(reasons)})")
403
+ # JSON dump mode for the hook wrapper
404
+ if "--json" in sys.argv:
405
+ print(json.dumps({"note": "use density_scan.py for stdin->json"}))
@@ -13,9 +13,11 @@ Scopes:
13
13
  errors (empty catch, broad except+pass), tautological asserts, pointless
14
14
  async wrappers (await Promise.resolve, async executors), deepening guard
15
15
  chains (the optional-chaining shape), boolean-pair call traps, SELECT *
16
- in .sql files, and Tailwind class soup / magic-px values. All per-file
17
- signals also run in AUDIT scope; only new-dependency detection is diff-only
18
- (every line of an existing manifest would otherwise read as "new").
16
+ in .sql files, Tailwind class soup / magic-px values, and SEMANTIC OPACITY
17
+ (low-density identifiers - DataManager, process(), utils.ts - scored via
18
+ the shared low_density module). All per-file signals also run in AUDIT
19
+ scope; only new-dependency detection is diff-only (every line of an
20
+ existing manifest would otherwise read as "new").
19
21
  * AUDIT (--all, or explicit paths): the WHOLE codebase, with the duplication
20
22
  analysis that catches the isRecord()-class slop:
21
23
  - Clone Proliferation : same function name in multiple files
@@ -432,6 +434,7 @@ _SIGNALS = {
432
434
  "boolean_traps": ("boolean trap ", "boolean-trap"),
433
435
  "select_star": ("SELECT * ", "select-star"),
434
436
  "tailwind_slop": ("tailwind smell ", "tailwind"),
437
+ "semantic_density": ("semantic opacity ", "semantic-density"),
435
438
  }
436
439
  _SIGNAL_KEYS = tuple(_SIGNALS)
437
440
 
@@ -517,6 +520,22 @@ def scan_lines(rel: str, lines: list[str], audit: bool) -> Finding | None:
517
520
  TAILWIND_SOUP.search(ln) or TAILWIND_MAGIC_PX.search(ln)
518
521
  ):
519
522
  found["tailwind_slop"].append(ln.strip()[:100])
523
+ # Semantic opacity: low-density identifiers introduced in this file. Lazy
524
+ # import because low_density imports scan_slop at module load (sibling
525
+ # resolution) - a top-level import here would cycle. Only declarations
526
+ # count, not references, so a CALL to processData(x) does not trip unless
527
+ # the agent also declared function processData on an audited line.
528
+ if is_source:
529
+ try:
530
+ import low_density
531
+ for item in low_density.format_for_report(
532
+ low_density.score_identifiers(lines, rel)):
533
+ found["semantic_density"].append(item[:140])
534
+ except Exception:
535
+ # Never let the density layer break the rest of the scan. If
536
+ # low_density.py is absent (older install) or errors, the other
537
+ # twelve signals still run.
538
+ pass
520
539
  found = {k: _uniq(v) for k, v in found.items()}
521
540
  added_count = sum(1 for ln in lines if ln.strip())
522
541
  substantial = (not audit) and is_source and added_count >= CHECKLIST_LINES
@@ -635,13 +654,15 @@ def collect_defs(rel: str, lines: list[str]) -> list[Finding]:
635
654
  nb = normalize_body(raw, lang)
636
655
  sb = structural_body(raw, lang)
637
656
  # Non-blank lines only: the brace walk pads raw with edge newlines,
638
- # and counting them would let `super(props);` pass the 3-line floor.
657
+ # and counting them would let `super(props);` pad its body_line count.
639
658
  body_lines = sum(1 for s in raw.splitlines() if s.strip()) or 1
640
- # Exact-dup hash needs substance (>=3 lines or >=60 chars): one-line
641
- # boilerplate like `super(props);` is not knowledge worth consolidating.
659
+ # Exact-dup hash needs substance (>=12 normalized chars). An earlier
660
+ # >=3-lines-or->=60-chars floor excluded the skill's own marquee case -
661
+ # tiny predicates like isRecord/isObject (1 line, ~40 chars) whose
662
+ # byte-identical bodies are exactly the duplication worth surfacing.
663
+ # Boilerplate like `return;`/`return x;` stays under the 12-char floor.
642
664
  # A truncated body is a prefix, not the function - never call it exact.
643
- hash_exact = (not truncated and len(nb) >= 12
644
- and (body_lines >= 3 or len(nb) >= 60))
665
+ hash_exact = (not truncated and len(nb) >= 12)
645
666
  defs.append({
646
667
  "name": name, "file": rel, "line": i + 1,
647
668
  "exported": _is_exported(name, ln, lang),