lemmaly 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +238 -0
- package/cli/gen-agents-md.js +60 -0
- package/cli/gen-rule-docs.js +885 -0
- package/cli/lemmaly.js +162 -0
- package/commands/benchmark.md +40 -0
- package/commands/budget.md +53 -0
- package/commands/complexity.md +26 -0
- package/commands/cut.md +27 -0
- package/commands/hotpath.md +22 -0
- package/commands/invariant.md +22 -0
- package/commands/n-plus-one.md +20 -0
- package/commands/profile.md +34 -0
- package/commands/regress.md +43 -0
- package/commands/scale-check.md +37 -0
- package/commands/ship-check.md +26 -0
- package/package.json +48 -0
- package/rules/cpp.json +46 -0
- package/rules/csharp.json +38 -0
- package/rules/go.json +46 -0
- package/rules/java.json +38 -0
- package/rules/javascript.json +102 -0
- package/rules/php.json +38 -0
- package/rules/python.json +62 -0
- package/rules/ruby.json +38 -0
- package/rules/rust.json +38 -0
- package/rules/shell.json +38 -0
- package/rules/sql.json +54 -0
- package/skills/complexity-cuts/SKILL.md +259 -0
- package/skills/invariant-guard/SKILL.md +310 -0
- package/skills/lemmaly/AGENTS.md +1869 -0
- package/skills/lemmaly/SKILL.md +365 -0
- package/skills/lemmaly/references/async.md +135 -0
- package/skills/lemmaly/references/complexity.md +66 -0
- package/skills/lemmaly/references/hot-paths.md +87 -0
- package/skills/lemmaly/references/memory.md +118 -0
- package/skills/lemmaly/references/n-plus-one.md +139 -0
- package/skills/lemmaly/rules/cpp-map-double-lookup.md +38 -0
- package/skills/lemmaly/rules/cpp-range-loop-copy.md +33 -0
- package/skills/lemmaly/rules/cpp-raw-new.md +36 -0
- package/skills/lemmaly/rules/cpp-string-concat-in-loop.md +45 -0
- package/skills/lemmaly/rules/cpp-vector-push-no-reserve.md +40 -0
- package/skills/lemmaly/rules/cs-async-void.md +45 -0
- package/skills/lemmaly/rules/cs-disposable-no-using.md +32 -0
- package/skills/lemmaly/rules/cs-list-contains-in-loop.md +36 -0
- package/skills/lemmaly/rules/cs-string-concat-in-loop.md +42 -0
- package/skills/lemmaly/rules/go-defer-in-loop.md +39 -0
- package/skills/lemmaly/rules/go-err-not-checked.md +38 -0
- package/skills/lemmaly/rules/go-loop-var-capture.md +47 -0
- package/skills/lemmaly/rules/go-slice-append-no-cap.md +39 -0
- package/skills/lemmaly/rules/go-string-concat-in-loop.md +44 -0
- package/skills/lemmaly/rules/java-arraylist-remove-in-for-i.md +44 -0
- package/skills/lemmaly/rules/java-bare-catch-exception.md +42 -0
- package/skills/lemmaly/rules/java-list-contains-in-loop.md +40 -0
- package/skills/lemmaly/rules/java-string-concat-in-loop.md +42 -0
- package/skills/lemmaly/rules/js-anonymous-handler-jsx.md +31 -0
- package/skills/lemmaly/rules/js-array-key-index.md +29 -0
- package/skills/lemmaly/rules/js-async-in-foreach.md +43 -0
- package/skills/lemmaly/rules/js-await-in-for-loop.md +41 -0
- package/skills/lemmaly/rules/js-deep-clone-via-json.md +33 -0
- package/skills/lemmaly/rules/js-helper-call-in-iterator.md +41 -0
- package/skills/lemmaly/rules/js-includes-in-iterator.md +37 -0
- package/skills/lemmaly/rules/js-inline-object-jsx-prop.md +35 -0
- package/skills/lemmaly/rules/js-nested-for-loops.md +45 -0
- package/skills/lemmaly/rules/js-spread-in-reduce.md +38 -0
- package/skills/lemmaly/rules/js-unique-via-indexof.md +35 -0
- package/skills/lemmaly/rules/js-useeffect-missing-deps.md +33 -0
- package/skills/lemmaly/rules/php-count-in-for-condition.md +45 -0
- package/skills/lemmaly/rules/php-in-array-in-loop.md +42 -0
- package/skills/lemmaly/rules/php-loose-equality.md +35 -0
- package/skills/lemmaly/rules/php-query-in-loop.md +47 -0
- package/skills/lemmaly/rules/py-bare-except.md +39 -0
- package/skills/lemmaly/rules/py-django-loop-without-eager.md +42 -0
- package/skills/lemmaly/rules/py-in-list-literal.md +37 -0
- package/skills/lemmaly/rules/py-mutable-default-arg.md +39 -0
- package/skills/lemmaly/rules/py-open-without-with.md +33 -0
- package/skills/lemmaly/rules/py-range-len.md +35 -0
- package/skills/lemmaly/rules/py-string-concat-in-loop.md +43 -0
- package/skills/lemmaly/rules/rb-bare-rescue.md +41 -0
- package/skills/lemmaly/rules/rb-include-in-iterator.md +37 -0
- package/skills/lemmaly/rules/rb-n-plus-one-activerecord.md +39 -0
- package/skills/lemmaly/rules/rb-string-concat-in-loop.md +39 -0
- package/skills/lemmaly/rules/rs-clone-in-loop.md +38 -0
- package/skills/lemmaly/rules/rs-string-push-no-capacity.md +43 -0
- package/skills/lemmaly/rules/rs-unwrap-in-prod.md +36 -0
- package/skills/lemmaly/rules/rs-vec-push-no-capacity.md +42 -0
- package/skills/lemmaly/rules/sh-for-ls.md +41 -0
- package/skills/lemmaly/rules/sh-set-e-no-pipefail.md +37 -0
- package/skills/lemmaly/rules/sh-unquoted-var.md +35 -0
- package/skills/lemmaly/rules/sh-useless-cat-pipe.md +32 -0
- package/skills/lemmaly/rules/sql-leading-wildcard-like.md +34 -0
- package/skills/lemmaly/rules/sql-not-in-subquery.md +38 -0
- package/skills/lemmaly/rules/sql-or-in-where.md +35 -0
- package/skills/lemmaly/rules/sql-select-no-limit.md +37 -0
- package/skills/lemmaly/rules/sql-select-star.md +29 -0
- package/skills/lemmaly/rules/sql-update-no-where.md +35 -0
- package/skills/mathguard/SKILL.md +277 -0
|
@@ -0,0 +1,885 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Generates one MD file per CLI rule into skills/lemmaly/rules/
|
|
3
|
+
// Source of truth: rules/*.json. Examples are hand-authored here.
|
|
4
|
+
|
|
5
|
+
const fs = require('fs');
|
|
6
|
+
const path = require('path');
|
|
7
|
+
|
|
8
|
+
const ROOT = path.resolve(__dirname, '..');
|
|
9
|
+
const OUT = path.join(ROOT, 'skills', 'lemmaly', 'rules');
|
|
10
|
+
const SEV_TO_IMPACT = { error: 'CRITICAL', warning: 'HIGH', info: 'MEDIUM' };
|
|
11
|
+
const SEV_TO_DESC = {
|
|
12
|
+
error: 'Will break or scale-fail in production',
|
|
13
|
+
warning: 'Hot-path or correctness risk at realistic n',
|
|
14
|
+
info: 'Suboptimal; flag when n is large or on a hot path',
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const EXAMPLES = {
|
|
18
|
+
'js-await-in-for-loop': {
|
|
19
|
+
why: 'Awaiting inside a `for` over independent items serializes wall-clock work into O(n × latency). For network or DB calls this is the N+1 problem.',
|
|
20
|
+
bad: `// Wall-clock: O(n × latency)
|
|
21
|
+
for (const id of ids) {
|
|
22
|
+
const user = await db.users.findById(id);
|
|
23
|
+
results.push(user);
|
|
24
|
+
}`,
|
|
25
|
+
good: `// Wall-clock: O(latency); one bulk query
|
|
26
|
+
const users = await db.users.findMany({ where: { id: { in: ids } } });
|
|
27
|
+
|
|
28
|
+
// Or, if calls are truly independent:
|
|
29
|
+
const results = await Promise.all(ids.map(id => db.users.findById(id)));`,
|
|
30
|
+
lang: 'ts',
|
|
31
|
+
chainTo: 'complexity-cuts',
|
|
32
|
+
},
|
|
33
|
+
'js-async-in-foreach': {
|
|
34
|
+
why: '`Array.prototype.forEach` ignores return values. Passing an `async` function returns promises that are dropped — errors are swallowed and the caller continues before the work finishes.',
|
|
35
|
+
bad: `// Promises dropped; errors silently swallowed
|
|
36
|
+
items.forEach(async (item) => {
|
|
37
|
+
await save(item);
|
|
38
|
+
});
|
|
39
|
+
console.log('done'); // logs before any save completes`,
|
|
40
|
+
good: `// Sequential, with errors propagated
|
|
41
|
+
for (const item of items) {
|
|
42
|
+
await save(item);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Parallel
|
|
46
|
+
await Promise.all(items.map((item) => save(item)));`,
|
|
47
|
+
lang: 'ts',
|
|
48
|
+
chainTo: 'complexity-cuts',
|
|
49
|
+
},
|
|
50
|
+
'js-deep-clone-via-json': {
|
|
51
|
+
why: '`JSON.parse(JSON.stringify(x))` is slow, allocates twice, and silently loses `Date`, `Map`, `Set`, `undefined`, `BigInt`, `RegExp`, and any non-enumerable property. It is also unsafe on cyclic structures.',
|
|
52
|
+
bad: `const copy = JSON.parse(JSON.stringify(state));
|
|
53
|
+
// state.createdAt was a Date — now a string`,
|
|
54
|
+
good: `const copy = structuredClone(state); // preserves Date, Map, Set, cycles
|
|
55
|
+
|
|
56
|
+
// Or, when you only need a few fields:
|
|
57
|
+
const copy = { id: state.id, name: state.name };`,
|
|
58
|
+
lang: 'ts',
|
|
59
|
+
chainTo: null,
|
|
60
|
+
},
|
|
61
|
+
'js-useeffect-missing-deps': {
|
|
62
|
+
why: 'A `useEffect` with no dependency array runs after every render. If the effect updates state, you can get a render loop or wasted work each frame.',
|
|
63
|
+
bad: `useEffect(() => {
|
|
64
|
+
setUser(fetchUser(id));
|
|
65
|
+
}); // no deps — runs every render`,
|
|
66
|
+
good: `useEffect(() => {
|
|
67
|
+
setUser(fetchUser(id));
|
|
68
|
+
}, [id]); // runs when id changes`,
|
|
69
|
+
lang: 'tsx',
|
|
70
|
+
chainTo: null,
|
|
71
|
+
},
|
|
72
|
+
'js-inline-object-jsx-prop': {
|
|
73
|
+
why: 'An inline object literal in JSX creates a new reference every render. If the child is `React.memo`, this defeats the memoization. If it is a dependency of a hook in the child, it re-fires that hook every render.',
|
|
74
|
+
bad: `<Chart options={{ animated: true, color: 'red' }} />`,
|
|
75
|
+
good: `// Hoist if static
|
|
76
|
+
const CHART_OPTIONS = { animated: true, color: 'red' };
|
|
77
|
+
<Chart options={CHART_OPTIONS} />
|
|
78
|
+
|
|
79
|
+
// Memoize if derived
|
|
80
|
+
const options = useMemo(() => ({ animated, color }), [animated, color]);
|
|
81
|
+
<Chart options={options} />`,
|
|
82
|
+
lang: 'tsx',
|
|
83
|
+
chainTo: null,
|
|
84
|
+
},
|
|
85
|
+
'js-anonymous-handler-jsx': {
|
|
86
|
+
why: 'An anonymous arrow handler is a new function every render. For an ordinary child, this is fine. For a `React.memo` child, it breaks equality and forces a re-render every time.',
|
|
87
|
+
bad: `// Child is React.memo — this defeats it
|
|
88
|
+
<MemoButton onClick={() => save(id)} />`,
|
|
89
|
+
good: `const onClick = useCallback(() => save(id), [id]);
|
|
90
|
+
<MemoButton onClick={onClick} />`,
|
|
91
|
+
lang: 'tsx',
|
|
92
|
+
chainTo: null,
|
|
93
|
+
},
|
|
94
|
+
'js-nested-for-loops': {
|
|
95
|
+
why: 'Two `for` loops that check membership between arrays are O(n × m). Hashing one side into a `Set` makes it O(n + m).',
|
|
96
|
+
bad: `// O(n × m)
|
|
97
|
+
const matches = [];
|
|
98
|
+
for (const a of left) {
|
|
99
|
+
for (const b of right) {
|
|
100
|
+
if (a.id === b.id) matches.push([a, b]);
|
|
101
|
+
}
|
|
102
|
+
}`,
|
|
103
|
+
good: `// O(n + m)
|
|
104
|
+
const rightById = new Map(right.map((b) => [b.id, b]));
|
|
105
|
+
const matches = [];
|
|
106
|
+
for (const a of left) {
|
|
107
|
+
const b = rightById.get(a.id);
|
|
108
|
+
if (b) matches.push([a, b]);
|
|
109
|
+
}`,
|
|
110
|
+
lang: 'ts',
|
|
111
|
+
chainTo: 'complexity-cuts',
|
|
112
|
+
},
|
|
113
|
+
'js-spread-in-reduce': {
|
|
114
|
+
why: 'Object-spread in a reducer copies the accumulator on every iteration — O(n²) work and O(n²) allocation. The result is the same as mutating once.',
|
|
115
|
+
bad: `// O(n²)
|
|
116
|
+
const byId = items.reduce((acc, x) => ({ ...acc, [x.id]: x }), {});`,
|
|
117
|
+
good: `// O(n)
|
|
118
|
+
const byId = Object.fromEntries(items.map((x) => [x.id, x]));
|
|
119
|
+
|
|
120
|
+
// Or mutate the accumulator
|
|
121
|
+
const byId = items.reduce((acc, x) => { acc[x.id] = x; return acc; }, {});`,
|
|
122
|
+
lang: 'ts',
|
|
123
|
+
chainTo: 'complexity-cuts',
|
|
124
|
+
},
|
|
125
|
+
'js-includes-in-iterator': {
|
|
126
|
+
why: '`.includes` on an array is O(n). Calling it inside `.map`/`.filter`/`.forEach` over m items is O(n × m). A `Set` lookup is O(1).',
|
|
127
|
+
bad: `// O(n × m)
|
|
128
|
+
const allowed = ['admin', 'editor', 'owner'];
|
|
129
|
+
const result = users.filter((u) => allowed.includes(u.role));`,
|
|
130
|
+
good: `// O(n + m)
|
|
131
|
+
const allowed = new Set(['admin', 'editor', 'owner']);
|
|
132
|
+
const result = users.filter((u) => allowed.has(u.role));`,
|
|
133
|
+
lang: 'ts',
|
|
134
|
+
chainTo: 'complexity-cuts',
|
|
135
|
+
},
|
|
136
|
+
'js-unique-via-indexof': {
|
|
137
|
+
why: '`.filter((x, i, a) => a.indexOf(x) === i)` is the textbook O(n²) dedupe. A `Set` does it in O(n).',
|
|
138
|
+
bad: `// O(n²)
|
|
139
|
+
const unique = arr.filter((x, i, a) => a.indexOf(x) === i);`,
|
|
140
|
+
good: `// O(n)
|
|
141
|
+
const unique = Array.from(new Set(arr));`,
|
|
142
|
+
lang: 'ts',
|
|
143
|
+
chainTo: 'complexity-cuts',
|
|
144
|
+
},
|
|
145
|
+
'js-helper-call-in-iterator': {
|
|
146
|
+
why: 'A `get*`/`find*`/`fetch*` helper inside an iterator usually means N independent lookups. If the helper hits a DB or network, this is N+1. If it scans an array, it is O(n × m).',
|
|
147
|
+
bad: `// N round trips
|
|
148
|
+
const enriched = orders.map((o) => ({
|
|
149
|
+
...o,
|
|
150
|
+
user: getUserById(o.userId),
|
|
151
|
+
}));`,
|
|
152
|
+
good: `// 1 round trip
|
|
153
|
+
const userIds = [...new Set(orders.map((o) => o.userId))];
|
|
154
|
+
const users = await db.users.findMany({ where: { id: { in: userIds } } });
|
|
155
|
+
const userById = new Map(users.map((u) => [u.id, u]));
|
|
156
|
+
const enriched = orders.map((o) => ({ ...o, user: userById.get(o.userId) }));`,
|
|
157
|
+
lang: 'ts',
|
|
158
|
+
chainTo: 'complexity-cuts',
|
|
159
|
+
},
|
|
160
|
+
'js-array-key-index': {
|
|
161
|
+
why: '`key={index}` is fine for a static list. For a list that reorders, inserts, or deletes in the middle, it forces React to mismatch component state with the wrong row — losing input focus, animation, and local state.',
|
|
162
|
+
bad: `{items.map((item, i) => <Row key={i} item={item} />)}`,
|
|
163
|
+
good: `{items.map((item) => <Row key={item.id} item={item} />)}`,
|
|
164
|
+
lang: 'tsx',
|
|
165
|
+
chainTo: null,
|
|
166
|
+
},
|
|
167
|
+
'py-mutable-default-arg': {
|
|
168
|
+
why: 'Python evaluates default arguments once, at function definition. A mutable default (list, dict, set) is **shared across every call** — calls accumulate state from previous calls.',
|
|
169
|
+
bad: `def collect(item, bucket=[]): # shared across all calls!
|
|
170
|
+
bucket.append(item)
|
|
171
|
+
return bucket`,
|
|
172
|
+
good: `def collect(item, bucket=None):
|
|
173
|
+
if bucket is None:
|
|
174
|
+
bucket = []
|
|
175
|
+
bucket.append(item)
|
|
176
|
+
return bucket`,
|
|
177
|
+
lang: 'python',
|
|
178
|
+
chainTo: 'invariant-guard',
|
|
179
|
+
},
|
|
180
|
+
'py-string-concat-in-loop': {
|
|
181
|
+
why: 'Python strings are immutable. `s += x` in a loop allocates and copies the whole `s` each iteration — O(n²) total work. A list join is O(n).',
|
|
182
|
+
bad: `# O(n²)
|
|
183
|
+
s = ""
|
|
184
|
+
for line in lines:
|
|
185
|
+
s += line + "\\n"`,
|
|
186
|
+
good: `# O(n)
|
|
187
|
+
s = "\\n".join(lines) + "\\n"
|
|
188
|
+
|
|
189
|
+
# Or, for incremental building:
|
|
190
|
+
parts = []
|
|
191
|
+
for line in lines:
|
|
192
|
+
parts.append(line)
|
|
193
|
+
s = "\\n".join(parts)`,
|
|
194
|
+
lang: 'python',
|
|
195
|
+
chainTo: 'complexity-cuts',
|
|
196
|
+
},
|
|
197
|
+
'py-range-len': {
|
|
198
|
+
why: '`for i in range(len(xs))` then indexing `xs[i]` is un-Pythonic, slower, and forces you to think about indices when you usually want the items. `enumerate` is idiomatic and faster.',
|
|
199
|
+
bad: `for i in range(len(xs)):
|
|
200
|
+
process(xs[i])`,
|
|
201
|
+
good: `for x in xs:
|
|
202
|
+
process(x)
|
|
203
|
+
|
|
204
|
+
# When you actually need the index
|
|
205
|
+
for i, x in enumerate(xs):
|
|
206
|
+
process(i, x)`,
|
|
207
|
+
lang: 'python',
|
|
208
|
+
chainTo: null,
|
|
209
|
+
},
|
|
210
|
+
'py-in-list-literal': {
|
|
211
|
+
why: '`x in [a, b, c, ...]` is an O(n) linear scan. Inside a loop over m items, this is O(n × m). A `set` (or a literal `{a, b, c}` for membership) is O(1).',
|
|
212
|
+
bad: `# O(n × m)
|
|
213
|
+
roles = ["admin", "editor", "owner"]
|
|
214
|
+
result = [u for u in users if u.role in roles]`,
|
|
215
|
+
good: `# O(n + m)
|
|
216
|
+
roles = {"admin", "editor", "owner"}
|
|
217
|
+
result = [u for u in users if u.role in roles]`,
|
|
218
|
+
lang: 'python',
|
|
219
|
+
chainTo: 'complexity-cuts',
|
|
220
|
+
},
|
|
221
|
+
'py-django-loop-without-eager': {
|
|
222
|
+
why: 'Iterating a Django QuerySet that touches a related model triggers one extra query per row — classic N+1. `select_related` (foreign key, one query with JOIN) or `prefetch_related` (reverse / many-to-many, two queries) fixes it.',
|
|
223
|
+
bad: `# N+1 queries
|
|
224
|
+
for order in Order.objects.all():
|
|
225
|
+
print(order.user.email) # extra query per order`,
|
|
226
|
+
good: `# 1 query with JOIN
|
|
227
|
+
for order in Order.objects.select_related("user"):
|
|
228
|
+
print(order.user.email)
|
|
229
|
+
|
|
230
|
+
# For reverse FK / M2M:
|
|
231
|
+
for user in User.objects.prefetch_related("orders"):
|
|
232
|
+
for order in user.orders.all():
|
|
233
|
+
...`,
|
|
234
|
+
lang: 'python',
|
|
235
|
+
chainTo: 'complexity-cuts',
|
|
236
|
+
},
|
|
237
|
+
'py-bare-except': {
|
|
238
|
+
why: 'Bare `except:` catches `SystemExit`, `KeyboardInterrupt`, `MemoryError`, and `GeneratorExit` — things you almost never want to swallow. It hides timeouts, OOM, and Ctrl-C.',
|
|
239
|
+
bad: `try:
|
|
240
|
+
do_work()
|
|
241
|
+
except:
|
|
242
|
+
log("failed") # also swallows Ctrl-C, OOM, timeouts`,
|
|
243
|
+
good: `try:
|
|
244
|
+
do_work()
|
|
245
|
+
except Exception as e: # excludes SystemExit, KeyboardInterrupt
|
|
246
|
+
log(f"failed: {e}")`,
|
|
247
|
+
lang: 'python',
|
|
248
|
+
chainTo: 'invariant-guard',
|
|
249
|
+
},
|
|
250
|
+
'py-open-without-with': {
|
|
251
|
+
why: '`open()` returns a file object. Without `with`, an exception between open and close leaks the file descriptor. Long-running processes (servers, workers) eventually exhaust the OS limit.',
|
|
252
|
+
bad: `f = open(path)
|
|
253
|
+
data = f.read()
|
|
254
|
+
f.close() # skipped if read() raises`,
|
|
255
|
+
good: `with open(path) as f:
|
|
256
|
+
data = f.read()
|
|
257
|
+
# f is closed even if read() raises`,
|
|
258
|
+
lang: 'python',
|
|
259
|
+
chainTo: null,
|
|
260
|
+
},
|
|
261
|
+
'sql-select-star': {
|
|
262
|
+
why: '`SELECT *` fetches every column, defeating index-only scans, inflating wire traffic, and breaking downstream code when a column is added/renamed. Project only what you need.',
|
|
263
|
+
bad: `SELECT * FROM users WHERE id = $1;`,
|
|
264
|
+
good: `SELECT id, email, created_at FROM users WHERE id = $1;`,
|
|
265
|
+
lang: 'sql',
|
|
266
|
+
chainTo: null,
|
|
267
|
+
},
|
|
268
|
+
'sql-leading-wildcard-like': {
|
|
269
|
+
why: 'A B-tree index sorts by prefix. `LIKE \'%foo\'` cannot use it — the query scans the table. Use a trigram index (Postgres `pg_trgm`), reverse the column for suffix search, or a full-text search index.',
|
|
270
|
+
bad: `SELECT * FROM products WHERE name LIKE '%phone';`,
|
|
271
|
+
good: `-- Postgres: GIN index on trigrams
|
|
272
|
+
CREATE INDEX products_name_trgm ON products USING gin (name gin_trgm_ops);
|
|
273
|
+
SELECT * FROM products WHERE name ILIKE '%phone%';
|
|
274
|
+
|
|
275
|
+
-- Or full-text:
|
|
276
|
+
SELECT * FROM products WHERE to_tsvector(name) @@ to_tsquery('phone');`,
|
|
277
|
+
lang: 'sql',
|
|
278
|
+
chainTo: null,
|
|
279
|
+
},
|
|
280
|
+
'sql-not-in-subquery': {
|
|
281
|
+
why: '`NOT IN (subquery)` is null-unsafe: if any row in the subquery is NULL, the whole predicate is NULL (not TRUE), and the outer row is dropped. `NOT EXISTS` is null-safe and usually has a better plan.',
|
|
282
|
+
bad: `SELECT * FROM orders
|
|
283
|
+
WHERE user_id NOT IN (SELECT id FROM banned_users);
|
|
284
|
+
-- One NULL in banned_users.id → returns zero rows`,
|
|
285
|
+
good: `SELECT o.* FROM orders o
|
|
286
|
+
WHERE NOT EXISTS (
|
|
287
|
+
SELECT 1 FROM banned_users b WHERE b.id = o.user_id
|
|
288
|
+
);`,
|
|
289
|
+
lang: 'sql',
|
|
290
|
+
chainTo: 'invariant-guard',
|
|
291
|
+
},
|
|
292
|
+
'sql-select-no-limit': {
|
|
293
|
+
why: 'A query with no `LIMIT` returns however many rows match. On a small table this is fine; on a growing one it eventually OOMs the client or the page. Add a bound, or paginate.',
|
|
294
|
+
bad: `SELECT id, email FROM users ORDER BY created_at DESC;`,
|
|
295
|
+
good: `SELECT id, email FROM users
|
|
296
|
+
ORDER BY created_at DESC
|
|
297
|
+
LIMIT 100;
|
|
298
|
+
|
|
299
|
+
-- Keyset pagination for next page:
|
|
300
|
+
SELECT id, email FROM users
|
|
301
|
+
WHERE created_at < $1
|
|
302
|
+
ORDER BY created_at DESC
|
|
303
|
+
LIMIT 100;`,
|
|
304
|
+
lang: 'sql',
|
|
305
|
+
chainTo: null,
|
|
306
|
+
},
|
|
307
|
+
'sql-or-in-where': {
|
|
308
|
+
why: 'A query planner can use an index on one side of an `OR` but often not both, falling back to a sequential scan. `UNION ALL` or `IN (...)` (when both sides are equality on the same column) usually wins.',
|
|
309
|
+
bad: `SELECT * FROM events
|
|
310
|
+
WHERE user_id = $1 OR account_id = $1;`,
|
|
311
|
+
good: `SELECT * FROM events WHERE user_id = $1
|
|
312
|
+
UNION ALL
|
|
313
|
+
SELECT * FROM events WHERE account_id = $1;
|
|
314
|
+
|
|
315
|
+
-- Or, when both sides are the same column:
|
|
316
|
+
SELECT * FROM events WHERE user_id IN ($1, $2, $3);`,
|
|
317
|
+
lang: 'sql',
|
|
318
|
+
chainTo: null,
|
|
319
|
+
},
|
|
320
|
+
'sql-update-no-where': {
|
|
321
|
+
why: 'An `UPDATE` or `DELETE` without a `WHERE` clause rewrites every row in the table. In production this is an incident, not a bug.',
|
|
322
|
+
bad: `UPDATE users SET active = false;
|
|
323
|
+
DELETE FROM sessions;`,
|
|
324
|
+
good: `UPDATE users SET active = false WHERE last_seen_at < NOW() - INTERVAL '90 days';
|
|
325
|
+
DELETE FROM sessions WHERE expires_at < NOW();`,
|
|
326
|
+
lang: 'sql',
|
|
327
|
+
chainTo: 'invariant-guard',
|
|
328
|
+
},
|
|
329
|
+
|
|
330
|
+
// ---------- Java ----------
|
|
331
|
+
'java-string-concat-in-loop': {
|
|
332
|
+
why: 'Java `String` is immutable. `s += x` allocates a fresh `String` (and an underlying `char[]`) each iteration — O(n²) total work. `StringBuilder` reuses one buffer.',
|
|
333
|
+
bad: `// O(n²)
|
|
334
|
+
String s = "";
|
|
335
|
+
for (var line : lines) {
|
|
336
|
+
s += line + "\\n";
|
|
337
|
+
}`,
|
|
338
|
+
good: `// O(n)
|
|
339
|
+
var sb = new StringBuilder(lines.size() * 80);
|
|
340
|
+
for (var line : lines) {
|
|
341
|
+
sb.append(line).append('\\n');
|
|
342
|
+
}
|
|
343
|
+
String s = sb.toString();`,
|
|
344
|
+
lang: 'java',
|
|
345
|
+
chainTo: 'complexity-cuts',
|
|
346
|
+
},
|
|
347
|
+
'java-list-contains-in-loop': {
|
|
348
|
+
why: '`List.contains` is O(n). Used inside a stream/iterator over m items it is O(n·m). A `HashSet` lookup is O(1).',
|
|
349
|
+
bad: `// O(n·m)
|
|
350
|
+
var active = users.stream()
|
|
351
|
+
.filter(u -> !banned.contains(u.id))
|
|
352
|
+
.toList();`,
|
|
353
|
+
good: `// O(n+m)
|
|
354
|
+
var bannedSet = new HashSet<>(banned);
|
|
355
|
+
var active = users.stream()
|
|
356
|
+
.filter(u -> !bannedSet.contains(u.id))
|
|
357
|
+
.toList();`,
|
|
358
|
+
lang: 'java',
|
|
359
|
+
chainTo: 'complexity-cuts',
|
|
360
|
+
},
|
|
361
|
+
'java-arraylist-remove-in-for-i': {
|
|
362
|
+
why: 'Removing from an `ArrayList` inside a `for (int i = 0; i < list.size(); i++)` shifts indices and skips elements — and on a `Collections.synchronizedList` or in concurrent code, it throws `ConcurrentModificationException`.',
|
|
363
|
+
bad: `for (int i = 0; i < list.size(); i++) {
|
|
364
|
+
if (shouldRemove(list.get(i))) {
|
|
365
|
+
list.remove(i); // shifts everything; next element is skipped
|
|
366
|
+
}
|
|
367
|
+
}`,
|
|
368
|
+
good: `// Idiomatic and correct
|
|
369
|
+
list.removeIf(this::shouldRemove);
|
|
370
|
+
|
|
371
|
+
// Or, with an explicit iterator:
|
|
372
|
+
var it = list.iterator();
|
|
373
|
+
while (it.hasNext()) {
|
|
374
|
+
if (shouldRemove(it.next())) it.remove();
|
|
375
|
+
}`,
|
|
376
|
+
lang: 'java',
|
|
377
|
+
chainTo: 'invariant-guard',
|
|
378
|
+
},
|
|
379
|
+
'java-bare-catch-exception': {
|
|
380
|
+
why: '`catch (Exception e)` with an empty body or just `printStackTrace()` swallows the root cause. The bug lives on, the stack trace evaporates, the production incident has no breadcrumbs.',
|
|
381
|
+
bad: `try {
|
|
382
|
+
riskyCall();
|
|
383
|
+
} catch (Exception e) {
|
|
384
|
+
e.printStackTrace(); // swallowed; caller thinks everything is fine
|
|
385
|
+
}`,
|
|
386
|
+
good: `try {
|
|
387
|
+
riskyCall();
|
|
388
|
+
} catch (IOException e) {
|
|
389
|
+
// narrow exception, rethrow as domain error with cause preserved
|
|
390
|
+
throw new ReadFailedException("riskyCall failed for " + ctx, e);
|
|
391
|
+
}`,
|
|
392
|
+
lang: 'java',
|
|
393
|
+
chainTo: 'invariant-guard',
|
|
394
|
+
},
|
|
395
|
+
|
|
396
|
+
// ---------- C# ----------
|
|
397
|
+
'cs-string-concat-in-loop': {
|
|
398
|
+
why: 'C# `string` is immutable. `s += x` allocates a new string each iteration — O(n²). `StringBuilder` reuses one buffer; or use `string.Concat` / `string.Join` for known parts.',
|
|
399
|
+
bad: `// O(n²)
|
|
400
|
+
string s = "";
|
|
401
|
+
foreach (var line in lines) {
|
|
402
|
+
s += line + "\\n";
|
|
403
|
+
}`,
|
|
404
|
+
good: `// O(n)
|
|
405
|
+
var sb = new StringBuilder(lines.Count * 80);
|
|
406
|
+
foreach (var line in lines) {
|
|
407
|
+
sb.Append(line).Append('\\n');
|
|
408
|
+
}
|
|
409
|
+
var s = sb.ToString();`,
|
|
410
|
+
lang: 'csharp',
|
|
411
|
+
chainTo: 'complexity-cuts',
|
|
412
|
+
},
|
|
413
|
+
'cs-list-contains-in-loop': {
|
|
414
|
+
why: '`List<T>.Contains` is O(n). Inside LINQ over m items it is O(n·m). `HashSet<T>` is O(1).',
|
|
415
|
+
bad: `// O(n·m)
|
|
416
|
+
var active = users.Where(u => !banned.Contains(u.Id)).ToList();`,
|
|
417
|
+
good: `// O(n+m)
|
|
418
|
+
var bannedSet = new HashSet<int>(banned);
|
|
419
|
+
var active = users.Where(u => !bannedSet.Contains(u.Id)).ToList();`,
|
|
420
|
+
lang: 'csharp',
|
|
421
|
+
chainTo: 'complexity-cuts',
|
|
422
|
+
},
|
|
423
|
+
'cs-async-void': {
|
|
424
|
+
why: '`async void` cannot be awaited. Exceptions raised inside are unobserved and crash the process — they bypass `try/catch` at the call site. The only legitimate use is true event handlers (e.g. `OnClick`).',
|
|
425
|
+
bad: `public async void DoWork() {
|
|
426
|
+
await Task.Delay(100);
|
|
427
|
+
throw new Exception("boom"); // crashes the process
|
|
428
|
+
}`,
|
|
429
|
+
good: `public async Task DoWork() {
|
|
430
|
+
await Task.Delay(100);
|
|
431
|
+
throw new Exception("boom"); // caller can await + catch
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Event handlers stay async void with a try/catch at the boundary:
|
|
435
|
+
private async void OnClick(object s, EventArgs e) {
|
|
436
|
+
try { await DoWorkAsync(); }
|
|
437
|
+
catch (Exception ex) { _log.LogError(ex, "OnClick failed"); }
|
|
438
|
+
}`,
|
|
439
|
+
lang: 'csharp',
|
|
440
|
+
chainTo: 'invariant-guard',
|
|
441
|
+
},
|
|
442
|
+
'cs-disposable-no-using': {
|
|
443
|
+
why: '`IDisposable` resources allocated without `using` leak if an exception fires before `Dispose()`. `using var` ensures cleanup even on throw.',
|
|
444
|
+
bad: `var stream = new FileStream(path, FileMode.Open);
|
|
445
|
+
var data = ReadAll(stream); // if this throws, stream is never disposed
|
|
446
|
+
stream.Dispose();`,
|
|
447
|
+
good: `using var stream = new FileStream(path, FileMode.Open);
|
|
448
|
+
var data = ReadAll(stream); // disposed on every exit path`,
|
|
449
|
+
lang: 'csharp',
|
|
450
|
+
chainTo: null,
|
|
451
|
+
},
|
|
452
|
+
|
|
453
|
+
// ---------- Go ----------
|
|
454
|
+
'go-loop-var-capture': {
|
|
455
|
+
why: 'Before Go 1.22, the loop variable in `for ... := range` was reused across iterations. Goroutines that closed over it all read the *same* (final) value. Go 1.22+ fixes this at the language level, but the pattern still appears in libraries that target older versions.',
|
|
456
|
+
bad: `// Pre-1.22: all goroutines print the last item
|
|
457
|
+
for _, v := range items {
|
|
458
|
+
go func() {
|
|
459
|
+
fmt.Println(v)
|
|
460
|
+
}()
|
|
461
|
+
}`,
|
|
462
|
+
good: `// Pin the variable explicitly
|
|
463
|
+
for _, v := range items {
|
|
464
|
+
v := v
|
|
465
|
+
go func() { fmt.Println(v) }()
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Or pass as a parameter
|
|
469
|
+
for _, v := range items {
|
|
470
|
+
go func(v string) { fmt.Println(v) }(v)
|
|
471
|
+
}`,
|
|
472
|
+
lang: 'go',
|
|
473
|
+
chainTo: 'invariant-guard',
|
|
474
|
+
},
|
|
475
|
+
'go-string-concat-in-loop': {
|
|
476
|
+
why: 'Go strings are immutable. `s += x` allocates each iteration — O(n²). `strings.Builder` reuses one backing array.',
|
|
477
|
+
bad: `// O(n²)
|
|
478
|
+
var s string
|
|
479
|
+
for _, line := range lines {
|
|
480
|
+
s += line + "\\n"
|
|
481
|
+
}`,
|
|
482
|
+
good: `// O(n)
|
|
483
|
+
var sb strings.Builder
|
|
484
|
+
sb.Grow(len(lines) * 80)
|
|
485
|
+
for _, line := range lines {
|
|
486
|
+
sb.WriteString(line)
|
|
487
|
+
sb.WriteByte('\\n')
|
|
488
|
+
}
|
|
489
|
+
s := sb.String()`,
|
|
490
|
+
lang: 'go',
|
|
491
|
+
chainTo: 'complexity-cuts',
|
|
492
|
+
},
|
|
493
|
+
'go-defer-in-loop': {
|
|
494
|
+
why: '`defer` fires when the enclosing *function* returns, not when the loop body ends. Deferring inside a loop over N files holds N open handles until the function exits — easy way to exhaust file descriptors.',
|
|
495
|
+
bad: `for _, f := range files {
|
|
496
|
+
fp, _ := os.Open(f)
|
|
497
|
+
defer fp.Close() // accumulates; nothing closes until the outer function returns
|
|
498
|
+
process(fp)
|
|
499
|
+
}`,
|
|
500
|
+
good: `for _, f := range files {
|
|
501
|
+
func() {
|
|
502
|
+
fp, _ := os.Open(f)
|
|
503
|
+
defer fp.Close() // closes at end of this iteration
|
|
504
|
+
process(fp)
|
|
505
|
+
}()
|
|
506
|
+
}`,
|
|
507
|
+
lang: 'go',
|
|
508
|
+
chainTo: null,
|
|
509
|
+
},
|
|
510
|
+
'go-err-not-checked': {
|
|
511
|
+
why: 'Discarding the error return with `_` is silent failure. The function looks like it succeeded; downstream code sees zero values and behaves unpredictably.',
|
|
512
|
+
bad: `data, _ := os.ReadFile(path)
|
|
513
|
+
return parse(data) // parse on empty buffer if file missing`,
|
|
514
|
+
good: `data, err := os.ReadFile(path)
|
|
515
|
+
if err != nil {
|
|
516
|
+
return nil, fmt.Errorf("read %s: %w", path, err)
|
|
517
|
+
}
|
|
518
|
+
return parse(data), nil`,
|
|
519
|
+
lang: 'go',
|
|
520
|
+
chainTo: 'invariant-guard',
|
|
521
|
+
},
|
|
522
|
+
'go-slice-append-no-cap': {
|
|
523
|
+
why: 'A slice grown by repeated `append` reallocates and copies its backing array each time it crosses a capacity boundary. Preallocating with `make([]T, 0, n)` does one allocation total.',
|
|
524
|
+
bad: `var out []int
|
|
525
|
+
for _, x := range in {
|
|
526
|
+
out = append(out, x*2) // grows; reallocates log(n) times
|
|
527
|
+
}`,
|
|
528
|
+
good: `out := make([]int, 0, len(in))
|
|
529
|
+
for _, x := range in {
|
|
530
|
+
out = append(out, x*2)
|
|
531
|
+
}`,
|
|
532
|
+
lang: 'go',
|
|
533
|
+
chainTo: 'complexity-cuts',
|
|
534
|
+
},
|
|
535
|
+
|
|
536
|
+
// ---------- Rust ----------
|
|
537
|
+
'rs-unwrap-in-prod': {
|
|
538
|
+
why: '`.unwrap()` and `.expect()` panic on `None`/`Err`. In production code they crash the process and lose the structured error. Rust gives you `?`, `match`, and `ok_or` to surface the error to the caller.',
|
|
539
|
+
bad: `let value = map.get(&key).unwrap(); // panics if key missing`,
|
|
540
|
+
good: `let value = map.get(&key).ok_or(Error::MissingKey)?;
|
|
541
|
+
|
|
542
|
+
// Or, when None has a meaningful default:
|
|
543
|
+
let value = map.get(&key).copied().unwrap_or_default();`,
|
|
544
|
+
lang: 'rust',
|
|
545
|
+
chainTo: 'invariant-guard',
|
|
546
|
+
},
|
|
547
|
+
'rs-clone-in-loop': {
|
|
548
|
+
why: 'A `.clone()` inside an iterator allocates a fresh copy per element. If a borrow (`&x`) or `Rc::clone` (cheap atomic increment for shared ownership) would do, the deep clone is wasted work.',
|
|
549
|
+
bad: `// Deep clones every element
|
|
550
|
+
let names: Vec<String> = users.iter().map(|u| u.name.clone()).collect();`,
|
|
551
|
+
good: `// Borrow when possible
|
|
552
|
+
let names: Vec<&str> = users.iter().map(|u| u.name.as_str()).collect();
|
|
553
|
+
|
|
554
|
+
// Cheap reference-counted clone when shared ownership is needed
|
|
555
|
+
let shared: Vec<Rc<String>> = users.iter().map(|u| Rc::clone(&u.name)).collect();`,
|
|
556
|
+
lang: 'rust',
|
|
557
|
+
chainTo: 'complexity-cuts',
|
|
558
|
+
},
|
|
559
|
+
'rs-vec-push-no-capacity': {
|
|
560
|
+
why: '`Vec::new()` starts with capacity 0; each `push` past the current capacity reallocates and copies. Preallocating with `Vec::with_capacity(n)` does one allocation.',
|
|
561
|
+
bad: `let mut out = Vec::new();
|
|
562
|
+
for x in input.iter() {
|
|
563
|
+
out.push(transform(x)); // grows; reallocates log(n) times
|
|
564
|
+
}`,
|
|
565
|
+
good: `let mut out = Vec::with_capacity(input.len());
|
|
566
|
+
for x in input.iter() {
|
|
567
|
+
out.push(transform(x));
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// Or, when transform is pure:
|
|
571
|
+
let out: Vec<_> = input.iter().map(transform).collect();`,
|
|
572
|
+
lang: 'rust',
|
|
573
|
+
chainTo: 'complexity-cuts',
|
|
574
|
+
},
|
|
575
|
+
'rs-string-push-no-capacity': {
|
|
576
|
+
why: '`String::new()` starts at capacity 0. Each `push_str` past capacity reallocates. `with_capacity` or `join` avoids it.',
|
|
577
|
+
bad: `let mut s = String::new();
|
|
578
|
+
for part in parts.iter() {
|
|
579
|
+
s.push_str(part); // reallocates as it grows
|
|
580
|
+
}`,
|
|
581
|
+
good: `let total: usize = parts.iter().map(|p| p.len()).sum();
|
|
582
|
+
let mut s = String::with_capacity(total);
|
|
583
|
+
for part in parts.iter() {
|
|
584
|
+
s.push_str(part);
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Or, simplest:
|
|
588
|
+
let s = parts.join("");`,
|
|
589
|
+
lang: 'rust',
|
|
590
|
+
chainTo: 'complexity-cuts',
|
|
591
|
+
},
|
|
592
|
+
|
|
593
|
+
// ---------- C++ ----------
|
|
594
|
+
'cpp-vector-push-no-reserve': {
|
|
595
|
+
why: '`std::vector::push_back` past the current capacity doubles the backing array and copies/moves every element. Calling `reserve(n)` first does one allocation.',
|
|
596
|
+
bad: `std::vector<int> v;
|
|
597
|
+
for (int i = 0; i < n; ++i) {
|
|
598
|
+
v.push_back(compute(i)); // reallocates log(n) times
|
|
599
|
+
}`,
|
|
600
|
+
good: `std::vector<int> v;
|
|
601
|
+
v.reserve(n);
|
|
602
|
+
for (int i = 0; i < n; ++i) {
|
|
603
|
+
v.push_back(compute(i));
|
|
604
|
+
}`,
|
|
605
|
+
lang: 'cpp',
|
|
606
|
+
chainTo: 'complexity-cuts',
|
|
607
|
+
},
|
|
608
|
+
'cpp-string-concat-in-loop': {
|
|
609
|
+
why: '`std::string::operator+=` past capacity reallocates. Without a `reserve`, repeated concatenation is O(n²). `std::ostringstream` or one `reserve` fixes it.',
|
|
610
|
+
bad: `std::string s;
|
|
611
|
+
for (const auto& part : parts) {
|
|
612
|
+
s += part;
|
|
613
|
+
}`,
|
|
614
|
+
good: `std::ostringstream os;
|
|
615
|
+
for (const auto& part : parts) {
|
|
616
|
+
os << part;
|
|
617
|
+
}
|
|
618
|
+
auto s = os.str();
|
|
619
|
+
|
|
620
|
+
// Or, if size known:
|
|
621
|
+
std::string s;
|
|
622
|
+
s.reserve(total_size);
|
|
623
|
+
for (const auto& part : parts) s += part;`,
|
|
624
|
+
lang: 'cpp',
|
|
625
|
+
chainTo: 'complexity-cuts',
|
|
626
|
+
},
|
|
627
|
+
'cpp-raw-new': {
|
|
628
|
+
why: 'Raw `new` requires a matching `delete` on every exit path. If an exception fires between `new` and `delete`, the memory leaks. Smart pointers (`unique_ptr`, `shared_ptr`) destruct automatically.',
|
|
629
|
+
bad: `Widget* w = new Widget(42);
|
|
630
|
+
configure(w); // if this throws, w leaks
|
|
631
|
+
delete w;`,
|
|
632
|
+
good: `auto w = std::make_unique<Widget>(42);
|
|
633
|
+
configure(w.get()); // destructs on every exit path, including throw`,
|
|
634
|
+
lang: 'cpp',
|
|
635
|
+
chainTo: 'invariant-guard',
|
|
636
|
+
},
|
|
637
|
+
'cpp-range-loop-copy': {
|
|
638
|
+
why: '`for (auto x : container)` copies each element into `x`. For non-trivial types (`std::string`, `std::vector<…>`, custom types) this is expensive. `const auto&` borrows, `auto&` mutates in place.',
|
|
639
|
+
bad: `for (auto s : large_strings) {
|
|
640
|
+
process(s); // s is a fresh copy each iteration
|
|
641
|
+
}`,
|
|
642
|
+
good: `for (const auto& s : large_strings) {
|
|
643
|
+
process(s); // reference, no copy
|
|
644
|
+
}`,
|
|
645
|
+
lang: 'cpp',
|
|
646
|
+
chainTo: null,
|
|
647
|
+
},
|
|
648
|
+
'cpp-map-double-lookup': {
|
|
649
|
+
why: '`m.count(k)` then `m[k]` does two hash lookups (and for `std::map`, two binary searches). `find` returns an iterator that gives both presence and value in one lookup.',
|
|
650
|
+
bad: `if (m.count(k)) {
|
|
651
|
+
use(m[k]); // second lookup
|
|
652
|
+
}`,
|
|
653
|
+
good: `auto it = m.find(k);
|
|
654
|
+
if (it != m.end()) {
|
|
655
|
+
use(it->second);
|
|
656
|
+
}`,
|
|
657
|
+
lang: 'cpp',
|
|
658
|
+
chainTo: 'complexity-cuts',
|
|
659
|
+
},
|
|
660
|
+
|
|
661
|
+
// ---------- PHP ----------
|
|
662
|
+
'php-count-in-for-condition': {
|
|
663
|
+
why: 'PHP recomputes the loop condition every iteration. `count($a)` on a 100k-element array, called 100k times, is 10B element traversals. Hoist it.',
|
|
664
|
+
bad: `<?php
|
|
665
|
+
for ($i = 0; $i < count($a); $i++) {
|
|
666
|
+
echo $a[$i];
|
|
667
|
+
}`,
|
|
668
|
+
good: `<?php
|
|
669
|
+
// Hoist the length
|
|
670
|
+
for ($i = 0, $n = count($a); $i < $n; $i++) {
|
|
671
|
+
echo $a[$i];
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
// Or, idiomatic
|
|
675
|
+
foreach ($a as $x) {
|
|
676
|
+
echo $x;
|
|
677
|
+
}`,
|
|
678
|
+
lang: 'php',
|
|
679
|
+
chainTo: 'complexity-cuts',
|
|
680
|
+
},
|
|
681
|
+
'php-in-array-in-loop': {
|
|
682
|
+
why: '`in_array` is a linear scan. Used inside a loop over m items it is O(n·m). `array_flip + isset` is O(1) per check.',
|
|
683
|
+
bad: `<?php
|
|
684
|
+
foreach ($users as $u) {
|
|
685
|
+
if (in_array($u->id, $banned)) continue;
|
|
686
|
+
ship($u);
|
|
687
|
+
}`,
|
|
688
|
+
good: `<?php
|
|
689
|
+
$bannedSet = array_flip($banned); // O(n) once
|
|
690
|
+
foreach ($users as $u) {
|
|
691
|
+
if (isset($bannedSet[$u->id])) continue; // O(1)
|
|
692
|
+
ship($u);
|
|
693
|
+
}`,
|
|
694
|
+
lang: 'php',
|
|
695
|
+
chainTo: 'complexity-cuts',
|
|
696
|
+
},
|
|
697
|
+
'php-query-in-loop': {
|
|
698
|
+
why: 'A SQL query inside a loop is the textbook N+1 problem. 1000 orders, 1000 round-trips to the DB. Batch with `WHERE id IN (...)` or eager-load in your ORM.',
|
|
699
|
+
bad: `<?php
|
|
700
|
+
foreach ($orderIds as $id) {
|
|
701
|
+
$row = $db->query("SELECT * FROM orders WHERE id = $id"); // 1 query per id
|
|
702
|
+
process($row);
|
|
703
|
+
}`,
|
|
704
|
+
good: `<?php
|
|
705
|
+
// One bulk query
|
|
706
|
+
$placeholders = implode(',', array_fill(0, count($orderIds), '?'));
|
|
707
|
+
$stmt = $db->prepare("SELECT * FROM orders WHERE id IN ($placeholders)");
|
|
708
|
+
$stmt->execute($orderIds);
|
|
709
|
+
foreach ($stmt->fetchAll() as $row) {
|
|
710
|
+
process($row);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// Eloquent / Doctrine: use eager loading
|
|
714
|
+
$orders = Order::with('items')->whereIn('id', $orderIds)->get();`,
|
|
715
|
+
lang: 'php',
|
|
716
|
+
chainTo: 'complexity-cuts',
|
|
717
|
+
},
|
|
718
|
+
'php-loose-equality': {
|
|
719
|
+
why: 'PHP `==` does type coercion in surprising ways (`"0" == false` is `true`, `"abc" == 0` was `true` before PHP 8). `===` compares value AND type — no surprises.',
|
|
720
|
+
bad: `<?php
|
|
721
|
+
if ($status == 0) { /* matches "", "0", false, null, 0, "abc" pre-PHP 8 */ }`,
|
|
722
|
+
good: `<?php
|
|
723
|
+
if ($status === 0) { /* matches only int 0 */ }`,
|
|
724
|
+
lang: 'php',
|
|
725
|
+
chainTo: 'invariant-guard',
|
|
726
|
+
},
|
|
727
|
+
|
|
728
|
+
// ---------- Ruby ----------
|
|
729
|
+
'rb-include-in-iterator': {
|
|
730
|
+
why: '`Array#include?` is O(n). Used inside an iterator over m items it is O(n·m). `Set#include?` is O(1).',
|
|
731
|
+
bad: `# O(n·m)
|
|
732
|
+
users.select { |u| banned.include?(u.id) }`,
|
|
733
|
+
good: `# O(n+m)
|
|
734
|
+
require 'set'
|
|
735
|
+
banned_set = Set.new(banned)
|
|
736
|
+
users.select { |u| banned_set.include?(u.id) }`,
|
|
737
|
+
lang: 'ruby',
|
|
738
|
+
chainTo: 'complexity-cuts',
|
|
739
|
+
},
|
|
740
|
+
'rb-n-plus-one-activerecord': {
|
|
741
|
+
why: 'Iterating an ActiveRecord scope and touching an association fires one extra query per row. `includes` (preload or eager_load) fetches everything in a constant number of queries.',
|
|
742
|
+
bad: `# N+1 queries
|
|
743
|
+
Post.all.each do |p|
|
|
744
|
+
puts p.author.name # 1 extra query per post
|
|
745
|
+
end`,
|
|
746
|
+
good: `# 2 queries total (or 1 with eager_load)
|
|
747
|
+
Post.includes(:author).each do |p|
|
|
748
|
+
puts p.author.name
|
|
749
|
+
end`,
|
|
750
|
+
lang: 'ruby',
|
|
751
|
+
chainTo: 'complexity-cuts',
|
|
752
|
+
},
|
|
753
|
+
'rb-bare-rescue': {
|
|
754
|
+
why: 'A bare `rescue` (without a class) catches `StandardError` — including `NoMethodError`, `ArgumentError`, and other bugs you almost always want to surface. Catch the specific class.',
|
|
755
|
+
bad: `begin
|
|
756
|
+
fetch_remote
|
|
757
|
+
rescue
|
|
758
|
+
retry # also catches typos and bugs in fetch_remote
|
|
759
|
+
end`,
|
|
760
|
+
good: `begin
|
|
761
|
+
fetch_remote
|
|
762
|
+
rescue Net::ReadTimeout, Net::OpenTimeout => e
|
|
763
|
+
retry
|
|
764
|
+
end`,
|
|
765
|
+
lang: 'ruby',
|
|
766
|
+
chainTo: 'invariant-guard',
|
|
767
|
+
},
|
|
768
|
+
'rb-string-concat-in-loop': {
|
|
769
|
+
why: '`s += x` creates a new string each iteration — O(n²). `<<` mutates in place (O(n) amortized). `Array#join` is O(n) and idiomatic.',
|
|
770
|
+
bad: `s = ""
|
|
771
|
+
parts.each { |p| s += p } # O(n²)`,
|
|
772
|
+
good: `# Mutating concat
|
|
773
|
+
s = String.new
|
|
774
|
+
parts.each { |p| s << p }
|
|
775
|
+
|
|
776
|
+
# Idiomatic
|
|
777
|
+
s = parts.join`,
|
|
778
|
+
lang: 'ruby',
|
|
779
|
+
chainTo: 'complexity-cuts',
|
|
780
|
+
},
|
|
781
|
+
|
|
782
|
+
// ---------- Shell / Bash ----------
|
|
783
|
+
'sh-set-e-no-pipefail': {
|
|
784
|
+
why: '`set -e` exits on a failed command, but a failure in the middle of a pipe is masked — only the *last* command\'s exit status counts. `set -o pipefail` fixes it. `set -u` catches unset variables.',
|
|
785
|
+
bad: `#!/bin/bash
|
|
786
|
+
set -e
|
|
787
|
+
some_failing_step | grep needle # if some_failing_step fails, script keeps going`,
|
|
788
|
+
good: `#!/bin/bash
|
|
789
|
+
set -euo pipefail
|
|
790
|
+
some_failing_step | grep needle # any failure in the pipe aborts the script`,
|
|
791
|
+
lang: 'shell',
|
|
792
|
+
chainTo: 'invariant-guard',
|
|
793
|
+
},
|
|
794
|
+
'sh-unquoted-var': {
|
|
795
|
+
why: 'An unquoted `$var` is subject to word splitting (on `$IFS`) and glob expansion. A path with a space, a tab, or a `*` will silently do the wrong thing.',
|
|
796
|
+
bad: `if [ -d $dir ]; then echo yes; fi
|
|
797
|
+
# If $dir = "/tmp/with space", expands to: [ -d /tmp/with space ] — syntax error`,
|
|
798
|
+
good: `if [ -d "$dir" ]; then echo yes; fi
|
|
799
|
+
# Always quote. For arrays: "\${arr[@]}".`,
|
|
800
|
+
lang: 'shell',
|
|
801
|
+
chainTo: 'invariant-guard',
|
|
802
|
+
},
|
|
803
|
+
'sh-useless-cat-pipe': {
|
|
804
|
+
why: '`cat file | cmd` reads the file and pipes through `cat` just to feed `cmd`. Every command that takes a file argument can read it directly — one fewer process, clearer intent.',
|
|
805
|
+
bad: `cat access.log | grep "500"`,
|
|
806
|
+
good: `grep "500" access.log
|
|
807
|
+
|
|
808
|
+
# Or stdin redirection if the command takes only stdin:
|
|
809
|
+
cmd < access.log`,
|
|
810
|
+
lang: 'shell',
|
|
811
|
+
chainTo: null,
|
|
812
|
+
},
|
|
813
|
+
'sh-for-ls': {
|
|
814
|
+
why: '`for f in $(ls ...)` breaks on filenames with spaces, tabs, newlines, or globs. The output of `ls` is meant for humans, not programs. Use a glob or `find -print0 | xargs -0`.',
|
|
815
|
+
bad: `for f in $(ls *.txt); do
|
|
816
|
+
process "$f" # breaks on "my file.txt"
|
|
817
|
+
done`,
|
|
818
|
+
good: `# Glob directly
|
|
819
|
+
for f in *.txt; do
|
|
820
|
+
process "$f"
|
|
821
|
+
done
|
|
822
|
+
|
|
823
|
+
# Or, when find is necessary
|
|
824
|
+
find . -name '*.txt' -print0 | xargs -0 -n1 process`,
|
|
825
|
+
lang: 'shell',
|
|
826
|
+
chainTo: 'invariant-guard',
|
|
827
|
+
},
|
|
828
|
+
};
|
|
829
|
+
|
|
830
|
+
function render(rule, lang) {
|
|
831
|
+
const ex = EXAMPLES[rule.id];
|
|
832
|
+
if (!ex) throw new Error('No example authored for ' + rule.id);
|
|
833
|
+
const tags = [lang, rule.severity, ex.chainTo].filter(Boolean).join(', ');
|
|
834
|
+
const chain = ex.chainTo
|
|
835
|
+
? `\n## Escalate to\n\nIf this pattern is widespread in the codebase, load **${ex.chainTo}** for the corrective workflow.\n`
|
|
836
|
+
: '';
|
|
837
|
+
return `---
|
|
838
|
+
id: ${rule.id}
|
|
839
|
+
title: ${rule.title.replace(/"/g, '\\"')}
|
|
840
|
+
severity: ${rule.severity}
|
|
841
|
+
impact: ${SEV_TO_IMPACT[rule.severity]}
|
|
842
|
+
impactDescription: ${SEV_TO_DESC[rule.severity]}
|
|
843
|
+
language: ${lang}
|
|
844
|
+
tags: ${tags}
|
|
845
|
+
---
|
|
846
|
+
|
|
847
|
+
# ${rule.title}
|
|
848
|
+
|
|
849
|
+
${ex.why}
|
|
850
|
+
|
|
851
|
+
## Fix
|
|
852
|
+
|
|
853
|
+
${rule.fix}
|
|
854
|
+
|
|
855
|
+
## Incorrect
|
|
856
|
+
|
|
857
|
+
\`\`\`${ex.lang}
|
|
858
|
+
${ex.bad}
|
|
859
|
+
\`\`\`
|
|
860
|
+
|
|
861
|
+
## Correct
|
|
862
|
+
|
|
863
|
+
\`\`\`${ex.lang}
|
|
864
|
+
${ex.good}
|
|
865
|
+
\`\`\`
|
|
866
|
+
${chain}`;
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
const counts = {};
|
|
870
|
+
const LANG_FILES = fs
|
|
871
|
+
.readdirSync(path.join(ROOT, 'rules'))
|
|
872
|
+
.filter((f) => f.endsWith('.json'))
|
|
873
|
+
.map((f) => f.replace(/\.json$/, ''));
|
|
874
|
+
|
|
875
|
+
for (const langFile of LANG_FILES) {
|
|
876
|
+
const j = JSON.parse(fs.readFileSync(path.join(ROOT, 'rules', langFile + '.json'), 'utf8'));
|
|
877
|
+
counts[langFile] = 0;
|
|
878
|
+
for (const rule of j.rules) {
|
|
879
|
+
const out = render(rule, j.language);
|
|
880
|
+
fs.writeFileSync(path.join(OUT, rule.id + '.md'), out);
|
|
881
|
+
counts[langFile]++;
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
console.log('Generated rule docs:', counts);
|
|
885
|
+
console.log('Total:', Object.values(counts).reduce((a, b) => a + b, 0));
|