lemmaly 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +238 -0
  3. package/cli/gen-agents-md.js +60 -0
  4. package/cli/gen-rule-docs.js +885 -0
  5. package/cli/lemmaly.js +162 -0
  6. package/commands/benchmark.md +40 -0
  7. package/commands/budget.md +53 -0
  8. package/commands/complexity.md +26 -0
  9. package/commands/cut.md +27 -0
  10. package/commands/hotpath.md +22 -0
  11. package/commands/invariant.md +22 -0
  12. package/commands/n-plus-one.md +20 -0
  13. package/commands/profile.md +34 -0
  14. package/commands/regress.md +43 -0
  15. package/commands/scale-check.md +37 -0
  16. package/commands/ship-check.md +26 -0
  17. package/package.json +48 -0
  18. package/rules/cpp.json +46 -0
  19. package/rules/csharp.json +38 -0
  20. package/rules/go.json +46 -0
  21. package/rules/java.json +38 -0
  22. package/rules/javascript.json +102 -0
  23. package/rules/php.json +38 -0
  24. package/rules/python.json +62 -0
  25. package/rules/ruby.json +38 -0
  26. package/rules/rust.json +38 -0
  27. package/rules/shell.json +38 -0
  28. package/rules/sql.json +54 -0
  29. package/skills/complexity-cuts/SKILL.md +259 -0
  30. package/skills/invariant-guard/SKILL.md +310 -0
  31. package/skills/lemmaly/AGENTS.md +1869 -0
  32. package/skills/lemmaly/SKILL.md +365 -0
  33. package/skills/lemmaly/references/async.md +135 -0
  34. package/skills/lemmaly/references/complexity.md +66 -0
  35. package/skills/lemmaly/references/hot-paths.md +87 -0
  36. package/skills/lemmaly/references/memory.md +118 -0
  37. package/skills/lemmaly/references/n-plus-one.md +139 -0
  38. package/skills/lemmaly/rules/cpp-map-double-lookup.md +38 -0
  39. package/skills/lemmaly/rules/cpp-range-loop-copy.md +33 -0
  40. package/skills/lemmaly/rules/cpp-raw-new.md +36 -0
  41. package/skills/lemmaly/rules/cpp-string-concat-in-loop.md +45 -0
  42. package/skills/lemmaly/rules/cpp-vector-push-no-reserve.md +40 -0
  43. package/skills/lemmaly/rules/cs-async-void.md +45 -0
  44. package/skills/lemmaly/rules/cs-disposable-no-using.md +32 -0
  45. package/skills/lemmaly/rules/cs-list-contains-in-loop.md +36 -0
  46. package/skills/lemmaly/rules/cs-string-concat-in-loop.md +42 -0
  47. package/skills/lemmaly/rules/go-defer-in-loop.md +39 -0
  48. package/skills/lemmaly/rules/go-err-not-checked.md +38 -0
  49. package/skills/lemmaly/rules/go-loop-var-capture.md +47 -0
  50. package/skills/lemmaly/rules/go-slice-append-no-cap.md +39 -0
  51. package/skills/lemmaly/rules/go-string-concat-in-loop.md +44 -0
  52. package/skills/lemmaly/rules/java-arraylist-remove-in-for-i.md +44 -0
  53. package/skills/lemmaly/rules/java-bare-catch-exception.md +42 -0
  54. package/skills/lemmaly/rules/java-list-contains-in-loop.md +40 -0
  55. package/skills/lemmaly/rules/java-string-concat-in-loop.md +42 -0
  56. package/skills/lemmaly/rules/js-anonymous-handler-jsx.md +31 -0
  57. package/skills/lemmaly/rules/js-array-key-index.md +29 -0
  58. package/skills/lemmaly/rules/js-async-in-foreach.md +43 -0
  59. package/skills/lemmaly/rules/js-await-in-for-loop.md +41 -0
  60. package/skills/lemmaly/rules/js-deep-clone-via-json.md +33 -0
  61. package/skills/lemmaly/rules/js-helper-call-in-iterator.md +41 -0
  62. package/skills/lemmaly/rules/js-includes-in-iterator.md +37 -0
  63. package/skills/lemmaly/rules/js-inline-object-jsx-prop.md +35 -0
  64. package/skills/lemmaly/rules/js-nested-for-loops.md +45 -0
  65. package/skills/lemmaly/rules/js-spread-in-reduce.md +38 -0
  66. package/skills/lemmaly/rules/js-unique-via-indexof.md +35 -0
  67. package/skills/lemmaly/rules/js-useeffect-missing-deps.md +33 -0
  68. package/skills/lemmaly/rules/php-count-in-for-condition.md +45 -0
  69. package/skills/lemmaly/rules/php-in-array-in-loop.md +42 -0
  70. package/skills/lemmaly/rules/php-loose-equality.md +35 -0
  71. package/skills/lemmaly/rules/php-query-in-loop.md +47 -0
  72. package/skills/lemmaly/rules/py-bare-except.md +39 -0
  73. package/skills/lemmaly/rules/py-django-loop-without-eager.md +42 -0
  74. package/skills/lemmaly/rules/py-in-list-literal.md +37 -0
  75. package/skills/lemmaly/rules/py-mutable-default-arg.md +39 -0
  76. package/skills/lemmaly/rules/py-open-without-with.md +33 -0
  77. package/skills/lemmaly/rules/py-range-len.md +35 -0
  78. package/skills/lemmaly/rules/py-string-concat-in-loop.md +43 -0
  79. package/skills/lemmaly/rules/rb-bare-rescue.md +41 -0
  80. package/skills/lemmaly/rules/rb-include-in-iterator.md +37 -0
  81. package/skills/lemmaly/rules/rb-n-plus-one-activerecord.md +39 -0
  82. package/skills/lemmaly/rules/rb-string-concat-in-loop.md +39 -0
  83. package/skills/lemmaly/rules/rs-clone-in-loop.md +38 -0
  84. package/skills/lemmaly/rules/rs-string-push-no-capacity.md +43 -0
  85. package/skills/lemmaly/rules/rs-unwrap-in-prod.md +36 -0
  86. package/skills/lemmaly/rules/rs-vec-push-no-capacity.md +42 -0
  87. package/skills/lemmaly/rules/sh-for-ls.md +41 -0
  88. package/skills/lemmaly/rules/sh-set-e-no-pipefail.md +37 -0
  89. package/skills/lemmaly/rules/sh-unquoted-var.md +35 -0
  90. package/skills/lemmaly/rules/sh-useless-cat-pipe.md +32 -0
  91. package/skills/lemmaly/rules/sql-leading-wildcard-like.md +34 -0
  92. package/skills/lemmaly/rules/sql-not-in-subquery.md +38 -0
  93. package/skills/lemmaly/rules/sql-or-in-where.md +35 -0
  94. package/skills/lemmaly/rules/sql-select-no-limit.md +37 -0
  95. package/skills/lemmaly/rules/sql-select-star.md +29 -0
  96. package/skills/lemmaly/rules/sql-update-no-where.md +35 -0
  97. package/skills/mathguard/SKILL.md +277 -0
@@ -0,0 +1,365 @@
1
+ ---
2
+ name: lemmaly
3
+ description: Use whenever writing, editing, or reviewing code that involves loops, collections, lookups, searches, joins, recursion, graphs, queries, or any computation over more than a handful of items. Forces algorithmic thinking BEFORE writing code — names the time/space complexity, the data structure, the algorithm family, and the dominant input dimension. Catches O(n^2) loops, N+1 queries, repeated work, wrong data structures, and lazy brute-force solutions that AI assistants ship by default. Pairs with mathguard (advanced math optimization), invariant-guard (correctness), and complexity-cuts (corrective Big-O fixes).
4
+ metadata:
5
+ priority: 1
6
+ role: gateway
7
+ pathPatterns:
8
+ - '**/*.{js,jsx,ts,tsx,mjs,cjs}'
9
+ - '**/*.py'
10
+ - '**/*.sql'
11
+ - '**/*.java'
12
+ - '**/*.cs'
13
+ - '**/*.go'
14
+ - '**/*.rs'
15
+ - '**/*.{cpp,cc,cxx,hpp,hh,hxx}'
16
+ - '**/*.php'
17
+ - '**/*.rb'
18
+ - '**/*.{sh,bash}'
19
+ importPatterns:
20
+ - 'prisma'
21
+ - 'drizzle-orm'
22
+ - 'sqlalchemy'
23
+ - 'django.db'
24
+ - 'mongoose'
25
+ - 'java.util.stream'
26
+ - 'System.Linq'
27
+ - 'strings.Builder'
28
+ - 'std::vector'
29
+ - 'std::string'
30
+ - 'ActiveRecord'
31
+ chainTo:
32
+ - skill: complexity-cuts
33
+ when: 'existing code already has bad Big-O'
34
+ - skill: invariant-guard
35
+ when: 'algorithm has subtle correctness traps (loop invariants, base cases)'
36
+ - skill: mathguard
37
+ when: 'classical algorithm is at its lower bound and n is large'
38
+ retrieval:
39
+ aliases:
40
+ - algorithm-first
41
+ - big-o-discipline
42
+ - complexity-before-code
43
+ intents:
44
+ - choose the right algorithm
45
+ - state complexity before coding
46
+ - avoid n-squared
47
+ - prevent n-plus-one
48
+ ---
49
+
50
+ # lemmaly — Algorithm-First Proof
51
+
52
+ The model already knows Big-O, hash tables, divide-and-conquer, dynamic programming, sorting, graph algorithms, and amortized analysis. It just does not apply them spontaneously. lemmaly fixes the behavior, not the knowledge.
53
+
54
+ This skill is the gateway. It enforces the hard rules that every other guard in the suite assumes.
55
+
56
+ **Violating the letter of these rules is violating the spirit of the skill.** "Just this once" is how O(n²) ships to production.
57
+
58
+ ## How to use — pick the right skill
59
+
60
+ The suite has four skills. Use this table to route to the right one. When in doubt, **start at lemmaly** — it is the gateway and will tell you when to escalate.
61
+
62
+ | If you are about to… | Use | Why |
63
+ | --- | --- | --- |
64
+ | Write *new* code that loops, queries, joins, recurses, or processes a collection | **lemmaly** | Forces complexity + data structure + algorithm family **before** code is written. |
65
+ | Refactor *existing* code that is already slow, OOMs, times out, or has nested loops / N+1 / repeated work | **complexity-cuts** | Corrective playbook for code that already shipped with bad Big-O. |
66
+ | Implement an algorithm where the obvious version is subtly wrong (binary search variants, in-place dedup, Boyer–Moore, QuickSelect partition, recursion with accumulators, fixed-point / termination concerns) | **invariant-guard** | Forces writing the function contract + loop invariant before code. The trap is in the contract, not the loop body. |
67
+ | Work with n ≥ 10⁶, similarity search, dedup at scale, top-K, streaming analytics, cardinality estimation, embeddings, FFT/NTT, dimensionality reduction, computational geometry, randomized algorithms | **mathguard** | Classical algorithms have hit their lower bound; an approximate or math-heavy technique (Bloom, HLL, Count-Min, MinHash/LSH, FFT, JL projection, sweep line, kd-tree) gives the asymptotic win. |
68
+ | Audit a codebase / PR for known anti-patterns (await-in-loop, .includes inside .filter, string-concat in loop, SELECT *, N+1, etc.) | **lemmaly** + `lemmaly scan` | The rule catalog plus the CLI scanner catches the 59 documented patterns across 11 languages. |
69
+
70
+ ### Routing flow
71
+
72
+ ```text
73
+ Are you writing new code?
74
+ ├── yes → lemmaly (state complexity, structure, family BEFORE coding)
75
+ │ ├── classical algorithm at its lower bound AND n is large? → mathguard
76
+ │ └── subtle correctness trap (invariant, base case, off-by-one)? → invariant-guard
77
+ └── no, refactoring existing slow / OOM / timed-out code → complexity-cuts
78
+ └── still slow after classical fixes? → mathguard
79
+ ```
80
+
81
+ ### One-line mental model
82
+
83
+ - **lemmaly** = think first (prevention).
84
+ - **complexity-cuts** = clean up bad Big-O (correction).
85
+ - **invariant-guard** = prove it's correct (verification).
86
+ - **mathguard** = beat the classical floor (acceleration).
87
+
88
+ ## The Iron Law
89
+
90
+ ```text
91
+ NO NON-TRIVIAL CODE WITHOUT STATED COMPLEXITY, DATA STRUCTURE, AND ALGORITHM FAMILY
92
+ ```
93
+
94
+ Before you write a loop, a recursion, a query, or any computation over more than a handful of items, three things must appear in your message — in this order:
95
+
96
+ 1. `time = O(?)`, `space = O(?)`, with the dominant input dimension named.
97
+ 2. The data structure you will use, with a one-phrase reason.
98
+ 3. The algorithm family (one of: linear scan, two-pointer, sliding window, binary search, sort+sweep, hash join, BFS/DFS, topo sort, Dijkstra/A*, union-find, DP, greedy, recursion+memo, prefix sum, segment tree, monoid reduction).
99
+
100
+ If you cannot state all three, you do not understand the problem yet. Ask, or read more code. Do not write code.
101
+
102
+ ## Non-negotiable rules
103
+
104
+ 1. **State complexity before writing any non-trivial code.** In one line:
105
+ - `time = O(?)`, `space = O(?)`
106
+ - Dominant input dimension: `n = what`, with realistic magnitude (e.g. `n ~ 10^6 rows`)
107
+ - If you cannot state these, you do not yet understand the problem. Ask, or read more code.
108
+
109
+ 2. **Name the data structure with a one-phrase reason.** Every collection-shaped value gets a deliberate choice from `Array / List / Set / HashMap / TreeMap / Heap / Deque / Trie / Graph / BitSet / Counter / LinkedList` — with the reason: "Set for O(1) membership inside the loop", "Heap for top-K in O(n log k)", "Counter to fold the nested loop into a single pass". Default to hashed structures (`Set`, `Map`) for lookup inside loops. Default to streaming/iterator over materialized list when n is large.
110
+
111
+ 3. **Identify the algorithm family before writing.** Name one of: `linear scan`, `divide and conquer`, `two-pointer`, `sliding window`, `binary search`, `sort + sweep`, `hash join`, `BFS/DFS`, `topological sort`, `Dijkstra/A*`, `union-find`, `dynamic programming`, `greedy`, `recursion + memoization`, `prefix sum`, `segment tree`, `monoid reduction`. If you cannot name a family, you are about to write brute force. Stop and reconsider.
112
+
113
+ 4. **Repeated work in loops is algorithmic waste.** All of these are presumed wrong until justified:
114
+ - I/O inside a loop (database queries, HTTP calls, file reads) — batch with `IN (...)`, `Promise.all`, bulk endpoints, streaming
115
+ - Recomputing the same value in a loop — hoist or memoize
116
+ - Re-sorting / re-grouping inside a loop — sort once outside
117
+ - Linear scan (`.find`, `.indexOf`, `.includes`, `in list`) inside a loop — precompute an index `Map`
118
+ - Allocating fresh structures per iteration when one can be reused — hoist allocation
119
+ - Materializing intermediate collections only to iterate again — fuse into one pass
120
+
121
+ If you must do any of these inside a loop, write one comment line explaining why.
122
+
123
+ 5. **No invented complexity or numbers.** Never write "O(log n) on average" without an argument. Never write "10x faster" or "~3ms" without measuring. If you cannot derive the complexity, write `<complexity: TBD>`. If you have not measured, write `<measured: TBD>`. Move on.
124
+
125
+ ## The flow
126
+
127
+ ```dot
128
+ digraph lemmaly_flow {
129
+ rankdir=LR;
130
+ start [label="About to write\ncode over a collection?", shape=diamond];
131
+ state [label="State: time, space,\nn, structure, family", shape=box, style=filled, fillcolor="#fff3cd"];
132
+ derived [label="Can derive all 5?", shape=diamond];
133
+ ask [label="Stop. Ask, or\nread more code.", shape=box, style=filled, fillcolor="#f8d7da"];
134
+ repeated [label="Loop body has I/O,\n.find, sort, await?", shape=diamond];
135
+ justify [label="Write one-line\njustification", shape=box, style=filled, fillcolor="#fff3cd"];
136
+ code [label="Write code\nmatching claims", shape=box, style=filled, fillcolor="#d4edda"];
137
+ check [label="Verification\nchecklist passes?", shape=diamond];
138
+ escalate [label="Escalate:\ncomplexity-cuts /\ninvariant-guard /\nmathguard", shape=box, style=filled, fillcolor="#cfe2ff"];
139
+ done [label="Ship", shape=ellipse];
140
+
141
+ start -> state [label="yes"];
142
+ start -> done [label="trivial / n<10"];
143
+ state -> derived;
144
+ derived -> ask [label="no"];
145
+ derived -> repeated [label="yes"];
146
+ repeated -> justify [label="yes"];
147
+ repeated -> code [label="no"];
148
+ justify -> code;
149
+ code -> check;
150
+ check -> done [label="yes"];
151
+ check -> escalate [label="no"];
152
+ escalate -> state;
153
+ }
154
+ ```
155
+
156
+ ## The pre-write protocol
157
+
158
+ Before producing non-trivial code, your message must contain — in this order:
159
+
160
+ 1. **Problem shape** — one sentence. ("Given n events with a timestamp, find the longest contiguous window where total weight ≤ K.")
161
+ 2. **Input dimensions** — `n = ?`, realistic magnitude, whether hot path.
162
+ 3. **Target complexity** — `time = O(?)`, `space = O(?)`.
163
+ 4. **Data structures** — name them with a phrase each.
164
+ 5. **Algorithm family** — one phrase.
165
+ 6. **Edge cases you will handle** — empty, singleton, all-equal, n=1, n=max, overflow, duplicates. List the ones that apply.
166
+ 7. **The code.**
167
+
168
+ If any of 1–6 is missing, do not emit code yet.
169
+
170
+ ## When to load references
171
+
172
+ Load only the file you need. Do not bulk-load.
173
+
174
+ - `references/complexity.md` — choosing between O(1) / O(log n) / O(n) / O(n log n) / O(n^2) data structures and algorithms, with the practical n-thresholds where each starts to hurt.
175
+ - `references/n-plus-one.md` — ORM query loops (Prisma, Drizzle, SQLAlchemy, Django, ActiveRecord), `IN`/`join`/`select_related` fixes, batching patterns.
176
+ - `references/memory.md` — closures retaining DOM/state, unbounded caches, event-listener leaks, large-object retention, streaming over buffering.
177
+ - `references/async.md` — `Promise.all` vs sequential, concurrency limits, request coalescing, debouncing vs throttling, AbortController.
178
+ - `references/hot-paths.md` — recognizing hot paths (render functions, request handlers, inner loops, event listeners) and the kinds of work that do not belong in them.
179
+
180
+ ## Rule catalog
181
+
182
+ The same anti-patterns the CLI scanner catches have one MD per rule under `rules/<rule-id>.md`. Load the specific rule when the pattern appears in code under review. Each rule file contains the why, the Incorrect example, the Correct example, and the sibling skill to escalate to.
183
+
184
+ **Languages covered (59 rules across 11 languages):** JavaScript / TypeScript, Python, SQL, Java, C#, C++, Go, Rust, PHP, Ruby, Shell / Bash.
185
+
186
+ **CRITICAL severity (error in CI):**
187
+
188
+ - `js-await-in-for-loop` — N+1 over network
189
+ - `js-async-in-foreach` — dropped promises
190
+ - `py-mutable-default-arg` — shared default state
191
+ - `sql-update-no-where` — touches every row
192
+ - `java-arraylist-remove-in-for-i` — index shifts; ConcurrentModification
193
+ - `cs-async-void` — exceptions unobserved; crashes the process
194
+ - `go-loop-var-capture` — pre-1.22 race on the last value
195
+ - `php-query-in-loop` — N+1 against the database
196
+
197
+ **HIGH severity (warning in CI):**
198
+
199
+ - `js-deep-clone-via-json` — slow; loses Dates/Maps/undefined
200
+ - `js-useeffect-missing-deps` — runs every render
201
+ - `js-inline-object-jsx-prop` — new ref every render
202
+ - `js-anonymous-handler-jsx` — breaks `React.memo`
203
+ - `js-spread-in-reduce` — O(n²) accumulator copies
204
+ - `js-unique-via-indexof` — O(n²) dedupe
205
+ - `js-helper-call-in-iterator` — N round-trips
206
+ - `py-string-concat-in-loop` — O(n²) string build
207
+ - `py-django-loop-without-eager` — N+1 in Django ORM
208
+ - `py-bare-except` — hides timeouts, OOM, Ctrl-C
209
+ - `sql-select-star` — defeats index-only scans
210
+ - `sql-leading-wildcard-like` — cannot use B-tree index
211
+ - `sql-not-in-subquery` — null-unsafe
212
+ - `java-string-concat-in-loop` — O(n²); use StringBuilder
213
+ - `java-list-contains-in-loop` — O(n·m); use HashSet
214
+ - `java-bare-catch-exception` — swallows root cause
215
+ - `cs-string-concat-in-loop` — O(n²); use StringBuilder
216
+ - `cs-list-contains-in-loop` — O(n·m); use HashSet
217
+ - `cs-disposable-no-using` — leak on exception
218
+ - `go-string-concat-in-loop` — O(n²); use strings.Builder
219
+ - `go-defer-in-loop` — defers accumulate to function exit
220
+ - `go-err-not-checked` — silent failures
221
+ - `rs-unwrap-in-prod` — panics on None/Err
222
+ - `cpp-string-concat-in-loop` — O(n²) without reserve
223
+ - `cpp-raw-new` — manual delete; exception-unsafe
224
+ - `php-count-in-for-condition` — recomputed every iteration
225
+ - `php-in-array-in-loop` — O(n·m); use array_flip + isset
226
+ - `rb-include-in-iterator` — O(n·m); use Set
227
+ - `rb-n-plus-one-activerecord` — eager-load with `includes`
228
+ - `rb-bare-rescue` — catches StandardError; hides bugs
229
+ - `sh-set-e-no-pipefail` — pipe failures masked
230
+ - `sh-unquoted-var` — word splitting / glob expansion
231
+ - `sh-for-ls` — breaks on spaces / newlines in filenames
232
+
233
+ **MEDIUM severity (info in CI):**
234
+
235
+ - `js-nested-for-loops` — O(n·m); hash one side
236
+ - `js-includes-in-iterator` — O(n·m); use a Set
237
+ - `js-array-key-index` — breaks identity for reorderable lists
238
+ - `py-range-len` — un-Pythonic; use `enumerate`
239
+ - `py-in-list-literal` — O(n) membership; use a `set`
240
+ - `py-open-without-with` — leaked file descriptors
241
+ - `sql-select-no-limit` — unbounded result set
242
+ - `sql-or-in-where` — can prevent index use
243
+ - `go-slice-append-no-cap` — repeated reallocation
244
+ - `rs-clone-in-loop` — borrow instead
245
+ - `rs-vec-push-no-capacity` — preallocate
246
+ - `rs-string-push-no-capacity` — preallocate / join
247
+ - `cpp-vector-push-no-reserve` — call reserve(n)
248
+ - `cpp-range-loop-copy` — use `const auto&`
249
+ - `cpp-map-double-lookup` — `find` once
250
+ - `php-loose-equality` — use `===`
251
+ - `rb-string-concat-in-loop` — O(n²) with `+=`
252
+ - `sh-useless-cat-pipe` — pass file directly
253
+
254
+ Run `lemmaly rules` for the same list from the CLI. Run `node cli/lemmaly.js scan <path>` to flag instances.
255
+
256
+ ## When to escalate to sibling skills
257
+
258
+ lemmaly handles classical, day-to-day algorithmic discipline. Escalate when:
259
+
260
+ - **Math-level optimization** (probabilistic data structures, FFT, dimensionality reduction, approximation algorithms, computational geometry) — load **mathguard**.
261
+ - **Algorithm correctness** (loop invariants, termination, recursion base cases, edge cases that tests miss) — load **invariant-guard**.
262
+ - **Existing code with bad complexity that already shipped** — load **complexity-cuts** for the corrective transformation playbook.
263
+
264
+ ## Canonical example — protocol vs no-protocol
265
+
266
+ The same problem with and without the seven-step protocol.
267
+
268
+ **Problem.** Given `users: User[]` and `bannedIds: string[]`, return users whose `id` is not banned. Realistic n: 50k users, 5k banned.
269
+
270
+ <Bad>
271
+
272
+ ```ts
273
+ // No protocol — looks idiomatic, ships O(n·m)
274
+ const active = users.filter((u) => !bannedIds.includes(u.id));
275
+ ```
276
+
277
+ `bannedIds.includes` is O(m) per call. The filter runs it n times → 50k × 5k = 250M comparisons.
278
+
279
+ </Bad>
280
+
281
+ <Good>
282
+
283
+ ```ts
284
+ // Protocol applied:
285
+ // time = O(n + m), space = O(m), n = 50k users, m = 5k banned
286
+ // structure: Set<string> for O(1) membership inside the loop
287
+ // family: linear scan with hashed lookup
288
+ // edge cases: empty users → [], empty bannedIds → users, duplicates in bannedIds → fine (Set dedupes)
289
+ const banned = new Set(bannedIds);
290
+ const active = users.filter((u) => !banned.has(u.id));
291
+ ```
292
+
293
+ </Good>
294
+
295
+ The Bad version is the default an AI ships when asked "filter the active users." The Good version is what the protocol forces — without changing how the code reads.
296
+
297
+ ## Output discipline
298
+
299
+ Code you emit must:
300
+
301
+ - Be preceded by the seven-step pre-write protocol above.
302
+ - Use the data structures you named.
303
+ - Match the complexity you claimed (if it does not, you lied — go back).
304
+ - Handle the edge cases you listed.
305
+
306
+ ## Rationalizations to watch for
307
+
308
+ These are real verbatim thoughts captured from controlled tests where the model shipped O(n·m) code that the seven-step protocol would have prevented:
309
+
310
+ | Excuse | Reality |
311
+ | --- | --- |
312
+ | "`.filter` then `.reduce` is the idiomatic way, ship it." | Idiomatic ≠ correct asymptotic. Idiom-driven coding is how O(n²) ships. |
313
+ | "It's fine for now, we can optimize later." | Later is a different engineer with no context. State the complexity now. |
314
+ | "I'll just use `Array.find` here, it's just one lookup." | One lookup inside a loop over `n` items is `O(n)` lookups. Make the `Map` outside. |
315
+ | "The data is small in dev — I'll worry about scale when we ship." | Production data is never the size of dev data. The seven-step protocol takes 30 seconds. |
316
+ | "I already understand the problem, the protocol is overhead." | The cases the protocol "wastes time on" are the cases that break in prod. |
317
+
318
+ If any of these sound familiar mid-thought: stop, write the seven steps.
319
+
320
+ ## Red flags — STOP and restart the protocol
321
+
322
+ - About to write a `for` inside a `for` without first stating it is the intended O(n·m).
323
+ - About to call `.find` / `.includes` / `.indexOf` inside a loop body.
324
+ - About to `await` inside `for` / `map` / `forEach` over independent items.
325
+ - About to issue one query per item in a collection.
326
+ - About to recurse without stating the base case or memoization plan.
327
+ - About to write code without having stated complexity.
328
+ - About to claim "this is fast" / "this is efficient" / "this scales" without a derivation.
329
+ - About to copy a brute-force solution from memory because it "should work for now".
330
+
331
+ All of these mean: stop, restart the seven-step protocol, choose a better algorithm or explicitly accept the brute force with a written justification.
332
+
333
+ ## Verification checklist
334
+
335
+ Before claiming the implementation is done:
336
+
337
+ - [ ] Stated `time = O(?)` and `space = O(?)` appear in the message or PR description.
338
+ - [ ] Dominant input dimension is named with a realistic magnitude.
339
+ - [ ] Every collection-shaped value has a deliberate data-structure choice with a one-phrase reason.
340
+ - [ ] The algorithm family is named (not "a loop").
341
+ - [ ] No I/O, `.find` / `.includes` / `.indexOf`, regex compile, sort, or independent `await` sits inside a loop without a one-line justification.
342
+ - [ ] The shipped code matches the complexity that was claimed (re-derive if uncertain).
343
+ - [ ] Edge cases listed in the pre-write protocol each have a corresponding code path or test.
344
+ - [ ] Any "fast" / "efficient" / "scales" claims have either a derivation or a measurement — `<measured: TBD>` is acceptable; an unsupported claim is not.
345
+
346
+ Cannot check every box? You did not run the protocol. Restart from step 1.
347
+
348
+ ## Real-world impact
349
+
350
+ Measured on the `examples/before-after/` reference pair shipped with this repo (same component, default AI output vs protocol-applied output):
351
+
352
+ | Metric | Without protocol (`bad.jsx`) | With protocol (`good.jsx`) |
353
+ |---|---|---|
354
+ | CLI scan findings | **2 errors, 5 warnings, 3 info** (10 total) | **0 errors, 1 warning, 0 info** |
355
+ | Asymptotic complexity of hot path | `O(n·m)` lookup inside render | `O(n+m)` with hoisted index |
356
+ | Async pattern | `await` inside `for` over independent items | `Promise.all` with bulk fetch |
357
+ | Reproduce | `node cli/lemmaly.js scan examples/before-after/bad.jsx` | `…/good.jsx` |
358
+
359
+ The one remaining warning on the good file (`js-anonymous-handler-jsx`) is an inline handler the protocol explicitly accepts when the child is not `React.memo` — it is documented in the example, not an oversight.
360
+
361
+ These are the same anti-patterns the four skills catch before code is written.
362
+
363
+ ## The thesis, in one line
364
+
365
+ > **AI ships algorithmically lazy code by default. lemmaly makes it think first.**
@@ -0,0 +1,135 @@
1
+ # Async reference
2
+
3
+ ## Sequential vs parallel
4
+
5
+ ```js
6
+ // Bad — sequential, total = sum of latencies
7
+ const user = await fetchUser(id);
8
+ const posts = await fetchPosts(id);
9
+ const friends = await fetchFriends(id);
10
+
11
+ // Good — parallel, total = max of latencies
12
+ const [user, posts, friends] = await Promise.all([
13
+ fetchUser(id),
14
+ fetchPosts(id),
15
+ fetchFriends(id),
16
+ ]);
17
+ ```
18
+
19
+ Apply this every time the awaits don't depend on each other. The model often writes the sequential form by reflex.
20
+
21
+ ## When parallel is wrong
22
+
23
+ - Each call mutates shared state (race).
24
+ - Each call needs the previous result.
25
+ - Each call counts against a strict rate limit and you'd burst over it.
26
+ - Each call holds an exclusive lock or DB transaction.
27
+
28
+ In those cases, write the sequential form and leave a one-line comment.
29
+
30
+ ## Concurrency limit ("don't fan out to infinity")
31
+
32
+ `Promise.all(thousand_things.map(fetch))` will hit the server with 1000 simultaneous connections. Cap it:
33
+
34
+ ```js
35
+ // p-limit (npm)
36
+ import pLimit from 'p-limit';
37
+ const limit = pLimit(10); // ten in flight
38
+ const results = await Promise.all(items.map(i => limit(() => fetchOne(i))));
39
+ ```
40
+
41
+ Pick the concurrency to fit the downstream's capacity, not the network's.
42
+
43
+ ## `forEach` is async-hostile
44
+
45
+ ```js
46
+ // Bad — forEach ignores the returned promises; "load()" returns before any await resolves
47
+ items.forEach(async i => {
48
+ await save(i);
49
+ });
50
+ console.log('done'); // lies
51
+
52
+ // Good
53
+ for (const i of items) await save(i); // sequential
54
+ await Promise.all(items.map(i => save(i))); // parallel
55
+ ```
56
+
57
+ If you see `async` inside `.forEach(`, it's almost always wrong.
58
+
59
+ ## Cancellation: `AbortController`
60
+
61
+ User clicks away, tab hidden, debounced search supersedes the prior request — all should cancel:
62
+
63
+ ```js
64
+ const ctrl = new AbortController();
65
+ const res = await fetch(url, { signal: ctrl.signal });
66
+ // later
67
+ ctrl.abort();
68
+ ```
69
+
70
+ In React: cancel in the cleanup of `useEffect`. In a search box: cancel the prior controller before issuing the next request.
71
+
72
+ ## Debounce vs throttle vs request coalescing
73
+
74
+ | Pattern | What it does | When |
75
+ |---------|--------------|------|
76
+ | Debounce | Run after user pauses for X ms | Search-as-you-type, autosave |
77
+ | Throttle | Run at most once per X ms | Scroll/resize handlers, drag |
78
+ | Coalesce | Multiple identical concurrent requests → one in-flight request, all callers wait | Same-key fetches inside a tick |
79
+
80
+ Coalesce example:
81
+
82
+ ```js
83
+ const inflight = new Map();
84
+ function getCached(key) {
85
+ if (inflight.has(key)) return inflight.get(key);
86
+ const p = fetchOne(key).finally(() => inflight.delete(key));
87
+ inflight.set(key, p);
88
+ return p;
89
+ }
90
+ ```
91
+
92
+ ## Streaming over buffering
93
+
94
+ If you'll process items one-by-one anyway, don't materialize the whole list first:
95
+
96
+ ```js
97
+ // Bad
98
+ const all = await fetchAll();
99
+ for (const item of all) await process(item);
100
+
101
+ // Good — async iterator / cursor
102
+ for await (const item of fetchPaged()) await process(item);
103
+ ```
104
+
105
+ Saves memory and lets you start processing before the source has finished producing.
106
+
107
+ ## Retries: do them right or not at all
108
+
109
+ A naive `for (let i = 0; i < 3; i++) try { ... }` retry storm during an outage is how minor incidents become major ones.
110
+
111
+ - Only retry on transient errors (timeouts, 502/503, network reset). Not 4xx.
112
+ - Exponential backoff with jitter: `delay = base * 2^attempt + random()`.
113
+ - Cap total retry time, not just attempt count.
114
+ - Apply at the edge, not at every layer (or you get retry^layers).
115
+
116
+ ## Timeouts
117
+
118
+ Every external call has a timeout. Default Node `fetch` has none. Always:
119
+
120
+ ```js
121
+ const ctrl = new AbortController();
122
+ const t = setTimeout(() => ctrl.abort(), 5000);
123
+ try {
124
+ return await fetch(url, { signal: ctrl.signal });
125
+ } finally {
126
+ clearTimeout(t);
127
+ }
128
+ ```
129
+
130
+ ## Promise.all vs Promise.allSettled
131
+
132
+ - `Promise.all` rejects on the first failure — good when partial result is useless.
133
+ - `Promise.allSettled` always resolves with per-item status — good when partial is fine and you want to report failures.
134
+
135
+ Choose deliberately. The default reflex (`all`) is often wrong for "fetch user's widgets from N services where one being down is tolerable".
@@ -0,0 +1,66 @@
1
+ # Complexity reference
2
+
3
+ ## The only chart that matters for shipping
4
+
5
+ | n | O(1) | O(log n) | O(n) | O(n log n) | O(n^2) | O(2^n) |
6
+ |----------|-------|----------|--------|------------|------------|------------|
7
+ | 10 | inst. | inst. | inst. | inst. | inst. | ms |
8
+ | 1,000 | inst. | inst. | inst. | inst. | ms | impossible |
9
+ | 10,000 | inst. | inst. | ms | ms | seconds | — |
10
+ | 100,000 | inst. | inst. | tens ms| ~100 ms | minutes | — |
11
+ | 1,000,000| inst. | inst. | ~100 ms| ~seconds | hours | — |
12
+
13
+ Read as: "if my hot path runs at this `n`, what's my budget?"
14
+
15
+ Rule of thumb in JS/Python: a `for`-loop body of moderate work does ~10^7 ops/sec. Cross-check before shipping.
16
+
17
+ ## Picking a data structure
18
+
19
+ | Need | Pick | Why |
20
+ |-------------------------------|---------------------|------------------------------------------|
21
+ | "Have I seen this value?" | `Set` / `dict` / `frozenset` | O(1) `has` vs O(n) `includes`/`in list` |
22
+ | "Lookup by key" | `Map` / `dict` | O(1) — `Object` works but slower for non-string keys |
23
+ | "Ordered, frequent insertion at head" | `Deque` / linked list | Array `unshift` is O(n) |
24
+ | "Distinct sorted set" | sorted array + bisect, or `SortedSet` | O(log n) ops |
25
+ | "Stack/queue" | array `push`/`pop`, or deque | `shift` on array is O(n) |
26
+ | "Range query" | sorted array + binary search, or interval tree | Avoid linear scan |
27
+ | "Top-k of stream" | heap (priority queue), size k | O(n log k) vs O(n log n) for full sort |
28
+ | "Approximate membership at scale" | Bloom filter | When set itself doesn't fit memory |
29
+
30
+ ## Algorithm shortcuts
31
+
32
+ - **Sort once, query many** — pre-sort if you'll do many lookups/range scans.
33
+ - **Two pointers / sliding window** — most "find pair/subarray that satisfies X" problems collapse from O(n^2) to O(n).
34
+ - **Hash and intersect** — to find common elements between two arrays, put one in a `Set` first. O(n+m) vs O(n*m).
35
+ - **Memoize pure recursion** — DP via memo turns 2^n into n^2 or n.
36
+ - **Batch I/O** — `IN (...)` query, `Promise.all([...])`, bulk insert/update. Never one-at-a-time inside a loop.
37
+ - **Lazy / streaming** — when n is huge but each consumer only reads a slice (generators, async iterators, cursor-based pagination).
38
+
39
+ ## Amortized vs worst-case
40
+
41
+ Watch for cases the model usually misses:
42
+
43
+ - `array.push` — amortized O(1), worst-case O(n) on resize. Fine for batches.
44
+ - Hash table resize — amortized O(1), occasional O(n) pause. Matters in real-time.
45
+ - `string += s` in Python / JS — repeated copy → O(n^2). Use `''.join(list)` / `arr.join('')`.
46
+ - Recursive memoization — first call O(n), subsequent O(1). Cold-start cost.
47
+
48
+ State amortized AND worst-case when it matters to the caller (e.g. interactive UI, request handler).
49
+
50
+ ## Space matters too
51
+
52
+ Always state space, not just time. Cases that bite:
53
+ - Materializing a full list when you could stream.
54
+ - Deep-cloning a large object to "be safe" — pay O(n) memory + O(n) time on every call.
55
+ - Caching everything forever — unbounded cache is a memory leak.
56
+ - Closures retaining large parents — see `memory.md`.
57
+
58
+ ## "What's n?" — always ask
59
+
60
+ Never optimize without knowing `n` and growth rate. Ask the user, or read the schema/data shape:
61
+
62
+ - Per-request `n`? Per-tenant `n`? Per-user lifetime `n`?
63
+ - Is it bounded (`<= 100` always), or does it grow linearly with users / time / data ingest?
64
+ - Does it appear in a hot path (every request) or a cold path (admin script, once a day)?
65
+
66
+ A nested loop at n=20 is fine forever. The same code at n=50,000 is an outage.
@@ -0,0 +1,87 @@
1
+ # Hot paths reference
2
+
3
+ A "hot path" is code that runs many times per second, per request, or per render. Small constants matter here; nowhere else.
4
+
5
+ ## How to identify one
6
+
7
+ A path is hot if any of:
8
+ - Runs inside React `render` / function-component body / `useEffect` with frequent deps.
9
+ - Runs per item in a list of `n > 100`.
10
+ - Runs per request on a server (especially middleware, auth checks).
11
+ - Runs per frame (animation, scroll, drag, resize handlers).
12
+ - Runs per keystroke (search, validation).
13
+ - Runs in a polling/interval loop.
14
+
15
+ If you can't say which of these applies, you don't know if it's hot. Ask the user.
16
+
17
+ ## React: the common re-render causes
18
+
19
+ 1. **New referential identity every render.**
20
+ ```jsx
21
+ // Bad — new array every render, breaks memo
22
+ <Child items={[a, b, c]} options={{ sort: true }} />
23
+
24
+ // Bad — new function every render
25
+ <Child onClick={() => doThing(id)} />
26
+
27
+ // Good
28
+ const items = useMemo(() => [a, b, c], [a, b, c]);
29
+ const onClick = useCallback(() => doThing(id), [id]);
30
+ ```
31
+ But only if `Child` is `React.memo` and the cost of re-rendering it actually matters. Don't blanket-memo.
32
+
33
+ 2. **Context that changes too often.** Any consumer of a Context re-renders when its value reference changes. Split contexts; memoize the value object.
34
+
35
+ 3. **Anonymous components in render.**
36
+ ```jsx
37
+ // Bad — new component type every render, full subtree remounts
38
+ function Parent() {
39
+ function Item({ x }) { return <li>{x}</li>; }
40
+ return items.map(x => <Item x={x} />);
41
+ }
42
+ ```
43
+ Hoist `Item` outside.
44
+
45
+ 4. **Inline object/array props to a memoized child.** Same problem as (1). Hoist or `useMemo`.
46
+
47
+ 5. **Effects with object/array deps.** `useEffect(fn, [{ a, b }])` — that object is new every render → infinite loop or constant re-run. Memoize the dep, or list scalars.
48
+
49
+ 6. **`key={index}` on a reorderable list.** Forces unnecessary unmounts/remounts and breaks input state.
50
+
51
+ ## Big lists
52
+
53
+ - **Virtualize at ~200+ items rendered at once.** `react-virtuoso`, `@tanstack/react-virtual`, or native CSS `content-visibility: auto`.
54
+ - **Move heavy work off the main thread.** `Web Worker` for parsing/diffing/searching large data.
55
+ - **Defer non-critical work.** `startTransition` / `useDeferredValue` for filtering. `requestIdleCallback` for analytics.
56
+
57
+ ## Event-loop blocking
58
+
59
+ In JS, a single tick over ~50ms is felt as a jank. Over ~200ms, the user thinks the app is frozen.
60
+
61
+ Common offenders to never run synchronously on the main thread:
62
+ - JSON.parse / JSON.stringify on payloads > a few hundred KB.
63
+ - Sort / dedupe / aggregation over > ~50k items.
64
+ - Regex with catastrophic backtracking on untrusted input (ReDoS).
65
+ - Image / video / PDF processing.
66
+ - Sync crypto.
67
+
68
+ Fix: chunk with `setTimeout(_, 0)` / `MessageChannel`, move to a Worker, or stream.
69
+
70
+ ## Render budgets (rules of thumb)
71
+
72
+ | Surface | Budget |
73
+ |---------|--------|
74
+ | First paint (LCP) | < 2.5s |
75
+ | Interaction (INP) | < 200ms |
76
+ | Per-frame work | < 16ms (60fps) or < 8ms (120fps) |
77
+ | Server response (TTFB) | < 200ms warm |
78
+
79
+ State a budget when shipping new UI work. Then measure.
80
+
81
+ ## Server hot paths
82
+
83
+ - Auth/identity middleware runs on every request — must be O(1) or cached.
84
+ - N+1 queries in list endpoints — see `n-plus-one.md`.
85
+ - JSON serialization on huge payloads — paginate or stream.
86
+ - Synchronous bcrypt/argon2 in a request handler — fine if rare, costly under load. Tune cost factor.
87
+ - Cold start: minimize module-load work, lazy-import heavy deps.