@booklib/core 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. package/.cursor/rules/booklib-standards.mdc +40 -0
  2. package/.gemini/context.md +372 -0
  3. package/AGENTS.md +166 -0
  4. package/CHANGELOG.md +226 -0
  5. package/CLAUDE.md +81 -0
  6. package/CODE_OF_CONDUCT.md +31 -0
  7. package/CONTRIBUTING.md +304 -0
  8. package/LICENSE +21 -0
  9. package/PLAN.md +28 -0
  10. package/README.ja.md +198 -0
  11. package/README.ko.md +198 -0
  12. package/README.md +503 -0
  13. package/README.pt-BR.md +198 -0
  14. package/README.uk.md +241 -0
  15. package/README.zh-CN.md +198 -0
  16. package/SECURITY.md +9 -0
  17. package/agents/architecture-reviewer.md +136 -0
  18. package/agents/booklib-reviewer.md +90 -0
  19. package/agents/data-reviewer.md +107 -0
  20. package/agents/jvm-reviewer.md +146 -0
  21. package/agents/python-reviewer.md +128 -0
  22. package/agents/rust-reviewer.md +115 -0
  23. package/agents/ts-reviewer.md +110 -0
  24. package/agents/ui-reviewer.md +117 -0
  25. package/assets/logo.svg +36 -0
  26. package/bin/booklib-mcp.js +304 -0
  27. package/bin/booklib.js +1705 -0
  28. package/bin/skills.cjs +1292 -0
  29. package/booklib-router.mdc +36 -0
  30. package/booklib.config.json +19 -0
  31. package/commands/animation-at-work.md +10 -0
  32. package/commands/clean-code-reviewer.md +10 -0
  33. package/commands/data-intensive-patterns.md +10 -0
  34. package/commands/data-pipelines.md +10 -0
  35. package/commands/design-patterns.md +10 -0
  36. package/commands/domain-driven-design.md +10 -0
  37. package/commands/effective-java.md +10 -0
  38. package/commands/effective-kotlin.md +10 -0
  39. package/commands/effective-python.md +10 -0
  40. package/commands/effective-typescript.md +10 -0
  41. package/commands/kotlin-in-action.md +10 -0
  42. package/commands/lean-startup.md +10 -0
  43. package/commands/microservices-patterns.md +10 -0
  44. package/commands/programming-with-rust.md +10 -0
  45. package/commands/refactoring-ui.md +10 -0
  46. package/commands/rust-in-action.md +10 -0
  47. package/commands/skill-router.md +10 -0
  48. package/commands/spring-boot-in-action.md +10 -0
  49. package/commands/storytelling-with-data.md +10 -0
  50. package/commands/system-design-interview.md +10 -0
  51. package/commands/using-asyncio-python.md +10 -0
  52. package/commands/web-scraping-python.md +10 -0
  53. package/community/registry.json +1616 -0
  54. package/hooks/hooks.json +23 -0
  55. package/hooks/posttooluse-capture.mjs +67 -0
  56. package/hooks/suggest.js +153 -0
  57. package/lib/agent-behaviors.js +40 -0
  58. package/lib/agent-detector.js +96 -0
  59. package/lib/config-loader.js +39 -0
  60. package/lib/conflict-resolver.js +148 -0
  61. package/lib/context-builder.js +574 -0
  62. package/lib/discovery-engine.js +298 -0
  63. package/lib/doctor/hook-installer.js +83 -0
  64. package/lib/doctor/usage-tracker.js +87 -0
  65. package/lib/engine/ai-features.js +253 -0
  66. package/lib/engine/auditor.js +103 -0
  67. package/lib/engine/bm25-index.js +178 -0
  68. package/lib/engine/capture.js +120 -0
  69. package/lib/engine/corrections.js +198 -0
  70. package/lib/engine/doctor.js +195 -0
  71. package/lib/engine/graph-injector.js +137 -0
  72. package/lib/engine/graph.js +161 -0
  73. package/lib/engine/handoff.js +405 -0
  74. package/lib/engine/indexer.js +242 -0
  75. package/lib/engine/parser.js +53 -0
  76. package/lib/engine/query-expander.js +42 -0
  77. package/lib/engine/reranker.js +40 -0
  78. package/lib/engine/rrf.js +59 -0
  79. package/lib/engine/scanner.js +151 -0
  80. package/lib/engine/searcher.js +139 -0
  81. package/lib/engine/session-coordinator.js +306 -0
  82. package/lib/engine/session-manager.js +429 -0
  83. package/lib/engine/synthesizer.js +70 -0
  84. package/lib/installer.js +70 -0
  85. package/lib/instinct-block.js +33 -0
  86. package/lib/mcp-config-writer.js +88 -0
  87. package/lib/paths.js +57 -0
  88. package/lib/profiles/design.md +19 -0
  89. package/lib/profiles/general.md +16 -0
  90. package/lib/profiles/research-analysis.md +22 -0
  91. package/lib/profiles/software-development.md +23 -0
  92. package/lib/profiles/writing-content.md +19 -0
  93. package/lib/project-initializer.js +916 -0
  94. package/lib/registry/skills.js +102 -0
  95. package/lib/registry-searcher.js +99 -0
  96. package/lib/rules/rules-manager.js +169 -0
  97. package/lib/skill-fetcher.js +333 -0
  98. package/lib/well-known-builder.js +70 -0
  99. package/lib/wizard/index.js +404 -0
  100. package/lib/wizard/integration-detector.js +41 -0
  101. package/lib/wizard/project-detector.js +100 -0
  102. package/lib/wizard/prompt.js +156 -0
  103. package/lib/wizard/registry-embeddings.js +107 -0
  104. package/lib/wizard/skill-recommender.js +69 -0
  105. package/llms-full.txt +254 -0
  106. package/llms.txt +70 -0
  107. package/package.json +45 -0
  108. package/research-reports/2026-04-01-current-architecture.md +160 -0
  109. package/research-reports/IDEAS.md +93 -0
  110. package/rules/common/clean-code.md +42 -0
  111. package/rules/java/effective-java.md +42 -0
  112. package/rules/kotlin/effective-kotlin.md +37 -0
  113. package/rules/python/effective-python.md +38 -0
  114. package/rules/rust/rust.md +37 -0
  115. package/rules/typescript/effective-typescript.md +42 -0
  116. package/scripts/gen-llms-full.mjs +36 -0
  117. package/scripts/gen-og.mjs +142 -0
  118. package/scripts/validate-frontmatter.js +25 -0
  119. package/skills/animation-at-work/SKILL.md +270 -0
  120. package/skills/animation-at-work/assets/example_asset.txt +1 -0
  121. package/skills/animation-at-work/evals/evals.json +44 -0
  122. package/skills/animation-at-work/evals/results.json +13 -0
  123. package/skills/animation-at-work/examples/after.md +64 -0
  124. package/skills/animation-at-work/examples/before.md +35 -0
  125. package/skills/animation-at-work/references/api_reference.md +369 -0
  126. package/skills/animation-at-work/references/review-checklist.md +79 -0
  127. package/skills/animation-at-work/scripts/audit_animations.py +295 -0
  128. package/skills/animation-at-work/scripts/example.py +1 -0
  129. package/skills/clean-code-reviewer/SKILL.md +444 -0
  130. package/skills/clean-code-reviewer/audit.json +35 -0
  131. package/skills/clean-code-reviewer/evals/evals.json +185 -0
  132. package/skills/clean-code-reviewer/evals/results.json +13 -0
  133. package/skills/clean-code-reviewer/examples/after.md +48 -0
  134. package/skills/clean-code-reviewer/examples/before.md +33 -0
  135. package/skills/clean-code-reviewer/references/api_reference.md +158 -0
  136. package/skills/clean-code-reviewer/references/practices-catalog.md +282 -0
  137. package/skills/clean-code-reviewer/references/review-checklist.md +254 -0
  138. package/skills/clean-code-reviewer/scripts/pre-review.py +206 -0
  139. package/skills/data-intensive-patterns/SKILL.md +267 -0
  140. package/skills/data-intensive-patterns/assets/example_asset.txt +1 -0
  141. package/skills/data-intensive-patterns/evals/evals.json +54 -0
  142. package/skills/data-intensive-patterns/evals/results.json +13 -0
  143. package/skills/data-intensive-patterns/examples/after.md +61 -0
  144. package/skills/data-intensive-patterns/examples/before.md +38 -0
  145. package/skills/data-intensive-patterns/references/api_reference.md +34 -0
  146. package/skills/data-intensive-patterns/references/patterns-catalog.md +551 -0
  147. package/skills/data-intensive-patterns/references/review-checklist.md +193 -0
  148. package/skills/data-intensive-patterns/scripts/adr.py +213 -0
  149. package/skills/data-intensive-patterns/scripts/example.py +1 -0
  150. package/skills/data-pipelines/SKILL.md +259 -0
  151. package/skills/data-pipelines/assets/example_asset.txt +1 -0
  152. package/skills/data-pipelines/evals/evals.json +45 -0
  153. package/skills/data-pipelines/evals/results.json +13 -0
  154. package/skills/data-pipelines/examples/after.md +97 -0
  155. package/skills/data-pipelines/examples/before.md +37 -0
  156. package/skills/data-pipelines/references/api_reference.md +301 -0
  157. package/skills/data-pipelines/references/review-checklist.md +181 -0
  158. package/skills/data-pipelines/scripts/example.py +1 -0
  159. package/skills/data-pipelines/scripts/new_pipeline.py +444 -0
  160. package/skills/design-patterns/SKILL.md +271 -0
  161. package/skills/design-patterns/assets/example_asset.txt +1 -0
  162. package/skills/design-patterns/evals/evals.json +46 -0
  163. package/skills/design-patterns/evals/results.json +13 -0
  164. package/skills/design-patterns/examples/after.md +52 -0
  165. package/skills/design-patterns/examples/before.md +29 -0
  166. package/skills/design-patterns/references/api_reference.md +1 -0
  167. package/skills/design-patterns/references/patterns-catalog.md +726 -0
  168. package/skills/design-patterns/references/review-checklist.md +173 -0
  169. package/skills/design-patterns/scripts/example.py +1 -0
  170. package/skills/design-patterns/scripts/scaffold.py +807 -0
  171. package/skills/domain-driven-design/SKILL.md +142 -0
  172. package/skills/domain-driven-design/assets/example_asset.txt +1 -0
  173. package/skills/domain-driven-design/evals/evals.json +48 -0
  174. package/skills/domain-driven-design/evals/results.json +13 -0
  175. package/skills/domain-driven-design/examples/after.md +80 -0
  176. package/skills/domain-driven-design/examples/before.md +43 -0
  177. package/skills/domain-driven-design/references/api_reference.md +1 -0
  178. package/skills/domain-driven-design/references/patterns-catalog.md +545 -0
  179. package/skills/domain-driven-design/references/review-checklist.md +158 -0
  180. package/skills/domain-driven-design/scripts/example.py +1 -0
  181. package/skills/domain-driven-design/scripts/scaffold.py +421 -0
  182. package/skills/effective-java/SKILL.md +227 -0
  183. package/skills/effective-java/assets/example_asset.txt +1 -0
  184. package/skills/effective-java/evals/evals.json +46 -0
  185. package/skills/effective-java/evals/results.json +13 -0
  186. package/skills/effective-java/examples/after.md +83 -0
  187. package/skills/effective-java/examples/before.md +37 -0
  188. package/skills/effective-java/references/api_reference.md +1 -0
  189. package/skills/effective-java/references/items-catalog.md +955 -0
  190. package/skills/effective-java/references/review-checklist.md +216 -0
  191. package/skills/effective-java/scripts/checkstyle_setup.py +211 -0
  192. package/skills/effective-java/scripts/example.py +1 -0
  193. package/skills/effective-kotlin/SKILL.md +271 -0
  194. package/skills/effective-kotlin/assets/example_asset.txt +1 -0
  195. package/skills/effective-kotlin/audit.json +29 -0
  196. package/skills/effective-kotlin/evals/evals.json +45 -0
  197. package/skills/effective-kotlin/evals/results.json +13 -0
  198. package/skills/effective-kotlin/examples/after.md +36 -0
  199. package/skills/effective-kotlin/examples/before.md +38 -0
  200. package/skills/effective-kotlin/references/api_reference.md +1 -0
  201. package/skills/effective-kotlin/references/practices-catalog.md +1228 -0
  202. package/skills/effective-kotlin/references/review-checklist.md +126 -0
  203. package/skills/effective-kotlin/scripts/example.py +1 -0
  204. package/skills/effective-python/SKILL.md +441 -0
  205. package/skills/effective-python/evals/evals.json +44 -0
  206. package/skills/effective-python/evals/results.json +13 -0
  207. package/skills/effective-python/examples/after.md +56 -0
  208. package/skills/effective-python/examples/before.md +40 -0
  209. package/skills/effective-python/ref-01-pythonic-thinking.md +202 -0
  210. package/skills/effective-python/ref-02-lists-and-dicts.md +146 -0
  211. package/skills/effective-python/ref-03-functions.md +186 -0
  212. package/skills/effective-python/ref-04-comprehensions-generators.md +211 -0
  213. package/skills/effective-python/ref-05-classes-interfaces.md +188 -0
  214. package/skills/effective-python/ref-06-metaclasses-attributes.md +209 -0
  215. package/skills/effective-python/ref-07-concurrency.md +213 -0
  216. package/skills/effective-python/ref-08-robustness-performance.md +248 -0
  217. package/skills/effective-python/ref-09-testing-debugging.md +253 -0
  218. package/skills/effective-python/ref-10-collaboration.md +175 -0
  219. package/skills/effective-python/references/api_reference.md +218 -0
  220. package/skills/effective-python/references/practices-catalog.md +483 -0
  221. package/skills/effective-python/references/review-checklist.md +190 -0
  222. package/skills/effective-python/scripts/lint.py +173 -0
  223. package/skills/effective-typescript/SKILL.md +262 -0
  224. package/skills/effective-typescript/audit.json +29 -0
  225. package/skills/effective-typescript/evals/evals.json +37 -0
  226. package/skills/effective-typescript/evals/results.json +13 -0
  227. package/skills/effective-typescript/examples/after.md +70 -0
  228. package/skills/effective-typescript/examples/before.md +47 -0
  229. package/skills/effective-typescript/references/api_reference.md +118 -0
  230. package/skills/effective-typescript/references/practices-catalog.md +371 -0
  231. package/skills/effective-typescript/scripts/review.py +169 -0
  232. package/skills/kotlin-in-action/SKILL.md +261 -0
  233. package/skills/kotlin-in-action/assets/example_asset.txt +1 -0
  234. package/skills/kotlin-in-action/evals/evals.json +43 -0
  235. package/skills/kotlin-in-action/evals/results.json +13 -0
  236. package/skills/kotlin-in-action/examples/after.md +53 -0
  237. package/skills/kotlin-in-action/examples/before.md +39 -0
  238. package/skills/kotlin-in-action/references/api_reference.md +1 -0
  239. package/skills/kotlin-in-action/references/practices-catalog.md +436 -0
  240. package/skills/kotlin-in-action/references/review-checklist.md +204 -0
  241. package/skills/kotlin-in-action/scripts/example.py +1 -0
  242. package/skills/kotlin-in-action/scripts/setup_detekt.py +224 -0
  243. package/skills/lean-startup/SKILL.md +160 -0
  244. package/skills/lean-startup/assets/example_asset.txt +1 -0
  245. package/skills/lean-startup/evals/evals.json +43 -0
  246. package/skills/lean-startup/evals/results.json +13 -0
  247. package/skills/lean-startup/examples/after.md +80 -0
  248. package/skills/lean-startup/examples/before.md +34 -0
  249. package/skills/lean-startup/references/api_reference.md +319 -0
  250. package/skills/lean-startup/references/review-checklist.md +137 -0
  251. package/skills/lean-startup/scripts/example.py +1 -0
  252. package/skills/lean-startup/scripts/new_experiment.py +286 -0
  253. package/skills/microservices-patterns/SKILL.md +384 -0
  254. package/skills/microservices-patterns/evals/evals.json +45 -0
  255. package/skills/microservices-patterns/evals/results.json +13 -0
  256. package/skills/microservices-patterns/examples/after.md +69 -0
  257. package/skills/microservices-patterns/examples/before.md +40 -0
  258. package/skills/microservices-patterns/references/patterns-catalog.md +391 -0
  259. package/skills/microservices-patterns/references/review-checklist.md +169 -0
  260. package/skills/microservices-patterns/scripts/new_service.py +583 -0
  261. package/skills/programming-with-rust/SKILL.md +209 -0
  262. package/skills/programming-with-rust/evals/evals.json +37 -0
  263. package/skills/programming-with-rust/evals/results.json +13 -0
  264. package/skills/programming-with-rust/examples/after.md +107 -0
  265. package/skills/programming-with-rust/examples/before.md +59 -0
  266. package/skills/programming-with-rust/references/api_reference.md +152 -0
  267. package/skills/programming-with-rust/references/practices-catalog.md +335 -0
  268. package/skills/programming-with-rust/scripts/review.py +142 -0
  269. package/skills/refactoring-ui/SKILL.md +362 -0
  270. package/skills/refactoring-ui/assets/example_asset.txt +1 -0
  271. package/skills/refactoring-ui/evals/evals.json +45 -0
  272. package/skills/refactoring-ui/evals/results.json +13 -0
  273. package/skills/refactoring-ui/examples/after.md +85 -0
  274. package/skills/refactoring-ui/examples/before.md +58 -0
  275. package/skills/refactoring-ui/references/api_reference.md +355 -0
  276. package/skills/refactoring-ui/references/review-checklist.md +114 -0
  277. package/skills/refactoring-ui/scripts/audit_css.py +250 -0
  278. package/skills/refactoring-ui/scripts/example.py +1 -0
  279. package/skills/rust-in-action/SKILL.md +350 -0
  280. package/skills/rust-in-action/evals/evals.json +38 -0
  281. package/skills/rust-in-action/evals/results.json +13 -0
  282. package/skills/rust-in-action/examples/after.md +156 -0
  283. package/skills/rust-in-action/examples/before.md +56 -0
  284. package/skills/rust-in-action/references/practices-catalog.md +346 -0
  285. package/skills/rust-in-action/scripts/review.py +147 -0
  286. package/skills/skill-router/SKILL.md +186 -0
  287. package/skills/skill-router/evals/evals.json +38 -0
  288. package/skills/skill-router/evals/results.json +13 -0
  289. package/skills/skill-router/examples/after.md +63 -0
  290. package/skills/skill-router/examples/before.md +39 -0
  291. package/skills/skill-router/references/api_reference.md +24 -0
  292. package/skills/skill-router/references/routing-heuristics.md +89 -0
  293. package/skills/skill-router/references/skill-catalog.md +174 -0
  294. package/skills/skill-router/scripts/route.py +266 -0
  295. package/skills/spring-boot-in-action/SKILL.md +340 -0
  296. package/skills/spring-boot-in-action/evals/evals.json +39 -0
  297. package/skills/spring-boot-in-action/evals/results.json +13 -0
  298. package/skills/spring-boot-in-action/examples/after.md +185 -0
  299. package/skills/spring-boot-in-action/examples/before.md +84 -0
  300. package/skills/spring-boot-in-action/references/practices-catalog.md +403 -0
  301. package/skills/spring-boot-in-action/scripts/review.py +184 -0
  302. package/skills/storytelling-with-data/SKILL.md +241 -0
  303. package/skills/storytelling-with-data/assets/example_asset.txt +1 -0
  304. package/skills/storytelling-with-data/evals/evals.json +47 -0
  305. package/skills/storytelling-with-data/evals/results.json +13 -0
  306. package/skills/storytelling-with-data/examples/after.md +50 -0
  307. package/skills/storytelling-with-data/examples/before.md +33 -0
  308. package/skills/storytelling-with-data/references/api_reference.md +379 -0
  309. package/skills/storytelling-with-data/references/review-checklist.md +111 -0
  310. package/skills/storytelling-with-data/scripts/chart_review.py +301 -0
  311. package/skills/storytelling-with-data/scripts/example.py +1 -0
  312. package/skills/system-design-interview/SKILL.md +233 -0
  313. package/skills/system-design-interview/assets/example_asset.txt +1 -0
  314. package/skills/system-design-interview/evals/evals.json +46 -0
  315. package/skills/system-design-interview/evals/results.json +13 -0
  316. package/skills/system-design-interview/examples/after.md +94 -0
  317. package/skills/system-design-interview/examples/before.md +27 -0
  318. package/skills/system-design-interview/references/api_reference.md +582 -0
  319. package/skills/system-design-interview/references/review-checklist.md +201 -0
  320. package/skills/system-design-interview/scripts/example.py +1 -0
  321. package/skills/system-design-interview/scripts/new_design.py +421 -0
  322. package/skills/using-asyncio-python/SKILL.md +290 -0
  323. package/skills/using-asyncio-python/assets/example_asset.txt +1 -0
  324. package/skills/using-asyncio-python/evals/evals.json +43 -0
  325. package/skills/using-asyncio-python/evals/results.json +13 -0
  326. package/skills/using-asyncio-python/examples/after.md +68 -0
  327. package/skills/using-asyncio-python/examples/before.md +39 -0
  328. package/skills/using-asyncio-python/references/api_reference.md +267 -0
  329. package/skills/using-asyncio-python/references/review-checklist.md +149 -0
  330. package/skills/using-asyncio-python/scripts/check_blocking.py +270 -0
  331. package/skills/using-asyncio-python/scripts/example.py +1 -0
  332. package/skills/web-scraping-python/SKILL.md +280 -0
  333. package/skills/web-scraping-python/assets/example_asset.txt +1 -0
  334. package/skills/web-scraping-python/evals/evals.json +46 -0
  335. package/skills/web-scraping-python/evals/results.json +13 -0
  336. package/skills/web-scraping-python/examples/after.md +109 -0
  337. package/skills/web-scraping-python/examples/before.md +40 -0
  338. package/skills/web-scraping-python/references/api_reference.md +393 -0
  339. package/skills/web-scraping-python/references/review-checklist.md +163 -0
  340. package/skills/web-scraping-python/scripts/example.py +1 -0
  341. package/skills/web-scraping-python/scripts/new_scraper.py +231 -0
  342. package/skills/writing-plans/audit.json +34 -0
  343. package/tests/agent-detector.test.js +83 -0
  344. package/tests/corrections.test.js +245 -0
  345. package/tests/doctor/hook-installer.test.js +72 -0
  346. package/tests/doctor/usage-tracker.test.js +140 -0
  347. package/tests/engine/benchmark-eval.test.js +31 -0
  348. package/tests/engine/bm25-index.test.js +85 -0
  349. package/tests/engine/capture-command.test.js +35 -0
  350. package/tests/engine/capture.test.js +17 -0
  351. package/tests/engine/graph-augmented-search.test.js +107 -0
  352. package/tests/engine/graph-injector.test.js +44 -0
  353. package/tests/engine/graph.test.js +216 -0
  354. package/tests/engine/hybrid-searcher.test.js +74 -0
  355. package/tests/engine/indexer-bm25.test.js +37 -0
  356. package/tests/engine/mcp-tools.test.js +73 -0
  357. package/tests/engine/project-initializer-mcp.test.js +99 -0
  358. package/tests/engine/query-expander.test.js +36 -0
  359. package/tests/engine/reranker.test.js +51 -0
  360. package/tests/engine/rrf.test.js +49 -0
  361. package/tests/engine/srag-prefix.test.js +47 -0
  362. package/tests/instinct-block.test.js +23 -0
  363. package/tests/mcp-config-writer.test.js +60 -0
  364. package/tests/project-initializer-new-agents.test.js +48 -0
  365. package/tests/rules/rules-manager.test.js +230 -0
  366. package/tests/well-known-builder.test.js +40 -0
  367. package/tests/wizard/integration-detector.test.js +31 -0
  368. package/tests/wizard/project-detector.test.js +51 -0
  369. package/tests/wizard/prompt-session.test.js +61 -0
  370. package/tests/wizard/prompt.test.js +16 -0
  371. package/tests/wizard/registry-embeddings.test.js +35 -0
  372. package/tests/wizard/skill-recommender.test.js +34 -0
  373. package/tests/wizard/slot-count.test.js +25 -0
  374. package/vercel.json +21 -0
@@ -0,0 +1,201 @@
1
+ # System Design Interview — Design Review Checklist
2
+
3
+ Systematic checklist for reviewing system designs against the 16 chapters
4
+ from *System Design Interview* by Alex Xu.
5
+
6
+ ---
7
+
8
+ ## 1. Scaling Fundamentals (Chapter 1)
9
+
10
+ ### Infrastructure
11
+ - [ ] **Ch 1 — Load balancing** — Is traffic distributed across multiple servers with failover?
12
+ - [ ] **Ch 1 — Database replication** — Are read replicas used for read-heavy workloads?
13
+ - [ ] **Ch 1 — Caching** — Is a cache layer (Redis/Memcached) used for frequently accessed data?
14
+ - [ ] **Ch 1 — CDN** — Are static assets served from a CDN?
15
+ - [ ] **Ch 1 — Stateless web tier** — Is session data stored in shared storage, not on web servers?
16
+ - [ ] **Ch 1 — Message queue** — Are time-consuming tasks decoupled via async message queues?
17
+ - [ ] **Ch 1 — Database sharding** — Is data sharded for write-heavy or large-scale workloads?
18
+ - [ ] **Ch 1 — Data centers** — Is multi-datacenter deployment considered for geo-distribution?
19
+
20
+ ### Data Layer
21
+ - [ ] **Ch 1 — Database choice** — Is the right database type selected (SQL vs. NoSQL) based on access patterns?
22
+ - [ ] **Ch 1 — Shard key** — Is the shard key chosen for even data distribution?
23
+ - [ ] **Ch 1 — Hotspot mitigation** — Are celebrity/hotspot problems addressed?
24
+
25
+ ---
26
+
27
+ ## 2. Capacity Estimation (Chapter 2)
28
+
29
+ ### Back-of-Envelope
30
+ - [ ] **Ch 2 — QPS estimated** — Are queries per second calculated (average and peak)?
31
+ - [ ] **Ch 2 — Storage estimated** — Is storage growth estimated over time (1 year, 5 years)?
32
+ - [ ] **Ch 2 — Bandwidth estimated** — Is network bandwidth estimated for read and write?
33
+ - [ ] **Ch 2 — Memory estimated** — Is cache memory estimated (e.g., 80/20 rule)?
34
+ - [ ] **Ch 2 — Availability target** — Is the availability SLA defined (99.9%, 99.99%)?
35
+ - [ ] **Ch 2 — Latency awareness** — Are latency numbers considered (memory vs. disk vs. network)?
36
+
37
+ ---
38
+
39
+ ## 3. Design Structure (Chapter 3)
40
+
41
+ ### Framework Adherence
42
+ - [ ] **Ch 3 — Requirements defined** — Are functional and non-functional requirements explicit?
43
+ - [ ] **Ch 3 — High-level design** — Is there a clear component diagram with data flow?
44
+ - [ ] **Ch 3 — API design** — Are API endpoints defined?
45
+ - [ ] **Ch 3 — Deep dive** — Are 2–3 critical components designed in detail?
46
+ - [ ] **Ch 3 — Trade-offs stated** — Are design trade-offs explicitly discussed?
47
+ - [ ] **Ch 3 — Error handling** — Are failure modes and error handling addressed?
48
+ - [ ] **Ch 3 — Monitoring** — Is logging, metrics, and alerting included?
49
+
50
+ ---
51
+
52
+ ## 4. Rate Limiting (Chapter 4)
53
+
54
+ ### Rate Limiter Design
55
+ - [ ] **Ch 4 — Algorithm selected** — Is an appropriate rate limiting algorithm chosen for the use case?
56
+ - [ ] **Ch 4 — Distributed concerns** — Are race conditions and multi-server sync addressed?
57
+ - [ ] **Ch 4 — Rate limit response** — Are proper HTTP 429 responses and headers used?
58
+ - [ ] **Ch 4 — Rule configuration** — Are rate limiting rules configurable and cached?
59
+
60
+ ---
61
+
62
+ ## 5. Data Distribution (Chapter 5)
63
+
64
+ ### Consistent Hashing
65
+ - [ ] **Ch 5 — Hash strategy** — Is consistent hashing used for data/request distribution?
66
+ - [ ] **Ch 5 — Virtual nodes** — Are virtual nodes used for even distribution?
67
+ - [ ] **Ch 5 — Rebalancing** — Is key redistribution minimized when servers change?
68
+
69
+ ---
70
+
71
+ ## 6. Distributed Storage (Chapter 6)
72
+
73
+ ### Key-Value Store Design
74
+ - [ ] **Ch 6 — CAP choice** — Is the CP vs. AP trade-off explicitly decided?
75
+ - [ ] **Ch 6 — Replication** — Is data replicated to N nodes with appropriate quorum (N/W/R)?
76
+ - [ ] **Ch 6 — Conflict resolution** — Are concurrent write conflicts handled (vector clocks, last-write-wins)?
77
+ - [ ] **Ch 6 — Failure detection** — Is gossip protocol or equivalent used for failure detection?
78
+ - [ ] **Ch 6 — Failure recovery** — Are sloppy quorum and hinted handoff used for temporary failures?
79
+ - [ ] **Ch 6 — Anti-entropy** — Are Merkle trees used for replica synchronization?
80
+
81
+ ---
82
+
83
+ ## 7. Unique IDs (Chapter 7)
84
+
85
+ ### ID Generation
86
+ - [ ] **Ch 7 — ID approach** — Is the right ID generation approach used for the requirements?
87
+ - [ ] **Ch 7 — Sortability** — Are IDs sortable by time if needed (snowflake)?
88
+ - [ ] **Ch 7 — Distribution** — Can IDs be generated without central coordination?
89
+ - [ ] **Ch 7 — Size** — Is the ID size appropriate (64-bit vs. 128-bit)?
90
+
91
+ ---
92
+
93
+ ## 8. URL Shortening (Chapter 8)
94
+
95
+ ### URL Shortener Design
96
+ - [ ] **Ch 8 — Redirect type** — Is the correct redirect (301 vs. 302) chosen based on analytics needs?
97
+ - [ ] **Ch 8 — Hash strategy** — Is the hash/encoding approach appropriate (base-62, hash+collision)?
98
+ - [ ] **Ch 8 — Collision handling** — Are hash collisions detected and resolved?
99
+
100
+ ---
101
+
102
+ ## 9. Web Crawling (Chapter 9)
103
+
104
+ ### Crawler Design
105
+ - [ ] **Ch 9 — URL frontier** — Does the frontier handle politeness and priority?
106
+ - [ ] **Ch 9 — Content dedup** — Is content fingerprinting used to avoid redundant crawling?
107
+ - [ ] **Ch 9 — URL dedup** — Is a Bloom filter or similar used to track visited URLs?
108
+ - [ ] **Ch 9 — Robots.txt** — Is robots.txt respected and cached?
109
+ - [ ] **Ch 9 — Spider traps** — Is max URL depth enforced to avoid infinite crawling?
110
+
111
+ ---
112
+
113
+ ## 10. Notifications (Chapter 10)
114
+
115
+ ### Notification System
116
+ - [ ] **Ch 10 — Multi-channel** — Are all required channels supported (push, SMS, email)?
117
+ - [ ] **Ch 10 — Reliability** — Is a notification log maintained for retry on failure?
118
+ - [ ] **Ch 10 — Deduplication** — Are duplicate notifications prevented via event_id checking?
119
+ - [ ] **Ch 10 — Rate limiting** — Are per-user notification limits enforced?
120
+ - [ ] **Ch 10 — Analytics** — Is notification engagement tracked (open rate, click rate)?
121
+ - [ ] **Ch 10 — User preferences** — Can users opt in/out per channel?
122
+
123
+ ---
124
+
125
+ ## 11. News Feed (Chapter 11)
126
+
127
+ ### News Feed System
128
+ - [ ] **Ch 11 — Fanout model** — Is the right fanout model chosen (push, pull, or hybrid)?
129
+ - [ ] **Ch 11 — Celebrity handling** — Is the celebrity/hotkey problem addressed (hybrid approach)?
130
+ - [ ] **Ch 11 — Cache layers** — Are appropriate cache tiers used (feed, content, social graph, actions, counters)?
131
+
132
+ ---
133
+
134
+ ## 12. Chat System (Chapter 12)
135
+
136
+ ### Chat Design
137
+ - [ ] **Ch 12 — Protocol** — Is WebSocket used for real-time messaging?
138
+ - [ ] **Ch 12 — Stateful servers** — Are chat servers stateful with proper service discovery?
139
+ - [ ] **Ch 12 — Storage** — Is a key-value store used for message history (write-heavy)?
140
+ - [ ] **Ch 12 — Message sync** — Is per-device cursor-based sync implemented?
141
+ - [ ] **Ch 12 — Presence** — Is online presence tracked with heartbeat mechanism?
142
+ - [ ] **Ch 12 — Group scaling** — Are small groups (push) and large groups (pull) handled differently?
143
+
144
+ ---
145
+
146
+ ## 13. Autocomplete (Chapter 13)
147
+
148
+ ### Autocomplete System
149
+ - [ ] **Ch 13 — Trie structure** — Is a trie used for prefix matching?
150
+ - [ ] **Ch 13 — Top-k caching** — Are top-k results cached at each trie node?
151
+ - [ ] **Ch 13 — Data pipeline** — Is there a data gathering → aggregation → trie build pipeline?
152
+ - [ ] **Ch 13 — Browser caching** — Are autocomplete results cached client-side?
153
+ - [ ] **Ch 13 — Content filtering** — Is a filter layer used to remove inappropriate suggestions?
154
+ - [ ] **Ch 13 — Sharding** — Is the trie sharded for scale (by character or frequency)?
155
+
156
+ ---
157
+
158
+ ## 14. Video Platform (Chapter 14)
159
+
160
+ ### Video System
161
+ - [ ] **Ch 14 — Upload flow** — Is parallel chunked upload with pre-signed URLs used?
162
+ - [ ] **Ch 14 — Transcoding** — Is a DAG-based transcoding pipeline designed?
163
+ - [ ] **Ch 14 — Adaptive streaming** — Is adaptive bitrate streaming used (HLS/DASH)?
164
+ - [ ] **Ch 14 — CDN strategy** — Are popular videos served from CDN, long-tail from origin?
165
+ - [ ] **Ch 14 — Error handling** — Are recoverable vs. non-recoverable errors distinguished?
166
+ - [ ] **Ch 14 — Content safety** — Is DRM, encryption, or watermarking considered?
167
+
168
+ ---
169
+
170
+ ## 15. Cloud Storage (Chapter 15)
171
+
172
+ ### File Storage System
173
+ - [ ] **Ch 15 — Block servers** — Are files split into blocks for delta sync?
174
+ - [ ] **Ch 15 — Deduplication** — Are duplicate blocks detected by hash and skipped?
175
+ - [ ] **Ch 15 — Resumable uploads** — Are large file uploads resumable?
176
+ - [ ] **Ch 15 — Notifications** — Is long polling used for real-time file change notifications?
177
+ - [ ] **Ch 15 — Conflict resolution** — Is first-version-wins with conflict copies implemented?
178
+ - [ ] **Ch 15 — Versioning** — Is file version history maintained?
179
+ - [ ] **Ch 15 — Offline support** — Is an offline backup queue used for sync when clients reconnect?
180
+
181
+ ---
182
+
183
+ ## Quick Review Workflow
184
+
185
+ 1. **Scale pass** — Are scaling fundamentals applied (LB, cache, CDN, replication, sharding)?
186
+ 2. **Estimation pass** — Are capacity numbers calculated and reasonable?
187
+ 3. **Structure pass** — Does the design follow the 4-step framework?
188
+ 4. **Component pass** — Are relevant design patterns used for each component?
189
+ 5. **Failure pass** — Are failure modes identified and handled?
190
+ 6. **Trade-off pass** — Are design decisions justified with explicit trade-offs?
191
+ 7. **Operational pass** — Is monitoring, logging, and alerting included?
192
+ 8. **Prioritize findings** — Rank by severity: missing scaling > wrong data store > missing estimation > process gaps
193
+
194
+ ## Severity Levels
195
+
196
+ | Severity | Description | Example |
197
+ |----------|-------------|---------|
198
+ | **Critical** | Missing fundamental scaling or wrong architecture | No load balancing, single DB at scale, no caching for read-heavy system, stateful web servers |
199
+ | **High** | Missing core design patterns | No capacity estimation, wrong CAP choice, no failure handling, no rate limiting |
200
+ | **Medium** | Component design gaps | No CDN, no message queue for async tasks, no content dedup, suboptimal fanout model |
201
+ | **Low** | Optimization improvements | No virtual nodes in consistent hashing, no browser caching for autocomplete, no delta sync |
@@ -0,0 +1,421 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ System Design Interview Doc Generator — Alex Xu 4-step framework.
4
+
5
+ Usage (one-shot): python new_design.py "URL Shortener"
6
+ Usage (interactive): python new_design.py
7
+ """
8
+
9
+ import argparse
10
+ import math
11
+ import sys
12
+ from datetime import date
13
+ from pathlib import Path
14
+
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Prompting helpers
18
+ # ---------------------------------------------------------------------------
19
+
20
+ def prompt(label: str, default: str = "") -> str:
21
+ suffix = f" [{default}]" if default else ""
22
+ while True:
23
+ val = input(f"{label}{suffix}: ").strip()
24
+ if val:
25
+ return val
26
+ if default:
27
+ return default
28
+ print(" (required)")
29
+
30
+
31
+ def prompt_int(label: str, default: int) -> int:
32
+ while True:
33
+ raw = input(f"{label} [{default:,}]: ").strip()
34
+ if not raw:
35
+ return default
36
+ try:
37
+ return int(raw.replace(",", "").replace("_", ""))
38
+ except ValueError:
39
+ print(" Please enter an integer.")
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Back-of-envelope calculations
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def human_size(bytes_: float) -> str:
47
+ for unit in ("B", "KB", "MB", "GB", "TB", "PB"):
48
+ if bytes_ < 1024:
49
+ return f"{bytes_:.1f} {unit}"
50
+ bytes_ /= 1024
51
+ return f"{bytes_:.1f} PB"
52
+
53
+
54
+ def calc_estimations(dau: int, read_write_ratio: int, avg_object_size_bytes: int, years: int) -> dict:
55
+ """Return a dict of derived estimates."""
56
+ total_requests_per_day = dau * read_write_ratio
57
+ write_qps = dau / 86400 # 1 write per user per day assumption
58
+ read_qps = write_qps * read_write_ratio
59
+ peak_qps = read_qps * 2 # common rule of thumb
60
+
61
+ writes_per_day = dau # 1 write per active user
62
+ storage_per_day = writes_per_day * avg_object_size_bytes
63
+ total_storage = storage_per_day * 365 * years
64
+
65
+ bandwidth_in = write_qps * avg_object_size_bytes # bytes/sec
66
+ bandwidth_out = read_qps * avg_object_size_bytes
67
+
68
+ return {
69
+ "dau": dau,
70
+ "write_qps": write_qps,
71
+ "read_qps": read_qps,
72
+ "peak_qps": peak_qps,
73
+ "read_write_ratio": read_write_ratio,
74
+ "storage_per_day": storage_per_day,
75
+ "total_storage": total_storage,
76
+ "bandwidth_in": bandwidth_in,
77
+ "bandwidth_out": bandwidth_out,
78
+ "years": years,
79
+ "avg_object_size_bytes": avg_object_size_bytes,
80
+ }
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Document sections
85
+ # ---------------------------------------------------------------------------
86
+
87
+ def section_requirements(system: str, features: list[str]) -> str:
88
+ func = "\n".join(f"- {f}" for f in features)
89
+ return f"""\
90
+ ## Step 1: Requirements Clarification
91
+
92
+ ### Functional Requirements
93
+ {func}
94
+
95
+ ### Non-Functional Requirements
96
+ - High availability: 99.99% uptime (< 52 min downtime/year)
97
+ - Low latency: p99 read latency < 100 ms
98
+ - Durability: no data loss; replicated across at least 3 availability zones
99
+ - Eventual consistency is acceptable for non-critical reads
100
+ - The system must be horizontally scalable
101
+
102
+ ### Out of Scope (for this interview)
103
+ - Admin dashboard / abuse reporting
104
+ - A/B testing infrastructure
105
+ - Multi-region write consistency
106
+ - Billing / rate-limiting per customer tier (mention but don't design)
107
+
108
+ ### Clarifying Questions to Ask the Interviewer
109
+ 1. What is the expected scale (DAU, peak QPS)?
110
+ 2. Read-heavy or write-heavy? What is the read:write ratio?
111
+ 3. Any latency SLA for writes?
112
+ 4. Do we need strong consistency or is eventual consistency acceptable?
113
+ 5. What is the retention period for data?
114
+ """
115
+
116
+
117
+ def section_estimation(e: dict) -> str:
118
+ return f"""\
119
+ ## Step 2: Back-of-Envelope Estimation
120
+
121
+ ### Assumptions
122
+ | Parameter | Value |
123
+ |-----------|-------|
124
+ | Daily Active Users (DAU) | {e['dau']:,} |
125
+ | Read : Write ratio | {e['read_write_ratio']} : 1 |
126
+ | Average object size | {human_size(e['avg_object_size_bytes'])} |
127
+ | Retention period | {e['years']} years |
128
+
129
+ ### Derived Estimates
130
+
131
+ **Traffic**
132
+ ```
133
+ Write QPS = DAU / 86,400 s
134
+ = {e['dau']:,} / 86,400
135
+ ≈ {e['write_qps']:,.1f} writes/sec
136
+
137
+ Read QPS = Write QPS × {e['read_write_ratio']}
138
+ ≈ {e['read_qps']:,.0f} reads/sec
139
+
140
+ Peak QPS ≈ Read QPS × 2 (rule of thumb)
141
+ ≈ {e['peak_qps']:,.0f} reads/sec
142
+ ```
143
+
144
+ **Storage**
145
+ ```
146
+ Storage/day = writes/day × avg object size
147
+ = {e['dau']:,} × {human_size(e['avg_object_size_bytes'])}
148
+ = {human_size(e['storage_per_day'])}
149
+
150
+ Total = {human_size(e['storage_per_day'])} × 365 × {e['years']} years
151
+ ≈ {human_size(e['total_storage'])}
152
+ ```
153
+
154
+ **Bandwidth**
155
+ ```
156
+ Inbound ≈ {e['write_qps']:,.1f} req/s × {human_size(e['avg_object_size_bytes'])}
157
+ ≈ {human_size(e['bandwidth_in'])}/s
158
+
159
+ Outbound ≈ {e['read_qps']:,.0f} req/s × {human_size(e['avg_object_size_bytes'])}
160
+ ≈ {human_size(e['bandwidth_out'])}/s
161
+ ```
162
+
163
+ **Cache sizing (80/20 rule)**
164
+ ```
165
+ Hot data = 20% of daily reads × avg object size
166
+ ≈ {human_size(e['read_qps'] * 86400 * 0.20 * e['avg_object_size_bytes'])}
167
+ ```
168
+ """
169
+
170
+
171
+ def section_high_level(system: str, features: list[str]) -> str:
172
+ return f"""\
173
+ ## Step 3: High-Level Design
174
+
175
+ ### Component Diagram (describe to interviewer)
176
+
177
+ ```
178
+ Clients
179
+
180
+
181
+ [CDN / Edge Cache]
182
+ │ (cache hit → return)
183
+
184
+ [Load Balancer] ←──── health checks
185
+
186
+ ├─► [API Server cluster] (stateless, auto-scaling)
187
+ │ │
188
+ │ ├─► [Cache layer] (Redis / Memcached)
189
+ │ │ │ cache miss
190
+ │ │ ▼
191
+ │ └─► [Primary DB] ←── [Read Replicas]
192
+
193
+ └─► [Message Queue] (Kafka / SQS)
194
+
195
+
196
+ [Worker / Consumer]
197
+
198
+
199
+ [Object Storage] (S3-compatible, for blobs)
200
+ ```
201
+
202
+ ### Core API Endpoints
203
+
204
+ | Method | Path | Description |
205
+ |--------|------|-------------|
206
+ | POST | /v1/resource | Create a new resource |
207
+ | GET | /v1/resource/:id | Fetch by ID |
208
+ | PUT | /v1/resource/:id | Update |
209
+ | DELETE | /v1/resource/:id | Soft-delete |
210
+ | GET | /v1/healthz | Health check |
211
+
212
+ ### Data Model (core entities)
213
+
214
+ ```sql
215
+ -- Primary entity
216
+ CREATE TABLE resource (
217
+ id CHAR(8) PRIMARY KEY, -- or UUID
218
+ owner_id BIGINT NOT NULL,
219
+ payload TEXT,
220
+ created_at TIMESTAMP NOT NULL DEFAULT NOW(),
221
+ updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
222
+ is_deleted BOOLEAN NOT NULL DEFAULT FALSE
223
+ );
224
+
225
+ CREATE INDEX idx_resource_owner ON resource(owner_id);
226
+ ```
227
+
228
+ ### Technology Choices
229
+ | Layer | Choice | Rationale |
230
+ |-------|--------|-----------|
231
+ | API | REST / gRPC | REST for external; gRPC for internal services |
232
+ | Primary DB | PostgreSQL (or Cassandra if write-heavy) | ACID; mature; read replicas |
233
+ | Cache | Redis | Sub-millisecond latency; rich data structures |
234
+ | Object store | S3-compatible | Cheap; durable; decoupled from DB |
235
+ | Queue | Kafka | High-throughput; replay; partitioned by key |
236
+ """
237
+
238
+
239
+ def section_deep_dive(system: str, e: dict) -> str:
240
+ return f"""\
241
+ ## Step 4: Deep Dive
242
+
243
+ ### Bottleneck Analysis
244
+ - **Write path**: API server → DB primary. Mitigate with write-ahead log tailing,
245
+ async replication, and buffered writes via the message queue.
246
+ - **Read path**: DB read replicas + Redis cache. Target > 90% cache hit rate.
247
+ - **Hot keys**: Apply key-based sharding and local in-process LRU cache for the
248
+ top-N items (identified via cache hit analytics).
249
+
250
+ ### Database Deep Dive
251
+
252
+ **Why {("Cassandra" if e["write_qps"] > 5000 else "PostgreSQL")}?**
253
+ {"Cassandra: wide-column store optimised for high write throughput with tunable consistency. Partition key = user_id for even distribution." if e["write_qps"] > 5000 else "PostgreSQL: strong ACID guarantees, mature tooling, easy to add read replicas. Move to Cassandra if write QPS exceeds ~10k sustained."}
254
+
255
+ **Sharding strategy**
256
+ - Shard by `user_id` hash to distribute load evenly.
257
+ - Avoid sharding by time (creates hot partitions for recent data).
258
+ - Use consistent hashing to minimise re-sharding cost.
259
+
260
+ **Replication**
261
+ - 1 primary + 2 read replicas per shard (cross-AZ).
262
+ - Async replication is acceptable; compensate with cache TTL.
263
+
264
+ ### Caching Strategy
265
+ - **Read-through cache**: API checks Redis before DB.
266
+ - **Write-invalidation**: On write, delete the cache key (not update).
267
+ - **TTL**: Set based on staleness tolerance (e.g., 5 min for non-critical data).
268
+ - **Eviction policy**: `allkeys-lru` for general use.
269
+
270
+ ### Consistency Model
271
+ - Reads from replicas may be slightly stale (< 1 s typical).
272
+ - Critical reads (e.g., immediately after a write) can be routed to primary.
273
+ - Use optimistic locking (version column) for concurrent updates.
274
+
275
+ ### Fault Tolerance
276
+ - API servers: stateless → replace failed nodes automatically.
277
+ - DB primary failure: automated failover to replica (< 30 s with Patroni/RDS).
278
+ - Cache failure: graceful degradation — fall through to DB.
279
+ - Queue failure: producers buffer locally and retry.
280
+
281
+ ### Scalability Levers (ordered by cost)
282
+ 1. Increase read replica count.
283
+ 2. Add Redis cluster nodes.
284
+ 3. Add API server instances (auto-scaling policy on CPU/QPS).
285
+ 4. Shard the database.
286
+ 5. Move to a distributed DB (Cassandra / CockroachDB).
287
+
288
+ ### Areas to Explore If Time Permits
289
+ - **CDN**: Cache static and semi-static responses at edge.
290
+ - **Rate limiting**: Token bucket per user_id at the load balancer.
291
+ - **Search**: Add Elasticsearch for full-text queries.
292
+ - **Analytics**: Stream events to a data warehouse (Snowflake / BigQuery) via Kafka.
293
+ """
294
+
295
+
296
+ def section_interview_questions(system: str) -> str:
297
+ return f"""\
298
+ ## Common Follow-Up Interview Questions
299
+
300
+ | Question | Key Points to Cover |
301
+ |----------|---------------------|
302
+ | How do you handle a DB primary failure? | Automated failover, replica promotion, heartbeat checks |
303
+ | How do you prevent cache stampede? | Mutex lock on cache miss, probabilistic early refresh |
304
+ | How would you design the ID generation? | Snowflake ID, UUID v7, or DB sequence — trade-offs |
305
+ | How do you ensure exactly-once processing? | Idempotency keys, deduplication in the consumer |
306
+ | How would you add full-text search? | Elasticsearch / OpenSearch, sync via CDC from DB |
307
+ | How do you handle schema migrations? | Expand/contract pattern; blue/green deploys; backward-compatible changes first |
308
+ | Walk me through a write from client to storage | Client → LB → API → validate → DB write → publish event → async worker |
309
+ """
310
+
311
+
312
+ # ---------------------------------------------------------------------------
313
+ # Main
314
+ # ---------------------------------------------------------------------------
315
+
316
+ def gather_interactive() -> dict:
317
+ print("\n=== System Design Interview — Document Generator ===\n")
318
+ system = prompt("System name (e.g., 'URL Shortener', 'Twitter Feed')")
319
+ features_raw = prompt(
320
+ "Core features (comma-separated)",
321
+ "Create resource, Retrieve resource, Delete resource"
322
+ )
323
+ features = [f.strip() for f in features_raw.split(",") if f.strip()]
324
+ dau = prompt_int("DAU (Daily Active Users)", 10_000_000)
325
+ rw = prompt_int("Read:Write ratio (e.g., 10 means 10 reads per write)", 10)
326
+ obj_size = prompt_int("Average object size in bytes", 1024)
327
+ years = prompt_int("Retention period (years)", 5)
328
+ output_raw = prompt("Output file (leave blank for stdout)", "")
329
+ output = Path(output_raw) if output_raw else None
330
+ return dict(system=system, features=features, dau=dau, rw=rw,
331
+ obj_size=obj_size, years=years, output=output)
332
+
333
+
334
+ def render(data: dict) -> str:
335
+ system = data["system"]
336
+ e = calc_estimations(
337
+ dau=data["dau"],
338
+ read_write_ratio=data["rw"],
339
+ avg_object_size_bytes=data["obj_size"],
340
+ years=data["years"],
341
+ )
342
+ parts = [
343
+ f"# System Design: {system}",
344
+ "",
345
+ f"**Date:** {date.today()} ",
346
+ f"**Framework:** Alex Xu — System Design Interview Vol. 1 & 2",
347
+ "",
348
+ "---",
349
+ "",
350
+ section_requirements(system, data["features"]),
351
+ "---",
352
+ "",
353
+ section_estimation(e),
354
+ "---",
355
+ "",
356
+ section_high_level(system, data["features"]),
357
+ "---",
358
+ "",
359
+ section_deep_dive(system, e),
360
+ "---",
361
+ "",
362
+ section_interview_questions(system),
363
+ "---",
364
+ "",
365
+ "*Generated by `new_design.py` — System Design Interview skill.*",
366
+ ]
367
+ return "\n".join(parts) + "\n"
368
+
369
+
370
+ def main() -> None:
371
+ parser = argparse.ArgumentParser(
372
+ description="Generate a system design interview document (Alex Xu framework)."
373
+ )
374
+ parser.add_argument("system", nargs="?", help="System name (skips prompt if provided)")
375
+ parser.add_argument("--dau", type=int, default=None)
376
+ parser.add_argument("--rw", type=int, default=None, help="Read:write ratio")
377
+ parser.add_argument("--obj-size", type=int, default=None, help="Avg object size in bytes")
378
+ parser.add_argument("--years", type=int, default=None, help="Retention years")
379
+ parser.add_argument("--features", help="Comma-separated feature list")
380
+ parser.add_argument("--output", type=Path, default=None)
381
+ args = parser.parse_args()
382
+
383
+ if args.system and args.dau and args.rw and args.obj_size and args.years:
384
+ features = (
385
+ [f.strip() for f in args.features.split(",")]
386
+ if args.features
387
+ else ["Create resource", "Read resource", "Delete resource"]
388
+ )
389
+ data = dict(
390
+ system=args.system, features=features,
391
+ dau=args.dau, rw=args.rw,
392
+ obj_size=args.obj_size, years=args.years,
393
+ output=args.output,
394
+ )
395
+ else:
396
+ if args.system:
397
+ # System name given but other params missing — use defaults
398
+ data = dict(
399
+ system=args.system,
400
+ features=["Create resource", "Read resource", "Delete resource"],
401
+ dau=10_000_000, rw=10, obj_size=1024, years=5,
402
+ output=args.output,
403
+ )
404
+ else:
405
+ try:
406
+ data = gather_interactive()
407
+ except (KeyboardInterrupt, EOFError):
408
+ print("\nAborted.", file=sys.stderr)
409
+ sys.exit(1)
410
+
411
+ document = render(data)
412
+
413
+ if data.get("output"):
414
+ data["output"].write_text(document)
415
+ print(f"Design document written to: {data['output']}")
416
+ else:
417
+ sys.stdout.write(document)
418
+
419
+
420
+ if __name__ == "__main__":
421
+ main()