@booklib/core 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/rules/booklib-standards.mdc +40 -0
- package/.gemini/context.md +372 -0
- package/AGENTS.md +166 -0
- package/CHANGELOG.md +226 -0
- package/CLAUDE.md +81 -0
- package/CODE_OF_CONDUCT.md +31 -0
- package/CONTRIBUTING.md +304 -0
- package/LICENSE +21 -0
- package/PLAN.md +28 -0
- package/README.ja.md +198 -0
- package/README.ko.md +198 -0
- package/README.md +503 -0
- package/README.pt-BR.md +198 -0
- package/README.uk.md +241 -0
- package/README.zh-CN.md +198 -0
- package/SECURITY.md +9 -0
- package/agents/architecture-reviewer.md +136 -0
- package/agents/booklib-reviewer.md +90 -0
- package/agents/data-reviewer.md +107 -0
- package/agents/jvm-reviewer.md +146 -0
- package/agents/python-reviewer.md +128 -0
- package/agents/rust-reviewer.md +115 -0
- package/agents/ts-reviewer.md +110 -0
- package/agents/ui-reviewer.md +117 -0
- package/assets/logo.svg +36 -0
- package/bin/booklib-mcp.js +304 -0
- package/bin/booklib.js +1705 -0
- package/bin/skills.cjs +1292 -0
- package/booklib-router.mdc +36 -0
- package/booklib.config.json +19 -0
- package/commands/animation-at-work.md +10 -0
- package/commands/clean-code-reviewer.md +10 -0
- package/commands/data-intensive-patterns.md +10 -0
- package/commands/data-pipelines.md +10 -0
- package/commands/design-patterns.md +10 -0
- package/commands/domain-driven-design.md +10 -0
- package/commands/effective-java.md +10 -0
- package/commands/effective-kotlin.md +10 -0
- package/commands/effective-python.md +10 -0
- package/commands/effective-typescript.md +10 -0
- package/commands/kotlin-in-action.md +10 -0
- package/commands/lean-startup.md +10 -0
- package/commands/microservices-patterns.md +10 -0
- package/commands/programming-with-rust.md +10 -0
- package/commands/refactoring-ui.md +10 -0
- package/commands/rust-in-action.md +10 -0
- package/commands/skill-router.md +10 -0
- package/commands/spring-boot-in-action.md +10 -0
- package/commands/storytelling-with-data.md +10 -0
- package/commands/system-design-interview.md +10 -0
- package/commands/using-asyncio-python.md +10 -0
- package/commands/web-scraping-python.md +10 -0
- package/community/registry.json +1616 -0
- package/hooks/hooks.json +23 -0
- package/hooks/posttooluse-capture.mjs +67 -0
- package/hooks/suggest.js +153 -0
- package/lib/agent-behaviors.js +40 -0
- package/lib/agent-detector.js +96 -0
- package/lib/config-loader.js +39 -0
- package/lib/conflict-resolver.js +148 -0
- package/lib/context-builder.js +574 -0
- package/lib/discovery-engine.js +298 -0
- package/lib/doctor/hook-installer.js +83 -0
- package/lib/doctor/usage-tracker.js +87 -0
- package/lib/engine/ai-features.js +253 -0
- package/lib/engine/auditor.js +103 -0
- package/lib/engine/bm25-index.js +178 -0
- package/lib/engine/capture.js +120 -0
- package/lib/engine/corrections.js +198 -0
- package/lib/engine/doctor.js +195 -0
- package/lib/engine/graph-injector.js +137 -0
- package/lib/engine/graph.js +161 -0
- package/lib/engine/handoff.js +405 -0
- package/lib/engine/indexer.js +242 -0
- package/lib/engine/parser.js +53 -0
- package/lib/engine/query-expander.js +42 -0
- package/lib/engine/reranker.js +40 -0
- package/lib/engine/rrf.js +59 -0
- package/lib/engine/scanner.js +151 -0
- package/lib/engine/searcher.js +139 -0
- package/lib/engine/session-coordinator.js +306 -0
- package/lib/engine/session-manager.js +429 -0
- package/lib/engine/synthesizer.js +70 -0
- package/lib/installer.js +70 -0
- package/lib/instinct-block.js +33 -0
- package/lib/mcp-config-writer.js +88 -0
- package/lib/paths.js +57 -0
- package/lib/profiles/design.md +19 -0
- package/lib/profiles/general.md +16 -0
- package/lib/profiles/research-analysis.md +22 -0
- package/lib/profiles/software-development.md +23 -0
- package/lib/profiles/writing-content.md +19 -0
- package/lib/project-initializer.js +916 -0
- package/lib/registry/skills.js +102 -0
- package/lib/registry-searcher.js +99 -0
- package/lib/rules/rules-manager.js +169 -0
- package/lib/skill-fetcher.js +333 -0
- package/lib/well-known-builder.js +70 -0
- package/lib/wizard/index.js +404 -0
- package/lib/wizard/integration-detector.js +41 -0
- package/lib/wizard/project-detector.js +100 -0
- package/lib/wizard/prompt.js +156 -0
- package/lib/wizard/registry-embeddings.js +107 -0
- package/lib/wizard/skill-recommender.js +69 -0
- package/llms-full.txt +254 -0
- package/llms.txt +70 -0
- package/package.json +45 -0
- package/research-reports/2026-04-01-current-architecture.md +160 -0
- package/research-reports/IDEAS.md +93 -0
- package/rules/common/clean-code.md +42 -0
- package/rules/java/effective-java.md +42 -0
- package/rules/kotlin/effective-kotlin.md +37 -0
- package/rules/python/effective-python.md +38 -0
- package/rules/rust/rust.md +37 -0
- package/rules/typescript/effective-typescript.md +42 -0
- package/scripts/gen-llms-full.mjs +36 -0
- package/scripts/gen-og.mjs +142 -0
- package/scripts/validate-frontmatter.js +25 -0
- package/skills/animation-at-work/SKILL.md +270 -0
- package/skills/animation-at-work/assets/example_asset.txt +1 -0
- package/skills/animation-at-work/evals/evals.json +44 -0
- package/skills/animation-at-work/evals/results.json +13 -0
- package/skills/animation-at-work/examples/after.md +64 -0
- package/skills/animation-at-work/examples/before.md +35 -0
- package/skills/animation-at-work/references/api_reference.md +369 -0
- package/skills/animation-at-work/references/review-checklist.md +79 -0
- package/skills/animation-at-work/scripts/audit_animations.py +295 -0
- package/skills/animation-at-work/scripts/example.py +1 -0
- package/skills/clean-code-reviewer/SKILL.md +444 -0
- package/skills/clean-code-reviewer/audit.json +35 -0
- package/skills/clean-code-reviewer/evals/evals.json +185 -0
- package/skills/clean-code-reviewer/evals/results.json +13 -0
- package/skills/clean-code-reviewer/examples/after.md +48 -0
- package/skills/clean-code-reviewer/examples/before.md +33 -0
- package/skills/clean-code-reviewer/references/api_reference.md +158 -0
- package/skills/clean-code-reviewer/references/practices-catalog.md +282 -0
- package/skills/clean-code-reviewer/references/review-checklist.md +254 -0
- package/skills/clean-code-reviewer/scripts/pre-review.py +206 -0
- package/skills/data-intensive-patterns/SKILL.md +267 -0
- package/skills/data-intensive-patterns/assets/example_asset.txt +1 -0
- package/skills/data-intensive-patterns/evals/evals.json +54 -0
- package/skills/data-intensive-patterns/evals/results.json +13 -0
- package/skills/data-intensive-patterns/examples/after.md +61 -0
- package/skills/data-intensive-patterns/examples/before.md +38 -0
- package/skills/data-intensive-patterns/references/api_reference.md +34 -0
- package/skills/data-intensive-patterns/references/patterns-catalog.md +551 -0
- package/skills/data-intensive-patterns/references/review-checklist.md +193 -0
- package/skills/data-intensive-patterns/scripts/adr.py +213 -0
- package/skills/data-intensive-patterns/scripts/example.py +1 -0
- package/skills/data-pipelines/SKILL.md +259 -0
- package/skills/data-pipelines/assets/example_asset.txt +1 -0
- package/skills/data-pipelines/evals/evals.json +45 -0
- package/skills/data-pipelines/evals/results.json +13 -0
- package/skills/data-pipelines/examples/after.md +97 -0
- package/skills/data-pipelines/examples/before.md +37 -0
- package/skills/data-pipelines/references/api_reference.md +301 -0
- package/skills/data-pipelines/references/review-checklist.md +181 -0
- package/skills/data-pipelines/scripts/example.py +1 -0
- package/skills/data-pipelines/scripts/new_pipeline.py +444 -0
- package/skills/design-patterns/SKILL.md +271 -0
- package/skills/design-patterns/assets/example_asset.txt +1 -0
- package/skills/design-patterns/evals/evals.json +46 -0
- package/skills/design-patterns/evals/results.json +13 -0
- package/skills/design-patterns/examples/after.md +52 -0
- package/skills/design-patterns/examples/before.md +29 -0
- package/skills/design-patterns/references/api_reference.md +1 -0
- package/skills/design-patterns/references/patterns-catalog.md +726 -0
- package/skills/design-patterns/references/review-checklist.md +173 -0
- package/skills/design-patterns/scripts/example.py +1 -0
- package/skills/design-patterns/scripts/scaffold.py +807 -0
- package/skills/domain-driven-design/SKILL.md +142 -0
- package/skills/domain-driven-design/assets/example_asset.txt +1 -0
- package/skills/domain-driven-design/evals/evals.json +48 -0
- package/skills/domain-driven-design/evals/results.json +13 -0
- package/skills/domain-driven-design/examples/after.md +80 -0
- package/skills/domain-driven-design/examples/before.md +43 -0
- package/skills/domain-driven-design/references/api_reference.md +1 -0
- package/skills/domain-driven-design/references/patterns-catalog.md +545 -0
- package/skills/domain-driven-design/references/review-checklist.md +158 -0
- package/skills/domain-driven-design/scripts/example.py +1 -0
- package/skills/domain-driven-design/scripts/scaffold.py +421 -0
- package/skills/effective-java/SKILL.md +227 -0
- package/skills/effective-java/assets/example_asset.txt +1 -0
- package/skills/effective-java/evals/evals.json +46 -0
- package/skills/effective-java/evals/results.json +13 -0
- package/skills/effective-java/examples/after.md +83 -0
- package/skills/effective-java/examples/before.md +37 -0
- package/skills/effective-java/references/api_reference.md +1 -0
- package/skills/effective-java/references/items-catalog.md +955 -0
- package/skills/effective-java/references/review-checklist.md +216 -0
- package/skills/effective-java/scripts/checkstyle_setup.py +211 -0
- package/skills/effective-java/scripts/example.py +1 -0
- package/skills/effective-kotlin/SKILL.md +271 -0
- package/skills/effective-kotlin/assets/example_asset.txt +1 -0
- package/skills/effective-kotlin/audit.json +29 -0
- package/skills/effective-kotlin/evals/evals.json +45 -0
- package/skills/effective-kotlin/evals/results.json +13 -0
- package/skills/effective-kotlin/examples/after.md +36 -0
- package/skills/effective-kotlin/examples/before.md +38 -0
- package/skills/effective-kotlin/references/api_reference.md +1 -0
- package/skills/effective-kotlin/references/practices-catalog.md +1228 -0
- package/skills/effective-kotlin/references/review-checklist.md +126 -0
- package/skills/effective-kotlin/scripts/example.py +1 -0
- package/skills/effective-python/SKILL.md +441 -0
- package/skills/effective-python/evals/evals.json +44 -0
- package/skills/effective-python/evals/results.json +13 -0
- package/skills/effective-python/examples/after.md +56 -0
- package/skills/effective-python/examples/before.md +40 -0
- package/skills/effective-python/ref-01-pythonic-thinking.md +202 -0
- package/skills/effective-python/ref-02-lists-and-dicts.md +146 -0
- package/skills/effective-python/ref-03-functions.md +186 -0
- package/skills/effective-python/ref-04-comprehensions-generators.md +211 -0
- package/skills/effective-python/ref-05-classes-interfaces.md +188 -0
- package/skills/effective-python/ref-06-metaclasses-attributes.md +209 -0
- package/skills/effective-python/ref-07-concurrency.md +213 -0
- package/skills/effective-python/ref-08-robustness-performance.md +248 -0
- package/skills/effective-python/ref-09-testing-debugging.md +253 -0
- package/skills/effective-python/ref-10-collaboration.md +175 -0
- package/skills/effective-python/references/api_reference.md +218 -0
- package/skills/effective-python/references/practices-catalog.md +483 -0
- package/skills/effective-python/references/review-checklist.md +190 -0
- package/skills/effective-python/scripts/lint.py +173 -0
- package/skills/effective-typescript/SKILL.md +262 -0
- package/skills/effective-typescript/audit.json +29 -0
- package/skills/effective-typescript/evals/evals.json +37 -0
- package/skills/effective-typescript/evals/results.json +13 -0
- package/skills/effective-typescript/examples/after.md +70 -0
- package/skills/effective-typescript/examples/before.md +47 -0
- package/skills/effective-typescript/references/api_reference.md +118 -0
- package/skills/effective-typescript/references/practices-catalog.md +371 -0
- package/skills/effective-typescript/scripts/review.py +169 -0
- package/skills/kotlin-in-action/SKILL.md +261 -0
- package/skills/kotlin-in-action/assets/example_asset.txt +1 -0
- package/skills/kotlin-in-action/evals/evals.json +43 -0
- package/skills/kotlin-in-action/evals/results.json +13 -0
- package/skills/kotlin-in-action/examples/after.md +53 -0
- package/skills/kotlin-in-action/examples/before.md +39 -0
- package/skills/kotlin-in-action/references/api_reference.md +1 -0
- package/skills/kotlin-in-action/references/practices-catalog.md +436 -0
- package/skills/kotlin-in-action/references/review-checklist.md +204 -0
- package/skills/kotlin-in-action/scripts/example.py +1 -0
- package/skills/kotlin-in-action/scripts/setup_detekt.py +224 -0
- package/skills/lean-startup/SKILL.md +160 -0
- package/skills/lean-startup/assets/example_asset.txt +1 -0
- package/skills/lean-startup/evals/evals.json +43 -0
- package/skills/lean-startup/evals/results.json +13 -0
- package/skills/lean-startup/examples/after.md +80 -0
- package/skills/lean-startup/examples/before.md +34 -0
- package/skills/lean-startup/references/api_reference.md +319 -0
- package/skills/lean-startup/references/review-checklist.md +137 -0
- package/skills/lean-startup/scripts/example.py +1 -0
- package/skills/lean-startup/scripts/new_experiment.py +286 -0
- package/skills/microservices-patterns/SKILL.md +384 -0
- package/skills/microservices-patterns/evals/evals.json +45 -0
- package/skills/microservices-patterns/evals/results.json +13 -0
- package/skills/microservices-patterns/examples/after.md +69 -0
- package/skills/microservices-patterns/examples/before.md +40 -0
- package/skills/microservices-patterns/references/patterns-catalog.md +391 -0
- package/skills/microservices-patterns/references/review-checklist.md +169 -0
- package/skills/microservices-patterns/scripts/new_service.py +583 -0
- package/skills/programming-with-rust/SKILL.md +209 -0
- package/skills/programming-with-rust/evals/evals.json +37 -0
- package/skills/programming-with-rust/evals/results.json +13 -0
- package/skills/programming-with-rust/examples/after.md +107 -0
- package/skills/programming-with-rust/examples/before.md +59 -0
- package/skills/programming-with-rust/references/api_reference.md +152 -0
- package/skills/programming-with-rust/references/practices-catalog.md +335 -0
- package/skills/programming-with-rust/scripts/review.py +142 -0
- package/skills/refactoring-ui/SKILL.md +362 -0
- package/skills/refactoring-ui/assets/example_asset.txt +1 -0
- package/skills/refactoring-ui/evals/evals.json +45 -0
- package/skills/refactoring-ui/evals/results.json +13 -0
- package/skills/refactoring-ui/examples/after.md +85 -0
- package/skills/refactoring-ui/examples/before.md +58 -0
- package/skills/refactoring-ui/references/api_reference.md +355 -0
- package/skills/refactoring-ui/references/review-checklist.md +114 -0
- package/skills/refactoring-ui/scripts/audit_css.py +250 -0
- package/skills/refactoring-ui/scripts/example.py +1 -0
- package/skills/rust-in-action/SKILL.md +350 -0
- package/skills/rust-in-action/evals/evals.json +38 -0
- package/skills/rust-in-action/evals/results.json +13 -0
- package/skills/rust-in-action/examples/after.md +156 -0
- package/skills/rust-in-action/examples/before.md +56 -0
- package/skills/rust-in-action/references/practices-catalog.md +346 -0
- package/skills/rust-in-action/scripts/review.py +147 -0
- package/skills/skill-router/SKILL.md +186 -0
- package/skills/skill-router/evals/evals.json +38 -0
- package/skills/skill-router/evals/results.json +13 -0
- package/skills/skill-router/examples/after.md +63 -0
- package/skills/skill-router/examples/before.md +39 -0
- package/skills/skill-router/references/api_reference.md +24 -0
- package/skills/skill-router/references/routing-heuristics.md +89 -0
- package/skills/skill-router/references/skill-catalog.md +174 -0
- package/skills/skill-router/scripts/route.py +266 -0
- package/skills/spring-boot-in-action/SKILL.md +340 -0
- package/skills/spring-boot-in-action/evals/evals.json +39 -0
- package/skills/spring-boot-in-action/evals/results.json +13 -0
- package/skills/spring-boot-in-action/examples/after.md +185 -0
- package/skills/spring-boot-in-action/examples/before.md +84 -0
- package/skills/spring-boot-in-action/references/practices-catalog.md +403 -0
- package/skills/spring-boot-in-action/scripts/review.py +184 -0
- package/skills/storytelling-with-data/SKILL.md +241 -0
- package/skills/storytelling-with-data/assets/example_asset.txt +1 -0
- package/skills/storytelling-with-data/evals/evals.json +47 -0
- package/skills/storytelling-with-data/evals/results.json +13 -0
- package/skills/storytelling-with-data/examples/after.md +50 -0
- package/skills/storytelling-with-data/examples/before.md +33 -0
- package/skills/storytelling-with-data/references/api_reference.md +379 -0
- package/skills/storytelling-with-data/references/review-checklist.md +111 -0
- package/skills/storytelling-with-data/scripts/chart_review.py +301 -0
- package/skills/storytelling-with-data/scripts/example.py +1 -0
- package/skills/system-design-interview/SKILL.md +233 -0
- package/skills/system-design-interview/assets/example_asset.txt +1 -0
- package/skills/system-design-interview/evals/evals.json +46 -0
- package/skills/system-design-interview/evals/results.json +13 -0
- package/skills/system-design-interview/examples/after.md +94 -0
- package/skills/system-design-interview/examples/before.md +27 -0
- package/skills/system-design-interview/references/api_reference.md +582 -0
- package/skills/system-design-interview/references/review-checklist.md +201 -0
- package/skills/system-design-interview/scripts/example.py +1 -0
- package/skills/system-design-interview/scripts/new_design.py +421 -0
- package/skills/using-asyncio-python/SKILL.md +290 -0
- package/skills/using-asyncio-python/assets/example_asset.txt +1 -0
- package/skills/using-asyncio-python/evals/evals.json +43 -0
- package/skills/using-asyncio-python/evals/results.json +13 -0
- package/skills/using-asyncio-python/examples/after.md +68 -0
- package/skills/using-asyncio-python/examples/before.md +39 -0
- package/skills/using-asyncio-python/references/api_reference.md +267 -0
- package/skills/using-asyncio-python/references/review-checklist.md +149 -0
- package/skills/using-asyncio-python/scripts/check_blocking.py +270 -0
- package/skills/using-asyncio-python/scripts/example.py +1 -0
- package/skills/web-scraping-python/SKILL.md +280 -0
- package/skills/web-scraping-python/assets/example_asset.txt +1 -0
- package/skills/web-scraping-python/evals/evals.json +46 -0
- package/skills/web-scraping-python/evals/results.json +13 -0
- package/skills/web-scraping-python/examples/after.md +109 -0
- package/skills/web-scraping-python/examples/before.md +40 -0
- package/skills/web-scraping-python/references/api_reference.md +393 -0
- package/skills/web-scraping-python/references/review-checklist.md +163 -0
- package/skills/web-scraping-python/scripts/example.py +1 -0
- package/skills/web-scraping-python/scripts/new_scraper.py +231 -0
- package/skills/writing-plans/audit.json +34 -0
- package/tests/agent-detector.test.js +83 -0
- package/tests/corrections.test.js +245 -0
- package/tests/doctor/hook-installer.test.js +72 -0
- package/tests/doctor/usage-tracker.test.js +140 -0
- package/tests/engine/benchmark-eval.test.js +31 -0
- package/tests/engine/bm25-index.test.js +85 -0
- package/tests/engine/capture-command.test.js +35 -0
- package/tests/engine/capture.test.js +17 -0
- package/tests/engine/graph-augmented-search.test.js +107 -0
- package/tests/engine/graph-injector.test.js +44 -0
- package/tests/engine/graph.test.js +216 -0
- package/tests/engine/hybrid-searcher.test.js +74 -0
- package/tests/engine/indexer-bm25.test.js +37 -0
- package/tests/engine/mcp-tools.test.js +73 -0
- package/tests/engine/project-initializer-mcp.test.js +99 -0
- package/tests/engine/query-expander.test.js +36 -0
- package/tests/engine/reranker.test.js +51 -0
- package/tests/engine/rrf.test.js +49 -0
- package/tests/engine/srag-prefix.test.js +47 -0
- package/tests/instinct-block.test.js +23 -0
- package/tests/mcp-config-writer.test.js +60 -0
- package/tests/project-initializer-new-agents.test.js +48 -0
- package/tests/rules/rules-manager.test.js +230 -0
- package/tests/well-known-builder.test.js +40 -0
- package/tests/wizard/integration-detector.test.js +31 -0
- package/tests/wizard/project-detector.test.js +51 -0
- package/tests/wizard/prompt-session.test.js +61 -0
- package/tests/wizard/prompt.test.js +16 -0
- package/tests/wizard/registry-embeddings.test.js +35 -0
- package/tests/wizard/skill-recommender.test.js +34 -0
- package/tests/wizard/slot-count.test.js +25 -0
- package/vercel.json +21 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# System Design Interview — Design Review Checklist
|
|
2
|
+
|
|
3
|
+
Systematic checklist for reviewing system designs against the 16 chapters
|
|
4
|
+
from *System Design Interview* by Alex Xu.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## 1. Scaling Fundamentals (Chapter 1)
|
|
9
|
+
|
|
10
|
+
### Infrastructure
|
|
11
|
+
- [ ] **Ch 1 — Load balancing** — Is traffic distributed across multiple servers with failover?
|
|
12
|
+
- [ ] **Ch 1 — Database replication** — Are read replicas used for read-heavy workloads?
|
|
13
|
+
- [ ] **Ch 1 — Caching** — Is a cache layer (Redis/Memcached) used for frequently accessed data?
|
|
14
|
+
- [ ] **Ch 1 — CDN** — Are static assets served from a CDN?
|
|
15
|
+
- [ ] **Ch 1 — Stateless web tier** — Is session data stored in shared storage, not on web servers?
|
|
16
|
+
- [ ] **Ch 1 — Message queue** — Are time-consuming tasks decoupled via async message queues?
|
|
17
|
+
- [ ] **Ch 1 — Database sharding** — Is data sharded for write-heavy or large-scale workloads?
|
|
18
|
+
- [ ] **Ch 1 — Data centers** — Is multi-datacenter deployment considered for geo-distribution?
|
|
19
|
+
|
|
20
|
+
### Data Layer
|
|
21
|
+
- [ ] **Ch 1 — Database choice** — Is the right database type selected (SQL vs. NoSQL) based on access patterns?
|
|
22
|
+
- [ ] **Ch 1 — Shard key** — Is the shard key chosen for even data distribution?
|
|
23
|
+
- [ ] **Ch 1 — Hotspot mitigation** — Are celebrity/hotspot problems addressed?
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## 2. Capacity Estimation (Chapter 2)
|
|
28
|
+
|
|
29
|
+
### Back-of-Envelope
|
|
30
|
+
- [ ] **Ch 2 — QPS estimated** — Are queries per second calculated (average and peak)?
|
|
31
|
+
- [ ] **Ch 2 — Storage estimated** — Is storage growth estimated over time (1 year, 5 years)?
|
|
32
|
+
- [ ] **Ch 2 — Bandwidth estimated** — Is network bandwidth estimated for read and write?
|
|
33
|
+
- [ ] **Ch 2 — Memory estimated** — Is cache memory estimated (e.g., 80/20 rule)?
|
|
34
|
+
- [ ] **Ch 2 — Availability target** — Is the availability SLA defined (99.9%, 99.99%)?
|
|
35
|
+
- [ ] **Ch 2 — Latency awareness** — Are latency numbers considered (memory vs. disk vs. network)?
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 3. Design Structure (Chapter 3)
|
|
40
|
+
|
|
41
|
+
### Framework Adherence
|
|
42
|
+
- [ ] **Ch 3 — Requirements defined** — Are functional and non-functional requirements explicit?
|
|
43
|
+
- [ ] **Ch 3 — High-level design** — Is there a clear component diagram with data flow?
|
|
44
|
+
- [ ] **Ch 3 — API design** — Are API endpoints defined?
|
|
45
|
+
- [ ] **Ch 3 — Deep dive** — Are 2–3 critical components designed in detail?
|
|
46
|
+
- [ ] **Ch 3 — Trade-offs stated** — Are design trade-offs explicitly discussed?
|
|
47
|
+
- [ ] **Ch 3 — Error handling** — Are failure modes and error handling addressed?
|
|
48
|
+
- [ ] **Ch 3 — Monitoring** — Is logging, metrics, and alerting included?
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## 4. Rate Limiting (Chapter 4)
|
|
53
|
+
|
|
54
|
+
### Rate Limiter Design
|
|
55
|
+
- [ ] **Ch 4 — Algorithm selected** — Is an appropriate rate limiting algorithm chosen for the use case?
|
|
56
|
+
- [ ] **Ch 4 — Distributed concerns** — Are race conditions and multi-server sync addressed?
|
|
57
|
+
- [ ] **Ch 4 — Rate limit response** — Are proper HTTP 429 responses and headers used?
|
|
58
|
+
- [ ] **Ch 4 — Rule configuration** — Are rate limiting rules configurable and cached?
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## 5. Data Distribution (Chapter 5)
|
|
63
|
+
|
|
64
|
+
### Consistent Hashing
|
|
65
|
+
- [ ] **Ch 5 — Hash strategy** — Is consistent hashing used for data/request distribution?
|
|
66
|
+
- [ ] **Ch 5 — Virtual nodes** — Are virtual nodes used for even distribution?
|
|
67
|
+
- [ ] **Ch 5 — Rebalancing** — Is key redistribution minimized when servers change?
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## 6. Distributed Storage (Chapter 6)
|
|
72
|
+
|
|
73
|
+
### Key-Value Store Design
|
|
74
|
+
- [ ] **Ch 6 — CAP choice** — Is the CP vs. AP trade-off explicitly decided?
|
|
75
|
+
- [ ] **Ch 6 — Replication** — Is data replicated to N nodes with appropriate quorum (N/W/R)?
|
|
76
|
+
- [ ] **Ch 6 — Conflict resolution** — Are concurrent write conflicts handled (vector clocks, last-write-wins)?
|
|
77
|
+
- [ ] **Ch 6 — Failure detection** — Is gossip protocol or equivalent used for failure detection?
|
|
78
|
+
- [ ] **Ch 6 — Failure recovery** — Are sloppy quorum and hinted handoff used for temporary failures?
|
|
79
|
+
- [ ] **Ch 6 — Anti-entropy** — Are Merkle trees used for replica synchronization?
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## 7. Unique IDs (Chapter 7)
|
|
84
|
+
|
|
85
|
+
### ID Generation
|
|
86
|
+
- [ ] **Ch 7 — ID approach** — Is the right ID generation approach used for the requirements?
|
|
87
|
+
- [ ] **Ch 7 — Sortability** — Are IDs sortable by time if needed (snowflake)?
|
|
88
|
+
- [ ] **Ch 7 — Distribution** — Can IDs be generated without central coordination?
|
|
89
|
+
- [ ] **Ch 7 — Size** — Is the ID size appropriate (64-bit vs. 128-bit)?
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## 8. URL Shortening (Chapter 8)
|
|
94
|
+
|
|
95
|
+
### URL Shortener Design
|
|
96
|
+
- [ ] **Ch 8 — Redirect type** — Is the correct redirect (301 vs. 302) chosen based on analytics needs?
|
|
97
|
+
- [ ] **Ch 8 — Hash strategy** — Is the hash/encoding approach appropriate (base-62, hash+collision)?
|
|
98
|
+
- [ ] **Ch 8 — Collision handling** — Are hash collisions detected and resolved?
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## 9. Web Crawling (Chapter 9)
|
|
103
|
+
|
|
104
|
+
### Crawler Design
|
|
105
|
+
- [ ] **Ch 9 — URL frontier** — Does the frontier handle politeness and priority?
|
|
106
|
+
- [ ] **Ch 9 — Content dedup** — Is content fingerprinting used to avoid redundant crawling?
|
|
107
|
+
- [ ] **Ch 9 — URL dedup** — Is a Bloom filter or similar used to track visited URLs?
|
|
108
|
+
- [ ] **Ch 9 — Robots.txt** — Is robots.txt respected and cached?
|
|
109
|
+
- [ ] **Ch 9 — Spider traps** — Is max URL depth enforced to avoid infinite crawling?
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## 10. Notifications (Chapter 10)
|
|
114
|
+
|
|
115
|
+
### Notification System
|
|
116
|
+
- [ ] **Ch 10 — Multi-channel** — Are all required channels supported (push, SMS, email)?
|
|
117
|
+
- [ ] **Ch 10 — Reliability** — Is a notification log maintained for retry on failure?
|
|
118
|
+
- [ ] **Ch 10 — Deduplication** — Are duplicate notifications prevented via event_id checking?
|
|
119
|
+
- [ ] **Ch 10 — Rate limiting** — Are per-user notification limits enforced?
|
|
120
|
+
- [ ] **Ch 10 — Analytics** — Is notification engagement tracked (open rate, click rate)?
|
|
121
|
+
- [ ] **Ch 10 — User preferences** — Can users opt in/out per channel?
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## 11. News Feed (Chapter 11)
|
|
126
|
+
|
|
127
|
+
### News Feed System
|
|
128
|
+
- [ ] **Ch 11 — Fanout model** — Is the right fanout model chosen (push, pull, or hybrid)?
|
|
129
|
+
- [ ] **Ch 11 — Celebrity handling** — Is the celebrity/hotkey problem addressed (hybrid approach)?
|
|
130
|
+
- [ ] **Ch 11 — Cache layers** — Are appropriate cache tiers used (feed, content, social graph, actions, counters)?
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## 12. Chat System (Chapter 12)
|
|
135
|
+
|
|
136
|
+
### Chat Design
|
|
137
|
+
- [ ] **Ch 12 — Protocol** — Is WebSocket used for real-time messaging?
|
|
138
|
+
- [ ] **Ch 12 — Stateful servers** — Are chat servers stateful with proper service discovery?
|
|
139
|
+
- [ ] **Ch 12 — Storage** — Is a key-value store used for message history (write-heavy)?
|
|
140
|
+
- [ ] **Ch 12 — Message sync** — Is per-device cursor-based sync implemented?
|
|
141
|
+
- [ ] **Ch 12 — Presence** — Is online presence tracked with heartbeat mechanism?
|
|
142
|
+
- [ ] **Ch 12 — Group scaling** — Are small groups (push) and large groups (pull) handled differently?
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## 13. Autocomplete (Chapter 13)
|
|
147
|
+
|
|
148
|
+
### Autocomplete System
|
|
149
|
+
- [ ] **Ch 13 — Trie structure** — Is a trie used for prefix matching?
|
|
150
|
+
- [ ] **Ch 13 — Top-k caching** — Are top-k results cached at each trie node?
|
|
151
|
+
- [ ] **Ch 13 — Data pipeline** — Is there a data gathering → aggregation → trie build pipeline?
|
|
152
|
+
- [ ] **Ch 13 — Browser caching** — Are autocomplete results cached client-side?
|
|
153
|
+
- [ ] **Ch 13 — Content filtering** — Is a filter layer used to remove inappropriate suggestions?
|
|
154
|
+
- [ ] **Ch 13 — Sharding** — Is the trie sharded for scale (by character or frequency)?
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## 14. Video Platform (Chapter 14)
|
|
159
|
+
|
|
160
|
+
### Video System
|
|
161
|
+
- [ ] **Ch 14 — Upload flow** — Is parallel chunked upload with pre-signed URLs used?
|
|
162
|
+
- [ ] **Ch 14 — Transcoding** — Is a DAG-based transcoding pipeline designed?
|
|
163
|
+
- [ ] **Ch 14 — Adaptive streaming** — Is adaptive bitrate streaming used (HLS/DASH)?
|
|
164
|
+
- [ ] **Ch 14 — CDN strategy** — Are popular videos served from CDN, long-tail from origin?
|
|
165
|
+
- [ ] **Ch 14 — Error handling** — Are recoverable vs. non-recoverable errors distinguished?
|
|
166
|
+
- [ ] **Ch 14 — Content safety** — Is DRM, encryption, or watermarking considered?
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## 15. Cloud Storage (Chapter 15)
|
|
171
|
+
|
|
172
|
+
### File Storage System
|
|
173
|
+
- [ ] **Ch 15 — Block servers** — Are files split into blocks for delta sync?
|
|
174
|
+
- [ ] **Ch 15 — Deduplication** — Are duplicate blocks detected by hash and skipped?
|
|
175
|
+
- [ ] **Ch 15 — Resumable uploads** — Are large file uploads resumable?
|
|
176
|
+
- [ ] **Ch 15 — Notifications** — Is long polling used for real-time file change notifications?
|
|
177
|
+
- [ ] **Ch 15 — Conflict resolution** — Is first-version-wins with conflict copies implemented?
|
|
178
|
+
- [ ] **Ch 15 — Versioning** — Is file version history maintained?
|
|
179
|
+
- [ ] **Ch 15 — Offline support** — Is an offline backup queue used for sync when clients reconnect?
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Quick Review Workflow
|
|
184
|
+
|
|
185
|
+
1. **Scale pass** — Are scaling fundamentals applied (LB, cache, CDN, replication, sharding)?
|
|
186
|
+
2. **Estimation pass** — Are capacity numbers calculated and reasonable?
|
|
187
|
+
3. **Structure pass** — Does the design follow the 4-step framework?
|
|
188
|
+
4. **Component pass** — Are relevant design patterns used for each component?
|
|
189
|
+
5. **Failure pass** — Are failure modes identified and handled?
|
|
190
|
+
6. **Trade-off pass** — Are design decisions justified with explicit trade-offs?
|
|
191
|
+
7. **Operational pass** — Is monitoring, logging, and alerting included?
|
|
192
|
+
8. **Prioritize findings** — Rank by severity: missing scaling > wrong data store > missing estimation > process gaps
|
|
193
|
+
|
|
194
|
+
## Severity Levels
|
|
195
|
+
|
|
196
|
+
| Severity | Description | Example |
|
|
197
|
+
|----------|-------------|---------|
|
|
198
|
+
| **Critical** | Missing fundamental scaling or wrong architecture | No load balancing, single DB at scale, no caching for read-heavy system, stateful web servers |
|
|
199
|
+
| **High** | Missing core design patterns | No capacity estimation, wrong CAP choice, no failure handling, no rate limiting |
|
|
200
|
+
| **Medium** | Component design gaps | No CDN, no message queue for async tasks, no content dedup, suboptimal fanout model |
|
|
201
|
+
| **Low** | Optimization improvements | No virtual nodes in consistent hashing, no browser caching for autocomplete, no delta sync |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
System Design Interview Doc Generator — Alex Xu 4-step framework.
|
|
4
|
+
|
|
5
|
+
Usage (one-shot): python new_design.py "URL Shortener"
|
|
6
|
+
Usage (interactive): python new_design.py
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import math
|
|
11
|
+
import sys
|
|
12
|
+
from datetime import date
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
# Prompting helpers
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
def prompt(label: str, default: str = "") -> str:
|
|
21
|
+
suffix = f" [{default}]" if default else ""
|
|
22
|
+
while True:
|
|
23
|
+
val = input(f"{label}{suffix}: ").strip()
|
|
24
|
+
if val:
|
|
25
|
+
return val
|
|
26
|
+
if default:
|
|
27
|
+
return default
|
|
28
|
+
print(" (required)")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def prompt_int(label: str, default: int) -> int:
|
|
32
|
+
while True:
|
|
33
|
+
raw = input(f"{label} [{default:,}]: ").strip()
|
|
34
|
+
if not raw:
|
|
35
|
+
return default
|
|
36
|
+
try:
|
|
37
|
+
return int(raw.replace(",", "").replace("_", ""))
|
|
38
|
+
except ValueError:
|
|
39
|
+
print(" Please enter an integer.")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Back-of-envelope calculations
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
def human_size(bytes_: float) -> str:
|
|
47
|
+
for unit in ("B", "KB", "MB", "GB", "TB", "PB"):
|
|
48
|
+
if bytes_ < 1024:
|
|
49
|
+
return f"{bytes_:.1f} {unit}"
|
|
50
|
+
bytes_ /= 1024
|
|
51
|
+
return f"{bytes_:.1f} PB"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def calc_estimations(dau: int, read_write_ratio: int, avg_object_size_bytes: int, years: int) -> dict:
|
|
55
|
+
"""Return a dict of derived estimates."""
|
|
56
|
+
total_requests_per_day = dau * read_write_ratio
|
|
57
|
+
write_qps = dau / 86400 # 1 write per user per day assumption
|
|
58
|
+
read_qps = write_qps * read_write_ratio
|
|
59
|
+
peak_qps = read_qps * 2 # common rule of thumb
|
|
60
|
+
|
|
61
|
+
writes_per_day = dau # 1 write per active user
|
|
62
|
+
storage_per_day = writes_per_day * avg_object_size_bytes
|
|
63
|
+
total_storage = storage_per_day * 365 * years
|
|
64
|
+
|
|
65
|
+
bandwidth_in = write_qps * avg_object_size_bytes # bytes/sec
|
|
66
|
+
bandwidth_out = read_qps * avg_object_size_bytes
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
"dau": dau,
|
|
70
|
+
"write_qps": write_qps,
|
|
71
|
+
"read_qps": read_qps,
|
|
72
|
+
"peak_qps": peak_qps,
|
|
73
|
+
"read_write_ratio": read_write_ratio,
|
|
74
|
+
"storage_per_day": storage_per_day,
|
|
75
|
+
"total_storage": total_storage,
|
|
76
|
+
"bandwidth_in": bandwidth_in,
|
|
77
|
+
"bandwidth_out": bandwidth_out,
|
|
78
|
+
"years": years,
|
|
79
|
+
"avg_object_size_bytes": avg_object_size_bytes,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
# Document sections
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
def section_requirements(system: str, features: list[str]) -> str:
|
|
88
|
+
func = "\n".join(f"- {f}" for f in features)
|
|
89
|
+
return f"""\
|
|
90
|
+
## Step 1: Requirements Clarification
|
|
91
|
+
|
|
92
|
+
### Functional Requirements
|
|
93
|
+
{func}
|
|
94
|
+
|
|
95
|
+
### Non-Functional Requirements
|
|
96
|
+
- High availability: 99.99% uptime (< 52 min downtime/year)
|
|
97
|
+
- Low latency: p99 read latency < 100 ms
|
|
98
|
+
- Durability: no data loss; replicated across at least 3 availability zones
|
|
99
|
+
- Eventual consistency is acceptable for non-critical reads
|
|
100
|
+
- The system must be horizontally scalable
|
|
101
|
+
|
|
102
|
+
### Out of Scope (for this interview)
|
|
103
|
+
- Admin dashboard / abuse reporting
|
|
104
|
+
- A/B testing infrastructure
|
|
105
|
+
- Multi-region write consistency
|
|
106
|
+
- Billing / rate-limiting per customer tier (mention but don't design)
|
|
107
|
+
|
|
108
|
+
### Clarifying Questions to Ask the Interviewer
|
|
109
|
+
1. What is the expected scale (DAU, peak QPS)?
|
|
110
|
+
2. Read-heavy or write-heavy? What is the read:write ratio?
|
|
111
|
+
3. Any latency SLA for writes?
|
|
112
|
+
4. Do we need strong consistency or is eventual consistency acceptable?
|
|
113
|
+
5. What is the retention period for data?
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def section_estimation(e: dict) -> str:
|
|
118
|
+
return f"""\
|
|
119
|
+
## Step 2: Back-of-Envelope Estimation
|
|
120
|
+
|
|
121
|
+
### Assumptions
|
|
122
|
+
| Parameter | Value |
|
|
123
|
+
|-----------|-------|
|
|
124
|
+
| Daily Active Users (DAU) | {e['dau']:,} |
|
|
125
|
+
| Read : Write ratio | {e['read_write_ratio']} : 1 |
|
|
126
|
+
| Average object size | {human_size(e['avg_object_size_bytes'])} |
|
|
127
|
+
| Retention period | {e['years']} years |
|
|
128
|
+
|
|
129
|
+
### Derived Estimates
|
|
130
|
+
|
|
131
|
+
**Traffic**
|
|
132
|
+
```
|
|
133
|
+
Write QPS = DAU / 86,400 s
|
|
134
|
+
= {e['dau']:,} / 86,400
|
|
135
|
+
≈ {e['write_qps']:,.1f} writes/sec
|
|
136
|
+
|
|
137
|
+
Read QPS = Write QPS × {e['read_write_ratio']}
|
|
138
|
+
≈ {e['read_qps']:,.0f} reads/sec
|
|
139
|
+
|
|
140
|
+
Peak QPS ≈ Read QPS × 2 (rule of thumb)
|
|
141
|
+
≈ {e['peak_qps']:,.0f} reads/sec
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**Storage**
|
|
145
|
+
```
|
|
146
|
+
Storage/day = writes/day × avg object size
|
|
147
|
+
= {e['dau']:,} × {human_size(e['avg_object_size_bytes'])}
|
|
148
|
+
= {human_size(e['storage_per_day'])}
|
|
149
|
+
|
|
150
|
+
Total = {human_size(e['storage_per_day'])} × 365 × {e['years']} years
|
|
151
|
+
≈ {human_size(e['total_storage'])}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Bandwidth**
|
|
155
|
+
```
|
|
156
|
+
Inbound ≈ {e['write_qps']:,.1f} req/s × {human_size(e['avg_object_size_bytes'])}
|
|
157
|
+
≈ {human_size(e['bandwidth_in'])}/s
|
|
158
|
+
|
|
159
|
+
Outbound ≈ {e['read_qps']:,.0f} req/s × {human_size(e['avg_object_size_bytes'])}
|
|
160
|
+
≈ {human_size(e['bandwidth_out'])}/s
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Cache sizing (80/20 rule)**
|
|
164
|
+
```
|
|
165
|
+
Hot data = 20% of daily reads × avg object size
|
|
166
|
+
≈ {human_size(e['read_qps'] * 86400 * 0.20 * e['avg_object_size_bytes'])}
|
|
167
|
+
```
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def section_high_level(system: str, features: list[str]) -> str:
|
|
172
|
+
return f"""\
|
|
173
|
+
## Step 3: High-Level Design
|
|
174
|
+
|
|
175
|
+
### Component Diagram (describe to interviewer)
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
Clients
|
|
179
|
+
│
|
|
180
|
+
▼
|
|
181
|
+
[CDN / Edge Cache]
|
|
182
|
+
│ (cache hit → return)
|
|
183
|
+
▼
|
|
184
|
+
[Load Balancer] ←──── health checks
|
|
185
|
+
│
|
|
186
|
+
├─► [API Server cluster] (stateless, auto-scaling)
|
|
187
|
+
│ │
|
|
188
|
+
│ ├─► [Cache layer] (Redis / Memcached)
|
|
189
|
+
│ │ │ cache miss
|
|
190
|
+
│ │ ▼
|
|
191
|
+
│ └─► [Primary DB] ←── [Read Replicas]
|
|
192
|
+
│
|
|
193
|
+
└─► [Message Queue] (Kafka / SQS)
|
|
194
|
+
│
|
|
195
|
+
▼
|
|
196
|
+
[Worker / Consumer]
|
|
197
|
+
│
|
|
198
|
+
▼
|
|
199
|
+
[Object Storage] (S3-compatible, for blobs)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Core API Endpoints
|
|
203
|
+
|
|
204
|
+
| Method | Path | Description |
|
|
205
|
+
|--------|------|-------------|
|
|
206
|
+
| POST | /v1/resource | Create a new resource |
|
|
207
|
+
| GET | /v1/resource/:id | Fetch by ID |
|
|
208
|
+
| PUT | /v1/resource/:id | Update |
|
|
209
|
+
| DELETE | /v1/resource/:id | Soft-delete |
|
|
210
|
+
| GET | /v1/healthz | Health check |
|
|
211
|
+
|
|
212
|
+
### Data Model (core entities)
|
|
213
|
+
|
|
214
|
+
```sql
|
|
215
|
+
-- Primary entity
|
|
216
|
+
CREATE TABLE resource (
|
|
217
|
+
id CHAR(8) PRIMARY KEY, -- or UUID
|
|
218
|
+
owner_id BIGINT NOT NULL,
|
|
219
|
+
payload TEXT,
|
|
220
|
+
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
221
|
+
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
222
|
+
is_deleted BOOLEAN NOT NULL DEFAULT FALSE
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
CREATE INDEX idx_resource_owner ON resource(owner_id);
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Technology Choices
|
|
229
|
+
| Layer | Choice | Rationale |
|
|
230
|
+
|-------|--------|-----------|
|
|
231
|
+
| API | REST / gRPC | REST for external; gRPC for internal services |
|
|
232
|
+
| Primary DB | PostgreSQL (or Cassandra if write-heavy) | ACID; mature; read replicas |
|
|
233
|
+
| Cache | Redis | Sub-millisecond latency; rich data structures |
|
|
234
|
+
| Object store | S3-compatible | Cheap; durable; decoupled from DB |
|
|
235
|
+
| Queue | Kafka | High-throughput; replay; partitioned by key |
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def section_deep_dive(system: str, e: dict) -> str:
|
|
240
|
+
return f"""\
|
|
241
|
+
## Step 4: Deep Dive
|
|
242
|
+
|
|
243
|
+
### Bottleneck Analysis
|
|
244
|
+
- **Write path**: API server → DB primary. Mitigate with write-ahead log tailing,
|
|
245
|
+
async replication, and buffered writes via the message queue.
|
|
246
|
+
- **Read path**: DB read replicas + Redis cache. Target > 90% cache hit rate.
|
|
247
|
+
- **Hot keys**: Apply key-based sharding and local in-process LRU cache for the
|
|
248
|
+
top-N items (identified via cache hit analytics).
|
|
249
|
+
|
|
250
|
+
### Database Deep Dive
|
|
251
|
+
|
|
252
|
+
**Why {("Cassandra" if e["write_qps"] > 5000 else "PostgreSQL")}?**
|
|
253
|
+
{"Cassandra: wide-column store optimised for high write throughput with tunable consistency. Partition key = user_id for even distribution." if e["write_qps"] > 5000 else "PostgreSQL: strong ACID guarantees, mature tooling, easy to add read replicas. Move to Cassandra if write QPS exceeds ~10k sustained."}
|
|
254
|
+
|
|
255
|
+
**Sharding strategy**
|
|
256
|
+
- Shard by `user_id` hash to distribute load evenly.
|
|
257
|
+
- Avoid sharding by time (creates hot partitions for recent data).
|
|
258
|
+
- Use consistent hashing to minimise re-sharding cost.
|
|
259
|
+
|
|
260
|
+
**Replication**
|
|
261
|
+
- 1 primary + 2 read replicas per shard (cross-AZ).
|
|
262
|
+
- Async replication is acceptable; compensate with cache TTL.
|
|
263
|
+
|
|
264
|
+
### Caching Strategy
|
|
265
|
+
- **Read-through cache**: API checks Redis before DB.
|
|
266
|
+
- **Write-invalidation**: On write, delete the cache key (not update).
|
|
267
|
+
- **TTL**: Set based on staleness tolerance (e.g., 5 min for non-critical data).
|
|
268
|
+
- **Eviction policy**: `allkeys-lru` for general use.
|
|
269
|
+
|
|
270
|
+
### Consistency Model
|
|
271
|
+
- Reads from replicas may be slightly stale (< 1 s typical).
|
|
272
|
+
- Critical reads (e.g., immediately after a write) can be routed to primary.
|
|
273
|
+
- Use optimistic locking (version column) for concurrent updates.
|
|
274
|
+
|
|
275
|
+
### Fault Tolerance
|
|
276
|
+
- API servers: stateless → replace failed nodes automatically.
|
|
277
|
+
- DB primary failure: automated failover to replica (< 30 s with Patroni/RDS).
|
|
278
|
+
- Cache failure: graceful degradation — fall through to DB.
|
|
279
|
+
- Queue failure: producers buffer locally and retry.
|
|
280
|
+
|
|
281
|
+
### Scalability Levers (ordered by cost)
|
|
282
|
+
1. Increase read replica count.
|
|
283
|
+
2. Add Redis cluster nodes.
|
|
284
|
+
3. Add API server instances (auto-scaling policy on CPU/QPS).
|
|
285
|
+
4. Shard the database.
|
|
286
|
+
5. Move to a distributed DB (Cassandra / CockroachDB).
|
|
287
|
+
|
|
288
|
+
### Areas to Explore If Time Permits
|
|
289
|
+
- **CDN**: Cache static and semi-static responses at edge.
|
|
290
|
+
- **Rate limiting**: Token bucket per user_id at the load balancer.
|
|
291
|
+
- **Search**: Add Elasticsearch for full-text queries.
|
|
292
|
+
- **Analytics**: Stream events to a data warehouse (Snowflake / BigQuery) via Kafka.
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def section_interview_questions(system: str) -> str:
|
|
297
|
+
return f"""\
|
|
298
|
+
## Common Follow-Up Interview Questions
|
|
299
|
+
|
|
300
|
+
| Question | Key Points to Cover |
|
|
301
|
+
|----------|---------------------|
|
|
302
|
+
| How do you handle a DB primary failure? | Automated failover, replica promotion, heartbeat checks |
|
|
303
|
+
| How do you prevent cache stampede? | Mutex lock on cache miss, probabilistic early refresh |
|
|
304
|
+
| How would you design the ID generation? | Snowflake ID, UUID v7, or DB sequence — trade-offs |
|
|
305
|
+
| How do you ensure exactly-once processing? | Idempotency keys, deduplication in the consumer |
|
|
306
|
+
| How would you add full-text search? | Elasticsearch / OpenSearch, sync via CDC from DB |
|
|
307
|
+
| How do you handle schema migrations? | Expand/contract pattern; blue/green deploys; backward-compatible changes first |
|
|
308
|
+
| Walk me through a write from client to storage | Client → LB → API → validate → DB write → publish event → async worker |
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
# Main
|
|
314
|
+
# ---------------------------------------------------------------------------
|
|
315
|
+
|
|
316
|
+
def gather_interactive() -> dict:
|
|
317
|
+
print("\n=== System Design Interview — Document Generator ===\n")
|
|
318
|
+
system = prompt("System name (e.g., 'URL Shortener', 'Twitter Feed')")
|
|
319
|
+
features_raw = prompt(
|
|
320
|
+
"Core features (comma-separated)",
|
|
321
|
+
"Create resource, Retrieve resource, Delete resource"
|
|
322
|
+
)
|
|
323
|
+
features = [f.strip() for f in features_raw.split(",") if f.strip()]
|
|
324
|
+
dau = prompt_int("DAU (Daily Active Users)", 10_000_000)
|
|
325
|
+
rw = prompt_int("Read:Write ratio (e.g., 10 means 10 reads per write)", 10)
|
|
326
|
+
obj_size = prompt_int("Average object size in bytes", 1024)
|
|
327
|
+
years = prompt_int("Retention period (years)", 5)
|
|
328
|
+
output_raw = prompt("Output file (leave blank for stdout)", "")
|
|
329
|
+
output = Path(output_raw) if output_raw else None
|
|
330
|
+
return dict(system=system, features=features, dau=dau, rw=rw,
|
|
331
|
+
obj_size=obj_size, years=years, output=output)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def render(data: dict) -> str:
|
|
335
|
+
system = data["system"]
|
|
336
|
+
e = calc_estimations(
|
|
337
|
+
dau=data["dau"],
|
|
338
|
+
read_write_ratio=data["rw"],
|
|
339
|
+
avg_object_size_bytes=data["obj_size"],
|
|
340
|
+
years=data["years"],
|
|
341
|
+
)
|
|
342
|
+
parts = [
|
|
343
|
+
f"# System Design: {system}",
|
|
344
|
+
"",
|
|
345
|
+
f"**Date:** {date.today()} ",
|
|
346
|
+
f"**Framework:** Alex Xu — System Design Interview Vol. 1 & 2",
|
|
347
|
+
"",
|
|
348
|
+
"---",
|
|
349
|
+
"",
|
|
350
|
+
section_requirements(system, data["features"]),
|
|
351
|
+
"---",
|
|
352
|
+
"",
|
|
353
|
+
section_estimation(e),
|
|
354
|
+
"---",
|
|
355
|
+
"",
|
|
356
|
+
section_high_level(system, data["features"]),
|
|
357
|
+
"---",
|
|
358
|
+
"",
|
|
359
|
+
section_deep_dive(system, e),
|
|
360
|
+
"---",
|
|
361
|
+
"",
|
|
362
|
+
section_interview_questions(system),
|
|
363
|
+
"---",
|
|
364
|
+
"",
|
|
365
|
+
"*Generated by `new_design.py` — System Design Interview skill.*",
|
|
366
|
+
]
|
|
367
|
+
return "\n".join(parts) + "\n"
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def main() -> None:
|
|
371
|
+
parser = argparse.ArgumentParser(
|
|
372
|
+
description="Generate a system design interview document (Alex Xu framework)."
|
|
373
|
+
)
|
|
374
|
+
parser.add_argument("system", nargs="?", help="System name (skips prompt if provided)")
|
|
375
|
+
parser.add_argument("--dau", type=int, default=None)
|
|
376
|
+
parser.add_argument("--rw", type=int, default=None, help="Read:write ratio")
|
|
377
|
+
parser.add_argument("--obj-size", type=int, default=None, help="Avg object size in bytes")
|
|
378
|
+
parser.add_argument("--years", type=int, default=None, help="Retention years")
|
|
379
|
+
parser.add_argument("--features", help="Comma-separated feature list")
|
|
380
|
+
parser.add_argument("--output", type=Path, default=None)
|
|
381
|
+
args = parser.parse_args()
|
|
382
|
+
|
|
383
|
+
if args.system and args.dau and args.rw and args.obj_size and args.years:
|
|
384
|
+
features = (
|
|
385
|
+
[f.strip() for f in args.features.split(",")]
|
|
386
|
+
if args.features
|
|
387
|
+
else ["Create resource", "Read resource", "Delete resource"]
|
|
388
|
+
)
|
|
389
|
+
data = dict(
|
|
390
|
+
system=args.system, features=features,
|
|
391
|
+
dau=args.dau, rw=args.rw,
|
|
392
|
+
obj_size=args.obj_size, years=args.years,
|
|
393
|
+
output=args.output,
|
|
394
|
+
)
|
|
395
|
+
else:
|
|
396
|
+
if args.system:
|
|
397
|
+
# System name given but other params missing — use defaults
|
|
398
|
+
data = dict(
|
|
399
|
+
system=args.system,
|
|
400
|
+
features=["Create resource", "Read resource", "Delete resource"],
|
|
401
|
+
dau=10_000_000, rw=10, obj_size=1024, years=5,
|
|
402
|
+
output=args.output,
|
|
403
|
+
)
|
|
404
|
+
else:
|
|
405
|
+
try:
|
|
406
|
+
data = gather_interactive()
|
|
407
|
+
except (KeyboardInterrupt, EOFError):
|
|
408
|
+
print("\nAborted.", file=sys.stderr)
|
|
409
|
+
sys.exit(1)
|
|
410
|
+
|
|
411
|
+
document = render(data)
|
|
412
|
+
|
|
413
|
+
if data.get("output"):
|
|
414
|
+
data["output"].write_text(document)
|
|
415
|
+
print(f"Design document written to: {data['output']}")
|
|
416
|
+
else:
|
|
417
|
+
sys.stdout.write(document)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
if __name__ == "__main__":
|
|
421
|
+
main()
|