@booklib/skills 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +122 -0
- package/README.md +20 -2
- package/ROADMAP.md +36 -0
- package/animation-at-work/evals/evals.json +44 -0
- package/animation-at-work/examples/after.md +64 -0
- package/animation-at-work/examples/before.md +35 -0
- package/animation-at-work/scripts/audit_animations.py +295 -0
- package/bin/skills.js +552 -42
- package/clean-code-reviewer/SKILL.md +109 -1
- package/clean-code-reviewer/evals/evals.json +121 -3
- package/clean-code-reviewer/examples/after.md +48 -0
- package/clean-code-reviewer/examples/before.md +33 -0
- package/clean-code-reviewer/references/api_reference.md +158 -0
- package/clean-code-reviewer/references/practices-catalog.md +282 -0
- package/clean-code-reviewer/references/review-checklist.md +254 -0
- package/clean-code-reviewer/scripts/pre-review.py +206 -0
- package/data-intensive-patterns/evals/evals.json +43 -0
- package/data-intensive-patterns/examples/after.md +61 -0
- package/data-intensive-patterns/examples/before.md +38 -0
- package/data-intensive-patterns/scripts/adr.py +213 -0
- package/data-pipelines/evals/evals.json +45 -0
- package/data-pipelines/examples/after.md +97 -0
- package/data-pipelines/examples/before.md +37 -0
- package/data-pipelines/scripts/new_pipeline.py +444 -0
- package/design-patterns/evals/evals.json +46 -0
- package/design-patterns/examples/after.md +52 -0
- package/design-patterns/examples/before.md +29 -0
- package/design-patterns/scripts/scaffold.py +807 -0
- package/domain-driven-design/SKILL.md +120 -0
- package/domain-driven-design/evals/evals.json +48 -0
- package/domain-driven-design/examples/after.md +80 -0
- package/domain-driven-design/examples/before.md +43 -0
- package/domain-driven-design/scripts/scaffold.py +421 -0
- package/effective-java/evals/evals.json +46 -0
- package/effective-java/examples/after.md +83 -0
- package/effective-java/examples/before.md +37 -0
- package/effective-java/scripts/checkstyle_setup.py +211 -0
- package/effective-kotlin/evals/evals.json +45 -0
- package/effective-kotlin/examples/after.md +36 -0
- package/effective-kotlin/examples/before.md +38 -0
- package/effective-python/evals/evals.json +44 -0
- package/effective-python/examples/after.md +56 -0
- package/effective-python/examples/before.md +40 -0
- package/effective-python/references/api_reference.md +218 -0
- package/effective-python/references/practices-catalog.md +483 -0
- package/effective-python/references/review-checklist.md +190 -0
- package/effective-python/scripts/lint.py +173 -0
- package/kotlin-in-action/evals/evals.json +43 -0
- package/kotlin-in-action/examples/after.md +53 -0
- package/kotlin-in-action/examples/before.md +39 -0
- package/kotlin-in-action/scripts/setup_detekt.py +224 -0
- package/lean-startup/evals/evals.json +43 -0
- package/lean-startup/examples/after.md +80 -0
- package/lean-startup/examples/before.md +34 -0
- package/lean-startup/scripts/new_experiment.py +286 -0
- package/microservices-patterns/SKILL.md +140 -0
- package/microservices-patterns/evals/evals.json +45 -0
- package/microservices-patterns/examples/after.md +69 -0
- package/microservices-patterns/examples/before.md +40 -0
- package/microservices-patterns/scripts/new_service.py +583 -0
- package/package.json +2 -8
- package/refactoring-ui/evals/evals.json +45 -0
- package/refactoring-ui/examples/after.md +85 -0
- package/refactoring-ui/examples/before.md +58 -0
- package/refactoring-ui/scripts/audit_css.py +250 -0
- package/skill-router/SKILL.md +142 -0
- package/skill-router/evals/evals.json +38 -0
- package/skill-router/examples/after.md +63 -0
- package/skill-router/examples/before.md +39 -0
- package/skill-router/references/api_reference.md +24 -0
- package/skill-router/references/routing-heuristics.md +89 -0
- package/skill-router/references/skill-catalog.md +156 -0
- package/skill-router/scripts/route.py +266 -0
- package/storytelling-with-data/evals/evals.json +47 -0
- package/storytelling-with-data/examples/after.md +50 -0
- package/storytelling-with-data/examples/before.md +33 -0
- package/storytelling-with-data/scripts/chart_review.py +301 -0
- package/system-design-interview/evals/evals.json +45 -0
- package/system-design-interview/examples/after.md +94 -0
- package/system-design-interview/examples/before.md +27 -0
- package/system-design-interview/scripts/new_design.py +421 -0
- package/using-asyncio-python/evals/evals.json +43 -0
- package/using-asyncio-python/examples/after.md +68 -0
- package/using-asyncio-python/examples/before.md +39 -0
- package/using-asyncio-python/scripts/check_blocking.py +270 -0
- package/web-scraping-python/evals/evals.json +46 -0
- package/web-scraping-python/examples/after.md +109 -0
- package/web-scraping-python/examples/before.md +40 -0
- package/web-scraping-python/scripts/new_scraper.py +231 -0
- /package/{effective-python-skill → effective-python}/SKILL.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-01-pythonic-thinking.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-02-lists-and-dicts.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-03-functions.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-04-comprehensions-generators.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-05-classes-interfaces.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-06-metaclasses-attributes.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-07-concurrency.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-08-robustness-performance.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-09-testing-debugging.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-10-collaboration.md +0 -0
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
System Design Interview Doc Generator — Alex Xu 4-step framework.
|
|
4
|
+
|
|
5
|
+
Usage (one-shot): python new_design.py "URL Shortener"
|
|
6
|
+
Usage (interactive): python new_design.py
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import math
|
|
11
|
+
import sys
|
|
12
|
+
from datetime import date
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
# Prompting helpers
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
def prompt(label: str, default: str = "") -> str:
|
|
21
|
+
suffix = f" [{default}]" if default else ""
|
|
22
|
+
while True:
|
|
23
|
+
val = input(f"{label}{suffix}: ").strip()
|
|
24
|
+
if val:
|
|
25
|
+
return val
|
|
26
|
+
if default:
|
|
27
|
+
return default
|
|
28
|
+
print(" (required)")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def prompt_int(label: str, default: int) -> int:
|
|
32
|
+
while True:
|
|
33
|
+
raw = input(f"{label} [{default:,}]: ").strip()
|
|
34
|
+
if not raw:
|
|
35
|
+
return default
|
|
36
|
+
try:
|
|
37
|
+
return int(raw.replace(",", "").replace("_", ""))
|
|
38
|
+
except ValueError:
|
|
39
|
+
print(" Please enter an integer.")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Back-of-envelope calculations
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
def human_size(bytes_: float) -> str:
|
|
47
|
+
for unit in ("B", "KB", "MB", "GB", "TB", "PB"):
|
|
48
|
+
if bytes_ < 1024:
|
|
49
|
+
return f"{bytes_:.1f} {unit}"
|
|
50
|
+
bytes_ /= 1024
|
|
51
|
+
return f"{bytes_:.1f} PB"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def calc_estimations(dau: int, read_write_ratio: int, avg_object_size_bytes: int, years: int) -> dict:
|
|
55
|
+
"""Return a dict of derived estimates."""
|
|
56
|
+
total_requests_per_day = dau * read_write_ratio
|
|
57
|
+
write_qps = dau / 86400 # 1 write per user per day assumption
|
|
58
|
+
read_qps = write_qps * read_write_ratio
|
|
59
|
+
peak_qps = read_qps * 2 # common rule of thumb
|
|
60
|
+
|
|
61
|
+
writes_per_day = dau # 1 write per active user
|
|
62
|
+
storage_per_day = writes_per_day * avg_object_size_bytes
|
|
63
|
+
total_storage = storage_per_day * 365 * years
|
|
64
|
+
|
|
65
|
+
bandwidth_in = write_qps * avg_object_size_bytes # bytes/sec
|
|
66
|
+
bandwidth_out = read_qps * avg_object_size_bytes
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
"dau": dau,
|
|
70
|
+
"write_qps": write_qps,
|
|
71
|
+
"read_qps": read_qps,
|
|
72
|
+
"peak_qps": peak_qps,
|
|
73
|
+
"read_write_ratio": read_write_ratio,
|
|
74
|
+
"storage_per_day": storage_per_day,
|
|
75
|
+
"total_storage": total_storage,
|
|
76
|
+
"bandwidth_in": bandwidth_in,
|
|
77
|
+
"bandwidth_out": bandwidth_out,
|
|
78
|
+
"years": years,
|
|
79
|
+
"avg_object_size_bytes": avg_object_size_bytes,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
# Document sections
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
def section_requirements(system: str, features: list[str]) -> str:
|
|
88
|
+
func = "\n".join(f"- {f}" for f in features)
|
|
89
|
+
return f"""\
|
|
90
|
+
## Step 1: Requirements Clarification
|
|
91
|
+
|
|
92
|
+
### Functional Requirements
|
|
93
|
+
{func}
|
|
94
|
+
|
|
95
|
+
### Non-Functional Requirements
|
|
96
|
+
- High availability: 99.99% uptime (< 52 min downtime/year)
|
|
97
|
+
- Low latency: p99 read latency < 100 ms
|
|
98
|
+
- Durability: no data loss; replicated across at least 3 availability zones
|
|
99
|
+
- Eventual consistency is acceptable for non-critical reads
|
|
100
|
+
- The system must be horizontally scalable
|
|
101
|
+
|
|
102
|
+
### Out of Scope (for this interview)
|
|
103
|
+
- Admin dashboard / abuse reporting
|
|
104
|
+
- A/B testing infrastructure
|
|
105
|
+
- Multi-region write consistency
|
|
106
|
+
- Billing / rate-limiting per customer tier (mention but don't design)
|
|
107
|
+
|
|
108
|
+
### Clarifying Questions to Ask the Interviewer
|
|
109
|
+
1. What is the expected scale (DAU, peak QPS)?
|
|
110
|
+
2. Read-heavy or write-heavy? What is the read:write ratio?
|
|
111
|
+
3. Any latency SLA for writes?
|
|
112
|
+
4. Do we need strong consistency or is eventual consistency acceptable?
|
|
113
|
+
5. What is the retention period for data?
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def section_estimation(e: dict) -> str:
|
|
118
|
+
return f"""\
|
|
119
|
+
## Step 2: Back-of-Envelope Estimation
|
|
120
|
+
|
|
121
|
+
### Assumptions
|
|
122
|
+
| Parameter | Value |
|
|
123
|
+
|-----------|-------|
|
|
124
|
+
| Daily Active Users (DAU) | {e['dau']:,} |
|
|
125
|
+
| Read : Write ratio | {e['read_write_ratio']} : 1 |
|
|
126
|
+
| Average object size | {human_size(e['avg_object_size_bytes'])} |
|
|
127
|
+
| Retention period | {e['years']} years |
|
|
128
|
+
|
|
129
|
+
### Derived Estimates
|
|
130
|
+
|
|
131
|
+
**Traffic**
|
|
132
|
+
```
|
|
133
|
+
Write QPS = DAU / 86,400 s
|
|
134
|
+
= {e['dau']:,} / 86,400
|
|
135
|
+
≈ {e['write_qps']:,.1f} writes/sec
|
|
136
|
+
|
|
137
|
+
Read QPS = Write QPS × {e['read_write_ratio']}
|
|
138
|
+
≈ {e['read_qps']:,.0f} reads/sec
|
|
139
|
+
|
|
140
|
+
Peak QPS ≈ Read QPS × 2 (rule of thumb)
|
|
141
|
+
≈ {e['peak_qps']:,.0f} reads/sec
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**Storage**
|
|
145
|
+
```
|
|
146
|
+
Storage/day = writes/day × avg object size
|
|
147
|
+
= {e['dau']:,} × {human_size(e['avg_object_size_bytes'])}
|
|
148
|
+
= {human_size(e['storage_per_day'])}
|
|
149
|
+
|
|
150
|
+
Total = {human_size(e['storage_per_day'])} × 365 × {e['years']} years
|
|
151
|
+
≈ {human_size(e['total_storage'])}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Bandwidth**
|
|
155
|
+
```
|
|
156
|
+
Inbound ≈ {e['write_qps']:,.1f} req/s × {human_size(e['avg_object_size_bytes'])}
|
|
157
|
+
≈ {human_size(e['bandwidth_in'])}/s
|
|
158
|
+
|
|
159
|
+
Outbound ≈ {e['read_qps']:,.0f} req/s × {human_size(e['avg_object_size_bytes'])}
|
|
160
|
+
≈ {human_size(e['bandwidth_out'])}/s
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Cache sizing (80/20 rule)**
|
|
164
|
+
```
|
|
165
|
+
Hot data = 20% of daily reads × avg object size
|
|
166
|
+
≈ {human_size(e['read_qps'] * 86400 * 0.20 * e['avg_object_size_bytes'])}
|
|
167
|
+
```
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def section_high_level(system: str, features: list[str]) -> str:
|
|
172
|
+
return f"""\
|
|
173
|
+
## Step 3: High-Level Design
|
|
174
|
+
|
|
175
|
+
### Component Diagram (describe to interviewer)
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
Clients
|
|
179
|
+
│
|
|
180
|
+
▼
|
|
181
|
+
[CDN / Edge Cache]
|
|
182
|
+
│ (cache hit → return)
|
|
183
|
+
▼
|
|
184
|
+
[Load Balancer] ←──── health checks
|
|
185
|
+
│
|
|
186
|
+
├─► [API Server cluster] (stateless, auto-scaling)
|
|
187
|
+
│ │
|
|
188
|
+
│ ├─► [Cache layer] (Redis / Memcached)
|
|
189
|
+
│ │ │ cache miss
|
|
190
|
+
│ │ ▼
|
|
191
|
+
│ └─► [Primary DB] ←── [Read Replicas]
|
|
192
|
+
│
|
|
193
|
+
└─► [Message Queue] (Kafka / SQS)
|
|
194
|
+
│
|
|
195
|
+
▼
|
|
196
|
+
[Worker / Consumer]
|
|
197
|
+
│
|
|
198
|
+
▼
|
|
199
|
+
[Object Storage] (S3-compatible, for blobs)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Core API Endpoints
|
|
203
|
+
|
|
204
|
+
| Method | Path | Description |
|
|
205
|
+
|--------|------|-------------|
|
|
206
|
+
| POST | /v1/resource | Create a new resource |
|
|
207
|
+
| GET | /v1/resource/:id | Fetch by ID |
|
|
208
|
+
| PUT | /v1/resource/:id | Update |
|
|
209
|
+
| DELETE | /v1/resource/:id | Soft-delete |
|
|
210
|
+
| GET | /v1/healthz | Health check |
|
|
211
|
+
|
|
212
|
+
### Data Model (core entities)
|
|
213
|
+
|
|
214
|
+
```sql
|
|
215
|
+
-- Primary entity
|
|
216
|
+
CREATE TABLE resource (
|
|
217
|
+
id CHAR(8) PRIMARY KEY, -- or UUID
|
|
218
|
+
owner_id BIGINT NOT NULL,
|
|
219
|
+
payload TEXT,
|
|
220
|
+
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
221
|
+
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
222
|
+
is_deleted BOOLEAN NOT NULL DEFAULT FALSE
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
CREATE INDEX idx_resource_owner ON resource(owner_id);
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Technology Choices
|
|
229
|
+
| Layer | Choice | Rationale |
|
|
230
|
+
|-------|--------|-----------|
|
|
231
|
+
| API | REST / gRPC | REST for external; gRPC for internal services |
|
|
232
|
+
| Primary DB | PostgreSQL (or Cassandra if write-heavy) | ACID; mature; read replicas |
|
|
233
|
+
| Cache | Redis | Sub-millisecond latency; rich data structures |
|
|
234
|
+
| Object store | S3-compatible | Cheap; durable; decoupled from DB |
|
|
235
|
+
| Queue | Kafka | High-throughput; replay; partitioned by key |
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def section_deep_dive(system: str, e: dict) -> str:
|
|
240
|
+
return f"""\
|
|
241
|
+
## Step 4: Deep Dive
|
|
242
|
+
|
|
243
|
+
### Bottleneck Analysis
|
|
244
|
+
- **Write path**: API server → DB primary. Mitigate with write-ahead log tailing,
|
|
245
|
+
async replication, and buffered writes via the message queue.
|
|
246
|
+
- **Read path**: DB read replicas + Redis cache. Target > 90% cache hit rate.
|
|
247
|
+
- **Hot keys**: Apply key-based sharding and local in-process LRU cache for the
|
|
248
|
+
top-N items (identified via cache hit analytics).
|
|
249
|
+
|
|
250
|
+
### Database Deep Dive
|
|
251
|
+
|
|
252
|
+
**Why {("Cassandra" if e["write_qps"] > 5000 else "PostgreSQL")}?**
|
|
253
|
+
{"Cassandra: wide-column store optimised for high write throughput with tunable consistency. Partition key = user_id for even distribution." if e["write_qps"] > 5000 else "PostgreSQL: strong ACID guarantees, mature tooling, easy to add read replicas. Move to Cassandra if write QPS exceeds ~10k sustained."}
|
|
254
|
+
|
|
255
|
+
**Sharding strategy**
|
|
256
|
+
- Shard by `user_id` hash to distribute load evenly.
|
|
257
|
+
- Avoid sharding by time (creates hot partitions for recent data).
|
|
258
|
+
- Use consistent hashing to minimise re-sharding cost.
|
|
259
|
+
|
|
260
|
+
**Replication**
|
|
261
|
+
- 1 primary + 2 read replicas per shard (cross-AZ).
|
|
262
|
+
- Async replication is acceptable; compensate with cache TTL.
|
|
263
|
+
|
|
264
|
+
### Caching Strategy
|
|
265
|
+
- **Read-through cache**: API checks Redis before DB.
|
|
266
|
+
- **Write-invalidation**: On write, delete the cache key (not update).
|
|
267
|
+
- **TTL**: Set based on staleness tolerance (e.g., 5 min for non-critical data).
|
|
268
|
+
- **Eviction policy**: `allkeys-lru` for general use.
|
|
269
|
+
|
|
270
|
+
### Consistency Model
|
|
271
|
+
- Reads from replicas may be slightly stale (< 1 s typical).
|
|
272
|
+
- Critical reads (e.g., immediately after a write) can be routed to primary.
|
|
273
|
+
- Use optimistic locking (version column) for concurrent updates.
|
|
274
|
+
|
|
275
|
+
### Fault Tolerance
|
|
276
|
+
- API servers: stateless → replace failed nodes automatically.
|
|
277
|
+
- DB primary failure: automated failover to replica (< 30 s with Patroni/RDS).
|
|
278
|
+
- Cache failure: graceful degradation — fall through to DB.
|
|
279
|
+
- Queue failure: producers buffer locally and retry.
|
|
280
|
+
|
|
281
|
+
### Scalability Levers (ordered by cost)
|
|
282
|
+
1. Increase read replica count.
|
|
283
|
+
2. Add Redis cluster nodes.
|
|
284
|
+
3. Add API server instances (auto-scaling policy on CPU/QPS).
|
|
285
|
+
4. Shard the database.
|
|
286
|
+
5. Move to a distributed DB (Cassandra / CockroachDB).
|
|
287
|
+
|
|
288
|
+
### Areas to Explore If Time Permits
|
|
289
|
+
- **CDN**: Cache static and semi-static responses at edge.
|
|
290
|
+
- **Rate limiting**: Token bucket per user_id at the load balancer.
|
|
291
|
+
- **Search**: Add Elasticsearch for full-text queries.
|
|
292
|
+
- **Analytics**: Stream events to a data warehouse (Snowflake / BigQuery) via Kafka.
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def section_interview_questions(system: str) -> str:
|
|
297
|
+
return f"""\
|
|
298
|
+
## Common Follow-Up Interview Questions
|
|
299
|
+
|
|
300
|
+
| Question | Key Points to Cover |
|
|
301
|
+
|----------|---------------------|
|
|
302
|
+
| How do you handle a DB primary failure? | Automated failover, replica promotion, heartbeat checks |
|
|
303
|
+
| How do you prevent cache stampede? | Mutex lock on cache miss, probabilistic early refresh |
|
|
304
|
+
| How would you design the ID generation? | Snowflake ID, UUID v7, or DB sequence — trade-offs |
|
|
305
|
+
| How do you ensure exactly-once processing? | Idempotency keys, deduplication in the consumer |
|
|
306
|
+
| How would you add full-text search? | Elasticsearch / OpenSearch, sync via CDC from DB |
|
|
307
|
+
| How do you handle schema migrations? | Expand/contract pattern; blue/green deploys; backward-compatible changes first |
|
|
308
|
+
| Walk me through a write from client to storage | Client → LB → API → validate → DB write → publish event → async worker |
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
# Main
|
|
314
|
+
# ---------------------------------------------------------------------------
|
|
315
|
+
|
|
316
|
+
def gather_interactive() -> dict:
|
|
317
|
+
print("\n=== System Design Interview — Document Generator ===\n")
|
|
318
|
+
system = prompt("System name (e.g., 'URL Shortener', 'Twitter Feed')")
|
|
319
|
+
features_raw = prompt(
|
|
320
|
+
"Core features (comma-separated)",
|
|
321
|
+
"Create resource, Retrieve resource, Delete resource"
|
|
322
|
+
)
|
|
323
|
+
features = [f.strip() for f in features_raw.split(",") if f.strip()]
|
|
324
|
+
dau = prompt_int("DAU (Daily Active Users)", 10_000_000)
|
|
325
|
+
rw = prompt_int("Read:Write ratio (e.g., 10 means 10 reads per write)", 10)
|
|
326
|
+
obj_size = prompt_int("Average object size in bytes", 1024)
|
|
327
|
+
years = prompt_int("Retention period (years)", 5)
|
|
328
|
+
output_raw = prompt("Output file (leave blank for stdout)", "")
|
|
329
|
+
output = Path(output_raw) if output_raw else None
|
|
330
|
+
return dict(system=system, features=features, dau=dau, rw=rw,
|
|
331
|
+
obj_size=obj_size, years=years, output=output)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def render(data: dict) -> str:
|
|
335
|
+
system = data["system"]
|
|
336
|
+
e = calc_estimations(
|
|
337
|
+
dau=data["dau"],
|
|
338
|
+
read_write_ratio=data["rw"],
|
|
339
|
+
avg_object_size_bytes=data["obj_size"],
|
|
340
|
+
years=data["years"],
|
|
341
|
+
)
|
|
342
|
+
parts = [
|
|
343
|
+
f"# System Design: {system}",
|
|
344
|
+
"",
|
|
345
|
+
f"**Date:** {date.today()} ",
|
|
346
|
+
f"**Framework:** Alex Xu — System Design Interview Vol. 1 & 2",
|
|
347
|
+
"",
|
|
348
|
+
"---",
|
|
349
|
+
"",
|
|
350
|
+
section_requirements(system, data["features"]),
|
|
351
|
+
"---",
|
|
352
|
+
"",
|
|
353
|
+
section_estimation(e),
|
|
354
|
+
"---",
|
|
355
|
+
"",
|
|
356
|
+
section_high_level(system, data["features"]),
|
|
357
|
+
"---",
|
|
358
|
+
"",
|
|
359
|
+
section_deep_dive(system, e),
|
|
360
|
+
"---",
|
|
361
|
+
"",
|
|
362
|
+
section_interview_questions(system),
|
|
363
|
+
"---",
|
|
364
|
+
"",
|
|
365
|
+
"*Generated by `new_design.py` — System Design Interview skill.*",
|
|
366
|
+
]
|
|
367
|
+
return "\n".join(parts) + "\n"
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def main() -> None:
|
|
371
|
+
parser = argparse.ArgumentParser(
|
|
372
|
+
description="Generate a system design interview document (Alex Xu framework)."
|
|
373
|
+
)
|
|
374
|
+
parser.add_argument("system", nargs="?", help="System name (skips prompt if provided)")
|
|
375
|
+
parser.add_argument("--dau", type=int, default=None)
|
|
376
|
+
parser.add_argument("--rw", type=int, default=None, help="Read:write ratio")
|
|
377
|
+
parser.add_argument("--obj-size", type=int, default=None, help="Avg object size in bytes")
|
|
378
|
+
parser.add_argument("--years", type=int, default=None, help="Retention years")
|
|
379
|
+
parser.add_argument("--features", help="Comma-separated feature list")
|
|
380
|
+
parser.add_argument("--output", type=Path, default=None)
|
|
381
|
+
args = parser.parse_args()
|
|
382
|
+
|
|
383
|
+
if args.system and args.dau and args.rw and args.obj_size and args.years:
|
|
384
|
+
features = (
|
|
385
|
+
[f.strip() for f in args.features.split(",")]
|
|
386
|
+
if args.features
|
|
387
|
+
else ["Create resource", "Read resource", "Delete resource"]
|
|
388
|
+
)
|
|
389
|
+
data = dict(
|
|
390
|
+
system=args.system, features=features,
|
|
391
|
+
dau=args.dau, rw=args.rw,
|
|
392
|
+
obj_size=args.obj_size, years=args.years,
|
|
393
|
+
output=args.output,
|
|
394
|
+
)
|
|
395
|
+
else:
|
|
396
|
+
if args.system:
|
|
397
|
+
# System name given but other params missing — use defaults
|
|
398
|
+
data = dict(
|
|
399
|
+
system=args.system,
|
|
400
|
+
features=["Create resource", "Read resource", "Delete resource"],
|
|
401
|
+
dau=10_000_000, rw=10, obj_size=1024, years=5,
|
|
402
|
+
output=args.output,
|
|
403
|
+
)
|
|
404
|
+
else:
|
|
405
|
+
try:
|
|
406
|
+
data = gather_interactive()
|
|
407
|
+
except (KeyboardInterrupt, EOFError):
|
|
408
|
+
print("\nAborted.", file=sys.stderr)
|
|
409
|
+
sys.exit(1)
|
|
410
|
+
|
|
411
|
+
document = render(data)
|
|
412
|
+
|
|
413
|
+
if data.get("output"):
|
|
414
|
+
data["output"].write_text(document)
|
|
415
|
+
print(f"Design document written to: {data['output']}")
|
|
416
|
+
else:
|
|
417
|
+
sys.stdout.write(document)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
if __name__ == "__main__":
|
|
421
|
+
main()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"evals": [
|
|
3
|
+
{
|
|
4
|
+
"id": "eval-01-blocking-io-inside-async",
|
|
5
|
+
"prompt": "Review this async Python code:\n\n```python\nimport asyncio\nimport requests\nimport time\n\nPRICING_API = 'https://api.pricing.internal/products'\nINVENTORY_API = 'https://api.inventory.internal/stock'\n\nasync def fetch_product_data(product_ids: list[str]) -> list[dict]:\n results = []\n for pid in product_ids:\n price_resp = requests.get(f'{PRICING_API}/{pid}')\n price = price_resp.json()['price']\n\n stock_resp = requests.get(f'{INVENTORY_API}/{pid}')\n stock = stock_resp.json()['quantity']\n\n results.append({'id': pid, 'price': price, 'stock': stock})\n time.sleep(0.1) # be polite to the API\n\n return results\n\nasync def main():\n ids = ['A1', 'B2', 'C3', 'D4', 'E5', 'F6', 'G7', 'H8']\n data = await fetch_product_data(ids)\n print(f'Fetched {len(data)} products')\n\nasyncio.run(main())\n```",
|
|
6
|
+
"expectations": [
|
|
7
|
+
"Flags `requests.get()` inside an `async def` function as a critical blocking call that stalls the entire event loop while waiting for the HTTP response (Ch 2-3: never block the event loop)",
|
|
8
|
+
"Flags `time.sleep(0.1)` inside an async function as a blocking sleep that also freezes the event loop; recommends `await asyncio.sleep(0.1)` (Ch 3: use asyncio.sleep, not time.sleep)",
|
|
9
|
+
"Flags that the two API calls per product (`PRICING_API` and `INVENTORY_API`) are made sequentially inside the loop; recommends using `asyncio.gather` to fetch both concurrently per product (Ch 3: use gather for fan-out concurrency)",
|
|
10
|
+
"Flags that all products are fetched sequentially in a for loop; recommends fetching all products concurrently with `asyncio.gather` or `create_task` (Ch 3: create_task for concurrency)",
|
|
11
|
+
"Recommends replacing `requests` with `aiohttp.ClientSession` for non-blocking HTTP calls (Ch 4: use aiohttp over requests in async code)",
|
|
12
|
+
"Recommends using a `Semaphore` to limit concurrent requests instead of `time.sleep` for rate limiting (Ch 3: Semaphore for concurrency control)",
|
|
13
|
+
"Provides a corrected version using aiohttp, asyncio.gather, asyncio.sleep, and a Semaphore"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "eval-02-ensure-future-and-fire-and-forget",
|
|
18
|
+
"prompt": "Review this async Python code:\n\n```python\nimport asyncio\n\nasync def save_to_database(record: dict) -> None:\n await asyncio.sleep(0.05) # simulate DB write\n print(f\"Saved {record['id']}\")\n\nasync def send_notification(user_id: str) -> None:\n await asyncio.sleep(0.1) # simulate email send\n print(f\"Notified {user_id}\")\n\nasync def process_event(event: dict) -> None:\n await save_to_database(event)\n\n # Fire and forget the notification\n asyncio.ensure_future(send_notification(event['user_id']))\n\n print(f\"Processed event {event['id']}\")\n\nasync def main():\n loop = asyncio.get_event_loop()\n\n events = [{'id': f'e{i}', 'user_id': f'u{i}'} for i in range(10)]\n for event in events:\n loop.run_until_complete(process_event(event))\n\nasyncio.run(main())\n```",
|
|
19
|
+
"expectations": [
|
|
20
|
+
"Flags `asyncio.ensure_future()` as the deprecated/less preferred API for scheduling coroutines; recommends `asyncio.create_task()` which is more explicit and requires an active event loop (Ch 3: prefer create_task over ensure_future)",
|
|
21
|
+
"Flags fire-and-forget usage of `ensure_future` without storing the task reference: if `send_notification` raises an exception, it is silently discarded; recommends keeping a reference and handling exceptions (Ch 3: keep references to created tasks; unhandled task exceptions are silent)",
|
|
22
|
+
"Flags `loop.run_until_complete(process_event(event))` called inside an already-running async context (`main` is a coroutine): `run_until_complete` cannot be called from within a running loop; this will raise RuntimeError (Ch 3: do not call asyncio.run or run_until_complete from within async code)",
|
|
23
|
+
"Flags `asyncio.get_event_loop()` as deprecated for getting the running loop inside async code; recommends `asyncio.get_running_loop()` or eliminating direct loop access (Ch 3: use asyncio.run as the single entry point, avoid manual loop management)",
|
|
24
|
+
"Flags the sequential `for event in events` loop using run_until_complete; recommends processing all events concurrently with `asyncio.gather` (Ch 3: use gather for concurrency)",
|
|
25
|
+
"Provides a corrected version using create_task with proper task tracking, asyncio.gather for concurrency, and exception handling on fire-and-forget tasks"
|
|
26
|
+
]
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "eval-03-clean-async-gather-task-management",
|
|
30
|
+
"prompt": "Review this async Python code:\n\n```python\nimport asyncio\nimport logging\nfrom contextlib import asynccontextmanager\nfrom typing import AsyncIterator\nimport aiohttp\n\nlogger = logging.getLogger(__name__)\n\nMAX_CONCURRENT = 5\n\n@asynccontextmanager\nasync def http_session() -> AsyncIterator[aiohttp.ClientSession]:\n async with aiohttp.ClientSession(\n timeout=aiohttp.ClientTimeout(total=10)\n ) as session:\n yield session\n\n\nasync def fetch_one(session: aiohttp.ClientSession, url: str, semaphore: asyncio.Semaphore) -> dict:\n async with semaphore:\n try:\n async with session.get(url) as resp:\n resp.raise_for_status()\n return {'url': url, 'data': await resp.json()}\n except aiohttp.ClientError as exc:\n logger.warning('Failed to fetch %s: %s', url, exc)\n return {'url': url, 'data': None, 'error': str(exc)}\n\n\nasync def fetch_all(urls: list[str]) -> list[dict]:\n semaphore = asyncio.Semaphore(MAX_CONCURRENT)\n async with http_session() as session:\n tasks = [\n asyncio.create_task(fetch_one(session, url, semaphore))\n for url in urls\n ]\n results = await asyncio.gather(*tasks, return_exceptions=True)\n return [r for r in results if not isinstance(r, BaseException)]\n\n\nasync def main() -> None:\n urls = [f'https://api.example.com/item/{i}' for i in range(20)]\n items = await fetch_all(urls)\n logger.info('Fetched %d items successfully', len(items))\n\n\nif __name__ == '__main__':\n asyncio.run(main())\n```",
|
|
31
|
+
"expectations": [
|
|
32
|
+
"Recognizes this is well-structured async code and says so explicitly",
|
|
33
|
+
"Praises the async context manager `http_session` using `async with aiohttp.ClientSession` ensuring the session is always closed (Ch 3-4: use async context managers for resource cleanup)",
|
|
34
|
+
"Praises `asyncio.Semaphore(MAX_CONCURRENT)` to cap concurrent requests, preventing thundering-herd against the remote API (Ch 3: use Semaphore to limit concurrency)",
|
|
35
|
+
"Praises `asyncio.create_task()` over `ensure_future()` for scheduling coroutines (Ch 3: prefer create_task)",
|
|
36
|
+
"Praises `asyncio.gather(*tasks, return_exceptions=True)` which prevents one failure from cancelling all other in-flight requests (Ch 3: use return_exceptions=True in gather)",
|
|
37
|
+
"Praises `resp.raise_for_status()` and catching `aiohttp.ClientError` with graceful per-URL error handling that does not crash the whole batch (Ch 3: error handling per task)",
|
|
38
|
+
"Praises `asyncio.run(main())` as the single clean entry point (Ch 3: use asyncio.run, avoid manual loop management)",
|
|
39
|
+
"Does NOT manufacture issues to appear thorough; any suggestions are explicitly framed as minor optional improvements"
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# After
|
|
2
|
+
|
|
3
|
+
Proper async code using `aiohttp` for non-blocking HTTP, `asyncio.gather` to fetch all three attributes of each product concurrently, and a semaphore to cap simultaneous connections.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import asyncio
|
|
7
|
+
import aiohttp
|
|
8
|
+
|
|
9
|
+
PRODUCT_API = "https://api.internal.com/products"
|
|
10
|
+
INVENTORY_API = "https://api.internal.com/inventory"
|
|
11
|
+
PRICING_API = "https://api.internal.com/pricing"
|
|
12
|
+
|
|
13
|
+
MAX_CONCURRENT_REQUESTS = 10 # prevent overwhelming the upstream APIs
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def fetch_json(session: aiohttp.ClientSession, url: str) -> dict:
|
|
17
|
+
"""Fetch a single JSON endpoint, raising on non-2xx responses."""
|
|
18
|
+
async with session.get(url) as response:
|
|
19
|
+
response.raise_for_status()
|
|
20
|
+
return await response.json()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def build_product_entry(
|
|
24
|
+
session: aiohttp.ClientSession,
|
|
25
|
+
semaphore: asyncio.Semaphore,
|
|
26
|
+
product_id: str,
|
|
27
|
+
) -> dict:
|
|
28
|
+
"""Fetch product, inventory, and pricing concurrently for one product ID."""
|
|
29
|
+
async with semaphore:
|
|
30
|
+
product, inventory, pricing = await asyncio.gather(
|
|
31
|
+
fetch_json(session, f"{PRODUCT_API}/{product_id}"),
|
|
32
|
+
fetch_json(session, f"{INVENTORY_API}/{product_id}"),
|
|
33
|
+
fetch_json(session, f"{PRICING_API}/{product_id}"),
|
|
34
|
+
return_exceptions=False,
|
|
35
|
+
)
|
|
36
|
+
return {
|
|
37
|
+
"id": product_id,
|
|
38
|
+
"name": product["name"],
|
|
39
|
+
"stock": inventory["quantity"],
|
|
40
|
+
"price": pricing["amount"],
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def build_product_catalog(product_ids: list[str]) -> list[dict]:
|
|
45
|
+
"""Build the full catalog by fetching all products concurrently."""
|
|
46
|
+
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
|
|
47
|
+
|
|
48
|
+
async with aiohttp.ClientSession(
|
|
49
|
+
timeout=aiohttp.ClientTimeout(total=30)
|
|
50
|
+
) as session:
|
|
51
|
+
tasks = [
|
|
52
|
+
build_product_entry(session, semaphore, pid)
|
|
53
|
+
for pid in product_ids
|
|
54
|
+
]
|
|
55
|
+
return await asyncio.gather(*tasks, return_exceptions=True)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
catalog = asyncio.run(build_product_catalog(["sku-001", "sku-002", "sku-003"]))
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Key improvements:
|
|
63
|
+
- `aiohttp.ClientSession` replaces `requests.get` — HTTP calls are non-blocking and never stall the event loop (Ch 4: aiohttp; Ch 2-3: Never block the event loop)
|
|
64
|
+
- `asyncio.gather` inside `build_product_entry` fetches product, inventory, and pricing for one SKU concurrently — three sequential blocking calls become one concurrent async fan-out (Ch 3: gather for fan-out)
|
|
65
|
+
- The outer `asyncio.gather(*tasks)` processes all product IDs concurrently instead of sequentially in a for loop (Ch 3: create_task / gather)
|
|
66
|
+
- `asyncio.Semaphore(10)` limits the number of simultaneous in-flight requests, preventing connection pool exhaustion on the upstream APIs (Ch 3: Semaphore for concurrency control)
|
|
67
|
+
- `aiohttp.ClientTimeout(total=30)` ensures no request hangs indefinitely (Ch 3: use timeouts everywhere)
|
|
68
|
+
- A single `aiohttp.ClientSession` is reused across all requests for connection pooling — the `async with` context manager ensures it is closed on exit (Ch 4: Use async with for resources)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Before
|
|
2
|
+
|
|
3
|
+
An `async def` function that calls the blocking `requests.get()` synchronously, stalling the entire event loop for the duration of each HTTP call.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import asyncio
|
|
7
|
+
import requests # blocking library — not async-safe
|
|
8
|
+
|
|
9
|
+
PRODUCT_API = "https://api.internal.com/products"
|
|
10
|
+
INVENTORY_API = "https://api.internal.com/inventory"
|
|
11
|
+
PRICING_API = "https://api.internal.com/pricing"
|
|
12
|
+
|
|
13
|
+
async def build_product_catalog(product_ids: list[str]) -> list[dict]:
|
|
14
|
+
catalog = []
|
|
15
|
+
|
|
16
|
+
for product_id in product_ids:
|
|
17
|
+
# Blocks the event loop for every request — defeats asyncio entirely
|
|
18
|
+
product_resp = requests.get(f"{PRODUCT_API}/{product_id}")
|
|
19
|
+
product = product_resp.json()
|
|
20
|
+
|
|
21
|
+
# Called sequentially AND blocking — no concurrency at all
|
|
22
|
+
inv_resp = requests.get(f"{INVENTORY_API}/{product_id}")
|
|
23
|
+
inventory = inv_resp.json()
|
|
24
|
+
|
|
25
|
+
price_resp = requests.get(f"{PRICING_API}/{product_id}")
|
|
26
|
+
pricing = price_resp.json()
|
|
27
|
+
|
|
28
|
+
catalog.append({
|
|
29
|
+
"id": product_id,
|
|
30
|
+
"name": product["name"],
|
|
31
|
+
"stock": inventory["quantity"],
|
|
32
|
+
"price": pricing["amount"],
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
return catalog
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
asyncio.run(build_product_catalog(["sku-001", "sku-002", "sku-003"]))
|
|
39
|
+
```
|