@booklib/skills 1.0.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CONTRIBUTING.md +122 -0
  2. package/README.md +20 -1
  3. package/ROADMAP.md +36 -0
  4. package/animation-at-work/evals/evals.json +44 -0
  5. package/animation-at-work/examples/after.md +64 -0
  6. package/animation-at-work/examples/before.md +35 -0
  7. package/animation-at-work/scripts/audit_animations.py +295 -0
  8. package/bin/skills.js +552 -42
  9. package/clean-code-reviewer/SKILL.md +109 -1
  10. package/clean-code-reviewer/evals/evals.json +121 -3
  11. package/clean-code-reviewer/examples/after.md +48 -0
  12. package/clean-code-reviewer/examples/before.md +33 -0
  13. package/clean-code-reviewer/references/api_reference.md +158 -0
  14. package/clean-code-reviewer/references/practices-catalog.md +282 -0
  15. package/clean-code-reviewer/references/review-checklist.md +254 -0
  16. package/clean-code-reviewer/scripts/pre-review.py +206 -0
  17. package/data-intensive-patterns/evals/evals.json +43 -0
  18. package/data-intensive-patterns/examples/after.md +61 -0
  19. package/data-intensive-patterns/examples/before.md +38 -0
  20. package/data-intensive-patterns/scripts/adr.py +213 -0
  21. package/data-pipelines/evals/evals.json +45 -0
  22. package/data-pipelines/examples/after.md +97 -0
  23. package/data-pipelines/examples/before.md +37 -0
  24. package/data-pipelines/scripts/new_pipeline.py +444 -0
  25. package/design-patterns/evals/evals.json +46 -0
  26. package/design-patterns/examples/after.md +52 -0
  27. package/design-patterns/examples/before.md +29 -0
  28. package/design-patterns/scripts/scaffold.py +807 -0
  29. package/domain-driven-design/SKILL.md +120 -0
  30. package/domain-driven-design/evals/evals.json +48 -0
  31. package/domain-driven-design/examples/after.md +80 -0
  32. package/domain-driven-design/examples/before.md +43 -0
  33. package/domain-driven-design/scripts/scaffold.py +421 -0
  34. package/effective-java/evals/evals.json +46 -0
  35. package/effective-java/examples/after.md +83 -0
  36. package/effective-java/examples/before.md +37 -0
  37. package/effective-java/scripts/checkstyle_setup.py +211 -0
  38. package/effective-kotlin/evals/evals.json +45 -0
  39. package/effective-kotlin/examples/after.md +36 -0
  40. package/effective-kotlin/examples/before.md +38 -0
  41. package/effective-python/SKILL.md +199 -0
  42. package/effective-python/evals/evals.json +44 -0
  43. package/effective-python/examples/after.md +56 -0
  44. package/effective-python/examples/before.md +40 -0
  45. package/effective-python/ref-01-pythonic-thinking.md +202 -0
  46. package/effective-python/ref-02-lists-and-dicts.md +146 -0
  47. package/effective-python/ref-03-functions.md +186 -0
  48. package/effective-python/ref-04-comprehensions-generators.md +211 -0
  49. package/effective-python/ref-05-classes-interfaces.md +188 -0
  50. package/effective-python/ref-06-metaclasses-attributes.md +209 -0
  51. package/effective-python/ref-07-concurrency.md +213 -0
  52. package/effective-python/ref-08-robustness-performance.md +248 -0
  53. package/effective-python/ref-09-testing-debugging.md +253 -0
  54. package/effective-python/ref-10-collaboration.md +175 -0
  55. package/effective-python/references/api_reference.md +218 -0
  56. package/effective-python/references/practices-catalog.md +483 -0
  57. package/effective-python/references/review-checklist.md +190 -0
  58. package/effective-python/scripts/lint.py +173 -0
  59. package/kotlin-in-action/evals/evals.json +43 -0
  60. package/kotlin-in-action/examples/after.md +53 -0
  61. package/kotlin-in-action/examples/before.md +39 -0
  62. package/kotlin-in-action/scripts/setup_detekt.py +224 -0
  63. package/lean-startup/evals/evals.json +43 -0
  64. package/lean-startup/examples/after.md +80 -0
  65. package/lean-startup/examples/before.md +34 -0
  66. package/lean-startup/scripts/new_experiment.py +286 -0
  67. package/microservices-patterns/SKILL.md +140 -0
  68. package/microservices-patterns/evals/evals.json +45 -0
  69. package/microservices-patterns/examples/after.md +69 -0
  70. package/microservices-patterns/examples/before.md +40 -0
  71. package/microservices-patterns/scripts/new_service.py +583 -0
  72. package/package.json +1 -1
  73. package/refactoring-ui/evals/evals.json +45 -0
  74. package/refactoring-ui/examples/after.md +85 -0
  75. package/refactoring-ui/examples/before.md +58 -0
  76. package/refactoring-ui/scripts/audit_css.py +250 -0
  77. package/skill-router/SKILL.md +142 -0
  78. package/skill-router/evals/evals.json +38 -0
  79. package/skill-router/examples/after.md +63 -0
  80. package/skill-router/examples/before.md +39 -0
  81. package/skill-router/references/api_reference.md +24 -0
  82. package/skill-router/references/routing-heuristics.md +89 -0
  83. package/skill-router/references/skill-catalog.md +156 -0
  84. package/skill-router/scripts/route.py +266 -0
  85. package/storytelling-with-data/evals/evals.json +47 -0
  86. package/storytelling-with-data/examples/after.md +50 -0
  87. package/storytelling-with-data/examples/before.md +33 -0
  88. package/storytelling-with-data/scripts/chart_review.py +301 -0
  89. package/system-design-interview/evals/evals.json +45 -0
  90. package/system-design-interview/examples/after.md +94 -0
  91. package/system-design-interview/examples/before.md +27 -0
  92. package/system-design-interview/scripts/new_design.py +421 -0
  93. package/using-asyncio-python/evals/evals.json +43 -0
  94. package/using-asyncio-python/examples/after.md +68 -0
  95. package/using-asyncio-python/examples/before.md +39 -0
  96. package/using-asyncio-python/scripts/check_blocking.py +270 -0
  97. package/web-scraping-python/evals/evals.json +46 -0
  98. package/web-scraping-python/examples/after.md +109 -0
  99. package/web-scraping-python/examples/before.md +40 -0
  100. package/web-scraping-python/scripts/new_scraper.py +231 -0
@@ -0,0 +1,421 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ System Design Interview Doc Generator — Alex Xu 4-step framework.
4
+
5
+ Usage (one-shot): python new_design.py "URL Shortener"
6
+ Usage (interactive): python new_design.py
7
+ """
8
+
9
+ import argparse
10
+ import math
11
+ import sys
12
+ from datetime import date
13
+ from pathlib import Path
14
+
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Prompting helpers
18
+ # ---------------------------------------------------------------------------
19
+
20
+ def prompt(label: str, default: str = "") -> str:
21
+ suffix = f" [{default}]" if default else ""
22
+ while True:
23
+ val = input(f"{label}{suffix}: ").strip()
24
+ if val:
25
+ return val
26
+ if default:
27
+ return default
28
+ print(" (required)")
29
+
30
+
31
+ def prompt_int(label: str, default: int) -> int:
32
+ while True:
33
+ raw = input(f"{label} [{default:,}]: ").strip()
34
+ if not raw:
35
+ return default
36
+ try:
37
+ return int(raw.replace(",", "").replace("_", ""))
38
+ except ValueError:
39
+ print(" Please enter an integer.")
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Back-of-envelope calculations
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def human_size(bytes_: float) -> str:
47
+ for unit in ("B", "KB", "MB", "GB", "TB", "PB"):
48
+ if bytes_ < 1024:
49
+ return f"{bytes_:.1f} {unit}"
50
+ bytes_ /= 1024
51
+ return f"{bytes_:.1f} PB"
52
+
53
+
54
+ def calc_estimations(dau: int, read_write_ratio: int, avg_object_size_bytes: int, years: int) -> dict:
55
+ """Return a dict of derived estimates."""
56
+ total_requests_per_day = dau * read_write_ratio
57
+ write_qps = dau / 86400 # 1 write per user per day assumption
58
+ read_qps = write_qps * read_write_ratio
59
+ peak_qps = read_qps * 2 # common rule of thumb
60
+
61
+ writes_per_day = dau # 1 write per active user
62
+ storage_per_day = writes_per_day * avg_object_size_bytes
63
+ total_storage = storage_per_day * 365 * years
64
+
65
+ bandwidth_in = write_qps * avg_object_size_bytes # bytes/sec
66
+ bandwidth_out = read_qps * avg_object_size_bytes
67
+
68
+ return {
69
+ "dau": dau,
70
+ "write_qps": write_qps,
71
+ "read_qps": read_qps,
72
+ "peak_qps": peak_qps,
73
+ "read_write_ratio": read_write_ratio,
74
+ "storage_per_day": storage_per_day,
75
+ "total_storage": total_storage,
76
+ "bandwidth_in": bandwidth_in,
77
+ "bandwidth_out": bandwidth_out,
78
+ "years": years,
79
+ "avg_object_size_bytes": avg_object_size_bytes,
80
+ }
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Document sections
85
+ # ---------------------------------------------------------------------------
86
+
87
+ def section_requirements(system: str, features: list[str]) -> str:
88
+ func = "\n".join(f"- {f}" for f in features)
89
+ return f"""\
90
+ ## Step 1: Requirements Clarification
91
+
92
+ ### Functional Requirements
93
+ {func}
94
+
95
+ ### Non-Functional Requirements
96
+ - High availability: 99.99% uptime (< 52 min downtime/year)
97
+ - Low latency: p99 read latency < 100 ms
98
+ - Durability: no data loss; replicated across at least 3 availability zones
99
+ - Eventual consistency is acceptable for non-critical reads
100
+ - The system must be horizontally scalable
101
+
102
+ ### Out of Scope (for this interview)
103
+ - Admin dashboard / abuse reporting
104
+ - A/B testing infrastructure
105
+ - Multi-region write consistency
106
+ - Billing / rate-limiting per customer tier (mention but don't design)
107
+
108
+ ### Clarifying Questions to Ask the Interviewer
109
+ 1. What is the expected scale (DAU, peak QPS)?
110
+ 2. Read-heavy or write-heavy? What is the read:write ratio?
111
+ 3. Any latency SLA for writes?
112
+ 4. Do we need strong consistency or is eventual consistency acceptable?
113
+ 5. What is the retention period for data?
114
+ """
115
+
116
+
117
+ def section_estimation(e: dict) -> str:
118
+ return f"""\
119
+ ## Step 2: Back-of-Envelope Estimation
120
+
121
+ ### Assumptions
122
+ | Parameter | Value |
123
+ |-----------|-------|
124
+ | Daily Active Users (DAU) | {e['dau']:,} |
125
+ | Read : Write ratio | {e['read_write_ratio']} : 1 |
126
+ | Average object size | {human_size(e['avg_object_size_bytes'])} |
127
+ | Retention period | {e['years']} years |
128
+
129
+ ### Derived Estimates
130
+
131
+ **Traffic**
132
+ ```
133
+ Write QPS = DAU / 86,400 s
134
+ = {e['dau']:,} / 86,400
135
+ ≈ {e['write_qps']:,.1f} writes/sec
136
+
137
+ Read QPS = Write QPS × {e['read_write_ratio']}
138
+ ≈ {e['read_qps']:,.0f} reads/sec
139
+
140
+ Peak QPS ≈ Read QPS × 2 (rule of thumb)
141
+ ≈ {e['peak_qps']:,.0f} reads/sec
142
+ ```
143
+
144
+ **Storage**
145
+ ```
146
+ Storage/day = writes/day × avg object size
147
+ = {e['dau']:,} × {human_size(e['avg_object_size_bytes'])}
148
+ = {human_size(e['storage_per_day'])}
149
+
150
+ Total = {human_size(e['storage_per_day'])} × 365 × {e['years']} years
151
+ ≈ {human_size(e['total_storage'])}
152
+ ```
153
+
154
+ **Bandwidth**
155
+ ```
156
+ Inbound ≈ {e['write_qps']:,.1f} req/s × {human_size(e['avg_object_size_bytes'])}
157
+ ≈ {human_size(e['bandwidth_in'])}/s
158
+
159
+ Outbound ≈ {e['read_qps']:,.0f} req/s × {human_size(e['avg_object_size_bytes'])}
160
+ ≈ {human_size(e['bandwidth_out'])}/s
161
+ ```
162
+
163
+ **Cache sizing (80/20 rule)**
164
+ ```
165
+ Hot data = 20% of daily reads × avg object size
166
+ ≈ {human_size(e['read_qps'] * 86400 * 0.20 * e['avg_object_size_bytes'])}
167
+ ```
168
+ """
169
+
170
+
171
+ def section_high_level(system: str, features: list[str]) -> str:
172
+ return f"""\
173
+ ## Step 3: High-Level Design
174
+
175
+ ### Component Diagram (describe to interviewer)
176
+
177
+ ```
178
+ Clients
179
+
180
+
181
+ [CDN / Edge Cache]
182
+ │ (cache hit → return)
183
+
184
+ [Load Balancer] ←──── health checks
185
+
186
+ ├─► [API Server cluster] (stateless, auto-scaling)
187
+ │ │
188
+ │ ├─► [Cache layer] (Redis / Memcached)
189
+ │ │ │ cache miss
190
+ │ │ ▼
191
+ │ └─► [Primary DB] ←── [Read Replicas]
192
+
193
+ └─► [Message Queue] (Kafka / SQS)
194
+
195
+
196
+ [Worker / Consumer]
197
+
198
+
199
+ [Object Storage] (S3-compatible, for blobs)
200
+ ```
201
+
202
+ ### Core API Endpoints
203
+
204
+ | Method | Path | Description |
205
+ |--------|------|-------------|
206
+ | POST | /v1/resource | Create a new resource |
207
+ | GET | /v1/resource/:id | Fetch by ID |
208
+ | PUT | /v1/resource/:id | Update |
209
+ | DELETE | /v1/resource/:id | Soft-delete |
210
+ | GET | /v1/healthz | Health check |
211
+
212
+ ### Data Model (core entities)
213
+
214
+ ```sql
215
+ -- Primary entity
216
+ CREATE TABLE resource (
217
+ id CHAR(8) PRIMARY KEY, -- or UUID
218
+ owner_id BIGINT NOT NULL,
219
+ payload TEXT,
220
+ created_at TIMESTAMP NOT NULL DEFAULT NOW(),
221
+ updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
222
+ is_deleted BOOLEAN NOT NULL DEFAULT FALSE
223
+ );
224
+
225
+ CREATE INDEX idx_resource_owner ON resource(owner_id);
226
+ ```
227
+
228
+ ### Technology Choices
229
+ | Layer | Choice | Rationale |
230
+ |-------|--------|-----------|
231
+ | API | REST / gRPC | REST for external; gRPC for internal services |
232
+ | Primary DB | PostgreSQL (or Cassandra if write-heavy) | ACID; mature; read replicas |
233
+ | Cache | Redis | Sub-millisecond latency; rich data structures |
234
+ | Object store | S3-compatible | Cheap; durable; decoupled from DB |
235
+ | Queue | Kafka | High-throughput; replay; partitioned by key |
236
+ """
237
+
238
+
239
+ def section_deep_dive(system: str, e: dict) -> str:
240
+ return f"""\
241
+ ## Step 4: Deep Dive
242
+
243
+ ### Bottleneck Analysis
244
+ - **Write path**: API server → DB primary. Mitigate with write-ahead log tailing,
245
+ async replication, and buffered writes via the message queue.
246
+ - **Read path**: DB read replicas + Redis cache. Target > 90% cache hit rate.
247
+ - **Hot keys**: Apply key-based sharding and local in-process LRU cache for the
248
+ top-N items (identified via cache hit analytics).
249
+
250
+ ### Database Deep Dive
251
+
252
+ **Why {("Cassandra" if e["write_qps"] > 5000 else "PostgreSQL")}?**
253
+ {"Cassandra: wide-column store optimised for high write throughput with tunable consistency. Partition key = user_id for even distribution." if e["write_qps"] > 5000 else "PostgreSQL: strong ACID guarantees, mature tooling, easy to add read replicas. Move to Cassandra if write QPS exceeds ~10k sustained."}
254
+
255
+ **Sharding strategy**
256
+ - Shard by `user_id` hash to distribute load evenly.
257
+ - Avoid sharding by time (creates hot partitions for recent data).
258
+ - Use consistent hashing to minimise re-sharding cost.
259
+
260
+ **Replication**
261
+ - 1 primary + 2 read replicas per shard (cross-AZ).
262
+ - Async replication is acceptable; compensate with cache TTL.
263
+
264
+ ### Caching Strategy
265
+ - **Read-through cache**: API checks Redis before DB.
266
+ - **Write-invalidation**: On write, delete the cache key (not update).
267
+ - **TTL**: Set based on staleness tolerance (e.g., 5 min for non-critical data).
268
+ - **Eviction policy**: `allkeys-lru` for general use.
269
+
270
+ ### Consistency Model
271
+ - Reads from replicas may be slightly stale (< 1 s typical).
272
+ - Critical reads (e.g., immediately after a write) can be routed to primary.
273
+ - Use optimistic locking (version column) for concurrent updates.
274
+
275
+ ### Fault Tolerance
276
+ - API servers: stateless → replace failed nodes automatically.
277
+ - DB primary failure: automated failover to replica (< 30 s with Patroni/RDS).
278
+ - Cache failure: graceful degradation — fall through to DB.
279
+ - Queue failure: producers buffer locally and retry.
280
+
281
+ ### Scalability Levers (ordered by cost)
282
+ 1. Increase read replica count.
283
+ 2. Add Redis cluster nodes.
284
+ 3. Add API server instances (auto-scaling policy on CPU/QPS).
285
+ 4. Shard the database.
286
+ 5. Move to a distributed DB (Cassandra / CockroachDB).
287
+
288
+ ### Areas to Explore If Time Permits
289
+ - **CDN**: Cache static and semi-static responses at edge.
290
+ - **Rate limiting**: Token bucket per user_id at the load balancer.
291
+ - **Search**: Add Elasticsearch for full-text queries.
292
+ - **Analytics**: Stream events to a data warehouse (Snowflake / BigQuery) via Kafka.
293
+ """
294
+
295
+
296
+ def section_interview_questions(system: str) -> str:
297
+ return f"""\
298
+ ## Common Follow-Up Interview Questions
299
+
300
+ | Question | Key Points to Cover |
301
+ |----------|---------------------|
302
+ | How do you handle a DB primary failure? | Automated failover, replica promotion, heartbeat checks |
303
+ | How do you prevent cache stampede? | Mutex lock on cache miss, probabilistic early refresh |
304
+ | How would you design the ID generation? | Snowflake ID, UUID v7, or DB sequence — trade-offs |
305
+ | How do you ensure exactly-once processing? | Idempotency keys, deduplication in the consumer |
306
+ | How would you add full-text search? | Elasticsearch / OpenSearch, sync via CDC from DB |
307
+ | How do you handle schema migrations? | Expand/contract pattern; blue/green deploys; backward-compatible changes first |
308
+ | Walk me through a write from client to storage | Client → LB → API → validate → DB write → publish event → async worker |
309
+ """
310
+
311
+
312
+ # ---------------------------------------------------------------------------
313
+ # Main
314
+ # ---------------------------------------------------------------------------
315
+
316
+ def gather_interactive() -> dict:
317
+ print("\n=== System Design Interview — Document Generator ===\n")
318
+ system = prompt("System name (e.g., 'URL Shortener', 'Twitter Feed')")
319
+ features_raw = prompt(
320
+ "Core features (comma-separated)",
321
+ "Create resource, Retrieve resource, Delete resource"
322
+ )
323
+ features = [f.strip() for f in features_raw.split(",") if f.strip()]
324
+ dau = prompt_int("DAU (Daily Active Users)", 10_000_000)
325
+ rw = prompt_int("Read:Write ratio (e.g., 10 means 10 reads per write)", 10)
326
+ obj_size = prompt_int("Average object size in bytes", 1024)
327
+ years = prompt_int("Retention period (years)", 5)
328
+ output_raw = prompt("Output file (leave blank for stdout)", "")
329
+ output = Path(output_raw) if output_raw else None
330
+ return dict(system=system, features=features, dau=dau, rw=rw,
331
+ obj_size=obj_size, years=years, output=output)
332
+
333
+
334
+ def render(data: dict) -> str:
335
+ system = data["system"]
336
+ e = calc_estimations(
337
+ dau=data["dau"],
338
+ read_write_ratio=data["rw"],
339
+ avg_object_size_bytes=data["obj_size"],
340
+ years=data["years"],
341
+ )
342
+ parts = [
343
+ f"# System Design: {system}",
344
+ "",
345
+ f"**Date:** {date.today()} ",
346
+ f"**Framework:** Alex Xu — System Design Interview Vol. 1 & 2",
347
+ "",
348
+ "---",
349
+ "",
350
+ section_requirements(system, data["features"]),
351
+ "---",
352
+ "",
353
+ section_estimation(e),
354
+ "---",
355
+ "",
356
+ section_high_level(system, data["features"]),
357
+ "---",
358
+ "",
359
+ section_deep_dive(system, e),
360
+ "---",
361
+ "",
362
+ section_interview_questions(system),
363
+ "---",
364
+ "",
365
+ "*Generated by `new_design.py` — System Design Interview skill.*",
366
+ ]
367
+ return "\n".join(parts) + "\n"
368
+
369
+
370
+ def main() -> None:
371
+ parser = argparse.ArgumentParser(
372
+ description="Generate a system design interview document (Alex Xu framework)."
373
+ )
374
+ parser.add_argument("system", nargs="?", help="System name (skips prompt if provided)")
375
+ parser.add_argument("--dau", type=int, default=None)
376
+ parser.add_argument("--rw", type=int, default=None, help="Read:write ratio")
377
+ parser.add_argument("--obj-size", type=int, default=None, help="Avg object size in bytes")
378
+ parser.add_argument("--years", type=int, default=None, help="Retention years")
379
+ parser.add_argument("--features", help="Comma-separated feature list")
380
+ parser.add_argument("--output", type=Path, default=None)
381
+ args = parser.parse_args()
382
+
383
+ if args.system and args.dau and args.rw and args.obj_size and args.years:
384
+ features = (
385
+ [f.strip() for f in args.features.split(",")]
386
+ if args.features
387
+ else ["Create resource", "Read resource", "Delete resource"]
388
+ )
389
+ data = dict(
390
+ system=args.system, features=features,
391
+ dau=args.dau, rw=args.rw,
392
+ obj_size=args.obj_size, years=args.years,
393
+ output=args.output,
394
+ )
395
+ else:
396
+ if args.system:
397
+ # System name given but other params missing — use defaults
398
+ data = dict(
399
+ system=args.system,
400
+ features=["Create resource", "Read resource", "Delete resource"],
401
+ dau=10_000_000, rw=10, obj_size=1024, years=5,
402
+ output=args.output,
403
+ )
404
+ else:
405
+ try:
406
+ data = gather_interactive()
407
+ except (KeyboardInterrupt, EOFError):
408
+ print("\nAborted.", file=sys.stderr)
409
+ sys.exit(1)
410
+
411
+ document = render(data)
412
+
413
+ if data.get("output"):
414
+ data["output"].write_text(document)
415
+ print(f"Design document written to: {data['output']}")
416
+ else:
417
+ sys.stdout.write(document)
418
+
419
+
420
+ if __name__ == "__main__":
421
+ main()
@@ -0,0 +1,43 @@
1
+ {
2
+ "evals": [
3
+ {
4
+ "id": "eval-01-blocking-io-inside-async",
5
+ "prompt": "Review this async Python code:\n\n```python\nimport asyncio\nimport requests\nimport time\n\nPRICING_API = 'https://api.pricing.internal/products'\nINVENTORY_API = 'https://api.inventory.internal/stock'\n\nasync def fetch_product_data(product_ids: list[str]) -> list[dict]:\n results = []\n for pid in product_ids:\n price_resp = requests.get(f'{PRICING_API}/{pid}')\n price = price_resp.json()['price']\n\n stock_resp = requests.get(f'{INVENTORY_API}/{pid}')\n stock = stock_resp.json()['quantity']\n\n results.append({'id': pid, 'price': price, 'stock': stock})\n time.sleep(0.1) # be polite to the API\n\n return results\n\nasync def main():\n ids = ['A1', 'B2', 'C3', 'D4', 'E5', 'F6', 'G7', 'H8']\n data = await fetch_product_data(ids)\n print(f'Fetched {len(data)} products')\n\nasyncio.run(main())\n```",
6
+ "expectations": [
7
+ "Flags `requests.get()` inside an `async def` function as a critical blocking call that stalls the entire event loop while waiting for the HTTP response (Ch 2-3: never block the event loop)",
8
+ "Flags `time.sleep(0.1)` inside an async function as a blocking sleep that also freezes the event loop; recommends `await asyncio.sleep(0.1)` (Ch 3: use asyncio.sleep, not time.sleep)",
9
+ "Flags that the two API calls per product (`PRICING_API` and `INVENTORY_API`) are made sequentially inside the loop; recommends using `asyncio.gather` to fetch both concurrently per product (Ch 3: use gather for fan-out concurrency)",
10
+ "Flags that all products are fetched sequentially in a for loop; recommends fetching all products concurrently with `asyncio.gather` or `create_task` (Ch 3: create_task for concurrency)",
11
+ "Recommends replacing `requests` with `aiohttp.ClientSession` for non-blocking HTTP calls (Ch 4: use aiohttp over requests in async code)",
12
+ "Recommends using a `Semaphore` to limit concurrent requests instead of `time.sleep` for rate limiting (Ch 3: Semaphore for concurrency control)",
13
+ "Provides a corrected version using aiohttp, asyncio.gather, asyncio.sleep, and a Semaphore"
14
+ ]
15
+ },
16
+ {
17
+ "id": "eval-02-ensure-future-and-fire-and-forget",
18
+ "prompt": "Review this async Python code:\n\n```python\nimport asyncio\n\nasync def save_to_database(record: dict) -> None:\n await asyncio.sleep(0.05) # simulate DB write\n print(f\"Saved {record['id']}\")\n\nasync def send_notification(user_id: str) -> None:\n await asyncio.sleep(0.1) # simulate email send\n print(f\"Notified {user_id}\")\n\nasync def process_event(event: dict) -> None:\n await save_to_database(event)\n\n # Fire and forget the notification\n asyncio.ensure_future(send_notification(event['user_id']))\n\n print(f\"Processed event {event['id']}\")\n\nasync def main():\n loop = asyncio.get_event_loop()\n\n events = [{'id': f'e{i}', 'user_id': f'u{i}'} for i in range(10)]\n for event in events:\n loop.run_until_complete(process_event(event))\n\nasyncio.run(main())\n```",
19
+ "expectations": [
20
+ "Flags `asyncio.ensure_future()` as the deprecated/less preferred API for scheduling coroutines; recommends `asyncio.create_task()` which is more explicit and requires an active event loop (Ch 3: prefer create_task over ensure_future)",
21
+ "Flags fire-and-forget usage of `ensure_future` without storing the task reference: if `send_notification` raises an exception, it is silently discarded; recommends keeping a reference and handling exceptions (Ch 3: keep references to created tasks; unhandled task exceptions are silent)",
22
+ "Flags `loop.run_until_complete(process_event(event))` called inside an already-running async context (`main` is a coroutine): `run_until_complete` cannot be called from within a running loop; this will raise RuntimeError (Ch 3: do not call asyncio.run or run_until_complete from within async code)",
23
+ "Flags `asyncio.get_event_loop()` as deprecated for getting the running loop inside async code; recommends `asyncio.get_running_loop()` or eliminating direct loop access (Ch 3: use asyncio.run as the single entry point, avoid manual loop management)",
24
+ "Flags the sequential `for event in events` loop using run_until_complete; recommends processing all events concurrently with `asyncio.gather` (Ch 3: use gather for concurrency)",
25
+ "Provides a corrected version using create_task with proper task tracking, asyncio.gather for concurrency, and exception handling on fire-and-forget tasks"
26
+ ]
27
+ },
28
+ {
29
+ "id": "eval-03-clean-async-gather-task-management",
30
+ "prompt": "Review this async Python code:\n\n```python\nimport asyncio\nimport logging\nfrom contextlib import asynccontextmanager\nfrom typing import AsyncIterator\nimport aiohttp\n\nlogger = logging.getLogger(__name__)\n\nMAX_CONCURRENT = 5\n\n@asynccontextmanager\nasync def http_session() -> AsyncIterator[aiohttp.ClientSession]:\n async with aiohttp.ClientSession(\n timeout=aiohttp.ClientTimeout(total=10)\n ) as session:\n yield session\n\n\nasync def fetch_one(session: aiohttp.ClientSession, url: str, semaphore: asyncio.Semaphore) -> dict:\n async with semaphore:\n try:\n async with session.get(url) as resp:\n resp.raise_for_status()\n return {'url': url, 'data': await resp.json()}\n except aiohttp.ClientError as exc:\n logger.warning('Failed to fetch %s: %s', url, exc)\n return {'url': url, 'data': None, 'error': str(exc)}\n\n\nasync def fetch_all(urls: list[str]) -> list[dict]:\n semaphore = asyncio.Semaphore(MAX_CONCURRENT)\n async with http_session() as session:\n tasks = [\n asyncio.create_task(fetch_one(session, url, semaphore))\n for url in urls\n ]\n results = await asyncio.gather(*tasks, return_exceptions=True)\n return [r for r in results if not isinstance(r, BaseException)]\n\n\nasync def main() -> None:\n urls = [f'https://api.example.com/item/{i}' for i in range(20)]\n items = await fetch_all(urls)\n logger.info('Fetched %d items successfully', len(items))\n\n\nif __name__ == '__main__':\n asyncio.run(main())\n```",
31
+ "expectations": [
32
+ "Recognizes this is well-structured async code and says so explicitly",
33
+ "Praises the async context manager `http_session` using `async with aiohttp.ClientSession` ensuring the session is always closed (Ch 3-4: use async context managers for resource cleanup)",
34
+ "Praises `asyncio.Semaphore(MAX_CONCURRENT)` to cap concurrent requests, preventing thundering-herd against the remote API (Ch 3: use Semaphore to limit concurrency)",
35
+ "Praises `asyncio.create_task()` over `ensure_future()` for scheduling coroutines (Ch 3: prefer create_task)",
36
+ "Praises `asyncio.gather(*tasks, return_exceptions=True)` which prevents one failure from cancelling all other in-flight requests (Ch 3: use return_exceptions=True in gather)",
37
+ "Praises `resp.raise_for_status()` and catching `aiohttp.ClientError` with graceful per-URL error handling that does not crash the whole batch (Ch 3: error handling per task)",
38
+ "Praises `asyncio.run(main())` as the single clean entry point (Ch 3: use asyncio.run, avoid manual loop management)",
39
+ "Does NOT manufacture issues to appear thorough; any suggestions are explicitly framed as minor optional improvements"
40
+ ]
41
+ }
42
+ ]
43
+ }
@@ -0,0 +1,68 @@
1
+ # After
2
+
3
+ Proper async code using `aiohttp` for non-blocking HTTP, `asyncio.gather` to fetch all three attributes of each product concurrently, and a semaphore to cap simultaneous connections.
4
+
5
+ ```python
6
+ import asyncio
7
+ import aiohttp
8
+
9
+ PRODUCT_API = "https://api.internal.com/products"
10
+ INVENTORY_API = "https://api.internal.com/inventory"
11
+ PRICING_API = "https://api.internal.com/pricing"
12
+
13
+ MAX_CONCURRENT_REQUESTS = 10 # prevent overwhelming the upstream APIs
14
+
15
+
16
+ async def fetch_json(session: aiohttp.ClientSession, url: str) -> dict:
17
+ """Fetch a single JSON endpoint, raising on non-2xx responses."""
18
+ async with session.get(url) as response:
19
+ response.raise_for_status()
20
+ return await response.json()
21
+
22
+
23
+ async def build_product_entry(
24
+ session: aiohttp.ClientSession,
25
+ semaphore: asyncio.Semaphore,
26
+ product_id: str,
27
+ ) -> dict:
28
+ """Fetch product, inventory, and pricing concurrently for one product ID."""
29
+ async with semaphore:
30
+ product, inventory, pricing = await asyncio.gather(
31
+ fetch_json(session, f"{PRODUCT_API}/{product_id}"),
32
+ fetch_json(session, f"{INVENTORY_API}/{product_id}"),
33
+ fetch_json(session, f"{PRICING_API}/{product_id}"),
34
+ return_exceptions=False,
35
+ )
36
+ return {
37
+ "id": product_id,
38
+ "name": product["name"],
39
+ "stock": inventory["quantity"],
40
+ "price": pricing["amount"],
41
+ }
42
+
43
+
44
+ async def build_product_catalog(product_ids: list[str]) -> list[dict]:
45
+ """Build the full catalog by fetching all products concurrently."""
46
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
47
+
48
+ async with aiohttp.ClientSession(
49
+ timeout=aiohttp.ClientTimeout(total=30)
50
+ ) as session:
51
+ tasks = [
52
+ build_product_entry(session, semaphore, pid)
53
+ for pid in product_ids
54
+ ]
55
+ return await asyncio.gather(*tasks, return_exceptions=True)
56
+
57
+
58
+ if __name__ == "__main__":
59
+ catalog = asyncio.run(build_product_catalog(["sku-001", "sku-002", "sku-003"]))
60
+ ```
61
+
62
+ Key improvements:
63
+ - `aiohttp.ClientSession` replaces `requests.get` — HTTP calls are non-blocking and never stall the event loop (Ch 4: aiohttp; Ch 2-3: Never block the event loop)
64
+ - `asyncio.gather` inside `build_product_entry` fetches product, inventory, and pricing for one SKU concurrently — three sequential blocking calls become one concurrent async fan-out (Ch 3: gather for fan-out)
65
+ - The outer `asyncio.gather(*tasks)` processes all product IDs concurrently instead of sequentially in a for loop (Ch 3: create_task / gather)
66
+ - `asyncio.Semaphore(10)` limits the number of simultaneous in-flight requests, preventing connection pool exhaustion on the upstream APIs (Ch 3: Semaphore for concurrency control)
67
+ - `aiohttp.ClientTimeout(total=30)` ensures no request hangs indefinitely (Ch 3: use timeouts everywhere)
68
+ - A single `aiohttp.ClientSession` is reused across all requests for connection pooling — the `async with` context manager ensures it is closed on exit (Ch 4: Use async with for resources)
@@ -0,0 +1,39 @@
1
+ # Before
2
+
3
+ An `async def` function that calls the blocking `requests.get()` synchronously, stalling the entire event loop for the duration of each HTTP call.
4
+
5
+ ```python
6
+ import asyncio
7
+ import requests # blocking library — not async-safe
8
+
9
+ PRODUCT_API = "https://api.internal.com/products"
10
+ INVENTORY_API = "https://api.internal.com/inventory"
11
+ PRICING_API = "https://api.internal.com/pricing"
12
+
13
+ async def build_product_catalog(product_ids: list[str]) -> list[dict]:
14
+ catalog = []
15
+
16
+ for product_id in product_ids:
17
+ # Blocks the event loop for every request — defeats asyncio entirely
18
+ product_resp = requests.get(f"{PRODUCT_API}/{product_id}")
19
+ product = product_resp.json()
20
+
21
+ # Called sequentially AND blocking — no concurrency at all
22
+ inv_resp = requests.get(f"{INVENTORY_API}/{product_id}")
23
+ inventory = inv_resp.json()
24
+
25
+ price_resp = requests.get(f"{PRICING_API}/{product_id}")
26
+ pricing = price_resp.json()
27
+
28
+ catalog.append({
29
+ "id": product_id,
30
+ "name": product["name"],
31
+ "stock": inventory["quantity"],
32
+ "price": pricing["amount"],
33
+ })
34
+
35
+ return catalog
36
+
37
+
38
+ asyncio.run(build_product_catalog(["sku-001", "sku-002", "sku-003"]))
39
+ ```