@booklib/skills 1.0.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +122 -0
- package/README.md +20 -1
- package/ROADMAP.md +36 -0
- package/animation-at-work/evals/evals.json +44 -0
- package/animation-at-work/examples/after.md +64 -0
- package/animation-at-work/examples/before.md +35 -0
- package/animation-at-work/scripts/audit_animations.py +295 -0
- package/bin/skills.js +552 -42
- package/clean-code-reviewer/SKILL.md +109 -1
- package/clean-code-reviewer/evals/evals.json +121 -3
- package/clean-code-reviewer/examples/after.md +48 -0
- package/clean-code-reviewer/examples/before.md +33 -0
- package/clean-code-reviewer/references/api_reference.md +158 -0
- package/clean-code-reviewer/references/practices-catalog.md +282 -0
- package/clean-code-reviewer/references/review-checklist.md +254 -0
- package/clean-code-reviewer/scripts/pre-review.py +206 -0
- package/data-intensive-patterns/evals/evals.json +43 -0
- package/data-intensive-patterns/examples/after.md +61 -0
- package/data-intensive-patterns/examples/before.md +38 -0
- package/data-intensive-patterns/scripts/adr.py +213 -0
- package/data-pipelines/evals/evals.json +45 -0
- package/data-pipelines/examples/after.md +97 -0
- package/data-pipelines/examples/before.md +37 -0
- package/data-pipelines/scripts/new_pipeline.py +444 -0
- package/design-patterns/evals/evals.json +46 -0
- package/design-patterns/examples/after.md +52 -0
- package/design-patterns/examples/before.md +29 -0
- package/design-patterns/scripts/scaffold.py +807 -0
- package/domain-driven-design/SKILL.md +120 -0
- package/domain-driven-design/evals/evals.json +48 -0
- package/domain-driven-design/examples/after.md +80 -0
- package/domain-driven-design/examples/before.md +43 -0
- package/domain-driven-design/scripts/scaffold.py +421 -0
- package/effective-java/evals/evals.json +46 -0
- package/effective-java/examples/after.md +83 -0
- package/effective-java/examples/before.md +37 -0
- package/effective-java/scripts/checkstyle_setup.py +211 -0
- package/effective-kotlin/evals/evals.json +45 -0
- package/effective-kotlin/examples/after.md +36 -0
- package/effective-kotlin/examples/before.md +38 -0
- package/effective-python/SKILL.md +199 -0
- package/effective-python/evals/evals.json +44 -0
- package/effective-python/examples/after.md +56 -0
- package/effective-python/examples/before.md +40 -0
- package/effective-python/ref-01-pythonic-thinking.md +202 -0
- package/effective-python/ref-02-lists-and-dicts.md +146 -0
- package/effective-python/ref-03-functions.md +186 -0
- package/effective-python/ref-04-comprehensions-generators.md +211 -0
- package/effective-python/ref-05-classes-interfaces.md +188 -0
- package/effective-python/ref-06-metaclasses-attributes.md +209 -0
- package/effective-python/ref-07-concurrency.md +213 -0
- package/effective-python/ref-08-robustness-performance.md +248 -0
- package/effective-python/ref-09-testing-debugging.md +253 -0
- package/effective-python/ref-10-collaboration.md +175 -0
- package/effective-python/references/api_reference.md +218 -0
- package/effective-python/references/practices-catalog.md +483 -0
- package/effective-python/references/review-checklist.md +190 -0
- package/effective-python/scripts/lint.py +173 -0
- package/kotlin-in-action/evals/evals.json +43 -0
- package/kotlin-in-action/examples/after.md +53 -0
- package/kotlin-in-action/examples/before.md +39 -0
- package/kotlin-in-action/scripts/setup_detekt.py +224 -0
- package/lean-startup/evals/evals.json +43 -0
- package/lean-startup/examples/after.md +80 -0
- package/lean-startup/examples/before.md +34 -0
- package/lean-startup/scripts/new_experiment.py +286 -0
- package/microservices-patterns/SKILL.md +140 -0
- package/microservices-patterns/evals/evals.json +45 -0
- package/microservices-patterns/examples/after.md +69 -0
- package/microservices-patterns/examples/before.md +40 -0
- package/microservices-patterns/scripts/new_service.py +583 -0
- package/package.json +1 -1
- package/refactoring-ui/evals/evals.json +45 -0
- package/refactoring-ui/examples/after.md +85 -0
- package/refactoring-ui/examples/before.md +58 -0
- package/refactoring-ui/scripts/audit_css.py +250 -0
- package/skill-router/SKILL.md +142 -0
- package/skill-router/evals/evals.json +38 -0
- package/skill-router/examples/after.md +63 -0
- package/skill-router/examples/before.md +39 -0
- package/skill-router/references/api_reference.md +24 -0
- package/skill-router/references/routing-heuristics.md +89 -0
- package/skill-router/references/skill-catalog.md +156 -0
- package/skill-router/scripts/route.py +266 -0
- package/storytelling-with-data/evals/evals.json +47 -0
- package/storytelling-with-data/examples/after.md +50 -0
- package/storytelling-with-data/examples/before.md +33 -0
- package/storytelling-with-data/scripts/chart_review.py +301 -0
- package/system-design-interview/evals/evals.json +45 -0
- package/system-design-interview/examples/after.md +94 -0
- package/system-design-interview/examples/before.md +27 -0
- package/system-design-interview/scripts/new_design.py +421 -0
- package/using-asyncio-python/evals/evals.json +43 -0
- package/using-asyncio-python/examples/after.md +68 -0
- package/using-asyncio-python/examples/before.md +39 -0
- package/using-asyncio-python/scripts/check_blocking.py +270 -0
- package/web-scraping-python/evals/evals.json +46 -0
- package/web-scraping-python/examples/after.md +109 -0
- package/web-scraping-python/examples/before.md +40 -0
- package/web-scraping-python/scripts/new_scraper.py +231 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Chapter 7: Concurrency and Parallelism (Items 52-64)
|
|
2
|
+
|
|
3
|
+
## Item 52: Use subprocess to Manage Child Processes
|
|
4
|
+
```python
|
|
5
|
+
import subprocess
|
|
6
|
+
|
|
7
|
+
# Run a command and capture output
|
|
8
|
+
result = subprocess.run(
|
|
9
|
+
['echo', 'Hello from subprocess'],
|
|
10
|
+
capture_output=True,
|
|
11
|
+
text=True
|
|
12
|
+
)
|
|
13
|
+
print(result.stdout)
|
|
14
|
+
|
|
15
|
+
# Set timeout
|
|
16
|
+
result = subprocess.run(
|
|
17
|
+
['sleep', '10'],
|
|
18
|
+
timeout=5 # raises TimeoutExpired after 5 seconds
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Pipe data to child process
|
|
22
|
+
result = subprocess.run(
|
|
23
|
+
['openssl', 'enc', '-aes-256-cbc', '-pass', 'pass:key'],
|
|
24
|
+
input=b'data to encrypt',
|
|
25
|
+
capture_output=True
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Run parallel child processes
|
|
29
|
+
procs = [subprocess.Popen(['cmd', arg]) for arg in args]
|
|
30
|
+
for proc in procs:
|
|
31
|
+
proc.communicate() # wait for each
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
- Use `subprocess.run` for simple command execution
|
|
35
|
+
- Use `subprocess.Popen` for parallel or streaming processes
|
|
36
|
+
- Always set timeouts to prevent hanging
|
|
37
|
+
|
|
38
|
+
## Item 53: Use Threads for Blocking I/O, Avoid for Parallelism
|
|
39
|
+
```python
|
|
40
|
+
import threading
|
|
41
|
+
|
|
42
|
+
# Threads for I/O parallelism — GOOD
|
|
43
|
+
def download(url):
|
|
44
|
+
resp = urllib.request.urlopen(url)
|
|
45
|
+
return resp.read()
|
|
46
|
+
|
|
47
|
+
threads = [threading.Thread(target=download, args=(url,)) for url in urls]
|
|
48
|
+
for t in threads:
|
|
49
|
+
t.start()
|
|
50
|
+
for t in threads:
|
|
51
|
+
t.join()
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
- The GIL prevents true CPU parallelism with threads
|
|
55
|
+
- Threads ARE useful for blocking I/O (network, file system, etc.)
|
|
56
|
+
- For CPU-bound work, use `multiprocessing` or `concurrent.futures.ProcessPoolExecutor`
|
|
57
|
+
- Never use threads for CPU-intensive computation
|
|
58
|
+
|
|
59
|
+
## Item 54: Use Lock to Prevent Data Races in Threads
|
|
60
|
+
```python
|
|
61
|
+
from threading import Lock
|
|
62
|
+
|
|
63
|
+
class Counter:
|
|
64
|
+
def __init__(self):
|
|
65
|
+
self.count = 0
|
|
66
|
+
self.lock = Lock()
|
|
67
|
+
|
|
68
|
+
def increment(self):
|
|
69
|
+
with self.lock: # context manager is cleanest
|
|
70
|
+
self.count += 1
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
- The GIL does NOT prevent data races on Python objects
|
|
74
|
+
- Operations like `+=` are not atomic — they involve read + modify + write
|
|
75
|
+
- Always use `Lock` when multiple threads modify shared state
|
|
76
|
+
- Use `with lock:` context manager for clean acquire/release
|
|
77
|
+
|
|
78
|
+
## Item 55: Use Queue to Coordinate Work Between Threads
|
|
79
|
+
```python
|
|
80
|
+
from queue import Queue
|
|
81
|
+
from threading import Thread
|
|
82
|
+
|
|
83
|
+
def producer(queue):
|
|
84
|
+
for item in generate_items():
|
|
85
|
+
queue.put(item)
|
|
86
|
+
queue.put(None) # sentinel to signal done
|
|
87
|
+
|
|
88
|
+
def consumer(queue):
|
|
89
|
+
while True:
|
|
90
|
+
item = queue.get()
|
|
91
|
+
if item is None:
|
|
92
|
+
break
|
|
93
|
+
process(item)
|
|
94
|
+
queue.task_done()
|
|
95
|
+
|
|
96
|
+
queue = Queue(maxsize=10) # bounded for backpressure
|
|
97
|
+
Thread(target=producer, args=(queue,)).start()
|
|
98
|
+
Thread(target=consumer, args=(queue,)).start()
|
|
99
|
+
queue.join() # wait for all items to be processed
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
- `Queue` provides thread-safe FIFO communication
|
|
103
|
+
- Use `maxsize` for backpressure (producer blocks when full)
|
|
104
|
+
- Use `task_done()` + `join()` for completion tracking
|
|
105
|
+
- Use sentinel values (None) to signal shutdown
|
|
106
|
+
|
|
107
|
+
## Item 56: Know How to Recognize When Concurrency Is Necessary
|
|
108
|
+
- Concurrency is needed when you have fan-out (one task spawning many) and fan-in (collecting results)
|
|
109
|
+
- Signs you need concurrency: I/O-bound waits, independent tasks, pipeline processing
|
|
110
|
+
- Start simple (sequential), then add concurrency only when needed
|
|
111
|
+
|
|
112
|
+
## Item 57: Avoid Creating New Thread Instances for On-demand Fan-out
|
|
113
|
+
- Creating a thread per task doesn't scale (thread creation overhead, memory)
|
|
114
|
+
- Use thread pools instead (Item 58/59)
|
|
115
|
+
|
|
116
|
+
## Item 58: Understand How Using Queue for Concurrency Requires Refactoring
|
|
117
|
+
- Queue-based pipelines require significant refactoring
|
|
118
|
+
- Consider `concurrent.futures` for simpler patterns
|
|
119
|
+
|
|
120
|
+
## Item 59: Consider ThreadPoolExecutor When Threads Are Necessary for Concurrency
|
|
121
|
+
```python
|
|
122
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
123
|
+
|
|
124
|
+
def fetch_url(url):
|
|
125
|
+
return urllib.request.urlopen(url).read()
|
|
126
|
+
|
|
127
|
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
|
128
|
+
# Submit individual tasks
|
|
129
|
+
future = executor.submit(fetch_url, 'https://example.com')
|
|
130
|
+
result = future.result()
|
|
131
|
+
|
|
132
|
+
# Map over multiple inputs
|
|
133
|
+
results = list(executor.map(fetch_url, urls))
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
- Simpler than manual thread + Queue management
|
|
137
|
+
- Automatically manages thread lifecycle
|
|
138
|
+
- `max_workers` controls parallelism
|
|
139
|
+
- Use `ProcessPoolExecutor` for CPU-bound tasks
|
|
140
|
+
|
|
141
|
+
## Item 60: Achieve Highly Concurrent I/O with Coroutines
|
|
142
|
+
```python
|
|
143
|
+
import asyncio
|
|
144
|
+
|
|
145
|
+
async def fetch_data(url):
|
|
146
|
+
# async I/O operation
|
|
147
|
+
reader, writer = await asyncio.open_connection(host, port)
|
|
148
|
+
writer.write(request)
|
|
149
|
+
data = await reader.read()
|
|
150
|
+
return data
|
|
151
|
+
|
|
152
|
+
async def main():
|
|
153
|
+
# Run multiple coroutines concurrently
|
|
154
|
+
results = await asyncio.gather(
|
|
155
|
+
fetch_data('url1'),
|
|
156
|
+
fetch_data('url2'),
|
|
157
|
+
fetch_data('url3'),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
asyncio.run(main())
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
- Coroutines enable thousands of concurrent I/O operations
|
|
164
|
+
- Use `async def` and `await` keywords
|
|
165
|
+
- `asyncio.gather` runs multiple coroutines concurrently
|
|
166
|
+
- Far more efficient than threads for I/O-heavy workloads
|
|
167
|
+
|
|
168
|
+
## Item 61: Know How to Port Threaded I/O to asyncio
|
|
169
|
+
- Replace `threading.Thread` with `async def` coroutines
|
|
170
|
+
- Replace blocking I/O calls with `await async_version`
|
|
171
|
+
- Replace `Lock` with `asyncio.Lock`
|
|
172
|
+
- Replace `Queue` with `asyncio.Queue`
|
|
173
|
+
- Use `asyncio.run()` as the entry point
|
|
174
|
+
|
|
175
|
+
## Item 62: Mix Threads and Coroutines to Ease the Transition to asyncio
|
|
176
|
+
```python
|
|
177
|
+
# Run blocking code in a thread from async context
|
|
178
|
+
import asyncio
|
|
179
|
+
|
|
180
|
+
async def main():
|
|
181
|
+
loop = asyncio.get_event_loop()
|
|
182
|
+
result = await loop.run_in_executor(None, blocking_function, arg)
|
|
183
|
+
|
|
184
|
+
# Run async code from synchronous context
|
|
185
|
+
def sync_function():
|
|
186
|
+
loop = asyncio.new_event_loop()
|
|
187
|
+
result = loop.run_until_complete(async_function())
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
- Use `run_in_executor` to call blocking code from async code
|
|
191
|
+
- Allows gradual migration from threads to asyncio
|
|
192
|
+
- Never call blocking functions directly in async code (it blocks the event loop)
|
|
193
|
+
|
|
194
|
+
## Item 63: Avoid Blocking the asyncio Event Loop to Maximize Responsiveness
|
|
195
|
+
- Never use `time.sleep()` in async code — use `await asyncio.sleep()`
|
|
196
|
+
- Never do CPU-heavy work in coroutines — use `run_in_executor`
|
|
197
|
+
- Never use blocking I/O calls — use async equivalents (aiohttp, aiofiles, etc.)
|
|
198
|
+
- Profile with `asyncio.get_event_loop().slow_callback_duration`
|
|
199
|
+
|
|
200
|
+
## Item 64: Consider concurrent.futures for True Parallelism
|
|
201
|
+
```python
|
|
202
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
203
|
+
|
|
204
|
+
def cpu_heavy(data):
|
|
205
|
+
return complex_computation(data)
|
|
206
|
+
|
|
207
|
+
with ProcessPoolExecutor() as executor:
|
|
208
|
+
results = list(executor.map(cpu_heavy, data_chunks))
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
- `ProcessPoolExecutor` bypasses the GIL for true CPU parallelism
|
|
212
|
+
- Data is serialized between processes (use for independent tasks)
|
|
213
|
+
- Same API as `ThreadPoolExecutor` — easy to switch
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# Chapter 8: Robustness and Performance (Items 65-76)
|
|
2
|
+
|
|
3
|
+
## Item 65: Take Advantage of Each Block in try/except/else/finally
|
|
4
|
+
```python
|
|
5
|
+
# Full structure
|
|
6
|
+
try:
|
|
7
|
+
# Code that might raise
|
|
8
|
+
result = dangerous_operation()
|
|
9
|
+
except SomeError as e:
|
|
10
|
+
# Handle specific error
|
|
11
|
+
log_error(e)
|
|
12
|
+
except (TypeError, ValueError):
|
|
13
|
+
# Handle multiple error types
|
|
14
|
+
handle_bad_input()
|
|
15
|
+
else:
|
|
16
|
+
# Runs ONLY if no exception was raised
|
|
17
|
+
# Use for code that depends on try succeeding
|
|
18
|
+
process(result)
|
|
19
|
+
finally:
|
|
20
|
+
# ALWAYS runs, even if exception was raised
|
|
21
|
+
# Use for cleanup (closing files, releasing locks)
|
|
22
|
+
cleanup()
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
- `else` block: reduces code in `try`, makes it clear what you're protecting
|
|
26
|
+
- `finally` block: guaranteed cleanup
|
|
27
|
+
- Don't put too much in `try` — only the code that can raise the expected exception
|
|
28
|
+
|
|
29
|
+
## Item 66: Consider contextlib and with Statements for Reusable try/finally Behavior
|
|
30
|
+
```python
|
|
31
|
+
from contextlib import contextmanager
|
|
32
|
+
|
|
33
|
+
@contextmanager
|
|
34
|
+
def log_level(level, name):
|
|
35
|
+
logger = logging.getLogger(name)
|
|
36
|
+
old_level = logger.level
|
|
37
|
+
logger.setLevel(level)
|
|
38
|
+
try:
|
|
39
|
+
yield logger
|
|
40
|
+
finally:
|
|
41
|
+
logger.setLevel(old_level)
|
|
42
|
+
|
|
43
|
+
with log_level(logging.DEBUG, 'my-log') as logger:
|
|
44
|
+
logger.debug('Debug message')
|
|
45
|
+
# Level is automatically restored after the block
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
- Use `contextlib.contextmanager` for simple context managers
|
|
49
|
+
- Use `with` statements instead of manual try/finally
|
|
50
|
+
- The `yield` in a context manager is where the `with` block executes
|
|
51
|
+
|
|
52
|
+
## Item 67: Use datetime Instead of time for Local Clocks
|
|
53
|
+
```python
|
|
54
|
+
from datetime import datetime, timezone
|
|
55
|
+
import pytz # or zoneinfo (Python 3.9+)
|
|
56
|
+
|
|
57
|
+
# BAD — time module is unreliable for timezones
|
|
58
|
+
import time
|
|
59
|
+
time.localtime() # platform-dependent behavior
|
|
60
|
+
|
|
61
|
+
# GOOD — datetime with explicit timezone
|
|
62
|
+
now = datetime.now(tz=timezone.utc)
|
|
63
|
+
|
|
64
|
+
# Convert between timezones
|
|
65
|
+
eastern = pytz.timezone('US/Eastern')
|
|
66
|
+
local_time = now.astimezone(eastern)
|
|
67
|
+
|
|
68
|
+
# Python 3.9+ — use zoneinfo
|
|
69
|
+
from zoneinfo import ZoneInfo
|
|
70
|
+
eastern = ZoneInfo('America/New_York')
|
|
71
|
+
local_time = now.astimezone(eastern)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
- Always store/transmit times in UTC
|
|
75
|
+
- Convert to local time only for display
|
|
76
|
+
- Use `pytz` or `zoneinfo` for timezone handling
|
|
77
|
+
- Never use the `time` module for timezone conversions
|
|
78
|
+
|
|
79
|
+
## Item 68: Make pickle Reliable with copyreg
|
|
80
|
+
```python
|
|
81
|
+
import copyreg
|
|
82
|
+
import pickle
|
|
83
|
+
|
|
84
|
+
class GameState:
|
|
85
|
+
def __init__(self, level=0, lives=4, points=0):
|
|
86
|
+
self.level = level
|
|
87
|
+
self.lives = lives
|
|
88
|
+
self.points = points
|
|
89
|
+
|
|
90
|
+
def pickle_game_state(game_state):
|
|
91
|
+
kwargs = game_state.__dict__
|
|
92
|
+
return unpickle_game_state, (kwargs,)
|
|
93
|
+
|
|
94
|
+
def unpickle_game_state(kwargs):
|
|
95
|
+
return GameState(**kwargs)
|
|
96
|
+
|
|
97
|
+
copyreg.pickle(GameState, pickle_game_state)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
- `copyreg` makes pickle forward-compatible when classes change
|
|
101
|
+
- Register custom serialization functions for your classes
|
|
102
|
+
- Always provide default values for new attributes
|
|
103
|
+
|
|
104
|
+
## Item 69: Use decimal When Precision Matters
|
|
105
|
+
```python
|
|
106
|
+
from decimal import Decimal, ROUND_UP
|
|
107
|
+
|
|
108
|
+
# BAD — float precision issues
|
|
109
|
+
rate = 1.45
|
|
110
|
+
seconds = 222
|
|
111
|
+
cost = rate * seconds / 60 # 5.364999999999999
|
|
112
|
+
|
|
113
|
+
# GOOD — Decimal for exact arithmetic
|
|
114
|
+
rate = Decimal('1.45')
|
|
115
|
+
seconds = Decimal('222')
|
|
116
|
+
cost = rate * seconds / Decimal('60')
|
|
117
|
+
rounded = cost.quantize(Decimal('0.01'), rounding=ROUND_UP)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
- Use `Decimal` for financial calculations, exact fractions
|
|
121
|
+
- Always construct from strings (`Decimal('1.45')`) not floats (`Decimal(1.45)`)
|
|
122
|
+
- Use `quantize` for rounding control
|
|
123
|
+
|
|
124
|
+
## Item 70: Profile Before Optimizing
|
|
125
|
+
```python
|
|
126
|
+
from cProfile import Profile
|
|
127
|
+
from pstats import Stats
|
|
128
|
+
|
|
129
|
+
profiler = Profile()
|
|
130
|
+
profiler.runcall(my_function, arg1, arg2)
|
|
131
|
+
|
|
132
|
+
stats = Stats(profiler)
|
|
133
|
+
stats.strip_dirs()
|
|
134
|
+
stats.sort_stats('cumulative')
|
|
135
|
+
stats.print_stats()
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
- Never guess where bottlenecks are — profile first
|
|
139
|
+
- Use `cProfile` for C-extension speed profiling
|
|
140
|
+
- `cumulative` time shows total time including sub-calls
|
|
141
|
+
- `tottime` shows time in the function itself (excluding sub-calls)
|
|
142
|
+
- Focus optimization on the top functions by cumulative time
|
|
143
|
+
|
|
144
|
+
## Item 71: Prefer deque for Producer-Consumer Queues
|
|
145
|
+
```python
|
|
146
|
+
from collections import deque
|
|
147
|
+
|
|
148
|
+
# FIFO queue operations
|
|
149
|
+
queue = deque()
|
|
150
|
+
queue.append('item') # O(1) add to right
|
|
151
|
+
item = queue.popleft() # O(1) remove from left
|
|
152
|
+
|
|
153
|
+
# BAD — list as queue
|
|
154
|
+
queue = []
|
|
155
|
+
queue.append('item') # O(1)
|
|
156
|
+
item = queue.pop(0) # O(n)! shifts all elements
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
- `list.pop(0)` is O(n); `deque.popleft()` is O(1)
|
|
160
|
+
- `deque` also supports `maxlen` for bounded buffers
|
|
161
|
+
- Use `deque` for any FIFO pattern
|
|
162
|
+
|
|
163
|
+
## Item 72: Consider Searching Sorted Sequences with bisect
|
|
164
|
+
```python
|
|
165
|
+
import bisect
|
|
166
|
+
|
|
167
|
+
sorted_list = [2, 5, 8, 12, 16, 23, 38, 56, 72, 91]
|
|
168
|
+
|
|
169
|
+
# Find insertion point
|
|
170
|
+
index = bisect.bisect_left(sorted_list, 12) # 3
|
|
171
|
+
index = bisect.bisect_right(sorted_list, 12) # 4
|
|
172
|
+
|
|
173
|
+
# Insert while maintaining sort order
|
|
174
|
+
bisect.insort(sorted_list, 15) # inserts 15 in correct position
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
- Binary search is O(log n) vs O(n) for linear search
|
|
178
|
+
- Use `bisect_left` for leftmost position, `bisect_right` for rightmost
|
|
179
|
+
- `insort` keeps list sorted after insertion
|
|
180
|
+
- Requires the sequence to already be sorted
|
|
181
|
+
|
|
182
|
+
## Item 73: Know How to Use heapq for Priority Queues
|
|
183
|
+
```python
|
|
184
|
+
import heapq
|
|
185
|
+
|
|
186
|
+
# Create a min-heap
|
|
187
|
+
heap = []
|
|
188
|
+
heapq.heappush(heap, 5)
|
|
189
|
+
heapq.heappush(heap, 1)
|
|
190
|
+
heapq.heappush(heap, 3)
|
|
191
|
+
|
|
192
|
+
# Pop smallest
|
|
193
|
+
smallest = heapq.heappop(heap) # 1
|
|
194
|
+
|
|
195
|
+
# Get n smallest/largest
|
|
196
|
+
heapq.nsmallest(3, data)
|
|
197
|
+
heapq.nlargest(3, data)
|
|
198
|
+
|
|
199
|
+
# Priority queue with tuples
|
|
200
|
+
heapq.heappush(heap, (priority, item))
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
- heapq provides O(log n) push and pop operations
|
|
204
|
+
- Always a min-heap (smallest first)
|
|
205
|
+
- For max-heap, negate the values
|
|
206
|
+
- Use for priority queues, top-K problems, merge sorted streams
|
|
207
|
+
|
|
208
|
+
## Item 74: Consider memoryview and bytearray for Zero-Copy Interactions with bytes
|
|
209
|
+
```python
|
|
210
|
+
# BAD — copying bytes on every slice
|
|
211
|
+
data = b'large data...'
|
|
212
|
+
chunk = data[10:20] # creates a new bytes object
|
|
213
|
+
|
|
214
|
+
# GOOD — zero-copy with memoryview
|
|
215
|
+
data = bytearray(b'large data...')
|
|
216
|
+
view = memoryview(data)
|
|
217
|
+
chunk = view[10:20] # no copy, just a view
|
|
218
|
+
chunk[:5] = b'hello' # writes directly to original data
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
- `memoryview` provides zero-copy slicing of bytes-like objects
|
|
222
|
+
- Essential for high-performance I/O and data processing
|
|
223
|
+
- Works with `bytearray`, `array.array`, NumPy arrays
|
|
224
|
+
- Use for socket I/O, file I/O, binary protocol parsing
|
|
225
|
+
|
|
226
|
+
## Item 75: Use repr Strings for Debugging Output
|
|
227
|
+
```python
|
|
228
|
+
class MyClass:
|
|
229
|
+
def __init__(self, value):
|
|
230
|
+
self.value = value
|
|
231
|
+
|
|
232
|
+
def __repr__(self):
|
|
233
|
+
return f'{self.__class__.__name__}({self.value!r})'
|
|
234
|
+
|
|
235
|
+
def __str__(self):
|
|
236
|
+
return f'MyClass with value {self.value}'
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
- `repr()` gives an unambiguous string for debugging
|
|
240
|
+
- `str()` gives a human-readable string
|
|
241
|
+
- Always implement `__repr__` on your classes
|
|
242
|
+
- Use `!r` in f-strings for repr formatting: `f'{obj!r}'`
|
|
243
|
+
|
|
244
|
+
## Item 76: Verify Related Behaviors in TestCase Subclasses
|
|
245
|
+
(Cross-reference with Chapter 9 Testing)
|
|
246
|
+
- Group related tests in TestCase subclasses
|
|
247
|
+
- Use descriptive test method names
|
|
248
|
+
- Test both success and failure cases
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# Chapter 9: Testing and Debugging (Items 77-85)
|
|
2
|
+
|
|
3
|
+
## Item 77: Isolate Tests from Each Other via setUp, tearDown, setUpModule, etc.
|
|
4
|
+
```python
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
class DatabaseTestCase(unittest.TestCase):
|
|
8
|
+
@classmethod
|
|
9
|
+
def setUpClass(cls):
|
|
10
|
+
"""Run once before all tests in this class."""
|
|
11
|
+
cls.db = create_test_database()
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def tearDownClass(cls):
|
|
15
|
+
"""Run once after all tests in this class."""
|
|
16
|
+
cls.db.close()
|
|
17
|
+
|
|
18
|
+
def setUp(self):
|
|
19
|
+
"""Run before each test method."""
|
|
20
|
+
self.connection = self.db.connect()
|
|
21
|
+
self.transaction = self.connection.begin()
|
|
22
|
+
|
|
23
|
+
def tearDown(self):
|
|
24
|
+
"""Run after each test method."""
|
|
25
|
+
self.transaction.rollback()
|
|
26
|
+
self.connection.close()
|
|
27
|
+
|
|
28
|
+
def test_query(self):
|
|
29
|
+
result = self.connection.execute('SELECT 1')
|
|
30
|
+
self.assertEqual(result, 1)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
- `setUp`/`tearDown` run for every test method (isolation)
|
|
34
|
+
- `setUpClass`/`tearDownClass` run once per class (expensive setup)
|
|
35
|
+
- `setUpModule`/`tearDownModule` run once per module
|
|
36
|
+
- Always clean up in tearDown (even if test fails)
|
|
37
|
+
|
|
38
|
+
## Item 78: Use Mocks to Test Code with Complex Dependencies
|
|
39
|
+
```python
|
|
40
|
+
from unittest.mock import patch, MagicMock, call
|
|
41
|
+
|
|
42
|
+
# Mock a function
|
|
43
|
+
@patch('mymodule.external_api_call')
|
|
44
|
+
def test_process(mock_api):
|
|
45
|
+
mock_api.return_value = {'status': 'ok'}
|
|
46
|
+
result = process_data()
|
|
47
|
+
mock_api.assert_called_once_with(expected_arg)
|
|
48
|
+
assert result == expected_result
|
|
49
|
+
|
|
50
|
+
# Mock an object's method
|
|
51
|
+
def test_with_mock():
|
|
52
|
+
mock_db = MagicMock()
|
|
53
|
+
mock_db.query.return_value = [{'id': 1}]
|
|
54
|
+
service = MyService(db=mock_db)
|
|
55
|
+
result = service.get_items()
|
|
56
|
+
mock_db.query.assert_called_once()
|
|
57
|
+
|
|
58
|
+
# Verify call order
|
|
59
|
+
mock_db.query.assert_has_calls([
|
|
60
|
+
call('SELECT * FROM users'),
|
|
61
|
+
call('SELECT * FROM orders'),
|
|
62
|
+
])
|
|
63
|
+
|
|
64
|
+
# Use spec for type checking
|
|
65
|
+
mock = MagicMock(spec=RealClass)
|
|
66
|
+
mock.nonexistent_method() # raises AttributeError
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
- Mock external dependencies (APIs, databases, file systems)
|
|
70
|
+
- Use `@patch` to replace modules/objects during tests
|
|
71
|
+
- Use `spec=RealClass` to catch API mismatches
|
|
72
|
+
- Verify both return values and call patterns
|
|
73
|
+
- Use `side_effect` for exceptions or multiple return values:
|
|
74
|
+
```python
|
|
75
|
+
mock.side_effect = ValueError('error')
|
|
76
|
+
mock.side_effect = [1, 2, 3] # returns different values each call
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Item 79: Encapsulate Dependencies to Facilitate Mocking and Testing
|
|
80
|
+
```python
|
|
81
|
+
# BAD — hard-coded dependency
|
|
82
|
+
class DataProcessor:
|
|
83
|
+
def process(self):
|
|
84
|
+
data = requests.get('https://api.example.com/data').json()
|
|
85
|
+
return transform(data)
|
|
86
|
+
|
|
87
|
+
# GOOD — inject dependency
|
|
88
|
+
class DataProcessor:
|
|
89
|
+
def __init__(self, data_fetcher):
|
|
90
|
+
self._fetcher = data_fetcher
|
|
91
|
+
|
|
92
|
+
def process(self):
|
|
93
|
+
data = self._fetcher.get_data()
|
|
94
|
+
return transform(data)
|
|
95
|
+
|
|
96
|
+
# Easy to test
|
|
97
|
+
class FakeFetcher:
|
|
98
|
+
def get_data(self):
|
|
99
|
+
return {'test': 'data'}
|
|
100
|
+
|
|
101
|
+
processor = DataProcessor(FakeFetcher())
|
|
102
|
+
result = processor.process()
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
- Dependency injection makes code testable
|
|
106
|
+
- Accept dependencies as constructor or method parameters
|
|
107
|
+
- Use abstract base classes or protocols to define interfaces
|
|
108
|
+
- Fakes/stubs are often clearer than mocks for complex dependencies
|
|
109
|
+
|
|
110
|
+
## Item 80: Consider Interactive Debugging with pdb
|
|
111
|
+
```python
|
|
112
|
+
# Drop into debugger at a specific point
|
|
113
|
+
def complex_function(data):
|
|
114
|
+
result = step_one(data)
|
|
115
|
+
breakpoint() # Python 3.7+ (same as pdb.set_trace())
|
|
116
|
+
final = step_two(result)
|
|
117
|
+
return final
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Key pdb commands:**
|
|
121
|
+
- `n` (next) — execute next line
|
|
122
|
+
- `s` (step) — step into function call
|
|
123
|
+
- `c` (continue) — continue execution until next breakpoint
|
|
124
|
+
- `p expr` — print expression
|
|
125
|
+
- `pp expr` — pretty-print expression
|
|
126
|
+
- `l` (list) — show current code context
|
|
127
|
+
- `w` (where) — show call stack
|
|
128
|
+
- `b line` — set breakpoint at line
|
|
129
|
+
- `r` (return) — run until current function returns
|
|
130
|
+
- `q` (quit) — quit debugger
|
|
131
|
+
|
|
132
|
+
- Use `breakpoint()` (Python 3.7+) instead of `import pdb; pdb.set_trace()`
|
|
133
|
+
- Use `PYTHONBREAKPOINT=0` environment variable to disable all breakpoints
|
|
134
|
+
- Use `post_mortem()` to debug after an exception
|
|
135
|
+
|
|
136
|
+
## Item 81: Use tracemalloc to Understand Memory Usage and Leaks
|
|
137
|
+
```python
|
|
138
|
+
import tracemalloc
|
|
139
|
+
|
|
140
|
+
tracemalloc.start()
|
|
141
|
+
|
|
142
|
+
# ... run code that uses memory ...
|
|
143
|
+
|
|
144
|
+
snapshot = tracemalloc.take_snapshot()
|
|
145
|
+
top_stats = snapshot.statistics('lineno')
|
|
146
|
+
|
|
147
|
+
print('Top 10 memory allocations:')
|
|
148
|
+
for stat in top_stats[:10]:
|
|
149
|
+
print(stat)
|
|
150
|
+
|
|
151
|
+
# Compare snapshots to find leaks
|
|
152
|
+
snapshot1 = tracemalloc.take_snapshot()
|
|
153
|
+
# ... more code ...
|
|
154
|
+
snapshot2 = tracemalloc.take_snapshot()
|
|
155
|
+
top_stats = snapshot2.compare_to(snapshot1, 'lineno')
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
- `tracemalloc` tracks where memory was allocated
|
|
159
|
+
- Use snapshot comparison to find memory leaks
|
|
160
|
+
- Shows file and line number of allocations
|
|
161
|
+
- Much more useful than `gc` module for debugging memory issues
|
|
162
|
+
|
|
163
|
+
## Item 82: Know Where to Find Community-Built Modules
|
|
164
|
+
- PyPI (Python Package Index) is the main repository
|
|
165
|
+
- Use `pip install` to install packages
|
|
166
|
+
- Check package health: last update, stars, downloads, issues
|
|
167
|
+
- Popular packages: requests, flask, django, pandas, numpy, pytest
|
|
168
|
+
|
|
169
|
+
## Item 83: Use Virtual Environments for Isolated and Reproducible Dependencies
|
|
170
|
+
```bash
|
|
171
|
+
# Create virtual environment
|
|
172
|
+
python3 -m venv myenv
|
|
173
|
+
|
|
174
|
+
# Activate
|
|
175
|
+
source myenv/bin/activate
|
|
176
|
+
|
|
177
|
+
# Install packages
|
|
178
|
+
pip install flask==2.0.1
|
|
179
|
+
|
|
180
|
+
# Freeze dependencies
|
|
181
|
+
pip freeze > requirements.txt
|
|
182
|
+
|
|
183
|
+
# Recreate environment
|
|
184
|
+
pip install -r requirements.txt
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
- Always use virtual environments for projects
|
|
188
|
+
- Never install packages globally with `pip`
|
|
189
|
+
- Use `requirements.txt` for reproducible environments
|
|
190
|
+
- Consider `pyproject.toml` and modern tools (poetry, pipenv)
|
|
191
|
+
|
|
192
|
+
## Item 84: Write Docstrings for Every Module, Class, and Function
|
|
193
|
+
```python
|
|
194
|
+
"""Module docstring: brief description of the module's purpose."""
|
|
195
|
+
|
|
196
|
+
class MyClass:
|
|
197
|
+
"""One-line summary of the class.
|
|
198
|
+
|
|
199
|
+
Extended description of the class if needed.
|
|
200
|
+
|
|
201
|
+
Attributes:
|
|
202
|
+
attr1: Description of attr1.
|
|
203
|
+
attr2: Description of attr2.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
def method(self, arg1: str, arg2: int = 0) -> bool:
|
|
207
|
+
"""One-line summary of method.
|
|
208
|
+
|
|
209
|
+
Extended description if needed.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
arg1: Description of arg1.
|
|
213
|
+
arg2: Description of arg2. Defaults to 0.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
Description of return value.
|
|
217
|
+
|
|
218
|
+
Raises:
|
|
219
|
+
ValueError: When arg1 is empty.
|
|
220
|
+
"""
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
- First line: one-line summary ending with period
|
|
224
|
+
- Blank line, then extended description if needed
|
|
225
|
+
- Document Args, Returns, Raises sections
|
|
226
|
+
- Use Google style or NumPy style consistently
|
|
227
|
+
- Type hints complement but don't replace docstrings
|
|
228
|
+
|
|
229
|
+
## Item 85: Use Packages to Organize Modules and Provide Stable APIs
|
|
230
|
+
```python
|
|
231
|
+
# mypackage/__init__.py
|
|
232
|
+
from mypackage.core import (
|
|
233
|
+
PublicClass,
|
|
234
|
+
public_function,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
__all__ = ['PublicClass', 'public_function']
|
|
238
|
+
|
|
239
|
+
# mypackage/core.py
|
|
240
|
+
class PublicClass:
|
|
241
|
+
...
|
|
242
|
+
|
|
243
|
+
def public_function():
|
|
244
|
+
...
|
|
245
|
+
|
|
246
|
+
def _private_helper(): # not exported
|
|
247
|
+
...
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
- Use `__init__.py` to define public API
|
|
251
|
+
- Use `__all__` to control `from package import *` behavior
|
|
252
|
+
- Keep internal modules private with `_` prefix
|
|
253
|
+
- Stable API = external code won't break when internals change
|