@booklib/skills 1.0.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CONTRIBUTING.md +122 -0
  2. package/README.md +20 -1
  3. package/ROADMAP.md +36 -0
  4. package/animation-at-work/evals/evals.json +44 -0
  5. package/animation-at-work/examples/after.md +64 -0
  6. package/animation-at-work/examples/before.md +35 -0
  7. package/animation-at-work/scripts/audit_animations.py +295 -0
  8. package/bin/skills.js +552 -42
  9. package/clean-code-reviewer/SKILL.md +109 -1
  10. package/clean-code-reviewer/evals/evals.json +121 -3
  11. package/clean-code-reviewer/examples/after.md +48 -0
  12. package/clean-code-reviewer/examples/before.md +33 -0
  13. package/clean-code-reviewer/references/api_reference.md +158 -0
  14. package/clean-code-reviewer/references/practices-catalog.md +282 -0
  15. package/clean-code-reviewer/references/review-checklist.md +254 -0
  16. package/clean-code-reviewer/scripts/pre-review.py +206 -0
  17. package/data-intensive-patterns/evals/evals.json +43 -0
  18. package/data-intensive-patterns/examples/after.md +61 -0
  19. package/data-intensive-patterns/examples/before.md +38 -0
  20. package/data-intensive-patterns/scripts/adr.py +213 -0
  21. package/data-pipelines/evals/evals.json +45 -0
  22. package/data-pipelines/examples/after.md +97 -0
  23. package/data-pipelines/examples/before.md +37 -0
  24. package/data-pipelines/scripts/new_pipeline.py +444 -0
  25. package/design-patterns/evals/evals.json +46 -0
  26. package/design-patterns/examples/after.md +52 -0
  27. package/design-patterns/examples/before.md +29 -0
  28. package/design-patterns/scripts/scaffold.py +807 -0
  29. package/domain-driven-design/SKILL.md +120 -0
  30. package/domain-driven-design/evals/evals.json +48 -0
  31. package/domain-driven-design/examples/after.md +80 -0
  32. package/domain-driven-design/examples/before.md +43 -0
  33. package/domain-driven-design/scripts/scaffold.py +421 -0
  34. package/effective-java/evals/evals.json +46 -0
  35. package/effective-java/examples/after.md +83 -0
  36. package/effective-java/examples/before.md +37 -0
  37. package/effective-java/scripts/checkstyle_setup.py +211 -0
  38. package/effective-kotlin/evals/evals.json +45 -0
  39. package/effective-kotlin/examples/after.md +36 -0
  40. package/effective-kotlin/examples/before.md +38 -0
  41. package/effective-python/SKILL.md +199 -0
  42. package/effective-python/evals/evals.json +44 -0
  43. package/effective-python/examples/after.md +56 -0
  44. package/effective-python/examples/before.md +40 -0
  45. package/effective-python/ref-01-pythonic-thinking.md +202 -0
  46. package/effective-python/ref-02-lists-and-dicts.md +146 -0
  47. package/effective-python/ref-03-functions.md +186 -0
  48. package/effective-python/ref-04-comprehensions-generators.md +211 -0
  49. package/effective-python/ref-05-classes-interfaces.md +188 -0
  50. package/effective-python/ref-06-metaclasses-attributes.md +209 -0
  51. package/effective-python/ref-07-concurrency.md +213 -0
  52. package/effective-python/ref-08-robustness-performance.md +248 -0
  53. package/effective-python/ref-09-testing-debugging.md +253 -0
  54. package/effective-python/ref-10-collaboration.md +175 -0
  55. package/effective-python/references/api_reference.md +218 -0
  56. package/effective-python/references/practices-catalog.md +483 -0
  57. package/effective-python/references/review-checklist.md +190 -0
  58. package/effective-python/scripts/lint.py +173 -0
  59. package/kotlin-in-action/evals/evals.json +43 -0
  60. package/kotlin-in-action/examples/after.md +53 -0
  61. package/kotlin-in-action/examples/before.md +39 -0
  62. package/kotlin-in-action/scripts/setup_detekt.py +224 -0
  63. package/lean-startup/evals/evals.json +43 -0
  64. package/lean-startup/examples/after.md +80 -0
  65. package/lean-startup/examples/before.md +34 -0
  66. package/lean-startup/scripts/new_experiment.py +286 -0
  67. package/microservices-patterns/SKILL.md +140 -0
  68. package/microservices-patterns/evals/evals.json +45 -0
  69. package/microservices-patterns/examples/after.md +69 -0
  70. package/microservices-patterns/examples/before.md +40 -0
  71. package/microservices-patterns/scripts/new_service.py +583 -0
  72. package/package.json +1 -1
  73. package/refactoring-ui/evals/evals.json +45 -0
  74. package/refactoring-ui/examples/after.md +85 -0
  75. package/refactoring-ui/examples/before.md +58 -0
  76. package/refactoring-ui/scripts/audit_css.py +250 -0
  77. package/skill-router/SKILL.md +142 -0
  78. package/skill-router/evals/evals.json +38 -0
  79. package/skill-router/examples/after.md +63 -0
  80. package/skill-router/examples/before.md +39 -0
  81. package/skill-router/references/api_reference.md +24 -0
  82. package/skill-router/references/routing-heuristics.md +89 -0
  83. package/skill-router/references/skill-catalog.md +156 -0
  84. package/skill-router/scripts/route.py +266 -0
  85. package/storytelling-with-data/evals/evals.json +47 -0
  86. package/storytelling-with-data/examples/after.md +50 -0
  87. package/storytelling-with-data/examples/before.md +33 -0
  88. package/storytelling-with-data/scripts/chart_review.py +301 -0
  89. package/system-design-interview/evals/evals.json +45 -0
  90. package/system-design-interview/examples/after.md +94 -0
  91. package/system-design-interview/examples/before.md +27 -0
  92. package/system-design-interview/scripts/new_design.py +421 -0
  93. package/using-asyncio-python/evals/evals.json +43 -0
  94. package/using-asyncio-python/examples/after.md +68 -0
  95. package/using-asyncio-python/examples/before.md +39 -0
  96. package/using-asyncio-python/scripts/check_blocking.py +270 -0
  97. package/web-scraping-python/evals/evals.json +46 -0
  98. package/web-scraping-python/examples/after.md +109 -0
  99. package/web-scraping-python/examples/before.md +40 -0
  100. package/web-scraping-python/scripts/new_scraper.py +231 -0
@@ -0,0 +1,97 @@
1
+ # After
2
+
3
+ A clean pipeline with separated extract/transform/load functions, idempotent upserts, retry logic, and proper error handling.
4
+
5
+ ```python
6
+ import logging
7
+ import time
8
+ from dataclasses import dataclass
9
+ from datetime import datetime
10
+ from functools import wraps
11
+
12
+ import psycopg2
13
+ import requests
14
+ from requests.exceptions import RequestException
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class SaleRecord:
21
+ id: str
22
+ sale_date: datetime
23
+ revenue: float
24
+ region: str
25
+
26
+
27
+ def with_retry(max_attempts: int = 3, backoff_seconds: float = 2.0):
28
+ """Decorator: retry a function on transient failures with exponential backoff."""
29
+ def decorator(fn):
30
+ @wraps(fn)
31
+ def wrapper(*args, **kwargs):
32
+ for attempt in range(1, max_attempts + 1):
33
+ try:
34
+ return fn(*args, **kwargs)
35
+ except (RequestException, psycopg2.OperationalError) as exc:
36
+ if attempt == max_attempts:
37
+ raise
38
+ wait = backoff_seconds ** attempt
39
+ logger.warning("Attempt %d/%d failed: %s — retrying in %.1fs",
40
+ attempt, max_attempts, exc, wait)
41
+ time.sleep(wait)
42
+ return wrapper
43
+ return decorator
44
+
45
+
46
+ @with_retry(max_attempts=3)
47
+ def extract(api_url: str) -> list[dict]:
48
+ """Fetch raw sales records from the partner API."""
49
+ response = requests.get(api_url, timeout=30)
50
+ response.raise_for_status()
51
+ return response.json()["sales"]
52
+
53
+
54
+ def transform(raw_records: list[dict]) -> list[SaleRecord]:
55
+ """Parse and normalise raw API records into typed SaleRecord objects."""
56
+ return [
57
+ SaleRecord(
58
+ id=rec["id"],
59
+ sale_date=datetime.fromisoformat(rec["date"]),
60
+ revenue=float(rec["amount_usd"]),
61
+ region=rec["region"].strip().upper(),
62
+ )
63
+ for rec in raw_records
64
+ ]
65
+
66
+
67
+ def load(records: list[SaleRecord], dsn: str) -> int:
68
+ """Upsert records into fact_sales. Idempotent: re-running is safe."""
69
+ upsert_sql = """
70
+ INSERT INTO fact_sales (sale_id, sale_date, revenue, region, loaded_at)
71
+ VALUES (%(id)s, %(sale_date)s, %(revenue)s, %(region)s, NOW())
72
+ ON CONFLICT (sale_id) DO UPDATE
73
+ SET revenue = EXCLUDED.revenue,
74
+ loaded_at = EXCLUDED.loaded_at
75
+ """
76
+ with psycopg2.connect(dsn) as conn, conn.cursor() as cur:
77
+ cur.executemany(upsert_sql, [vars(r) for r in records])
78
+ loaded = cur.rowcount
79
+ logger.info("Upserted %d records into fact_sales", loaded)
80
+ return loaded
81
+
82
+
83
+ def run_pipeline(api_url: str, warehouse_dsn: str) -> None:
84
+ logger.info("Starting sales pipeline")
85
+ raw = extract(api_url)
86
+ records = transform(raw)
87
+ loaded = load(records, warehouse_dsn)
88
+ logger.info("Pipeline complete: %d records loaded", loaded)
89
+ ```
90
+
91
+ Key improvements:
92
+ - Extract, transform, and load are separate functions with single responsibilities — each is independently testable and replaceable (Ch 13: Best Practices — separation of concerns)
93
+ - `ON CONFLICT (sale_id) DO UPDATE` makes the load idempotent — re-running the pipeline never creates duplicate rows (Ch 13: Idempotency)
94
+ - `@with_retry` decorator handles transient API and database failures with exponential backoff (Ch 6: API Ingestion — retry logic)
95
+ - `SaleRecord` dataclass replaces a raw dict, providing type safety and named field access in the transform step
96
+ - `psycopg2.connect` used as a context manager ensures the connection and transaction are always closed and committed correctly (Ch 4: Database Ingestion)
97
+ - Structured logging with `logger.info/warning` replaces bare `print` — output is filterable and includes context (Ch 12: Monitoring)
@@ -0,0 +1,37 @@
1
+ # Before
2
+
3
+ A Python ETL script that mixes extraction, transformation, and loading in one function with no error handling, no idempotency, and no retry logic.
4
+
5
+ ```python
6
+ import psycopg2
7
+ import requests
8
+ from datetime import datetime
9
+
10
+ def run_pipeline():
11
+ # Extract: fetch from API
12
+ resp = requests.get("https://api.partner.com/sales/export")
13
+ data = resp.json()
14
+
15
+ # Connect to warehouse
16
+ conn = psycopg2.connect("host=dw user=etl dbname=warehouse")
17
+ cur = conn.cursor()
18
+
19
+ # Transform + Load: all in one loop, no error handling
20
+ for record in data["sales"]:
21
+ sale_date = datetime.strptime(record["date"], "%Y-%m-%dT%H:%M:%S")
22
+ revenue = float(record["amount_usd"])
23
+ region = record["region"].strip().upper()
24
+
25
+ # No upsert — re-running inserts duplicates
26
+ cur.execute("""
27
+ INSERT INTO fact_sales (sale_id, sale_date, revenue, region, loaded_at)
28
+ VALUES (%s, %s, %s, %s, NOW())
29
+ """, (record["id"], sale_date, revenue, region))
30
+
31
+ conn.commit()
32
+ cur.close()
33
+ conn.close()
34
+ print("done")
35
+
36
+ run_pipeline()
37
+ ```
@@ -0,0 +1,444 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ new_pipeline.py — Scaffold a new data pipeline with extract/transform/load structure.
4
+ Usage: python new_pipeline.py <pipeline-name> [--source csv|api|db] [--target db|file|api]
5
+ """
6
+
7
+ import argparse
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+ from string import Template
12
+
13
+ # ---------------------------------------------------------------------------
14
+ # File templates
15
+ # ---------------------------------------------------------------------------
16
+
17
+ EXTRACT_CSV = '''\
18
+ """extract.py — Extract data from a CSV source."""
19
+
20
+ import csv
21
+ import logging
22
+ import time
23
+ from pathlib import Path
24
+ from functools import wraps
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def retry(max_attempts=3, delay=2.0, exceptions=(Exception,)):
30
+ """Retry decorator with exponential backoff."""
31
+ def decorator(fn):
32
+ @wraps(fn)
33
+ def wrapper(*args, **kwargs):
34
+ for attempt in range(1, max_attempts + 1):
35
+ try:
36
+ return fn(*args, **kwargs)
37
+ except exceptions as exc:
38
+ if attempt == max_attempts:
39
+ raise
40
+ wait = delay * (2 ** (attempt - 1))
41
+ logger.warning("Attempt %d failed: %s. Retrying in %.1fs...", attempt, exc, wait)
42
+ time.sleep(wait)
43
+ return wrapper
44
+ return decorator
45
+
46
+
47
+ @retry(max_attempts=3, exceptions=(OSError,))
48
+ def extract(source_path: str) -> list[dict]:
49
+ """Read rows from a CSV file. Returns a list of dicts."""
50
+ path = Path(source_path)
51
+ if not path.exists():
52
+ raise FileNotFoundError(f"Source file not found: {path}")
53
+ logger.info("Extracting from %s", path)
54
+ with path.open(newline="", encoding="utf-8") as fh:
55
+ reader = csv.DictReader(fh)
56
+ rows = list(reader)
57
+ logger.info("Extracted %d rows", len(rows))
58
+ return rows
59
+ '''
60
+
61
+ EXTRACT_API = '''\
62
+ """extract.py — Extract data from an HTTP API source."""
63
+
64
+ import json
65
+ import logging
66
+ import time
67
+ import urllib.error
68
+ import urllib.request
69
+ from functools import wraps
70
+
71
+ logger = logging.getLogger(__name__)
72
+
73
+ BASE_URL = "https://api.example.com/data"
74
+ API_KEY = "" # Set via environment variable in production
75
+
76
+
77
+ def retry(max_attempts=3, delay=2.0, exceptions=(Exception,)):
78
+ """Retry decorator with exponential backoff."""
79
+ def decorator(fn):
80
+ @wraps(fn)
81
+ def wrapper(*args, **kwargs):
82
+ for attempt in range(1, max_attempts + 1):
83
+ try:
84
+ return fn(*args, **kwargs)
85
+ except exceptions as exc:
86
+ if attempt == max_attempts:
87
+ raise
88
+ wait = delay * (2 ** (attempt - 1))
89
+ logger.warning("Attempt %d failed: %s. Retrying in %.1fs...", attempt, exc, wait)
90
+ time.sleep(wait)
91
+ return wrapper
92
+ return decorator
93
+
94
+
95
+ @retry(max_attempts=3, exceptions=(urllib.error.URLError, OSError))
96
+ def extract(endpoint: str = BASE_URL) -> list[dict]:
97
+ """Fetch JSON records from an API endpoint. Returns a list of dicts."""
98
+ logger.info("Extracting from %s", endpoint)
99
+ req = urllib.request.Request(endpoint, headers={"Accept": "application/json"})
100
+ with urllib.request.urlopen(req, timeout=30) as response:
101
+ data = json.loads(response.read())
102
+ records = data if isinstance(data, list) else data.get("results", data.get("items", []))
103
+ logger.info("Extracted %d records", len(records))
104
+ return records
105
+ '''
106
+
107
+ EXTRACT_DB = '''\
108
+ """extract.py — Extract data from a database source."""
109
+
110
+ import logging
111
+ import sqlite3
112
+ import time
113
+ from functools import wraps
114
+
115
+ logger = logging.getLogger(__name__)
116
+
117
+ DB_PATH = "source.db"
118
+ QUERY = "SELECT * FROM source_table"
119
+
120
+
121
+ def retry(max_attempts=3, delay=2.0, exceptions=(Exception,)):
122
+ """Retry decorator with exponential backoff."""
123
+ def decorator(fn):
124
+ @wraps(fn)
125
+ def wrapper(*args, **kwargs):
126
+ for attempt in range(1, max_attempts + 1):
127
+ try:
128
+ return fn(*args, **kwargs)
129
+ except exceptions as exc:
130
+ if attempt == max_attempts:
131
+ raise
132
+ wait = delay * (2 ** (attempt - 1))
133
+ logger.warning("Attempt %d failed: %s. Retrying in %.1fs...", attempt, exc, wait)
134
+ time.sleep(wait)
135
+ return wrapper
136
+ return decorator
137
+
138
+
139
+ @retry(max_attempts=3, exceptions=(sqlite3.OperationalError,))
140
+ def extract(db_path: str = DB_PATH, query: str = QUERY) -> list[dict]:
141
+ """Query records from a SQLite database. Returns a list of dicts."""
142
+ logger.info("Connecting to %s", db_path)
143
+ conn = sqlite3.connect(db_path)
144
+ conn.row_factory = sqlite3.Row
145
+ try:
146
+ cursor = conn.execute(query)
147
+ rows = [dict(row) for row in cursor.fetchall()]
148
+ finally:
149
+ conn.close()
150
+ logger.info("Extracted %d rows", len(rows))
151
+ return rows
152
+ '''
153
+
154
+ TRANSFORM_TEMPLATE = '''\
155
+ """transform.py — Transform extracted records."""
156
+
157
+ import logging
158
+ from typing import Any
159
+
160
+ logger = logging.getLogger(__name__)
161
+
162
+
163
+ def _clean_record(record: dict[str, Any]) -> dict[str, Any]:
164
+ """Strip whitespace from string values and drop empty fields."""
165
+ cleaned = {}
166
+ for key, value in record.items():
167
+ if isinstance(value, str):
168
+ value = value.strip()
169
+ if value not in (None, "", []):
170
+ cleaned[key] = value
171
+ return cleaned
172
+
173
+
174
+ def _validate_record(record: dict[str, Any]) -> bool:
175
+ """Return True if the record is valid. Customize required fields here."""
176
+ # TODO: add field-specific validation
177
+ return bool(record)
178
+
179
+
180
+ def transform(records: list[dict]) -> list[dict]:
181
+ """Clean, validate, and reshape records for loading."""
182
+ logger.info("Transforming %d records", len(records))
183
+ output = []
184
+ skipped = 0
185
+ for record in records:
186
+ cleaned = _clean_record(record)
187
+ if not _validate_record(cleaned):
188
+ skipped += 1
189
+ continue
190
+ # TODO: add field mappings / enrichment here
191
+ output.append(cleaned)
192
+ if skipped:
193
+ logger.warning("Skipped %d invalid records", skipped)
194
+ logger.info("Transformed %d records", len(output))
195
+ return output
196
+ '''
197
+
198
+ LOAD_DB = '''\
199
+ """load.py — Idempotent load into a SQLite database using upsert."""
200
+
201
+ import logging
202
+ import sqlite3
203
+ from typing import Any
204
+
205
+ logger = logging.getLogger(__name__)
206
+
207
+ DB_PATH = "output.db"
208
+ TABLE = "$pipeline_name"
209
+ # Define a unique key column used for upsert conflict detection
210
+ UNIQUE_KEY = "id"
211
+
212
+
213
+ def _ensure_table(conn: sqlite3.Connection, sample: dict[str, Any]) -> None:
214
+ columns = ", ".join(
215
+ f"{col} TEXT" if col != UNIQUE_KEY else f"{col} TEXT PRIMARY KEY"
216
+ for col in sample
217
+ )
218
+ conn.execute(f"CREATE TABLE IF NOT EXISTS {TABLE} ({columns})")
219
+ conn.commit()
220
+
221
+
222
+ def load(records: list[dict]) -> int:
223
+ """Upsert records into SQLite. Returns number of rows written."""
224
+ if not records:
225
+ logger.info("No records to load.")
226
+ return 0
227
+ logger.info("Loading %d records into %s:%s", len(records), DB_PATH, TABLE)
228
+ conn = sqlite3.connect(DB_PATH)
229
+ try:
230
+ _ensure_table(conn, records[0])
231
+ cols = ", ".join(records[0].keys())
232
+ placeholders = ", ".join("?" for _ in records[0])
233
+ sql = (
234
+ f"INSERT OR REPLACE INTO {TABLE} ({cols}) VALUES ({placeholders})"
235
+ )
236
+ conn.executemany(sql, [list(r.values()) for r in records])
237
+ conn.commit()
238
+ finally:
239
+ conn.close()
240
+ logger.info("Loaded %d records", len(records))
241
+ return len(records)
242
+ '''
243
+
244
+ LOAD_FILE = '''\
245
+ """load.py — Write records to a CSV or JSON file (idempotent by overwrite)."""
246
+
247
+ import csv
248
+ import json
249
+ import logging
250
+ from pathlib import Path
251
+
252
+ logger = logging.getLogger(__name__)
253
+
254
+ OUTPUT_PATH = "$pipeline_name_output.csv"
255
+
256
+
257
+ def load(records: list[dict], output_path: str = OUTPUT_PATH) -> int:
258
+ """Write records to a file. Overwrites to ensure idempotency."""
259
+ if not records:
260
+ logger.info("No records to load.")
261
+ return 0
262
+ path = Path(output_path)
263
+ path.parent.mkdir(parents=True, exist_ok=True)
264
+ if path.suffix == ".json":
265
+ path.write_text(json.dumps(records, indent=2, default=str), encoding="utf-8")
266
+ else:
267
+ with path.open("w", newline="", encoding="utf-8") as fh:
268
+ writer = csv.DictWriter(fh, fieldnames=records[0].keys())
269
+ writer.writeheader()
270
+ writer.writerows(records)
271
+ logger.info("Wrote %d records to %s", len(records), path)
272
+ return len(records)
273
+ '''
274
+
275
+ LOAD_API = '''\
276
+ """load.py — POST records to an API endpoint (idempotent with dedup key)."""
277
+
278
+ import json
279
+ import logging
280
+ import urllib.error
281
+ import urllib.request
282
+
283
+ logger = logging.getLogger(__name__)
284
+
285
+ TARGET_URL = "https://api.example.com/ingest"
286
+ BATCH_SIZE = 100
287
+
288
+
289
+ def _post_batch(batch: list[dict]) -> None:
290
+ payload = json.dumps(batch).encode("utf-8")
291
+ req = urllib.request.Request(
292
+ TARGET_URL,
293
+ data=payload,
294
+ method="POST",
295
+ headers={"Content-Type": "application/json"},
296
+ )
297
+ try:
298
+ with urllib.request.urlopen(req, timeout=30) as resp:
299
+ status = resp.status
300
+ logger.info("Batch of %d posted — HTTP %d", len(batch), status)
301
+ except urllib.error.HTTPError as exc:
302
+ logger.error("HTTP error %d posting batch: %s", exc.code, exc.reason)
303
+ raise
304
+
305
+
306
+ def load(records: list[dict]) -> int:
307
+ """POST records in batches. Returns total records sent."""
308
+ if not records:
309
+ logger.info("No records to load.")
310
+ return 0
311
+ total = 0
312
+ for i in range(0, len(records), BATCH_SIZE):
313
+ batch = records[i:i + BATCH_SIZE]
314
+ _post_batch(batch)
315
+ total += len(batch)
316
+ logger.info("Loaded %d records via API", total)
317
+ return total
318
+ '''
319
+
320
+ PIPELINE_TEMPLATE = '''\
321
+ """pipeline.py — Orchestrator: extract → transform → load."""
322
+
323
+ import logging
324
+ import sys
325
+ import time
326
+
327
+ from extract import extract
328
+ from transform import transform
329
+ from load import load
330
+
331
+ logging.basicConfig(
332
+ level=logging.INFO,
333
+ format="%(asctime)s %(levelname)-8s %(name)s %(message)s",
334
+ datefmt="%Y-%m-%dT%H:%M:%S",
335
+ )
336
+ logger = logging.getLogger("$pipeline_name")
337
+
338
+
339
+ def run() -> int:
340
+ """Run the full pipeline. Returns exit code (0=success, 1=failure)."""
341
+ start = time.monotonic()
342
+ logger.info("Pipeline '$pipeline_name' starting")
343
+ try:
344
+ raw = extract()
345
+ records = transform(raw)
346
+ count = load(records)
347
+ elapsed = time.monotonic() - start
348
+ logger.info(
349
+ "Pipeline complete — %d records loaded in %.2fs", count, elapsed
350
+ )
351
+ return 0
352
+ except Exception as exc:
353
+ logger.exception("Pipeline failed: %s", exc)
354
+ return 1
355
+
356
+
357
+ if __name__ == "__main__":
358
+ sys.exit(run())
359
+ '''
360
+
361
+ REQUIREMENTS_TEMPLATE = '''\
362
+ # Runtime dependencies for $pipeline_name pipeline
363
+ # Add your project-specific packages below.
364
+
365
+ # Uncomment as needed:
366
+ # requests>=2.31 # for API sources/targets
367
+ # psycopg2-binary>=2.9 # for PostgreSQL
368
+ # pymysql>=1.1 # for MySQL
369
+ # pandas>=2.0 # for complex transformations
370
+ # pydantic>=2.0 # for record validation
371
+ '''
372
+
373
+ # ---------------------------------------------------------------------------
374
+ # Source/target template selection
375
+ # ---------------------------------------------------------------------------
376
+
377
+ EXTRACT_TEMPLATES = {"csv": EXTRACT_CSV, "api": EXTRACT_API, "db": EXTRACT_DB}
378
+ LOAD_TEMPLATES = {"db": LOAD_DB, "file": LOAD_FILE, "api": LOAD_API}
379
+
380
+
381
+ def render(template_str: str, pipeline_name: str) -> str:
382
+ safe_name = pipeline_name.replace("-", "_")
383
+ return Template(template_str).safe_substitute(pipeline_name=safe_name)
384
+
385
+
386
+ def create_pipeline(name: str, source: str, target: str) -> None:
387
+ base = Path(name)
388
+ if base.exists():
389
+ print(f"Error: directory '{base}' already exists. Choose a different name.")
390
+ sys.exit(1)
391
+ base.mkdir(parents=True)
392
+
393
+ files = {
394
+ "extract.py": render(EXTRACT_TEMPLATES[source], name),
395
+ "transform.py": render(TRANSFORM_TEMPLATE, name),
396
+ "load.py": render(LOAD_TEMPLATES[target], name),
397
+ "pipeline.py": render(PIPELINE_TEMPLATE, name),
398
+ "requirements.txt": render(REQUIREMENTS_TEMPLATE, name),
399
+ }
400
+
401
+ created = []
402
+ for filename, content in files.items():
403
+ path = base / filename
404
+ path.write_text(content, encoding="utf-8")
405
+ created.append(str(path))
406
+
407
+ print(f"\nPipeline '{name}' created successfully!\n")
408
+ print(f" Source : {source}")
409
+ print(f" Target : {target}")
410
+ print(f"\nFiles created:")
411
+ for f in created:
412
+ print(f" {f}")
413
+ print(f"\nNext steps:")
414
+ print(f" 1. cd {name}")
415
+ print(f" 2. Review extract.py and update source configuration")
416
+ print(f" 3. Customize transform.py with your business logic")
417
+ print(f" 4. Review load.py and configure target destination")
418
+ print(f" 5. pip install -r requirements.txt # add packages as needed")
419
+ print(f" 6. python pipeline.py")
420
+
421
+
422
+ def main():
423
+ parser = argparse.ArgumentParser(
424
+ description="Scaffold a new data pipeline (extract → transform → load)"
425
+ )
426
+ parser.add_argument("name", help="Pipeline name (used as directory name)")
427
+ parser.add_argument(
428
+ "--source",
429
+ choices=["csv", "api", "db"],
430
+ default="csv",
431
+ help="Data source type (default: csv)",
432
+ )
433
+ parser.add_argument(
434
+ "--target",
435
+ choices=["db", "file", "api"],
436
+ default="db",
437
+ help="Data target type (default: db)",
438
+ )
439
+ args = parser.parse_args()
440
+ create_pipeline(args.name, args.source, args.target)
441
+
442
+
443
+ if __name__ == "__main__":
444
+ main()
@@ -0,0 +1,46 @@
1
+ {
2
+ "evals": [
3
+ {
4
+ "id": "eval-01-strategy-pattern-if-else",
5
+ "prompt": "Review this Java code:\n\n```java\npublic class PaymentProcessor {\n public void processPayment(String paymentType, double amount) {\n if (paymentType.equals(\"CREDIT_CARD\")) {\n System.out.println(\"Validating credit card...\");\n System.out.println(\"Charging $\" + amount + \" to credit card\");\n System.out.println(\"Sending credit card receipt\");\n } else if (paymentType.equals(\"PAYPAL\")) {\n System.out.println(\"Redirecting to PayPal...\");\n System.out.println(\"Confirming PayPal payment of $\" + amount);\n System.out.println(\"Sending PayPal confirmation email\");\n } else if (paymentType.equals(\"CRYPTO\")) {\n System.out.println(\"Opening crypto wallet...\");\n System.out.println(\"Broadcasting transaction of $\" + amount);\n System.out.println(\"Waiting for blockchain confirmation\");\n } else if (paymentType.equals(\"BANK_TRANSFER\")) {\n System.out.println(\"Initiating bank transfer...\");\n System.out.println(\"Transferring $\" + amount + \" via ACH\");\n System.out.println(\"Sending bank transfer confirmation\");\n } else {\n throw new IllegalArgumentException(\"Unknown payment type: \" + paymentType);\n }\n }\n \n public double calculateFee(String paymentType, double amount) {\n if (paymentType.equals(\"CREDIT_CARD\")) {\n return amount * 0.029 + 0.30;\n } else if (paymentType.equals(\"PAYPAL\")) {\n return amount * 0.034 + 0.30;\n } else if (paymentType.equals(\"CRYPTO\")) {\n return amount * 0.01;\n } else if (paymentType.equals(\"BANK_TRANSFER\")) {\n return 0.25;\n } else {\n throw new IllegalArgumentException(\"Unknown payment type: \" + paymentType);\n }\n }\n}\n```",
6
+ "expectations": [
7
+ "Identifies this as a prime Strategy pattern candidate — the payment algorithm varies by type",
8
+ "Calls out the Open-Closed Principle violation: adding a new payment method requires modifying this class",
9
+ "Notes that the parallel if/else chains in processPayment and calculateFee are a code smell (duplicated conditional logic)",
10
+ "Recommends extracting a PaymentStrategy interface with processPayment(double amount) and calculateFee(double amount) methods",
11
+ "Suggests concrete strategy classes: CreditCardStrategy, PayPalStrategy, CryptoStrategy, BankTransferStrategy",
12
+ "Describes the Context class (PaymentProcessor) holding a reference to the strategy interface",
13
+ "Notes that using a String 'paymentType' is fragile — a Map<String, PaymentStrategy> or enum-based lookup is safer",
14
+ "Provides a sketch of the refactored interface and at least one concrete implementation",
15
+ "References the principle: 'Encapsulate what varies' — payment behavior is what varies here"
16
+ ]
17
+ },
18
+ {
19
+ "id": "eval-02-unnecessary-singleton",
20
+ "prompt": "Review this Java code:\n\n```java\npublic class TaxCalculator {\n private static TaxCalculator instance;\n private double vatRate = 0.20;\n private double salesTaxRate = 0.08;\n \n private TaxCalculator() {}\n \n public static TaxCalculator getInstance() {\n if (instance == null) {\n instance = new TaxCalculator();\n }\n return instance;\n }\n \n public double calculateVAT(double price) {\n return price * vatRate;\n }\n \n public double calculateSalesTax(double price) {\n return price * salesTaxRate;\n }\n \n public void setVatRate(double vatRate) {\n this.vatRate = vatRate;\n }\n \n public void setSalesTaxRate(double salesTaxRate) {\n this.salesTaxRate = salesTaxRate;\n }\n}\n\n// Usage in application code:\nTaxCalculator calc = TaxCalculator.getInstance();\ncalc.setVatRate(0.23); // Portuguese VAT\nOrderTotal total = calc.calculateVAT(orderAmount);\n```",
21
+ "expectations": [
22
+ "Identifies the Singleton pattern and flags it as misapplied here",
23
+ "Explains why global mutable state is dangerous: setVatRate() and setSalesTaxRate() make this Singleton a shared mutable object — one caller's rate change affects all other callers",
24
+ "Notes the race condition: getInstance() is not thread-safe (no synchronization, no double-checked locking, no holder idiom)",
25
+ "Points out that a stateless or value-parameterized object has no reason to be a Singleton — TaxCalculator is just computation",
26
+ "Recommends removing the Singleton and using simple instantiation or dependency injection",
27
+ "Suggests making TaxCalculator immutable: accept rates in the constructor, remove setters",
28
+ "May suggest using the Strategy pattern if tax rules vary by jurisdiction",
29
+ "References the anti-pattern: 'Singleton abuse — using Singleton as a global variable container rather than for genuine single-instance needs'"
30
+ ]
31
+ },
32
+ {
33
+ "id": "eval-03-observer-pattern-correct",
34
+ "prompt": "Review this Java code:\n\n```java\npublic interface StockObserver {\n void update(String ticker, double price, double changePercent);\n}\n\npublic class StockMarket {\n private final Map<String, List<StockObserver>> observers = new HashMap<>();\n private final Map<String, Double> prices = new HashMap<>();\n \n public void registerObserver(String ticker, StockObserver observer) {\n observers.computeIfAbsent(ticker, k -> new ArrayList<>()).add(observer);\n }\n \n public void removeObserver(String ticker, StockObserver observer) {\n List<StockObserver> tickerObservers = observers.get(ticker);\n if (tickerObservers != null) {\n tickerObservers.remove(observer);\n }\n }\n \n public void updatePrice(String ticker, double newPrice) {\n double oldPrice = prices.getOrDefault(ticker, newPrice);\n prices.put(ticker, newPrice);\n double changePercent = oldPrice != 0 ? ((newPrice - oldPrice) / oldPrice) * 100 : 0;\n notifyObservers(ticker, newPrice, changePercent);\n }\n \n private void notifyObservers(String ticker, double price, double changePercent) {\n List<StockObserver> tickerObservers = observers.getOrDefault(ticker, Collections.emptyList());\n for (StockObserver observer : new ArrayList<>(tickerObservers)) {\n observer.update(ticker, price, changePercent);\n }\n }\n}\n\npublic class PriceAlertService implements StockObserver {\n private final double threshold;\n \n public PriceAlertService(double threshold) {\n this.threshold = threshold;\n }\n \n @Override\n public void update(String ticker, double price, double changePercent) {\n if (Math.abs(changePercent) >= threshold) {\n System.out.println(\"ALERT: \" + ticker + \" moved \" + changePercent + \"% to $\" + price);\n }\n }\n}\n```",
35
+ "expectations": [
36
+ "Recognizes this as a correctly implemented Observer pattern and says so explicitly",
37
+ "Praises the use of a StockObserver interface (programming to an interface, not an implementation)",
38
+ "Praises the removeObserver method — the SKILL.md specifically flags 'Observer memory leaks — registered observers never unregistered' as an anti-pattern to catch",
39
+ "Praises the defensive copy 'new ArrayList<>(tickerObservers)' in notifyObservers, which prevents ConcurrentModificationException if an observer deregisters during notification",
40
+ "Praises per-ticker observer registration — observers only receive events for the tickers they care about",
41
+ "Does NOT manufacture fake issues just to have something to say",
42
+ "May offer optional improvements (e.g., thread safety with CopyOnWriteArrayList, using java.util.EventListener, or noting that prices map isn't thread-safe) but clearly frames these as non-critical suggestions"
43
+ ]
44
+ }
45
+ ]
46
+ }
@@ -0,0 +1,52 @@
1
+ # After
2
+
3
+ Each payment method is extracted into its own `PaymentStrategy` implementation behind a common interface, making it trivial to add new methods without touching existing code.
4
+
5
+ ```kotlin
6
+ // Strategy interface — the contract every payment method must fulfill
7
+ interface PaymentStrategy {
8
+ fun process(order: Order): PaymentResult
9
+ }
10
+
11
+ // One class per payment method — focused, testable, replaceable
12
+ class CreditCardPaymentStrategy : PaymentStrategy {
13
+ override fun process(order: Order): PaymentResult {
14
+ val token = CreditCardGateway.tokenize(order.cardNumber)
15
+ val charge = CreditCardGateway.charge(token, order.totalAmount)
16
+ return PaymentResult(success = charge.success, transactionId = charge.id,
17
+ errorMessage = charge.error.takeIf { !charge.success })
18
+ }
19
+ }
20
+
21
+ class PayPalPaymentStrategy : PaymentStrategy {
22
+ override fun process(order: Order): PaymentResult {
23
+ val session = PayPalClient.createSession(order.paypalEmail)
24
+ val payment = PayPalClient.executePayment(session, order.totalAmount)
25
+ return PaymentResult(success = payment.approved, transactionId = payment.token)
26
+ }
27
+ }
28
+
29
+ class BankTransferPaymentStrategy : PaymentStrategy {
30
+ override fun process(order: Order): PaymentResult {
31
+ val ref = BankTransferService.initiate(order.iban, order.totalAmount)
32
+ return PaymentResult(success = ref != null, transactionId = ref,
33
+ errorMessage = "Bank transfer initiation failed".takeIf { ref == null })
34
+ }
35
+ }
36
+
37
+ // Context: delegates entirely to the injected strategy
38
+ class PaymentProcessor(private val strategy: PaymentStrategy) {
39
+ fun process(order: Order): PaymentResult = strategy.process(order)
40
+ }
41
+
42
+ // Usage — caller selects strategy; PaymentProcessor is unaware of the type
43
+ val processor = PaymentProcessor(CreditCardPaymentStrategy())
44
+ val result = processor.process(order)
45
+ ```
46
+
47
+ Key improvements:
48
+ - If/else chain replaced with Strategy pattern — adding a new payment method requires a new class only, no changes to `PaymentProcessor` (Open-Closed Principle)
49
+ - Each strategy is independently testable with a mock `Order`
50
+ - `PaymentProcessor` depends on the `PaymentStrategy` abstraction, not concrete gateway classes (Dependency Inversion Principle)
51
+ - Responsibility for "how to pay" is encapsulated inside each strategy class (Encapsulate What Varies)
52
+ - Caller selects strategy through constructor injection, enabling runtime switching and easy testing