@booklib/skills 1.0.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +122 -0
- package/README.md +20 -1
- package/ROADMAP.md +36 -0
- package/animation-at-work/evals/evals.json +44 -0
- package/animation-at-work/examples/after.md +64 -0
- package/animation-at-work/examples/before.md +35 -0
- package/animation-at-work/scripts/audit_animations.py +295 -0
- package/bin/skills.js +552 -42
- package/clean-code-reviewer/SKILL.md +109 -1
- package/clean-code-reviewer/evals/evals.json +121 -3
- package/clean-code-reviewer/examples/after.md +48 -0
- package/clean-code-reviewer/examples/before.md +33 -0
- package/clean-code-reviewer/references/api_reference.md +158 -0
- package/clean-code-reviewer/references/practices-catalog.md +282 -0
- package/clean-code-reviewer/references/review-checklist.md +254 -0
- package/clean-code-reviewer/scripts/pre-review.py +206 -0
- package/data-intensive-patterns/evals/evals.json +43 -0
- package/data-intensive-patterns/examples/after.md +61 -0
- package/data-intensive-patterns/examples/before.md +38 -0
- package/data-intensive-patterns/scripts/adr.py +213 -0
- package/data-pipelines/evals/evals.json +45 -0
- package/data-pipelines/examples/after.md +97 -0
- package/data-pipelines/examples/before.md +37 -0
- package/data-pipelines/scripts/new_pipeline.py +444 -0
- package/design-patterns/evals/evals.json +46 -0
- package/design-patterns/examples/after.md +52 -0
- package/design-patterns/examples/before.md +29 -0
- package/design-patterns/scripts/scaffold.py +807 -0
- package/domain-driven-design/SKILL.md +120 -0
- package/domain-driven-design/evals/evals.json +48 -0
- package/domain-driven-design/examples/after.md +80 -0
- package/domain-driven-design/examples/before.md +43 -0
- package/domain-driven-design/scripts/scaffold.py +421 -0
- package/effective-java/evals/evals.json +46 -0
- package/effective-java/examples/after.md +83 -0
- package/effective-java/examples/before.md +37 -0
- package/effective-java/scripts/checkstyle_setup.py +211 -0
- package/effective-kotlin/evals/evals.json +45 -0
- package/effective-kotlin/examples/after.md +36 -0
- package/effective-kotlin/examples/before.md +38 -0
- package/effective-python/SKILL.md +199 -0
- package/effective-python/evals/evals.json +44 -0
- package/effective-python/examples/after.md +56 -0
- package/effective-python/examples/before.md +40 -0
- package/effective-python/ref-01-pythonic-thinking.md +202 -0
- package/effective-python/ref-02-lists-and-dicts.md +146 -0
- package/effective-python/ref-03-functions.md +186 -0
- package/effective-python/ref-04-comprehensions-generators.md +211 -0
- package/effective-python/ref-05-classes-interfaces.md +188 -0
- package/effective-python/ref-06-metaclasses-attributes.md +209 -0
- package/effective-python/ref-07-concurrency.md +213 -0
- package/effective-python/ref-08-robustness-performance.md +248 -0
- package/effective-python/ref-09-testing-debugging.md +253 -0
- package/effective-python/ref-10-collaboration.md +175 -0
- package/effective-python/references/api_reference.md +218 -0
- package/effective-python/references/practices-catalog.md +483 -0
- package/effective-python/references/review-checklist.md +190 -0
- package/effective-python/scripts/lint.py +173 -0
- package/kotlin-in-action/evals/evals.json +43 -0
- package/kotlin-in-action/examples/after.md +53 -0
- package/kotlin-in-action/examples/before.md +39 -0
- package/kotlin-in-action/scripts/setup_detekt.py +224 -0
- package/lean-startup/evals/evals.json +43 -0
- package/lean-startup/examples/after.md +80 -0
- package/lean-startup/examples/before.md +34 -0
- package/lean-startup/scripts/new_experiment.py +286 -0
- package/microservices-patterns/SKILL.md +140 -0
- package/microservices-patterns/evals/evals.json +45 -0
- package/microservices-patterns/examples/after.md +69 -0
- package/microservices-patterns/examples/before.md +40 -0
- package/microservices-patterns/scripts/new_service.py +583 -0
- package/package.json +1 -1
- package/refactoring-ui/evals/evals.json +45 -0
- package/refactoring-ui/examples/after.md +85 -0
- package/refactoring-ui/examples/before.md +58 -0
- package/refactoring-ui/scripts/audit_css.py +250 -0
- package/skill-router/SKILL.md +142 -0
- package/skill-router/evals/evals.json +38 -0
- package/skill-router/examples/after.md +63 -0
- package/skill-router/examples/before.md +39 -0
- package/skill-router/references/api_reference.md +24 -0
- package/skill-router/references/routing-heuristics.md +89 -0
- package/skill-router/references/skill-catalog.md +156 -0
- package/skill-router/scripts/route.py +266 -0
- package/storytelling-with-data/evals/evals.json +47 -0
- package/storytelling-with-data/examples/after.md +50 -0
- package/storytelling-with-data/examples/before.md +33 -0
- package/storytelling-with-data/scripts/chart_review.py +301 -0
- package/system-design-interview/evals/evals.json +45 -0
- package/system-design-interview/examples/after.md +94 -0
- package/system-design-interview/examples/before.md +27 -0
- package/system-design-interview/scripts/new_design.py +421 -0
- package/using-asyncio-python/evals/evals.json +43 -0
- package/using-asyncio-python/examples/after.md +68 -0
- package/using-asyncio-python/examples/before.md +39 -0
- package/using-asyncio-python/scripts/check_blocking.py +270 -0
- package/web-scraping-python/evals/evals.json +46 -0
- package/web-scraping-python/examples/after.md +109 -0
- package/web-scraping-python/examples/before.md +40 -0
- package/web-scraping-python/scripts/new_scraper.py +231 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# After
|
|
2
|
+
|
|
3
|
+
A clean pipeline with separated extract/transform/load functions, idempotent upserts, retry logic, and proper error handling.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from functools import wraps
|
|
11
|
+
|
|
12
|
+
import psycopg2
|
|
13
|
+
import requests
|
|
14
|
+
from requests.exceptions import RequestException
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SaleRecord:
|
|
21
|
+
id: str
|
|
22
|
+
sale_date: datetime
|
|
23
|
+
revenue: float
|
|
24
|
+
region: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def with_retry(max_attempts: int = 3, backoff_seconds: float = 2.0):
|
|
28
|
+
"""Decorator: retry a function on transient failures with exponential backoff."""
|
|
29
|
+
def decorator(fn):
|
|
30
|
+
@wraps(fn)
|
|
31
|
+
def wrapper(*args, **kwargs):
|
|
32
|
+
for attempt in range(1, max_attempts + 1):
|
|
33
|
+
try:
|
|
34
|
+
return fn(*args, **kwargs)
|
|
35
|
+
except (RequestException, psycopg2.OperationalError) as exc:
|
|
36
|
+
if attempt == max_attempts:
|
|
37
|
+
raise
|
|
38
|
+
wait = backoff_seconds ** attempt
|
|
39
|
+
logger.warning("Attempt %d/%d failed: %s — retrying in %.1fs",
|
|
40
|
+
attempt, max_attempts, exc, wait)
|
|
41
|
+
time.sleep(wait)
|
|
42
|
+
return wrapper
|
|
43
|
+
return decorator
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@with_retry(max_attempts=3)
|
|
47
|
+
def extract(api_url: str) -> list[dict]:
|
|
48
|
+
"""Fetch raw sales records from the partner API."""
|
|
49
|
+
response = requests.get(api_url, timeout=30)
|
|
50
|
+
response.raise_for_status()
|
|
51
|
+
return response.json()["sales"]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def transform(raw_records: list[dict]) -> list[SaleRecord]:
|
|
55
|
+
"""Parse and normalise raw API records into typed SaleRecord objects."""
|
|
56
|
+
return [
|
|
57
|
+
SaleRecord(
|
|
58
|
+
id=rec["id"],
|
|
59
|
+
sale_date=datetime.fromisoformat(rec["date"]),
|
|
60
|
+
revenue=float(rec["amount_usd"]),
|
|
61
|
+
region=rec["region"].strip().upper(),
|
|
62
|
+
)
|
|
63
|
+
for rec in raw_records
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def load(records: list[SaleRecord], dsn: str) -> int:
|
|
68
|
+
"""Upsert records into fact_sales. Idempotent: re-running is safe."""
|
|
69
|
+
upsert_sql = """
|
|
70
|
+
INSERT INTO fact_sales (sale_id, sale_date, revenue, region, loaded_at)
|
|
71
|
+
VALUES (%(id)s, %(sale_date)s, %(revenue)s, %(region)s, NOW())
|
|
72
|
+
ON CONFLICT (sale_id) DO UPDATE
|
|
73
|
+
SET revenue = EXCLUDED.revenue,
|
|
74
|
+
loaded_at = EXCLUDED.loaded_at
|
|
75
|
+
"""
|
|
76
|
+
with psycopg2.connect(dsn) as conn, conn.cursor() as cur:
|
|
77
|
+
cur.executemany(upsert_sql, [vars(r) for r in records])
|
|
78
|
+
loaded = cur.rowcount
|
|
79
|
+
logger.info("Upserted %d records into fact_sales", loaded)
|
|
80
|
+
return loaded
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def run_pipeline(api_url: str, warehouse_dsn: str) -> None:
|
|
84
|
+
logger.info("Starting sales pipeline")
|
|
85
|
+
raw = extract(api_url)
|
|
86
|
+
records = transform(raw)
|
|
87
|
+
loaded = load(records, warehouse_dsn)
|
|
88
|
+
logger.info("Pipeline complete: %d records loaded", loaded)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Key improvements:
|
|
92
|
+
- Extract, transform, and load are separate functions with single responsibilities — each is independently testable and replaceable (Ch 13: Best Practices — separation of concerns)
|
|
93
|
+
- `ON CONFLICT (sale_id) DO UPDATE` makes the load idempotent — re-running the pipeline never creates duplicate rows (Ch 13: Idempotency)
|
|
94
|
+
- `@with_retry` decorator handles transient API and database failures with exponential backoff (Ch 6: API Ingestion — retry logic)
|
|
95
|
+
- `SaleRecord` dataclass replaces a raw dict, providing type safety and named field access in the transform step
|
|
96
|
+
- `psycopg2.connect` used as a context manager ensures the connection and transaction are always closed and committed correctly (Ch 4: Database Ingestion)
|
|
97
|
+
- Structured logging with `logger.info/warning` replaces bare `print` — output is filterable and includes context (Ch 12: Monitoring)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Before
|
|
2
|
+
|
|
3
|
+
A Python ETL script that mixes extraction, transformation, and loading in one function with no error handling, no idempotency, and no retry logic.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import psycopg2
|
|
7
|
+
import requests
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
def run_pipeline():
|
|
11
|
+
# Extract: fetch from API
|
|
12
|
+
resp = requests.get("https://api.partner.com/sales/export")
|
|
13
|
+
data = resp.json()
|
|
14
|
+
|
|
15
|
+
# Connect to warehouse
|
|
16
|
+
conn = psycopg2.connect("host=dw user=etl dbname=warehouse")
|
|
17
|
+
cur = conn.cursor()
|
|
18
|
+
|
|
19
|
+
# Transform + Load: all in one loop, no error handling
|
|
20
|
+
for record in data["sales"]:
|
|
21
|
+
sale_date = datetime.strptime(record["date"], "%Y-%m-%dT%H:%M:%S")
|
|
22
|
+
revenue = float(record["amount_usd"])
|
|
23
|
+
region = record["region"].strip().upper()
|
|
24
|
+
|
|
25
|
+
# No upsert — re-running inserts duplicates
|
|
26
|
+
cur.execute("""
|
|
27
|
+
INSERT INTO fact_sales (sale_id, sale_date, revenue, region, loaded_at)
|
|
28
|
+
VALUES (%s, %s, %s, %s, NOW())
|
|
29
|
+
""", (record["id"], sale_date, revenue, region))
|
|
30
|
+
|
|
31
|
+
conn.commit()
|
|
32
|
+
cur.close()
|
|
33
|
+
conn.close()
|
|
34
|
+
print("done")
|
|
35
|
+
|
|
36
|
+
run_pipeline()
|
|
37
|
+
```
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
new_pipeline.py — Scaffold a new data pipeline with extract/transform/load structure.
|
|
4
|
+
Usage: python new_pipeline.py <pipeline-name> [--source csv|api|db] [--target db|file|api]
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from string import Template
|
|
12
|
+
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
# File templates
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
EXTRACT_CSV = '''\
|
|
18
|
+
"""extract.py — Extract data from a CSV source."""
|
|
19
|
+
|
|
20
|
+
import csv
|
|
21
|
+
import logging
|
|
22
|
+
import time
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from functools import wraps
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def retry(max_attempts=3, delay=2.0, exceptions=(Exception,)):
|
|
30
|
+
"""Retry decorator with exponential backoff."""
|
|
31
|
+
def decorator(fn):
|
|
32
|
+
@wraps(fn)
|
|
33
|
+
def wrapper(*args, **kwargs):
|
|
34
|
+
for attempt in range(1, max_attempts + 1):
|
|
35
|
+
try:
|
|
36
|
+
return fn(*args, **kwargs)
|
|
37
|
+
except exceptions as exc:
|
|
38
|
+
if attempt == max_attempts:
|
|
39
|
+
raise
|
|
40
|
+
wait = delay * (2 ** (attempt - 1))
|
|
41
|
+
logger.warning("Attempt %d failed: %s. Retrying in %.1fs...", attempt, exc, wait)
|
|
42
|
+
time.sleep(wait)
|
|
43
|
+
return wrapper
|
|
44
|
+
return decorator
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@retry(max_attempts=3, exceptions=(OSError,))
|
|
48
|
+
def extract(source_path: str) -> list[dict]:
|
|
49
|
+
"""Read rows from a CSV file. Returns a list of dicts."""
|
|
50
|
+
path = Path(source_path)
|
|
51
|
+
if not path.exists():
|
|
52
|
+
raise FileNotFoundError(f"Source file not found: {path}")
|
|
53
|
+
logger.info("Extracting from %s", path)
|
|
54
|
+
with path.open(newline="", encoding="utf-8") as fh:
|
|
55
|
+
reader = csv.DictReader(fh)
|
|
56
|
+
rows = list(reader)
|
|
57
|
+
logger.info("Extracted %d rows", len(rows))
|
|
58
|
+
return rows
|
|
59
|
+
'''
|
|
60
|
+
|
|
61
|
+
EXTRACT_API = '''\
|
|
62
|
+
"""extract.py — Extract data from an HTTP API source."""
|
|
63
|
+
|
|
64
|
+
import json
|
|
65
|
+
import logging
|
|
66
|
+
import time
|
|
67
|
+
import urllib.error
|
|
68
|
+
import urllib.request
|
|
69
|
+
from functools import wraps
|
|
70
|
+
|
|
71
|
+
logger = logging.getLogger(__name__)
|
|
72
|
+
|
|
73
|
+
BASE_URL = "https://api.example.com/data"
|
|
74
|
+
API_KEY = "" # Set via environment variable in production
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def retry(max_attempts=3, delay=2.0, exceptions=(Exception,)):
|
|
78
|
+
"""Retry decorator with exponential backoff."""
|
|
79
|
+
def decorator(fn):
|
|
80
|
+
@wraps(fn)
|
|
81
|
+
def wrapper(*args, **kwargs):
|
|
82
|
+
for attempt in range(1, max_attempts + 1):
|
|
83
|
+
try:
|
|
84
|
+
return fn(*args, **kwargs)
|
|
85
|
+
except exceptions as exc:
|
|
86
|
+
if attempt == max_attempts:
|
|
87
|
+
raise
|
|
88
|
+
wait = delay * (2 ** (attempt - 1))
|
|
89
|
+
logger.warning("Attempt %d failed: %s. Retrying in %.1fs...", attempt, exc, wait)
|
|
90
|
+
time.sleep(wait)
|
|
91
|
+
return wrapper
|
|
92
|
+
return decorator
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@retry(max_attempts=3, exceptions=(urllib.error.URLError, OSError))
|
|
96
|
+
def extract(endpoint: str = BASE_URL) -> list[dict]:
|
|
97
|
+
"""Fetch JSON records from an API endpoint. Returns a list of dicts."""
|
|
98
|
+
logger.info("Extracting from %s", endpoint)
|
|
99
|
+
req = urllib.request.Request(endpoint, headers={"Accept": "application/json"})
|
|
100
|
+
with urllib.request.urlopen(req, timeout=30) as response:
|
|
101
|
+
data = json.loads(response.read())
|
|
102
|
+
records = data if isinstance(data, list) else data.get("results", data.get("items", []))
|
|
103
|
+
logger.info("Extracted %d records", len(records))
|
|
104
|
+
return records
|
|
105
|
+
'''
|
|
106
|
+
|
|
107
|
+
EXTRACT_DB = '''\
|
|
108
|
+
"""extract.py — Extract data from a database source."""
|
|
109
|
+
|
|
110
|
+
import logging
|
|
111
|
+
import sqlite3
|
|
112
|
+
import time
|
|
113
|
+
from functools import wraps
|
|
114
|
+
|
|
115
|
+
logger = logging.getLogger(__name__)
|
|
116
|
+
|
|
117
|
+
DB_PATH = "source.db"
|
|
118
|
+
QUERY = "SELECT * FROM source_table"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def retry(max_attempts=3, delay=2.0, exceptions=(Exception,)):
|
|
122
|
+
"""Retry decorator with exponential backoff."""
|
|
123
|
+
def decorator(fn):
|
|
124
|
+
@wraps(fn)
|
|
125
|
+
def wrapper(*args, **kwargs):
|
|
126
|
+
for attempt in range(1, max_attempts + 1):
|
|
127
|
+
try:
|
|
128
|
+
return fn(*args, **kwargs)
|
|
129
|
+
except exceptions as exc:
|
|
130
|
+
if attempt == max_attempts:
|
|
131
|
+
raise
|
|
132
|
+
wait = delay * (2 ** (attempt - 1))
|
|
133
|
+
logger.warning("Attempt %d failed: %s. Retrying in %.1fs...", attempt, exc, wait)
|
|
134
|
+
time.sleep(wait)
|
|
135
|
+
return wrapper
|
|
136
|
+
return decorator
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@retry(max_attempts=3, exceptions=(sqlite3.OperationalError,))
|
|
140
|
+
def extract(db_path: str = DB_PATH, query: str = QUERY) -> list[dict]:
|
|
141
|
+
"""Query records from a SQLite database. Returns a list of dicts."""
|
|
142
|
+
logger.info("Connecting to %s", db_path)
|
|
143
|
+
conn = sqlite3.connect(db_path)
|
|
144
|
+
conn.row_factory = sqlite3.Row
|
|
145
|
+
try:
|
|
146
|
+
cursor = conn.execute(query)
|
|
147
|
+
rows = [dict(row) for row in cursor.fetchall()]
|
|
148
|
+
finally:
|
|
149
|
+
conn.close()
|
|
150
|
+
logger.info("Extracted %d rows", len(rows))
|
|
151
|
+
return rows
|
|
152
|
+
'''
|
|
153
|
+
|
|
154
|
+
TRANSFORM_TEMPLATE = '''\
|
|
155
|
+
"""transform.py — Transform extracted records."""
|
|
156
|
+
|
|
157
|
+
import logging
|
|
158
|
+
from typing import Any
|
|
159
|
+
|
|
160
|
+
logger = logging.getLogger(__name__)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _clean_record(record: dict[str, Any]) -> dict[str, Any]:
|
|
164
|
+
"""Strip whitespace from string values and drop empty fields."""
|
|
165
|
+
cleaned = {}
|
|
166
|
+
for key, value in record.items():
|
|
167
|
+
if isinstance(value, str):
|
|
168
|
+
value = value.strip()
|
|
169
|
+
if value not in (None, "", []):
|
|
170
|
+
cleaned[key] = value
|
|
171
|
+
return cleaned
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _validate_record(record: dict[str, Any]) -> bool:
|
|
175
|
+
"""Return True if the record is valid. Customize required fields here."""
|
|
176
|
+
# TODO: add field-specific validation
|
|
177
|
+
return bool(record)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def transform(records: list[dict]) -> list[dict]:
|
|
181
|
+
"""Clean, validate, and reshape records for loading."""
|
|
182
|
+
logger.info("Transforming %d records", len(records))
|
|
183
|
+
output = []
|
|
184
|
+
skipped = 0
|
|
185
|
+
for record in records:
|
|
186
|
+
cleaned = _clean_record(record)
|
|
187
|
+
if not _validate_record(cleaned):
|
|
188
|
+
skipped += 1
|
|
189
|
+
continue
|
|
190
|
+
# TODO: add field mappings / enrichment here
|
|
191
|
+
output.append(cleaned)
|
|
192
|
+
if skipped:
|
|
193
|
+
logger.warning("Skipped %d invalid records", skipped)
|
|
194
|
+
logger.info("Transformed %d records", len(output))
|
|
195
|
+
return output
|
|
196
|
+
'''
|
|
197
|
+
|
|
198
|
+
LOAD_DB = '''\
|
|
199
|
+
"""load.py — Idempotent load into a SQLite database using upsert."""
|
|
200
|
+
|
|
201
|
+
import logging
|
|
202
|
+
import sqlite3
|
|
203
|
+
from typing import Any
|
|
204
|
+
|
|
205
|
+
logger = logging.getLogger(__name__)
|
|
206
|
+
|
|
207
|
+
DB_PATH = "output.db"
|
|
208
|
+
TABLE = "$pipeline_name"
|
|
209
|
+
# Define a unique key column used for upsert conflict detection
|
|
210
|
+
UNIQUE_KEY = "id"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _ensure_table(conn: sqlite3.Connection, sample: dict[str, Any]) -> None:
|
|
214
|
+
columns = ", ".join(
|
|
215
|
+
f"{col} TEXT" if col != UNIQUE_KEY else f"{col} TEXT PRIMARY KEY"
|
|
216
|
+
for col in sample
|
|
217
|
+
)
|
|
218
|
+
conn.execute(f"CREATE TABLE IF NOT EXISTS {TABLE} ({columns})")
|
|
219
|
+
conn.commit()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def load(records: list[dict]) -> int:
|
|
223
|
+
"""Upsert records into SQLite. Returns number of rows written."""
|
|
224
|
+
if not records:
|
|
225
|
+
logger.info("No records to load.")
|
|
226
|
+
return 0
|
|
227
|
+
logger.info("Loading %d records into %s:%s", len(records), DB_PATH, TABLE)
|
|
228
|
+
conn = sqlite3.connect(DB_PATH)
|
|
229
|
+
try:
|
|
230
|
+
_ensure_table(conn, records[0])
|
|
231
|
+
cols = ", ".join(records[0].keys())
|
|
232
|
+
placeholders = ", ".join("?" for _ in records[0])
|
|
233
|
+
sql = (
|
|
234
|
+
f"INSERT OR REPLACE INTO {TABLE} ({cols}) VALUES ({placeholders})"
|
|
235
|
+
)
|
|
236
|
+
conn.executemany(sql, [list(r.values()) for r in records])
|
|
237
|
+
conn.commit()
|
|
238
|
+
finally:
|
|
239
|
+
conn.close()
|
|
240
|
+
logger.info("Loaded %d records", len(records))
|
|
241
|
+
return len(records)
|
|
242
|
+
'''
|
|
243
|
+
|
|
244
|
+
LOAD_FILE = '''\
|
|
245
|
+
"""load.py — Write records to a CSV or JSON file (idempotent by overwrite)."""
|
|
246
|
+
|
|
247
|
+
import csv
|
|
248
|
+
import json
|
|
249
|
+
import logging
|
|
250
|
+
from pathlib import Path
|
|
251
|
+
|
|
252
|
+
logger = logging.getLogger(__name__)
|
|
253
|
+
|
|
254
|
+
OUTPUT_PATH = "$pipeline_name_output.csv"
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def load(records: list[dict], output_path: str = OUTPUT_PATH) -> int:
|
|
258
|
+
"""Write records to a file. Overwrites to ensure idempotency."""
|
|
259
|
+
if not records:
|
|
260
|
+
logger.info("No records to load.")
|
|
261
|
+
return 0
|
|
262
|
+
path = Path(output_path)
|
|
263
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
264
|
+
if path.suffix == ".json":
|
|
265
|
+
path.write_text(json.dumps(records, indent=2, default=str), encoding="utf-8")
|
|
266
|
+
else:
|
|
267
|
+
with path.open("w", newline="", encoding="utf-8") as fh:
|
|
268
|
+
writer = csv.DictWriter(fh, fieldnames=records[0].keys())
|
|
269
|
+
writer.writeheader()
|
|
270
|
+
writer.writerows(records)
|
|
271
|
+
logger.info("Wrote %d records to %s", len(records), path)
|
|
272
|
+
return len(records)
|
|
273
|
+
'''
|
|
274
|
+
|
|
275
|
+
LOAD_API = '''\
|
|
276
|
+
"""load.py — POST records to an API endpoint (idempotent with dedup key)."""
|
|
277
|
+
|
|
278
|
+
import json
|
|
279
|
+
import logging
|
|
280
|
+
import urllib.error
|
|
281
|
+
import urllib.request
|
|
282
|
+
|
|
283
|
+
logger = logging.getLogger(__name__)
|
|
284
|
+
|
|
285
|
+
TARGET_URL = "https://api.example.com/ingest"
|
|
286
|
+
BATCH_SIZE = 100
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _post_batch(batch: list[dict]) -> None:
|
|
290
|
+
payload = json.dumps(batch).encode("utf-8")
|
|
291
|
+
req = urllib.request.Request(
|
|
292
|
+
TARGET_URL,
|
|
293
|
+
data=payload,
|
|
294
|
+
method="POST",
|
|
295
|
+
headers={"Content-Type": "application/json"},
|
|
296
|
+
)
|
|
297
|
+
try:
|
|
298
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
299
|
+
status = resp.status
|
|
300
|
+
logger.info("Batch of %d posted — HTTP %d", len(batch), status)
|
|
301
|
+
except urllib.error.HTTPError as exc:
|
|
302
|
+
logger.error("HTTP error %d posting batch: %s", exc.code, exc.reason)
|
|
303
|
+
raise
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def load(records: list[dict]) -> int:
|
|
307
|
+
"""POST records in batches. Returns total records sent."""
|
|
308
|
+
if not records:
|
|
309
|
+
logger.info("No records to load.")
|
|
310
|
+
return 0
|
|
311
|
+
total = 0
|
|
312
|
+
for i in range(0, len(records), BATCH_SIZE):
|
|
313
|
+
batch = records[i:i + BATCH_SIZE]
|
|
314
|
+
_post_batch(batch)
|
|
315
|
+
total += len(batch)
|
|
316
|
+
logger.info("Loaded %d records via API", total)
|
|
317
|
+
return total
|
|
318
|
+
'''
|
|
319
|
+
|
|
320
|
+
PIPELINE_TEMPLATE = '''\
|
|
321
|
+
"""pipeline.py — Orchestrator: extract → transform → load."""
|
|
322
|
+
|
|
323
|
+
import logging
|
|
324
|
+
import sys
|
|
325
|
+
import time
|
|
326
|
+
|
|
327
|
+
from extract import extract
|
|
328
|
+
from transform import transform
|
|
329
|
+
from load import load
|
|
330
|
+
|
|
331
|
+
logging.basicConfig(
|
|
332
|
+
level=logging.INFO,
|
|
333
|
+
format="%(asctime)s %(levelname)-8s %(name)s %(message)s",
|
|
334
|
+
datefmt="%Y-%m-%dT%H:%M:%S",
|
|
335
|
+
)
|
|
336
|
+
logger = logging.getLogger("$pipeline_name")
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def run() -> int:
|
|
340
|
+
"""Run the full pipeline. Returns exit code (0=success, 1=failure)."""
|
|
341
|
+
start = time.monotonic()
|
|
342
|
+
logger.info("Pipeline '$pipeline_name' starting")
|
|
343
|
+
try:
|
|
344
|
+
raw = extract()
|
|
345
|
+
records = transform(raw)
|
|
346
|
+
count = load(records)
|
|
347
|
+
elapsed = time.monotonic() - start
|
|
348
|
+
logger.info(
|
|
349
|
+
"Pipeline complete — %d records loaded in %.2fs", count, elapsed
|
|
350
|
+
)
|
|
351
|
+
return 0
|
|
352
|
+
except Exception as exc:
|
|
353
|
+
logger.exception("Pipeline failed: %s", exc)
|
|
354
|
+
return 1
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
if __name__ == "__main__":
|
|
358
|
+
sys.exit(run())
|
|
359
|
+
'''
|
|
360
|
+
|
|
361
|
+
REQUIREMENTS_TEMPLATE = '''\
|
|
362
|
+
# Runtime dependencies for $pipeline_name pipeline
|
|
363
|
+
# Add your project-specific packages below.
|
|
364
|
+
|
|
365
|
+
# Uncomment as needed:
|
|
366
|
+
# requests>=2.31 # for API sources/targets
|
|
367
|
+
# psycopg2-binary>=2.9 # for PostgreSQL
|
|
368
|
+
# pymysql>=1.1 # for MySQL
|
|
369
|
+
# pandas>=2.0 # for complex transformations
|
|
370
|
+
# pydantic>=2.0 # for record validation
|
|
371
|
+
'''
|
|
372
|
+
|
|
373
|
+
# ---------------------------------------------------------------------------
|
|
374
|
+
# Source/target template selection
|
|
375
|
+
# ---------------------------------------------------------------------------
|
|
376
|
+
|
|
377
|
+
EXTRACT_TEMPLATES = {"csv": EXTRACT_CSV, "api": EXTRACT_API, "db": EXTRACT_DB}
|
|
378
|
+
LOAD_TEMPLATES = {"db": LOAD_DB, "file": LOAD_FILE, "api": LOAD_API}
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def render(template_str: str, pipeline_name: str) -> str:
|
|
382
|
+
safe_name = pipeline_name.replace("-", "_")
|
|
383
|
+
return Template(template_str).safe_substitute(pipeline_name=safe_name)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def create_pipeline(name: str, source: str, target: str) -> None:
|
|
387
|
+
base = Path(name)
|
|
388
|
+
if base.exists():
|
|
389
|
+
print(f"Error: directory '{base}' already exists. Choose a different name.")
|
|
390
|
+
sys.exit(1)
|
|
391
|
+
base.mkdir(parents=True)
|
|
392
|
+
|
|
393
|
+
files = {
|
|
394
|
+
"extract.py": render(EXTRACT_TEMPLATES[source], name),
|
|
395
|
+
"transform.py": render(TRANSFORM_TEMPLATE, name),
|
|
396
|
+
"load.py": render(LOAD_TEMPLATES[target], name),
|
|
397
|
+
"pipeline.py": render(PIPELINE_TEMPLATE, name),
|
|
398
|
+
"requirements.txt": render(REQUIREMENTS_TEMPLATE, name),
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
created = []
|
|
402
|
+
for filename, content in files.items():
|
|
403
|
+
path = base / filename
|
|
404
|
+
path.write_text(content, encoding="utf-8")
|
|
405
|
+
created.append(str(path))
|
|
406
|
+
|
|
407
|
+
print(f"\nPipeline '{name}' created successfully!\n")
|
|
408
|
+
print(f" Source : {source}")
|
|
409
|
+
print(f" Target : {target}")
|
|
410
|
+
print(f"\nFiles created:")
|
|
411
|
+
for f in created:
|
|
412
|
+
print(f" {f}")
|
|
413
|
+
print(f"\nNext steps:")
|
|
414
|
+
print(f" 1. cd {name}")
|
|
415
|
+
print(f" 2. Review extract.py and update source configuration")
|
|
416
|
+
print(f" 3. Customize transform.py with your business logic")
|
|
417
|
+
print(f" 4. Review load.py and configure target destination")
|
|
418
|
+
print(f" 5. pip install -r requirements.txt # add packages as needed")
|
|
419
|
+
print(f" 6. python pipeline.py")
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def main():
|
|
423
|
+
parser = argparse.ArgumentParser(
|
|
424
|
+
description="Scaffold a new data pipeline (extract → transform → load)"
|
|
425
|
+
)
|
|
426
|
+
parser.add_argument("name", help="Pipeline name (used as directory name)")
|
|
427
|
+
parser.add_argument(
|
|
428
|
+
"--source",
|
|
429
|
+
choices=["csv", "api", "db"],
|
|
430
|
+
default="csv",
|
|
431
|
+
help="Data source type (default: csv)",
|
|
432
|
+
)
|
|
433
|
+
parser.add_argument(
|
|
434
|
+
"--target",
|
|
435
|
+
choices=["db", "file", "api"],
|
|
436
|
+
default="db",
|
|
437
|
+
help="Data target type (default: db)",
|
|
438
|
+
)
|
|
439
|
+
args = parser.parse_args()
|
|
440
|
+
create_pipeline(args.name, args.source, args.target)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
if __name__ == "__main__":
|
|
444
|
+
main()
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"evals": [
|
|
3
|
+
{
|
|
4
|
+
"id": "eval-01-strategy-pattern-if-else",
|
|
5
|
+
"prompt": "Review this Java code:\n\n```java\npublic class PaymentProcessor {\n public void processPayment(String paymentType, double amount) {\n if (paymentType.equals(\"CREDIT_CARD\")) {\n System.out.println(\"Validating credit card...\");\n System.out.println(\"Charging $\" + amount + \" to credit card\");\n System.out.println(\"Sending credit card receipt\");\n } else if (paymentType.equals(\"PAYPAL\")) {\n System.out.println(\"Redirecting to PayPal...\");\n System.out.println(\"Confirming PayPal payment of $\" + amount);\n System.out.println(\"Sending PayPal confirmation email\");\n } else if (paymentType.equals(\"CRYPTO\")) {\n System.out.println(\"Opening crypto wallet...\");\n System.out.println(\"Broadcasting transaction of $\" + amount);\n System.out.println(\"Waiting for blockchain confirmation\");\n } else if (paymentType.equals(\"BANK_TRANSFER\")) {\n System.out.println(\"Initiating bank transfer...\");\n System.out.println(\"Transferring $\" + amount + \" via ACH\");\n System.out.println(\"Sending bank transfer confirmation\");\n } else {\n throw new IllegalArgumentException(\"Unknown payment type: \" + paymentType);\n }\n }\n \n public double calculateFee(String paymentType, double amount) {\n if (paymentType.equals(\"CREDIT_CARD\")) {\n return amount * 0.029 + 0.30;\n } else if (paymentType.equals(\"PAYPAL\")) {\n return amount * 0.034 + 0.30;\n } else if (paymentType.equals(\"CRYPTO\")) {\n return amount * 0.01;\n } else if (paymentType.equals(\"BANK_TRANSFER\")) {\n return 0.25;\n } else {\n throw new IllegalArgumentException(\"Unknown payment type: \" + paymentType);\n }\n }\n}\n```",
|
|
6
|
+
"expectations": [
|
|
7
|
+
"Identifies this as a prime Strategy pattern candidate — the payment algorithm varies by type",
|
|
8
|
+
"Calls out the Open-Closed Principle violation: adding a new payment method requires modifying this class",
|
|
9
|
+
"Notes that the parallel if/else chains in processPayment and calculateFee are a code smell (duplicated conditional logic)",
|
|
10
|
+
"Recommends extracting a PaymentStrategy interface with processPayment(double amount) and calculateFee(double amount) methods",
|
|
11
|
+
"Suggests concrete strategy classes: CreditCardStrategy, PayPalStrategy, CryptoStrategy, BankTransferStrategy",
|
|
12
|
+
"Describes the Context class (PaymentProcessor) holding a reference to the strategy interface",
|
|
13
|
+
"Notes that using a String 'paymentType' is fragile — a Map<String, PaymentStrategy> or enum-based lookup is safer",
|
|
14
|
+
"Provides a sketch of the refactored interface and at least one concrete implementation",
|
|
15
|
+
"References the principle: 'Encapsulate what varies' — payment behavior is what varies here"
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": "eval-02-unnecessary-singleton",
|
|
20
|
+
"prompt": "Review this Java code:\n\n```java\npublic class TaxCalculator {\n private static TaxCalculator instance;\n private double vatRate = 0.20;\n private double salesTaxRate = 0.08;\n \n private TaxCalculator() {}\n \n public static TaxCalculator getInstance() {\n if (instance == null) {\n instance = new TaxCalculator();\n }\n return instance;\n }\n \n public double calculateVAT(double price) {\n return price * vatRate;\n }\n \n public double calculateSalesTax(double price) {\n return price * salesTaxRate;\n }\n \n public void setVatRate(double vatRate) {\n this.vatRate = vatRate;\n }\n \n public void setSalesTaxRate(double salesTaxRate) {\n this.salesTaxRate = salesTaxRate;\n }\n}\n\n// Usage in application code:\nTaxCalculator calc = TaxCalculator.getInstance();\ncalc.setVatRate(0.23); // Portuguese VAT\nOrderTotal total = calc.calculateVAT(orderAmount);\n```",
|
|
21
|
+
"expectations": [
|
|
22
|
+
"Identifies the Singleton pattern and flags it as misapplied here",
|
|
23
|
+
"Explains why global mutable state is dangerous: setVatRate() and setSalesTaxRate() make this Singleton a shared mutable object — one caller's rate change affects all other callers",
|
|
24
|
+
"Notes the race condition: getInstance() is not thread-safe (no synchronization, no double-checked locking, no holder idiom)",
|
|
25
|
+
"Points out that a stateless or value-parameterized object has no reason to be a Singleton — TaxCalculator is just computation",
|
|
26
|
+
"Recommends removing the Singleton and using simple instantiation or dependency injection",
|
|
27
|
+
"Suggests making TaxCalculator immutable: accept rates in the constructor, remove setters",
|
|
28
|
+
"May suggest using the Strategy pattern if tax rules vary by jurisdiction",
|
|
29
|
+
"References the anti-pattern: 'Singleton abuse — using Singleton as a global variable container rather than for genuine single-instance needs'"
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": "eval-03-observer-pattern-correct",
|
|
34
|
+
"prompt": "Review this Java code:\n\n```java\npublic interface StockObserver {\n void update(String ticker, double price, double changePercent);\n}\n\npublic class StockMarket {\n private final Map<String, List<StockObserver>> observers = new HashMap<>();\n private final Map<String, Double> prices = new HashMap<>();\n \n public void registerObserver(String ticker, StockObserver observer) {\n observers.computeIfAbsent(ticker, k -> new ArrayList<>()).add(observer);\n }\n \n public void removeObserver(String ticker, StockObserver observer) {\n List<StockObserver> tickerObservers = observers.get(ticker);\n if (tickerObservers != null) {\n tickerObservers.remove(observer);\n }\n }\n \n public void updatePrice(String ticker, double newPrice) {\n double oldPrice = prices.getOrDefault(ticker, newPrice);\n prices.put(ticker, newPrice);\n double changePercent = oldPrice != 0 ? ((newPrice - oldPrice) / oldPrice) * 100 : 0;\n notifyObservers(ticker, newPrice, changePercent);\n }\n \n private void notifyObservers(String ticker, double price, double changePercent) {\n List<StockObserver> tickerObservers = observers.getOrDefault(ticker, Collections.emptyList());\n for (StockObserver observer : new ArrayList<>(tickerObservers)) {\n observer.update(ticker, price, changePercent);\n }\n }\n}\n\npublic class PriceAlertService implements StockObserver {\n private final double threshold;\n \n public PriceAlertService(double threshold) {\n this.threshold = threshold;\n }\n \n @Override\n public void update(String ticker, double price, double changePercent) {\n if (Math.abs(changePercent) >= threshold) {\n System.out.println(\"ALERT: \" + ticker + \" moved \" + changePercent + \"% to $\" + price);\n }\n }\n}\n```",
|
|
35
|
+
"expectations": [
|
|
36
|
+
"Recognizes this as a correctly implemented Observer pattern and says so explicitly",
|
|
37
|
+
"Praises the use of a StockObserver interface (programming to an interface, not an implementation)",
|
|
38
|
+
"Praises the removeObserver method — the SKILL.md specifically flags 'Observer memory leaks — registered observers never unregistered' as an anti-pattern to catch",
|
|
39
|
+
"Praises the defensive copy 'new ArrayList<>(tickerObservers)' in notifyObservers, which prevents ConcurrentModificationException if an observer deregisters during notification",
|
|
40
|
+
"Praises per-ticker observer registration — observers only receive events for the tickers they care about",
|
|
41
|
+
"Does NOT manufacture fake issues just to have something to say",
|
|
42
|
+
"May offer optional improvements (e.g., thread safety with CopyOnWriteArrayList, using java.util.EventListener, or noting that prices map isn't thread-safe) but clearly frames these as non-critical suggestions"
|
|
43
|
+
]
|
|
44
|
+
}
|
|
45
|
+
]
|
|
46
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# After
|
|
2
|
+
|
|
3
|
+
Each payment method is extracted into its own `PaymentStrategy` implementation behind a common interface, making it trivial to add new methods without touching existing code.
|
|
4
|
+
|
|
5
|
+
```kotlin
|
|
6
|
+
// Strategy interface — the contract every payment method must fulfill
|
|
7
|
+
interface PaymentStrategy {
|
|
8
|
+
fun process(order: Order): PaymentResult
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// One class per payment method — focused, testable, replaceable
|
|
12
|
+
class CreditCardPaymentStrategy : PaymentStrategy {
|
|
13
|
+
override fun process(order: Order): PaymentResult {
|
|
14
|
+
val token = CreditCardGateway.tokenize(order.cardNumber)
|
|
15
|
+
val charge = CreditCardGateway.charge(token, order.totalAmount)
|
|
16
|
+
return PaymentResult(success = charge.success, transactionId = charge.id,
|
|
17
|
+
errorMessage = charge.error.takeIf { !charge.success })
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
class PayPalPaymentStrategy : PaymentStrategy {
|
|
22
|
+
override fun process(order: Order): PaymentResult {
|
|
23
|
+
val session = PayPalClient.createSession(order.paypalEmail)
|
|
24
|
+
val payment = PayPalClient.executePayment(session, order.totalAmount)
|
|
25
|
+
return PaymentResult(success = payment.approved, transactionId = payment.token)
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
class BankTransferPaymentStrategy : PaymentStrategy {
|
|
30
|
+
override fun process(order: Order): PaymentResult {
|
|
31
|
+
val ref = BankTransferService.initiate(order.iban, order.totalAmount)
|
|
32
|
+
return PaymentResult(success = ref != null, transactionId = ref,
|
|
33
|
+
errorMessage = "Bank transfer initiation failed".takeIf { ref == null })
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Context: delegates entirely to the injected strategy
|
|
38
|
+
class PaymentProcessor(private val strategy: PaymentStrategy) {
|
|
39
|
+
fun process(order: Order): PaymentResult = strategy.process(order)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Usage — caller selects strategy; PaymentProcessor is unaware of the type
|
|
43
|
+
val processor = PaymentProcessor(CreditCardPaymentStrategy())
|
|
44
|
+
val result = processor.process(order)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Key improvements:
|
|
48
|
+
- If/else chain replaced with Strategy pattern — adding a new payment method requires a new class only, no changes to `PaymentProcessor` (Open-Closed Principle)
|
|
49
|
+
- Each strategy is independently testable with a mock `Order`
|
|
50
|
+
- `PaymentProcessor` depends on the `PaymentStrategy` abstraction, not concrete gateway classes (Dependency Inversion Principle)
|
|
51
|
+
- Responsibility for "how to pay" is encapsulated inside each strategy class (Encapsulate What Varies)
|
|
52
|
+
- Caller selects strategy through constructor injection, enabling runtime switching and easy testing
|