@booklib/skills 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +122 -0
- package/README.md +20 -2
- package/ROADMAP.md +36 -0
- package/animation-at-work/evals/evals.json +44 -0
- package/animation-at-work/examples/after.md +64 -0
- package/animation-at-work/examples/before.md +35 -0
- package/animation-at-work/scripts/audit_animations.py +295 -0
- package/bin/skills.js +552 -42
- package/clean-code-reviewer/SKILL.md +109 -1
- package/clean-code-reviewer/evals/evals.json +121 -3
- package/clean-code-reviewer/examples/after.md +48 -0
- package/clean-code-reviewer/examples/before.md +33 -0
- package/clean-code-reviewer/references/api_reference.md +158 -0
- package/clean-code-reviewer/references/practices-catalog.md +282 -0
- package/clean-code-reviewer/references/review-checklist.md +254 -0
- package/clean-code-reviewer/scripts/pre-review.py +206 -0
- package/data-intensive-patterns/evals/evals.json +43 -0
- package/data-intensive-patterns/examples/after.md +61 -0
- package/data-intensive-patterns/examples/before.md +38 -0
- package/data-intensive-patterns/scripts/adr.py +213 -0
- package/data-pipelines/evals/evals.json +45 -0
- package/data-pipelines/examples/after.md +97 -0
- package/data-pipelines/examples/before.md +37 -0
- package/data-pipelines/scripts/new_pipeline.py +444 -0
- package/design-patterns/evals/evals.json +46 -0
- package/design-patterns/examples/after.md +52 -0
- package/design-patterns/examples/before.md +29 -0
- package/design-patterns/scripts/scaffold.py +807 -0
- package/domain-driven-design/SKILL.md +120 -0
- package/domain-driven-design/evals/evals.json +48 -0
- package/domain-driven-design/examples/after.md +80 -0
- package/domain-driven-design/examples/before.md +43 -0
- package/domain-driven-design/scripts/scaffold.py +421 -0
- package/effective-java/evals/evals.json +46 -0
- package/effective-java/examples/after.md +83 -0
- package/effective-java/examples/before.md +37 -0
- package/effective-java/scripts/checkstyle_setup.py +211 -0
- package/effective-kotlin/evals/evals.json +45 -0
- package/effective-kotlin/examples/after.md +36 -0
- package/effective-kotlin/examples/before.md +38 -0
- package/effective-python/evals/evals.json +44 -0
- package/effective-python/examples/after.md +56 -0
- package/effective-python/examples/before.md +40 -0
- package/effective-python/references/api_reference.md +218 -0
- package/effective-python/references/practices-catalog.md +483 -0
- package/effective-python/references/review-checklist.md +190 -0
- package/effective-python/scripts/lint.py +173 -0
- package/kotlin-in-action/evals/evals.json +43 -0
- package/kotlin-in-action/examples/after.md +53 -0
- package/kotlin-in-action/examples/before.md +39 -0
- package/kotlin-in-action/scripts/setup_detekt.py +224 -0
- package/lean-startup/evals/evals.json +43 -0
- package/lean-startup/examples/after.md +80 -0
- package/lean-startup/examples/before.md +34 -0
- package/lean-startup/scripts/new_experiment.py +286 -0
- package/microservices-patterns/SKILL.md +140 -0
- package/microservices-patterns/evals/evals.json +45 -0
- package/microservices-patterns/examples/after.md +69 -0
- package/microservices-patterns/examples/before.md +40 -0
- package/microservices-patterns/scripts/new_service.py +583 -0
- package/package.json +2 -8
- package/refactoring-ui/evals/evals.json +45 -0
- package/refactoring-ui/examples/after.md +85 -0
- package/refactoring-ui/examples/before.md +58 -0
- package/refactoring-ui/scripts/audit_css.py +250 -0
- package/skill-router/SKILL.md +142 -0
- package/skill-router/evals/evals.json +38 -0
- package/skill-router/examples/after.md +63 -0
- package/skill-router/examples/before.md +39 -0
- package/skill-router/references/api_reference.md +24 -0
- package/skill-router/references/routing-heuristics.md +89 -0
- package/skill-router/references/skill-catalog.md +156 -0
- package/skill-router/scripts/route.py +266 -0
- package/storytelling-with-data/evals/evals.json +47 -0
- package/storytelling-with-data/examples/after.md +50 -0
- package/storytelling-with-data/examples/before.md +33 -0
- package/storytelling-with-data/scripts/chart_review.py +301 -0
- package/system-design-interview/evals/evals.json +45 -0
- package/system-design-interview/examples/after.md +94 -0
- package/system-design-interview/examples/before.md +27 -0
- package/system-design-interview/scripts/new_design.py +421 -0
- package/using-asyncio-python/evals/evals.json +43 -0
- package/using-asyncio-python/examples/after.md +68 -0
- package/using-asyncio-python/examples/before.md +39 -0
- package/using-asyncio-python/scripts/check_blocking.py +270 -0
- package/web-scraping-python/evals/evals.json +46 -0
- package/web-scraping-python/examples/after.md +109 -0
- package/web-scraping-python/examples/before.md +40 -0
- package/web-scraping-python/scripts/new_scraper.py +231 -0
- /package/{effective-python-skill → effective-python}/SKILL.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-01-pythonic-thinking.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-02-lists-and-dicts.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-03-functions.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-04-comprehensions-generators.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-05-classes-interfaces.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-06-metaclasses-attributes.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-07-concurrency.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-08-robustness-performance.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-09-testing-debugging.md +0 -0
- /package/{effective-python-skill → effective-python}/ref-10-collaboration.md +0 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
new_scraper.py — Scaffold a best-practice web scraper.
|
|
4
|
+
Usage: python new_scraper.py <scraper-name> <target-url>
|
|
5
|
+
|
|
6
|
+
Generates <scraper-name>.py — a real, runnable scraper with retry, rate limiting,
|
|
7
|
+
robots.txt checking, BeautifulSoup parsing, and CSV output.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from string import Template
|
|
13
|
+
|
|
14
|
+
SCRAPER_TEMPLATE = '''\
|
|
15
|
+
#!/usr/bin/env python3
|
|
16
|
+
"""
|
|
17
|
+
$scraper_name — scraper for $target_url
|
|
18
|
+
Generated by new_scraper.py. Edit the parse() function for your target site.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import csv
|
|
22
|
+
import logging
|
|
23
|
+
import time
|
|
24
|
+
import urllib.parse
|
|
25
|
+
import urllib.robotparser
|
|
26
|
+
from datetime import datetime
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
import requests
|
|
31
|
+
from requests.adapters import HTTPAdapter
|
|
32
|
+
from urllib3.util.retry import Retry
|
|
33
|
+
from bs4 import BeautifulSoup
|
|
34
|
+
except ImportError as exc:
|
|
35
|
+
raise SystemExit(
|
|
36
|
+
f"Missing dependency: {exc}\\n"
|
|
37
|
+
"Install with: pip install requests beautifulsoup4"
|
|
38
|
+
) from exc
|
|
39
|
+
|
|
40
|
+
logging.basicConfig(
|
|
41
|
+
level=logging.INFO,
|
|
42
|
+
format="%(asctime)s %(levelname)-8s %(message)s",
|
|
43
|
+
datefmt="%Y-%m-%dT%H:%M:%S",
|
|
44
|
+
)
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
TARGET_URL = "$target_url"
|
|
48
|
+
OUTPUT_CSV = "$scraper_name_output.csv"
|
|
49
|
+
REQUEST_DELAY = 1.5 # seconds between requests — be polite
|
|
50
|
+
USER_AGENT = "research-bot/1.0 (+https://example.com/bot)"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# Session with retry
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
def make_session() -> requests.Session:
|
|
58
|
+
"""Build a requests Session with automatic retries on transient errors."""
|
|
59
|
+
session = requests.Session()
|
|
60
|
+
retry_strategy = Retry(
|
|
61
|
+
total=3,
|
|
62
|
+
backoff_factor=1.5,
|
|
63
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
|
64
|
+
allowed_methods=["GET", "HEAD"],
|
|
65
|
+
)
|
|
66
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
67
|
+
session.mount("https://", adapter)
|
|
68
|
+
session.mount("http://", adapter)
|
|
69
|
+
session.headers.update({"User-Agent": USER_AGENT})
|
|
70
|
+
return session
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Robots.txt
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def check_robots(url: str, user_agent: str = USER_AGENT) -> bool:
|
|
78
|
+
"""Return True if scraping the URL is permitted by robots.txt."""
|
|
79
|
+
parsed = urllib.parse.urlparse(url)
|
|
80
|
+
robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
|
|
81
|
+
rp = urllib.robotparser.RobotFileParser()
|
|
82
|
+
rp.set_url(robots_url)
|
|
83
|
+
try:
|
|
84
|
+
rp.read()
|
|
85
|
+
allowed = rp.can_fetch(user_agent, url)
|
|
86
|
+
if not allowed:
|
|
87
|
+
logger.warning("robots.txt disallows scraping: %s", url)
|
|
88
|
+
return allowed
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
logger.warning("Could not read robots.txt (%s) — proceeding cautiously.", exc)
|
|
91
|
+
return True # assume allowed if robots.txt is unreachable
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Parse — EDIT THIS FUNCTION for your target site
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def parse(html: str, source_url: str) -> list[dict]:
|
|
99
|
+
"""
|
|
100
|
+
Extract structured data from a page. Returns a list of dicts.
|
|
101
|
+
Edit the selectors below for your actual target.
|
|
102
|
+
"""
|
|
103
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
104
|
+
records = []
|
|
105
|
+
|
|
106
|
+
# Example: scrape all hyperlinks with their text
|
|
107
|
+
# Replace this block with selectors for your target site.
|
|
108
|
+
for link in soup.find_all("a", href=True):
|
|
109
|
+
href = link["href"]
|
|
110
|
+
text = link.get_text(strip=True)
|
|
111
|
+
if not text:
|
|
112
|
+
continue
|
|
113
|
+
# Resolve relative URLs
|
|
114
|
+
full_url = urllib.parse.urljoin(source_url, href)
|
|
115
|
+
records.append({
|
|
116
|
+
"text": text,
|
|
117
|
+
"url": full_url,
|
|
118
|
+
"source_page": source_url,
|
|
119
|
+
"scraped_at": datetime.utcnow().isoformat(),
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
return records
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Core fetch + crawl logic
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
def fetch_page(session: requests.Session, url: str) -> str | None:
|
|
130
|
+
"""Fetch a single page and return HTML. Returns None on failure."""
|
|
131
|
+
try:
|
|
132
|
+
response = session.get(url, timeout=20)
|
|
133
|
+
response.raise_for_status()
|
|
134
|
+
return response.text
|
|
135
|
+
except requests.exceptions.RequestException as exc:
|
|
136
|
+
logger.error("Failed to fetch %s: %s", url, exc)
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def scrape(urls: list[str] | None = None) -> list[dict]:
|
|
141
|
+
"""
|
|
142
|
+
Main scrape loop. Pass a list of URLs or leave None to scrape TARGET_URL.
|
|
143
|
+
Respects robots.txt and rate-limits requests.
|
|
144
|
+
"""
|
|
145
|
+
urls = urls or [TARGET_URL]
|
|
146
|
+
session = make_session()
|
|
147
|
+
all_records: list[dict] = []
|
|
148
|
+
|
|
149
|
+
for i, url in enumerate(urls):
|
|
150
|
+
if not check_robots(url):
|
|
151
|
+
logger.info("Skipping disallowed URL: %s", url)
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
logger.info("Fetching (%d/%d): %s", i + 1, len(urls), url)
|
|
155
|
+
html = fetch_page(session, url)
|
|
156
|
+
if html is None:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
records = parse(html, url)
|
|
160
|
+
logger.info(" -> %d records found", len(records))
|
|
161
|
+
all_records.extend(records)
|
|
162
|
+
|
|
163
|
+
if i < len(urls) - 1:
|
|
164
|
+
time.sleep(REQUEST_DELAY) # rate limit between pages
|
|
165
|
+
|
|
166
|
+
return all_records
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ---------------------------------------------------------------------------
|
|
170
|
+
# CSV output
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
def save_csv(records: list[dict], path: str = OUTPUT_CSV) -> None:
|
|
174
|
+
"""Write records to a CSV file."""
|
|
175
|
+
if not records:
|
|
176
|
+
logger.warning("No records to save.")
|
|
177
|
+
return
|
|
178
|
+
out = Path(path)
|
|
179
|
+
with out.open("w", newline="", encoding="utf-8") as fh:
|
|
180
|
+
writer = csv.DictWriter(fh, fieldnames=records[0].keys())
|
|
181
|
+
writer.writeheader()
|
|
182
|
+
writer.writerows(records)
|
|
183
|
+
logger.info("Saved %d records to %s", len(records), out)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
# Entry point
|
|
188
|
+
# ---------------------------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
if __name__ == "__main__":
|
|
191
|
+
records = scrape()
|
|
192
|
+
save_csv(records)
|
|
193
|
+
'''
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def main():
|
|
197
|
+
if len(sys.argv) < 3:
|
|
198
|
+
print("Usage: python new_scraper.py <scraper-name> <target-url>")
|
|
199
|
+
sys.exit(1)
|
|
200
|
+
|
|
201
|
+
scraper_name = sys.argv[1]
|
|
202
|
+
target_url = sys.argv[2]
|
|
203
|
+
|
|
204
|
+
# Basic URL sanity check
|
|
205
|
+
if not target_url.startswith(("http://", "https://")):
|
|
206
|
+
print(f"Warning: target URL '{target_url}' doesn't look like a full URL.")
|
|
207
|
+
|
|
208
|
+
output_path = Path(f"{scraper_name}.py")
|
|
209
|
+
if output_path.exists():
|
|
210
|
+
print(f"Error: '{output_path}' already exists. Choose a different name.")
|
|
211
|
+
sys.exit(1)
|
|
212
|
+
|
|
213
|
+
safe_name = scraper_name.replace("-", "_")
|
|
214
|
+
content = Template(SCRAPER_TEMPLATE).safe_substitute(
|
|
215
|
+
scraper_name=safe_name,
|
|
216
|
+
target_url=target_url,
|
|
217
|
+
)
|
|
218
|
+
output_path.write_text(content, encoding="utf-8")
|
|
219
|
+
output_path.chmod(0o755)
|
|
220
|
+
|
|
221
|
+
print(f"\nScraper '{scraper_name}' created: {output_path}\n")
|
|
222
|
+
print(f" Target URL : {target_url}")
|
|
223
|
+
print(f" Output CSV : {safe_name}_output.csv")
|
|
224
|
+
print(f"\nNext steps:")
|
|
225
|
+
print(f" 1. pip install requests beautifulsoup4")
|
|
226
|
+
print(f" 2. Edit the parse() function in {output_path} for your target site")
|
|
227
|
+
print(f" 3. python {output_path}")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
if __name__ == "__main__":
|
|
231
|
+
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|