@booklib/skills 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CONTRIBUTING.md +122 -0
  2. package/README.md +20 -2
  3. package/ROADMAP.md +36 -0
  4. package/animation-at-work/evals/evals.json +44 -0
  5. package/animation-at-work/examples/after.md +64 -0
  6. package/animation-at-work/examples/before.md +35 -0
  7. package/animation-at-work/scripts/audit_animations.py +295 -0
  8. package/bin/skills.js +552 -42
  9. package/clean-code-reviewer/SKILL.md +109 -1
  10. package/clean-code-reviewer/evals/evals.json +121 -3
  11. package/clean-code-reviewer/examples/after.md +48 -0
  12. package/clean-code-reviewer/examples/before.md +33 -0
  13. package/clean-code-reviewer/references/api_reference.md +158 -0
  14. package/clean-code-reviewer/references/practices-catalog.md +282 -0
  15. package/clean-code-reviewer/references/review-checklist.md +254 -0
  16. package/clean-code-reviewer/scripts/pre-review.py +206 -0
  17. package/data-intensive-patterns/evals/evals.json +43 -0
  18. package/data-intensive-patterns/examples/after.md +61 -0
  19. package/data-intensive-patterns/examples/before.md +38 -0
  20. package/data-intensive-patterns/scripts/adr.py +213 -0
  21. package/data-pipelines/evals/evals.json +45 -0
  22. package/data-pipelines/examples/after.md +97 -0
  23. package/data-pipelines/examples/before.md +37 -0
  24. package/data-pipelines/scripts/new_pipeline.py +444 -0
  25. package/design-patterns/evals/evals.json +46 -0
  26. package/design-patterns/examples/after.md +52 -0
  27. package/design-patterns/examples/before.md +29 -0
  28. package/design-patterns/scripts/scaffold.py +807 -0
  29. package/domain-driven-design/SKILL.md +120 -0
  30. package/domain-driven-design/evals/evals.json +48 -0
  31. package/domain-driven-design/examples/after.md +80 -0
  32. package/domain-driven-design/examples/before.md +43 -0
  33. package/domain-driven-design/scripts/scaffold.py +421 -0
  34. package/effective-java/evals/evals.json +46 -0
  35. package/effective-java/examples/after.md +83 -0
  36. package/effective-java/examples/before.md +37 -0
  37. package/effective-java/scripts/checkstyle_setup.py +211 -0
  38. package/effective-kotlin/evals/evals.json +45 -0
  39. package/effective-kotlin/examples/after.md +36 -0
  40. package/effective-kotlin/examples/before.md +38 -0
  41. package/effective-python/evals/evals.json +44 -0
  42. package/effective-python/examples/after.md +56 -0
  43. package/effective-python/examples/before.md +40 -0
  44. package/effective-python/references/api_reference.md +218 -0
  45. package/effective-python/references/practices-catalog.md +483 -0
  46. package/effective-python/references/review-checklist.md +190 -0
  47. package/effective-python/scripts/lint.py +173 -0
  48. package/kotlin-in-action/evals/evals.json +43 -0
  49. package/kotlin-in-action/examples/after.md +53 -0
  50. package/kotlin-in-action/examples/before.md +39 -0
  51. package/kotlin-in-action/scripts/setup_detekt.py +224 -0
  52. package/lean-startup/evals/evals.json +43 -0
  53. package/lean-startup/examples/after.md +80 -0
  54. package/lean-startup/examples/before.md +34 -0
  55. package/lean-startup/scripts/new_experiment.py +286 -0
  56. package/microservices-patterns/SKILL.md +140 -0
  57. package/microservices-patterns/evals/evals.json +45 -0
  58. package/microservices-patterns/examples/after.md +69 -0
  59. package/microservices-patterns/examples/before.md +40 -0
  60. package/microservices-patterns/scripts/new_service.py +583 -0
  61. package/package.json +2 -8
  62. package/refactoring-ui/evals/evals.json +45 -0
  63. package/refactoring-ui/examples/after.md +85 -0
  64. package/refactoring-ui/examples/before.md +58 -0
  65. package/refactoring-ui/scripts/audit_css.py +250 -0
  66. package/skill-router/SKILL.md +142 -0
  67. package/skill-router/evals/evals.json +38 -0
  68. package/skill-router/examples/after.md +63 -0
  69. package/skill-router/examples/before.md +39 -0
  70. package/skill-router/references/api_reference.md +24 -0
  71. package/skill-router/references/routing-heuristics.md +89 -0
  72. package/skill-router/references/skill-catalog.md +156 -0
  73. package/skill-router/scripts/route.py +266 -0
  74. package/storytelling-with-data/evals/evals.json +47 -0
  75. package/storytelling-with-data/examples/after.md +50 -0
  76. package/storytelling-with-data/examples/before.md +33 -0
  77. package/storytelling-with-data/scripts/chart_review.py +301 -0
  78. package/system-design-interview/evals/evals.json +45 -0
  79. package/system-design-interview/examples/after.md +94 -0
  80. package/system-design-interview/examples/before.md +27 -0
  81. package/system-design-interview/scripts/new_design.py +421 -0
  82. package/using-asyncio-python/evals/evals.json +43 -0
  83. package/using-asyncio-python/examples/after.md +68 -0
  84. package/using-asyncio-python/examples/before.md +39 -0
  85. package/using-asyncio-python/scripts/check_blocking.py +270 -0
  86. package/web-scraping-python/evals/evals.json +46 -0
  87. package/web-scraping-python/examples/after.md +109 -0
  88. package/web-scraping-python/examples/before.md +40 -0
  89. package/web-scraping-python/scripts/new_scraper.py +231 -0
  90. /package/{effective-python-skill → effective-python}/SKILL.md +0 -0
  91. /package/{effective-python-skill → effective-python}/ref-01-pythonic-thinking.md +0 -0
  92. /package/{effective-python-skill → effective-python}/ref-02-lists-and-dicts.md +0 -0
  93. /package/{effective-python-skill → effective-python}/ref-03-functions.md +0 -0
  94. /package/{effective-python-skill → effective-python}/ref-04-comprehensions-generators.md +0 -0
  95. /package/{effective-python-skill → effective-python}/ref-05-classes-interfaces.md +0 -0
  96. /package/{effective-python-skill → effective-python}/ref-06-metaclasses-attributes.md +0 -0
  97. /package/{effective-python-skill → effective-python}/ref-07-concurrency.md +0 -0
  98. /package/{effective-python-skill → effective-python}/ref-08-robustness-performance.md +0 -0
  99. /package/{effective-python-skill → effective-python}/ref-09-testing-debugging.md +0 -0
  100. /package/{effective-python-skill → effective-python}/ref-10-collaboration.md +0 -0
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ new_scraper.py — Scaffold a best-practice web scraper.
4
+ Usage: python new_scraper.py <scraper-name> <target-url>
5
+
6
+ Generates <scraper-name>.py — a real, runnable scraper with retry, rate limiting,
7
+ robots.txt checking, BeautifulSoup parsing, and CSV output.
8
+ """
9
+
10
+ import sys
11
+ from pathlib import Path
12
+ from string import Template
13
+
14
+ SCRAPER_TEMPLATE = '''\
15
+ #!/usr/bin/env python3
16
+ """
17
+ $scraper_name — scraper for $target_url
18
+ Generated by new_scraper.py. Edit the parse() function for your target site.
19
+ """
20
+
21
+ import csv
22
+ import logging
23
+ import time
24
+ import urllib.parse
25
+ import urllib.robotparser
26
+ from datetime import datetime
27
+ from pathlib import Path
28
+
29
+ try:
30
+ import requests
31
+ from requests.adapters import HTTPAdapter
32
+ from urllib3.util.retry import Retry
33
+ from bs4 import BeautifulSoup
34
+ except ImportError as exc:
35
+ raise SystemExit(
36
+ f"Missing dependency: {exc}\\n"
37
+ "Install with: pip install requests beautifulsoup4"
38
+ ) from exc
39
+
40
+ logging.basicConfig(
41
+ level=logging.INFO,
42
+ format="%(asctime)s %(levelname)-8s %(message)s",
43
+ datefmt="%Y-%m-%dT%H:%M:%S",
44
+ )
45
+ logger = logging.getLogger(__name__)
46
+
47
+ TARGET_URL = "$target_url"
48
+ OUTPUT_CSV = "$scraper_name_output.csv"
49
+ REQUEST_DELAY = 1.5 # seconds between requests — be polite
50
+ USER_AGENT = "research-bot/1.0 (+https://example.com/bot)"
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Session with retry
55
+ # ---------------------------------------------------------------------------
56
+
57
+ def make_session() -> requests.Session:
58
+ """Build a requests Session with automatic retries on transient errors."""
59
+ session = requests.Session()
60
+ retry_strategy = Retry(
61
+ total=3,
62
+ backoff_factor=1.5,
63
+ status_forcelist=[429, 500, 502, 503, 504],
64
+ allowed_methods=["GET", "HEAD"],
65
+ )
66
+ adapter = HTTPAdapter(max_retries=retry_strategy)
67
+ session.mount("https://", adapter)
68
+ session.mount("http://", adapter)
69
+ session.headers.update({"User-Agent": USER_AGENT})
70
+ return session
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Robots.txt
75
+ # ---------------------------------------------------------------------------
76
+
77
+ def check_robots(url: str, user_agent: str = USER_AGENT) -> bool:
78
+ """Return True if scraping the URL is permitted by robots.txt."""
79
+ parsed = urllib.parse.urlparse(url)
80
+ robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
81
+ rp = urllib.robotparser.RobotFileParser()
82
+ rp.set_url(robots_url)
83
+ try:
84
+ rp.read()
85
+ allowed = rp.can_fetch(user_agent, url)
86
+ if not allowed:
87
+ logger.warning("robots.txt disallows scraping: %s", url)
88
+ return allowed
89
+ except Exception as exc:
90
+ logger.warning("Could not read robots.txt (%s) — proceeding cautiously.", exc)
91
+ return True # assume allowed if robots.txt is unreachable
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Parse — EDIT THIS FUNCTION for your target site
96
+ # ---------------------------------------------------------------------------
97
+
98
+ def parse(html: str, source_url: str) -> list[dict]:
99
+ """
100
+ Extract structured data from a page. Returns a list of dicts.
101
+ Edit the selectors below for your actual target.
102
+ """
103
+ soup = BeautifulSoup(html, "html.parser")
104
+ records = []
105
+
106
+ # Example: scrape all hyperlinks with their text
107
+ # Replace this block with selectors for your target site.
108
+ for link in soup.find_all("a", href=True):
109
+ href = link["href"]
110
+ text = link.get_text(strip=True)
111
+ if not text:
112
+ continue
113
+ # Resolve relative URLs
114
+ full_url = urllib.parse.urljoin(source_url, href)
115
+ records.append({
116
+ "text": text,
117
+ "url": full_url,
118
+ "source_page": source_url,
119
+ "scraped_at": datetime.utcnow().isoformat(),
120
+ })
121
+
122
+ return records
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Core fetch + crawl logic
127
+ # ---------------------------------------------------------------------------
128
+
129
+ def fetch_page(session: requests.Session, url: str) -> str | None:
130
+ """Fetch a single page and return HTML. Returns None on failure."""
131
+ try:
132
+ response = session.get(url, timeout=20)
133
+ response.raise_for_status()
134
+ return response.text
135
+ except requests.exceptions.RequestException as exc:
136
+ logger.error("Failed to fetch %s: %s", url, exc)
137
+ return None
138
+
139
+
140
+ def scrape(urls: list[str] | None = None) -> list[dict]:
141
+ """
142
+ Main scrape loop. Pass a list of URLs or leave None to scrape TARGET_URL.
143
+ Respects robots.txt and rate-limits requests.
144
+ """
145
+ urls = urls or [TARGET_URL]
146
+ session = make_session()
147
+ all_records: list[dict] = []
148
+
149
+ for i, url in enumerate(urls):
150
+ if not check_robots(url):
151
+ logger.info("Skipping disallowed URL: %s", url)
152
+ continue
153
+
154
+ logger.info("Fetching (%d/%d): %s", i + 1, len(urls), url)
155
+ html = fetch_page(session, url)
156
+ if html is None:
157
+ continue
158
+
159
+ records = parse(html, url)
160
+ logger.info(" -> %d records found", len(records))
161
+ all_records.extend(records)
162
+
163
+ if i < len(urls) - 1:
164
+ time.sleep(REQUEST_DELAY) # rate limit between pages
165
+
166
+ return all_records
167
+
168
+
169
+ # ---------------------------------------------------------------------------
170
+ # CSV output
171
+ # ---------------------------------------------------------------------------
172
+
173
+ def save_csv(records: list[dict], path: str = OUTPUT_CSV) -> None:
174
+ """Write records to a CSV file."""
175
+ if not records:
176
+ logger.warning("No records to save.")
177
+ return
178
+ out = Path(path)
179
+ with out.open("w", newline="", encoding="utf-8") as fh:
180
+ writer = csv.DictWriter(fh, fieldnames=records[0].keys())
181
+ writer.writeheader()
182
+ writer.writerows(records)
183
+ logger.info("Saved %d records to %s", len(records), out)
184
+
185
+
186
+ # ---------------------------------------------------------------------------
187
+ # Entry point
188
+ # ---------------------------------------------------------------------------
189
+
190
+ if __name__ == "__main__":
191
+ records = scrape()
192
+ save_csv(records)
193
+ '''
194
+
195
+
196
+ def main():
197
+ if len(sys.argv) < 3:
198
+ print("Usage: python new_scraper.py <scraper-name> <target-url>")
199
+ sys.exit(1)
200
+
201
+ scraper_name = sys.argv[1]
202
+ target_url = sys.argv[2]
203
+
204
+ # Basic URL sanity check
205
+ if not target_url.startswith(("http://", "https://")):
206
+ print(f"Warning: target URL '{target_url}' doesn't look like a full URL.")
207
+
208
+ output_path = Path(f"{scraper_name}.py")
209
+ if output_path.exists():
210
+ print(f"Error: '{output_path}' already exists. Choose a different name.")
211
+ sys.exit(1)
212
+
213
+ safe_name = scraper_name.replace("-", "_")
214
+ content = Template(SCRAPER_TEMPLATE).safe_substitute(
215
+ scraper_name=safe_name,
216
+ target_url=target_url,
217
+ )
218
+ output_path.write_text(content, encoding="utf-8")
219
+ output_path.chmod(0o755)
220
+
221
+ print(f"\nScraper '{scraper_name}' created: {output_path}\n")
222
+ print(f" Target URL : {target_url}")
223
+ print(f" Output CSV : {safe_name}_output.csv")
224
+ print(f"\nNext steps:")
225
+ print(f" 1. pip install requests beautifulsoup4")
226
+ print(f" 2. Edit the parse() function in {output_path} for your target site")
227
+ print(f" 3. python {output_path}")
228
+
229
+
230
+ if __name__ == "__main__":
231
+ main()