@opendirectory.dev/skills 0.1.34 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,372 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ npm-downloads-to-leads: fetch.py
4
+
5
+ Fetches 12 weeks of npm download data, computes velocity scores, and enriches
6
+ maintainer profiles via the npm registry and GitHub API.
7
+
8
+ Usage:
9
+ python3 scripts/fetch.py esbuild zod @hono/hono
10
+ python3 scripts/fetch.py --file packages.txt
11
+ python3 scripts/fetch.py esbuild zod --output results.json
12
+ python3 scripts/fetch.py esbuild zod --context "We build a TypeScript DX platform"
13
+
14
+ Output: JSON written to --output file (default: npm-leads-YYYY-MM-DD.json)
15
+ or printed to stdout with --stdout
16
+
17
+ Environment:
18
+ GITHUB_TOKEN optional -- raises GitHub rate limit from 60/hr to 5000/hr
19
+ """
20
+
21
+ import argparse
22
+ import json
23
+ import os
24
+ import re
25
+ import ssl
26
+ import sys
27
+ import time
28
+ import urllib.error
29
+ import urllib.request
30
+ from collections import defaultdict
31
+ from datetime import datetime, timedelta, timezone
32
+
33
+ # Some environments (e.g. macOS without updated certs) fail SSL verification.
34
+ # Fall back to unverified context so the script still runs.
35
+ _ssl_ctx = ssl._create_unverified_context()
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Helpers
40
+ # ---------------------------------------------------------------------------
41
+
42
+ def encode_package(pkg: str) -> str:
43
+ """URL-encode scoped packages: @org/pkg -> %40org%2Fpkg"""
44
+ return pkg.replace("@", "%40").replace("/", "%2F")
45
+
46
+
47
+ def fetch_json(url: str, headers: dict, timeout: int = 20) -> dict | None:
48
+ req = urllib.request.Request(url, headers=headers)
49
+ try:
50
+ with urllib.request.urlopen(req, timeout=timeout, context=_ssl_ctx) as resp:
51
+ return json.loads(resp.read()), resp.headers
52
+ except urllib.error.HTTPError as e:
53
+ return None, {"status": e.code}
54
+ except Exception as e:
55
+ return None, {"error": str(e)}
56
+
57
+
58
+ def compute_velocity(weeks: list[int]) -> dict:
59
+ if len(weeks) < 4:
60
+ return {"velocity_score": 0, "growth_pct": 0, "recent_4_avg": 0, "prior_4_avg": 0,
61
+ "tier": "insufficient_data"}
62
+
63
+ recent_4 = sum(weeks[-4:]) / 4
64
+ prior_4 = sum(weeks[-8:-4]) / 4 if len(weeks) >= 8 else sum(weeks[:max(len(weeks)-4, 1)]) / max(len(weeks)-4, 1)
65
+ recent_2 = sum(weeks[-2:]) / 2
66
+ mid_2 = sum(weeks[-4:-2]) / 2 if len(weeks) >= 4 else recent_2
67
+
68
+ growth_ratio = recent_4 / max(prior_4, 1)
69
+ acceleration = recent_2 / max(mid_2, 1)
70
+ growth_pct = round((growth_ratio - 1) * 100, 1)
71
+
72
+ if recent_4 < 500:
73
+ noise_factor = max(recent_4 / 500, 0.1)
74
+ elif recent_4 > 500_000:
75
+ noise_factor = max(500_000 / recent_4, 0.1)
76
+ else:
77
+ noise_factor = 1.0
78
+
79
+ velocity_score = round(growth_ratio * acceleration * noise_factor * 100, 1)
80
+
81
+ if velocity_score > 80 and 500 < recent_4 < 500_000 and growth_ratio >= 1.5:
82
+ tier = "breakout"
83
+ elif velocity_score > 40 and recent_4 >= 500 and growth_ratio >= 1.2:
84
+ tier = "watching"
85
+ elif recent_4 < 500:
86
+ tier = "too_early"
87
+ elif recent_4 >= 500_000:
88
+ tier = "established"
89
+ else:
90
+ tier = "steady"
91
+
92
+ return {
93
+ "velocity_score": velocity_score,
94
+ "growth_pct": growth_pct,
95
+ "recent_4_avg": round(recent_4),
96
+ "prior_4_avg": round(prior_4),
97
+ "tier": tier,
98
+ }
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Step 1: Fetch download data from npm API
103
+ # ---------------------------------------------------------------------------
104
+
105
+ def fetch_downloads(packages: list[str], verbose: bool = True) -> list[dict]:
106
+ end_date = datetime.now(tz=timezone.utc)
107
+ start_date = end_date - timedelta(weeks=13)
108
+ start_str = start_date.strftime("%Y-%m-%d")
109
+ end_str = end_date.strftime("%Y-%m-%d")
110
+
111
+ npm_headers = {"User-Agent": "npm-downloads-to-leads/1.0"}
112
+ results = []
113
+
114
+ if verbose:
115
+ print(f"Fetching download data for {len(packages)} packages ({start_str} to {end_str})...")
116
+
117
+ for pkg in packages:
118
+ encoded = encode_package(pkg)
119
+ url = f"https://api.npmjs.org/downloads/range/{start_str}:{end_str}/{encoded}"
120
+
121
+ data, _ = fetch_json(url, npm_headers)
122
+
123
+ if data is None or "downloads" not in data:
124
+ if verbose:
125
+ print(f" {pkg}: NOT FOUND or error")
126
+ results.append({"package": pkg, "weeks": [], "total_weeks": 0,
127
+ "current_weekly": 0, "status": "not_found"})
128
+ time.sleep(0.1)
129
+ continue
130
+
131
+ weekly = defaultdict(int)
132
+ for entry in data.get("downloads", []):
133
+ day = datetime.strptime(entry["day"], "%Y-%m-%d")
134
+ week_key = day.isocalendar()[:2]
135
+ weekly[week_key] += entry["downloads"]
136
+
137
+ weeks = [v for k, v in sorted(weekly.items())][-12:]
138
+
139
+ result = {
140
+ "package": pkg,
141
+ "weeks": weeks,
142
+ "total_weeks": len(weeks),
143
+ "current_weekly": weeks[-1] if weeks else 0,
144
+ "status": "ok",
145
+ }
146
+ result.update(compute_velocity(weeks))
147
+
148
+ if verbose:
149
+ print(f" {pkg}: {len(weeks)} weeks | {result['recent_4_avg']:,}/wk avg | "
150
+ f"growth {result['growth_pct']:+.0f}% | {result['tier'].upper()}")
151
+
152
+ results.append(result)
153
+ time.sleep(0.2)
154
+
155
+ results.sort(key=lambda x: x.get("velocity_score", 0), reverse=True)
156
+ return results
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Step 2: Fetch maintainer profiles (npm registry + GitHub)
161
+ # ---------------------------------------------------------------------------
162
+
163
+ def fetch_profiles(packages: list[dict], github_token: str = "", verbose: bool = True) -> list[dict]:
164
+ target = [p for p in packages if p.get("tier") in ("breakout", "watching")]
165
+
166
+ if not target:
167
+ return []
168
+
169
+ if verbose:
170
+ print(f"\nFetching profiles for {len(target)} packages (breakout + watching)...")
171
+
172
+ npm_headers = {"User-Agent": "npm-downloads-to-leads/1.0"}
173
+ gh_headers = {"Accept": "application/vnd.github+json", "User-Agent": "npm-downloads-to-leads/1.0"}
174
+ if github_token:
175
+ gh_headers["Authorization"] = f"Bearer {github_token}"
176
+
177
+ gh_rate_remaining = 999
178
+ enriched = []
179
+
180
+ for item in target:
181
+ pkg = item["package"]
182
+ profile = {
183
+ "package": pkg,
184
+ "description": "",
185
+ "keywords": [],
186
+ "npm_homepage": "",
187
+ "npm_maintainers": [],
188
+ "github_owner": None,
189
+ "github_repo": None,
190
+ "github_users": [],
191
+ }
192
+
193
+ # npm registry
194
+ encoded = encode_package(pkg)
195
+ reg_data, _ = fetch_json(f"https://registry.npmjs.org/{encoded}", npm_headers)
196
+
197
+ if reg_data:
198
+ profile["description"] = reg_data.get("description", "")
199
+ profile["keywords"] = (reg_data.get("keywords") or [])[:6]
200
+ profile["npm_homepage"] = reg_data.get("homepage", "")
201
+ profile["npm_maintainers"] = [
202
+ m.get("name", "") for m in reg_data.get("maintainers", []) if m.get("name")
203
+ ]
204
+
205
+ repo_field = reg_data.get("repository") or {}
206
+ repo_url = repo_field.get("url", "") if isinstance(repo_field, dict) else str(repo_field)
207
+ gh_match = re.search(r"github\.com[/:]([^/]+)/([^/.]+)", repo_url)
208
+ if gh_match:
209
+ profile["github_owner"] = gh_match.group(1)
210
+ profile["github_repo"] = gh_match.group(2).rstrip(".git")
211
+
212
+ if verbose:
213
+ print(f" {pkg}: registry OK | owner={profile['github_owner']} | "
214
+ f"maintainers={profile['npm_maintainers'][:2]}")
215
+
216
+ time.sleep(0.1)
217
+
218
+ # GitHub user profiles
219
+ candidates = []
220
+ if profile["github_owner"]:
221
+ candidates.append(profile["github_owner"])
222
+ for m in profile["npm_maintainers"][:2]:
223
+ if m and m not in candidates:
224
+ candidates.append(m)
225
+
226
+ for username in candidates[:3]:
227
+ if gh_rate_remaining <= 5:
228
+ if verbose:
229
+ print(f" GitHub rate limit low ({gh_rate_remaining}) -- skipping {username}")
230
+ break
231
+
232
+ gh_data, gh_resp_headers = fetch_json(
233
+ f"https://api.github.com/users/{username}", gh_headers
234
+ )
235
+
236
+ if isinstance(gh_resp_headers, dict):
237
+ remaining_raw = gh_resp_headers.get("X-RateLimit-Remaining") or gh_resp_headers.get("x-ratelimit-remaining")
238
+ else:
239
+ remaining_raw = gh_resp_headers.get("X-RateLimit-Remaining")
240
+
241
+ if remaining_raw:
242
+ gh_rate_remaining = int(remaining_raw)
243
+
244
+ if gh_data and gh_data.get("login"):
245
+ profile["github_users"].append({
246
+ "username": gh_data.get("login", username),
247
+ "name": gh_data.get("name") or username,
248
+ "twitter_username": gh_data.get("twitter_username") or "not found on GitHub",
249
+ "bio": gh_data.get("bio") or "",
250
+ "blog": gh_data.get("blog") or "",
251
+ "company": gh_data.get("company") or "",
252
+ "followers": gh_data.get("followers", 0),
253
+ "public_repos": gh_data.get("public_repos", 0),
254
+ "github_url": gh_data.get("html_url", f"https://github.com/{username}"),
255
+ })
256
+ if verbose:
257
+ twitter = gh_data.get("twitter_username") or "none"
258
+ print(f" @{username}: {gh_data.get('followers', 0):,} followers | "
259
+ f"twitter={twitter} | rate_remaining={gh_rate_remaining}")
260
+ else:
261
+ if verbose:
262
+ print(f" @{username}: not found on GitHub")
263
+
264
+ time.sleep(0.2)
265
+
266
+ enriched.append({**item, "profile": profile})
267
+
268
+ return enriched
269
+
270
+
271
+ # ---------------------------------------------------------------------------
272
+ # CLI
273
+ # ---------------------------------------------------------------------------
274
+
275
+ def main():
276
+ parser = argparse.ArgumentParser(
277
+ description="Fetch npm download trends and maintainer profiles for a list of packages."
278
+ )
279
+ parser.add_argument("packages", nargs="*", help="npm package names (e.g. esbuild @hono/hono zod)")
280
+ parser.add_argument("--file", "-f", help="text file with one package name per line")
281
+ parser.add_argument("--output", "-o", help="output JSON file path (default: npm-leads-YYYY-MM-DD.json)")
282
+ parser.add_argument("--stdout", action="store_true", help="print JSON to stdout instead of writing a file")
283
+ parser.add_argument("--context", "-c", default="", help="short description of your product (used in lead brief context)")
284
+ parser.add_argument("--quiet", "-q", action="store_true", help="suppress progress output")
285
+ args = parser.parse_args()
286
+
287
+ # Collect package names
288
+ packages = list(args.packages)
289
+ if args.file:
290
+ try:
291
+ file_pkgs = [l.strip() for l in open(args.file).readlines() if l.strip() and not l.startswith("#")]
292
+ packages.extend(file_pkgs)
293
+ except FileNotFoundError:
294
+ print(f"Error: file '{args.file}' not found.", file=sys.stderr)
295
+ sys.exit(1)
296
+
297
+ packages = list(dict.fromkeys(packages)) # deduplicate, preserve order
298
+
299
+ if not packages:
300
+ parser.print_help()
301
+ print("\nError: no packages provided. Pass package names as arguments or use --file.", file=sys.stderr)
302
+ sys.exit(1)
303
+
304
+ verbose = not args.quiet
305
+ github_token = os.environ.get("GITHUB_TOKEN", "")
306
+
307
+ if verbose and not github_token:
308
+ print("Note: GITHUB_TOKEN not set. GitHub enrichment limited to ~10 packages (60 req/hr).")
309
+ print(" Add token at github.com/settings/tokens (no scopes needed).\n")
310
+
311
+ # Run pipeline
312
+ scored = fetch_downloads(packages, verbose=verbose)
313
+ enriched = fetch_profiles(scored, github_token=github_token, verbose=verbose)
314
+
315
+ # Build enriched map for scored packages
316
+ enriched_map = {e["package"]: e for e in enriched}
317
+ final = []
318
+ for pkg_data in scored:
319
+ pkg = pkg_data["package"]
320
+ if pkg in enriched_map:
321
+ final.append(enriched_map[pkg])
322
+ else:
323
+ final.append(pkg_data)
324
+
325
+ # Summary
326
+ breakout = [p for p in scored if p.get("tier") == "breakout"]
327
+ watching = [p for p in scored if p.get("tier") == "watching"]
328
+
329
+ date_str = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d")
330
+
331
+ output_data = {
332
+ "date": date_str,
333
+ "product_context": args.context,
334
+ "packages_analyzed": len(packages),
335
+ "breakout_count": len(breakout),
336
+ "watching_count": len(watching),
337
+ "results": final,
338
+ "summary": {
339
+ "breakout": [{"package": p["package"], "velocity_score": p["velocity_score"],
340
+ "growth_pct": p["growth_pct"], "recent_4_avg": p["recent_4_avg"]}
341
+ for p in breakout],
342
+ "watching": [{"package": p["package"], "velocity_score": p["velocity_score"],
343
+ "growth_pct": p["growth_pct"], "recent_4_avg": p["recent_4_avg"]}
344
+ for p in watching],
345
+ }
346
+ }
347
+
348
+ if args.stdout:
349
+ print(json.dumps(output_data, indent=2))
350
+ return
351
+
352
+ output_path = args.output or f"npm-leads-{date_str}.json"
353
+ with open(output_path, "w") as f:
354
+ json.dump(output_data, f, indent=2)
355
+
356
+ if verbose:
357
+ print(f"\n{'='*50}")
358
+ print(f"Results: {len(packages)} packages analyzed")
359
+ print(f" BREAKOUT: {len(breakout)}")
360
+ print(f" WATCHING: {len(watching)}")
361
+ print(f"\nVelocity Leaderboard:")
362
+ for i, p in enumerate(scored[:10], 1):
363
+ tier_label = p.get("tier", "?").upper()
364
+ print(f" {i:2}. {p['package']:30} score={p.get('velocity_score', 0):6.1f} "
365
+ f"{p.get('recent_4_avg', 0):>8,}/wk {p.get('growth_pct', 0):+.0f}% [{tier_label}]")
366
+
367
+ print(f"\nSaved to: {output_path}")
368
+ print(f"\nNext step: open {output_path} with Claude and ask it to generate lead briefs.")
369
+
370
+
371
+ if __name__ == "__main__":
372
+ main()