@opendirectory.dev/skills 0.1.36 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,462 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ sdk-adoption-tracker: fetch.py
4
+
5
+ Searches GitHub code for public repos that import a given SDK, scores each
6
+ repo by adoption signal strength, and enriches high-signal repos with owner
7
+ and contributor data.
8
+
9
+ Usage:
10
+ python3 scripts/fetch.py stripe
11
+ python3 scripts/fetch.py @clerk/nextjs --ecosystem npm
12
+ python3 scripts/fetch.py requests --ecosystem python --output results.json
13
+ python3 scripts/fetch.py stripe --exclude stripe --context "We build billing tooling"
14
+
15
+ Output: JSON written to --output file (default: sdk-adopters-YYYY-MM-DD.json)
16
+ or printed to stdout with --stdout
17
+
18
+ Environment:
19
+ GITHUB_TOKEN required -- code search is rate-limited to 3 req/min without it
20
+ """
21
+
22
+ import argparse
23
+ import json
24
+ import os
25
+ import re
26
+ import ssl
27
+ import sys
28
+ import time
29
+ import urllib.error
30
+ import urllib.parse
31
+ import urllib.request
32
+ from datetime import datetime, timezone
33
+
34
+ _ssl_ctx = ssl._create_unverified_context()
35
+
36
+ TUTORIAL_WORDS = {
37
+ "example", "tutorial", "demo", "learn", "sample", "starter",
38
+ "boilerplate", "template", "playground", "test", "course", "workshop"
39
+ }
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Helpers
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def gh_get(path: str, headers: dict, timeout: int = 15):
47
+ url = f"https://api.github.com{path}"
48
+ req = urllib.request.Request(url, headers=headers)
49
+ try:
50
+ with urllib.request.urlopen(req, timeout=timeout, context=_ssl_ctx) as resp:
51
+ remaining = resp.headers.get("X-RateLimit-Remaining")
52
+ data = json.loads(resp.read())
53
+ return data, int(remaining) if remaining else 999
54
+ except urllib.error.HTTPError as e:
55
+ return None, 999
56
+ except Exception:
57
+ return None, 999
58
+
59
+
60
+ def detect_ecosystem(sdk_name: str) -> str:
61
+ if sdk_name.startswith("@") or "-" in sdk_name:
62
+ return "npm"
63
+ if re.match(r'^[a-z][a-z0-9_]*$', sdk_name):
64
+ return "python"
65
+ if "github.com/" in sdk_name:
66
+ return "go"
67
+ return "generic"
68
+
69
+
70
+ def build_queries(sdk_name: str, ecosystem: str) -> list[str]:
71
+ if ecosystem == "npm":
72
+ return [
73
+ f'require("{sdk_name}")',
74
+ f"require('{sdk_name}')",
75
+ f'from "{sdk_name}"',
76
+ f"from '{sdk_name}'",
77
+ ]
78
+ elif ecosystem == "python":
79
+ return [
80
+ f"import {sdk_name}",
81
+ f"from {sdk_name} import",
82
+ ]
83
+ elif ecosystem == "go":
84
+ return [f'"{sdk_name}"']
85
+ else:
86
+ return [sdk_name]
87
+
88
+
89
+ def is_tutorial(repo_name: str, description: str) -> bool:
90
+ name_words = set(repo_name.lower().replace("-", " ").replace("_", " ").split())
91
+ desc_words = set((description or "").lower().split())
92
+ return bool((name_words | desc_words) & TUTORIAL_WORDS)
93
+
94
+
95
+ def compute_score(owner_type: str, company: str, stars: int, days_since_push: int,
96
+ is_fork: bool, is_archived: bool, tutorial: bool) -> float:
97
+ score = 0.0
98
+ if owner_type == "Organization": score += 50
99
+ if company and company.strip(): score += 20
100
+ score += min(stars, 500) / 10
101
+ if days_since_push < 30: score += 30
102
+ if days_since_push < 7: score += 20
103
+ if not is_fork: score += 10
104
+ if not is_archived: score += 10
105
+ if not tutorial: score += 20
106
+ return round(score, 1)
107
+
108
+
109
+ # ---------------------------------------------------------------------------
110
+ # Step 1: Search GitHub code
111
+ # ---------------------------------------------------------------------------
112
+
113
+ def search_code(sdk_name: str, ecosystem: str, exclude_owner: str,
114
+ headers: dict, verbose: bool) -> list[dict]:
115
+ queries = build_queries(sdk_name, ecosystem)
116
+ seen = {}
117
+ sdk_bare = sdk_name.lower().split("/")[-1].replace("@", "")
118
+ search_remaining = 30
119
+
120
+ if verbose:
121
+ print(f"Searching GitHub code for {sdk_name} ({ecosystem})...")
122
+
123
+ for i, query in enumerate(queries):
124
+ if search_remaining <= 2:
125
+ if verbose:
126
+ print(f" Search rate limit low ({search_remaining}) -- stopping early")
127
+ break
128
+
129
+ encoded = urllib.parse.quote(query)
130
+ url = f"https://api.github.com/search/code?q={encoded}&per_page=100"
131
+ req = urllib.request.Request(url, headers=headers)
132
+
133
+ try:
134
+ with urllib.request.urlopen(req, timeout=20, context=_ssl_ctx) as resp:
135
+ remaining = resp.headers.get("X-RateLimit-Remaining")
136
+ if remaining:
137
+ search_remaining = int(remaining)
138
+ raw = json.loads(resp.read())
139
+ except urllib.error.HTTPError as e:
140
+ if e.code == 403:
141
+ if verbose:
142
+ print(f" Rate limit hit on '{query}'")
143
+ break
144
+ if verbose:
145
+ print(f" HTTP {e.code} on '{query}'")
146
+ if i < len(queries) - 1:
147
+ time.sleep(6)
148
+ continue
149
+ except Exception as e:
150
+ if verbose:
151
+ print(f" Error on '{query}': {e}")
152
+ if i < len(queries) - 1:
153
+ time.sleep(6)
154
+ continue
155
+
156
+ items = raw.get("items", [])
157
+ total = raw.get("total_count", 0)
158
+ if verbose:
159
+ print(f" '{query}': {total} total, {len(items)} fetched | rate={search_remaining}")
160
+
161
+ for item in items:
162
+ repo = item.get("repository", {})
163
+ full_name = repo.get("full_name", "")
164
+ owner_login = repo.get("owner", {}).get("login", "").lower()
165
+
166
+ if not full_name:
167
+ continue
168
+ if full_name in seen:
169
+ continue
170
+ if exclude_owner and owner_login == exclude_owner.lower():
171
+ continue
172
+
173
+ seen[full_name] = {
174
+ "full_name": full_name,
175
+ "name": repo.get("name", ""),
176
+ "owner_login": repo.get("owner", {}).get("login", ""),
177
+ "owner_type": repo.get("owner", {}).get("type", "User"),
178
+ "file_path": item.get("path", ""),
179
+ "matched_pattern": query,
180
+ "html_url": repo.get("html_url", ""),
181
+ "description": repo.get("description") or "",
182
+ }
183
+
184
+ if i < len(queries) - 1:
185
+ time.sleep(6) # respect 10 req/min code search limit
186
+
187
+ results = list(seen.values())
188
+ if verbose:
189
+ print(f" Total unique repos: {len(results)}")
190
+ return results
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # Step 2: Score repos (pure Python)
195
+ # ---------------------------------------------------------------------------
196
+
197
+ def score_repos(raw_results: list[dict], sdk_name: str, exclude_owner: str) -> list[dict]:
198
+ sdk_bare = sdk_name.lower().split("/")[-1].replace("@", "")
199
+ scored = []
200
+
201
+ for repo in raw_results:
202
+ owner_login = repo.get("owner_login", "").lower()
203
+ repo_name = repo.get("name", "").lower()
204
+ description = repo.get("description") or ""
205
+
206
+ if exclude_owner and owner_login == exclude_owner.lower():
207
+ continue
208
+
209
+ tutorial = is_tutorial(repo_name, description)
210
+ if repo_name == sdk_bare or repo_name.startswith(sdk_bare + "-"):
211
+ tutorial = True
212
+
213
+ owner_type = repo.get("owner_type", "User")
214
+ tier = "tutorial_noise" if tutorial else (
215
+ "company_org" if owner_type == "Organization" else "solo_dev"
216
+ )
217
+
218
+ scored.append({
219
+ **repo,
220
+ "tier": tier,
221
+ "is_tutorial": tutorial,
222
+ "adoption_score": 0.0,
223
+ "enriched": False,
224
+ })
225
+
226
+ tier_order = {"company_org": 0, "affiliated_dev": 1, "solo_dev": 2, "tutorial_noise": 3}
227
+ scored.sort(key=lambda x: tier_order.get(x["tier"], 9))
228
+ return scored
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # Step 3: Enrich high-signal repos
233
+ # ---------------------------------------------------------------------------
234
+
235
+ def enrich_repos(scored: list[dict], headers: dict, verbose: bool) -> list[dict]:
236
+ target = [r for r in scored if r["tier"] != "tutorial_noise"]
237
+ if verbose:
238
+ print(f"\nEnriching {len(target)} repos (skipping tutorial_noise)...")
239
+
240
+ core_remaining = 5000
241
+ enriched = []
242
+ now = datetime.now(tz=timezone.utc)
243
+
244
+ for item in target:
245
+ full_name = item["full_name"]
246
+ owner_login = item["owner_login"]
247
+ owner_type = item["owner_type"]
248
+
249
+ if core_remaining <= 10:
250
+ enriched.append({**item, "enriched": False})
251
+ continue
252
+
253
+ repo_data, core_remaining = gh_get(f"/repos/{full_name}", headers)
254
+ if not repo_data:
255
+ continue
256
+
257
+ stars = repo_data.get("stargazers_count", 0)
258
+ is_fork = repo_data.get("fork", False)
259
+ is_archived = repo_data.get("archived", False)
260
+ language = repo_data.get("language") or ""
261
+ description = repo_data.get("description") or item.get("description", "")
262
+ pushed_at = repo_data.get("pushed_at") or ""
263
+ created_at = repo_data.get("created_at") or ""
264
+
265
+ days_since_push = 999
266
+ if pushed_at:
267
+ pushed_dt = datetime.fromisoformat(pushed_at.replace("Z", "+00:00"))
268
+ days_since_push = (now - pushed_dt).days
269
+
270
+ days_since_created = 999
271
+ if created_at:
272
+ created_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
273
+ days_since_created = (now - created_dt).days
274
+
275
+ company = ""
276
+ owner_profile = {}
277
+ org_website = ""
278
+
279
+ if owner_type == "Organization":
280
+ org_data, core_remaining = gh_get(f"/orgs/{owner_login}", headers)
281
+ if org_data:
282
+ company = org_data.get("name") or owner_login
283
+ org_website = org_data.get("blog") or ""
284
+ owner_profile = {
285
+ "type": "org", "name": company,
286
+ "description": org_data.get("description") or "",
287
+ "website": org_website, "email": org_data.get("email") or "",
288
+ "public_repos": org_data.get("public_repos", 0),
289
+ "followers": org_data.get("followers", 0),
290
+ }
291
+ else:
292
+ user_data, core_remaining = gh_get(f"/users/{owner_login}", headers)
293
+ if user_data:
294
+ company = user_data.get("company") or ""
295
+ owner_profile = {
296
+ "type": "user", "name": user_data.get("name") or owner_login,
297
+ "company": company, "bio": user_data.get("bio") or "",
298
+ "blog": user_data.get("blog") or "",
299
+ "followers": user_data.get("followers", 0),
300
+ "twitter_username": user_data.get("twitter_username") or "not listed",
301
+ }
302
+ if company and item["tier"] == "solo_dev":
303
+ item["tier"] = "affiliated_dev"
304
+
305
+ top_contributors = []
306
+ if core_remaining > 20:
307
+ contrib_data, core_remaining = gh_get(
308
+ f"/repos/{full_name}/contributors?per_page=3", headers
309
+ )
310
+ if contrib_data:
311
+ top_contributors = [
312
+ {"login": c.get("login", ""), "contributions": c.get("contributions", 0)}
313
+ for c in contrib_data[:3]
314
+ ]
315
+
316
+ score = compute_score(
317
+ owner_type, company, stars, days_since_push,
318
+ is_fork, is_archived, item.get("is_tutorial", False)
319
+ )
320
+
321
+ tier = item["tier"]
322
+ if is_archived:
323
+ tier = "tutorial_noise" if item.get("is_tutorial") else tier
324
+
325
+ enriched.append({
326
+ **item,
327
+ "description": description,
328
+ "stars": stars,
329
+ "language": language,
330
+ "is_fork": is_fork,
331
+ "is_archived": is_archived,
332
+ "days_since_push": days_since_push,
333
+ "days_since_created": days_since_created,
334
+ "pushed_at": pushed_at,
335
+ "created_at": created_at,
336
+ "repo_url": repo_data.get("html_url", f"https://github.com/{full_name}"),
337
+ "tier": tier,
338
+ "adoption_score": score,
339
+ "company": company or "not listed",
340
+ "org_website": org_website,
341
+ "owner_profile": owner_profile,
342
+ "top_contributors": top_contributors,
343
+ "enriched": True,
344
+ })
345
+
346
+ if verbose:
347
+ print(f" {full_name} | {tier} | score={score} | stars={stars} | "
348
+ f"pushed={days_since_push}d | company={company or 'not listed'} | "
349
+ f"rate={core_remaining}")
350
+ time.sleep(0.1)
351
+
352
+ enriched.sort(key=lambda x: -x["adoption_score"])
353
+ return enriched
354
+
355
+
356
+ # ---------------------------------------------------------------------------
357
+ # CLI
358
+ # ---------------------------------------------------------------------------
359
+
360
+ def main():
361
+ parser = argparse.ArgumentParser(
362
+ description="Search GitHub for repos that import a given SDK and score adoption."
363
+ )
364
+ parser.add_argument("sdk", help="SDK name (e.g. stripe, @clerk/nextjs, requests)")
365
+ parser.add_argument("--ecosystem", "-e", default="",
366
+ help="Ecosystem: npm, python, go, generic (auto-detected if omitted)")
367
+ parser.add_argument("--exclude", "-x", default="",
368
+ help="Owner login to exclude (usually the SDK publisher)")
369
+ parser.add_argument("--context", "-c", default="",
370
+ help="Short product description for outreach message context")
371
+ parser.add_argument("--output", "-o", default="",
372
+ help="Output JSON file path (default: sdk-adopters-YYYY-MM-DD.json)")
373
+ parser.add_argument("--stdout", action="store_true", help="Print JSON to stdout")
374
+ parser.add_argument("--quiet", "-q", action="store_true", help="Suppress progress output")
375
+ args = parser.parse_args()
376
+
377
+ token = os.environ.get("GITHUB_TOKEN", "")
378
+ if not token:
379
+ print("ERROR: GITHUB_TOKEN is required for code search.", file=sys.stderr)
380
+ print("Add a token at github.com/settings/tokens (no scopes needed).", file=sys.stderr)
381
+ sys.exit(1)
382
+
383
+ verbose = not args.quiet
384
+ ecosystem = args.ecosystem or detect_ecosystem(args.sdk)
385
+
386
+ if verbose:
387
+ print(f"SDK: {args.sdk} | Ecosystem: {ecosystem}")
388
+ if not args.exclude:
389
+ print("Tip: use --exclude <owner> to filter out the SDK publisher's own repos.")
390
+
391
+ headers = {
392
+ "Accept": "application/vnd.github+json",
393
+ "Authorization": f"Bearer {token}",
394
+ "User-Agent": "sdk-adoption-tracker/1.0",
395
+ }
396
+
397
+ raw_results = search_code(args.sdk, ecosystem, args.exclude, headers, verbose)
398
+
399
+ if not raw_results:
400
+ print("No repos found. GitHub code search indexing takes 1-4 weeks for new packages.")
401
+ sys.exit(0)
402
+
403
+ scored = score_repos(raw_results, args.sdk, args.exclude)
404
+ enriched = enrich_repos(scored, headers, verbose)
405
+
406
+ high = [r for r in enriched if r["adoption_score"] >= 80]
407
+ medium = [r for r in enriched if 40 <= r["adoption_score"] < 80]
408
+ tier_counts = {}
409
+ for r in enriched:
410
+ tier_counts[r["tier"]] = tier_counts.get(r["tier"], 0) + 1
411
+
412
+ date_str = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d")
413
+
414
+ output_data = {
415
+ "date": date_str,
416
+ "sdk_name": args.sdk,
417
+ "ecosystem": ecosystem,
418
+ "product_context": args.context,
419
+ "repos_found": len(enriched),
420
+ "company_repos": tier_counts.get("company_org", 0),
421
+ "tier_counts": tier_counts,
422
+ "raw_results": raw_results,
423
+ "scored": scored,
424
+ "enriched": enriched,
425
+ "summary": {
426
+ "high_signal": [
427
+ {"repo": r["full_name"], "score": r["adoption_score"],
428
+ "company": r.get("company", ""), "tier": r["tier"]}
429
+ for r in high
430
+ ],
431
+ "medium_signal": [
432
+ {"repo": r["full_name"], "score": r["adoption_score"], "tier": r["tier"]}
433
+ for r in medium
434
+ ],
435
+ }
436
+ }
437
+
438
+ if args.stdout:
439
+ print(json.dumps(output_data, indent=2))
440
+ return
441
+
442
+ output_path = args.output or f"sdk-adopters-{date_str}.json"
443
+ with open(output_path, "w") as f:
444
+ json.dump(output_data, f, indent=2)
445
+
446
+ if verbose:
447
+ print(f"\n{'='*50}")
448
+ print(f"Results: {len(enriched)} repos analyzed")
449
+ print(f" Company org repos: {tier_counts.get('company_org', 0)}")
450
+ print(f" Affiliated devs: {tier_counts.get('affiliated_dev', 0)}")
451
+ print(f" Solo devs: {tier_counts.get('solo_dev', 0)}")
452
+ print(f" Tutorial noise: {tier_counts.get('tutorial_noise', 0)}")
453
+ print(f"\nTop Adopters (by score):")
454
+ for i, r in enumerate((high + medium)[:10], 1):
455
+ print(f" {i:2}. {r['full_name']:40} score={r['adoption_score']:5.1f} "
456
+ f"stars={r.get('stars',0):>5} {r['tier']}")
457
+ print(f"\nSaved to: {output_path}")
458
+ print(f"\nNext step: open {output_path} with Claude and ask it to generate adoption briefs.")
459
+
460
+
461
+ if __name__ == "__main__":
462
+ main()