@opendirectory.dev/skills 0.1.39 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,364 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ product-update-logger gather script
4
+ Collects shipped items from git commits, GitHub PRs, and free text.
5
+ No required API keys. GITHUB_TOKEN optional (enables GitHub PR fetching).
6
+
7
+ Usage:
8
+ python3 scripts/gather.py --since 7 --output /tmp/pul-raw.json
9
+ python3 scripts/gather.py --items "Add dark mode|Fix CSV bug" --output /tmp/pul-raw.json
10
+ GITHUB_TOKEN=your_token python3 scripts/gather.py --repo owner/repo --since 14
11
+ python3 scripts/gather.py --stdout | jq '.items'
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import os
17
+ import re
18
+ import ssl
19
+ import subprocess
20
+ import sys
21
+ from datetime import datetime, timedelta, timezone
22
+
23
+ _ssl_ctx = ssl._create_unverified_context()
24
+
25
+ TODAY = datetime.now(timezone.utc)
26
+ GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
27
+
28
+ quiet = False
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # HTTP helpers (reused from map-your-market pattern)
33
+ # ---------------------------------------------------------------------------
34
+
35
+ def fetch_json(url, headers=None, timeout=20):
36
+ import urllib.request, urllib.error
37
+ req = urllib.request.Request(url, headers=headers or {})
38
+ req.add_header("User-Agent", "product-update-logger/1.0")
39
+ try:
40
+ with urllib.request.urlopen(req, context=_ssl_ctx, timeout=timeout) as r:
41
+ return json.loads(r.read().decode("utf-8"))
42
+ except urllib.error.HTTPError as e:
43
+ if not quiet:
44
+ print(f" HTTP {e.code}: {url[:80]}", file=sys.stderr)
45
+ return None
46
+ except Exception as e:
47
+ if not quiet:
48
+ print(f" Error: {e} -- {url[:80]}", file=sys.stderr)
49
+ return None
50
+
51
+
52
+ def gh_get(path):
53
+ import urllib.request
54
+ headers = {"Accept": "application/vnd.github+json"}
55
+ if GITHUB_TOKEN:
56
+ headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
57
+ return fetch_json(f"https://api.github.com{path}", headers=headers)
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Noise filter patterns
62
+ # ---------------------------------------------------------------------------
63
+
64
+ NOISE_PATTERNS = [
65
+ re.compile(r"^Merge (pull request|branch)\b", re.IGNORECASE),
66
+ re.compile(r"^(bump|update) version\b", re.IGNORECASE),
67
+ re.compile(r"^chore[\s:(]", re.IGNORECASE),
68
+ re.compile(r"^ci[\s:(]", re.IGNORECASE),
69
+ re.compile(r"^build[\s:(]", re.IGNORECASE),
70
+ re.compile(r"^fix typo\b", re.IGNORECASE),
71
+ re.compile(r"^\s*typo\b", re.IGNORECASE),
72
+ re.compile(r"^wip\b", re.IGNORECASE),
73
+ re.compile(r"^test[\s:(]", re.IGNORECASE),
74
+ re.compile(r"^docs[\s:(]", re.IGNORECASE),
75
+ re.compile(r"^style[\s:(]", re.IGNORECASE),
76
+ re.compile(r"^revert\b", re.IGNORECASE),
77
+ re.compile(r"^Initial commit$", re.IGNORECASE),
78
+ re.compile(r"^init$", re.IGNORECASE),
79
+ ]
80
+
81
+ NOISE_PR_LABELS = {"documentation", "chore", "dependencies", "ci", "test", "tests"}
82
+
83
+
84
+ def filter_noise(commits: list) -> list:
85
+ filtered = []
86
+ for c in commits:
87
+ subject = c.get("subject", "").strip()
88
+ if not subject:
89
+ continue
90
+ # Skip single-word subjects under 8 chars
91
+ if len(subject.split()) == 1 and len(subject) < 8:
92
+ continue
93
+ skip = False
94
+ for pat in NOISE_PATTERNS:
95
+ if pat.match(subject):
96
+ skip = True
97
+ break
98
+ if not skip:
99
+ filtered.append(c)
100
+ return filtered
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # Git commits
105
+ # ---------------------------------------------------------------------------
106
+
107
+ def get_git_commits(since_days: int, repo_path: str = ".") -> list:
108
+ try:
109
+ result = subprocess.run(
110
+ [
111
+ "git", "-C", repo_path, "log",
112
+ f"--since={since_days} days ago",
113
+ "--pretty=format:%H|%s|%b|%ad",
114
+ "--date=short",
115
+ ],
116
+ capture_output=True, text=True, timeout=30
117
+ )
118
+ if result.returncode != 0:
119
+ return []
120
+ commits = []
121
+ for line in result.stdout.strip().split("\n"):
122
+ if not line.strip():
123
+ continue
124
+ parts = line.split("|", 3)
125
+ commit_hash = parts[0].strip() if len(parts) > 0 else ""
126
+ subject = parts[1].strip() if len(parts) > 1 else ""
127
+ body = parts[2].strip() if len(parts) > 2 else ""
128
+ date = parts[3].strip() if len(parts) > 3 else ""
129
+ if subject:
130
+ commits.append({
131
+ "subject": subject,
132
+ "body": body,
133
+ "source": "git_commit",
134
+ "hash": commit_hash[:7],
135
+ "date": date,
136
+ })
137
+ return commits
138
+ except FileNotFoundError:
139
+ if not quiet:
140
+ print(" git not installed -- skipping git commits", file=sys.stderr)
141
+ return []
142
+ except Exception as e:
143
+ if not quiet:
144
+ print(f" git error: {e}", file=sys.stderr)
145
+ return []
146
+
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # GitHub PRs
150
+ # ---------------------------------------------------------------------------
151
+
152
+ def get_github_prs(repo: str, since_date: str) -> list:
153
+ if not GITHUB_TOKEN:
154
+ if not quiet:
155
+ print(" GITHUB_TOKEN not set -- skipping GitHub PRs", file=sys.stderr)
156
+ return []
157
+ if not repo:
158
+ return []
159
+
160
+ data = gh_get(f"/repos/{repo}/pulls?state=closed&sort=updated&direction=desc&per_page=50")
161
+ if not data:
162
+ return []
163
+
164
+ prs = []
165
+ for pr in data:
166
+ merged_at = pr.get("merged_at")
167
+ if not merged_at:
168
+ continue
169
+ # Filter by since_date
170
+ merged_date = merged_at[:10]
171
+ if merged_date < since_date:
172
+ continue
173
+ labels = [lbl["name"].lower() for lbl in (pr.get("labels") or [])]
174
+ # Skip PRs with noise labels
175
+ if any(lbl in NOISE_PR_LABELS for lbl in labels):
176
+ continue
177
+ prs.append({
178
+ "subject": pr.get("title", "").strip(),
179
+ "body": (pr.get("body") or "").strip()[:500],
180
+ "source": "github_pr",
181
+ "pr_number": pr.get("number"),
182
+ "merged_at": merged_at[:10],
183
+ "labels": labels,
184
+ })
185
+ return prs
186
+
187
+
188
+ # ---------------------------------------------------------------------------
189
+ # Free text input
190
+ # ---------------------------------------------------------------------------
191
+
192
+ def parse_free_text(items_str: str) -> list:
193
+ if not items_str or not items_str.strip():
194
+ return []
195
+ items = []
196
+ for item in re.split(r"[|\n]", items_str):
197
+ item = item.strip()
198
+ if item:
199
+ items.append({"subject": item, "source": "free_text"})
200
+ return items
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Deduplication
205
+ # ---------------------------------------------------------------------------
206
+
207
+ def deduplicate(commits: list, prs: list) -> list:
208
+ result = list(prs)
209
+ pr_titles = [pr["subject"].lower() for pr in prs]
210
+
211
+ for commit in commits:
212
+ subject = commit["subject"].lower()
213
+ is_dup = False
214
+ for pr_title in pr_titles:
215
+ # If commit subject is substring of PR title or vice versa -> same item
216
+ if subject in pr_title or pr_title in subject:
217
+ is_dup = True
218
+ break
219
+ if not is_dup:
220
+ result.append(commit)
221
+
222
+ return result
223
+
224
+
225
+ # ---------------------------------------------------------------------------
226
+ # Existing changelog detection
227
+ # ---------------------------------------------------------------------------
228
+
229
+ def detect_existing_changelog(path: str = "docs/changelog.md") -> dict:
230
+ if not os.path.exists(path):
231
+ return {"exists": False, "last_label": "", "format": "date-based"}
232
+
233
+ try:
234
+ with open(path) as f:
235
+ lines = [f.readline() for _ in range(100)]
236
+ lines = [l.rstrip() for l in lines if l.strip()]
237
+ except Exception:
238
+ return {"exists": True, "last_label": "", "format": "date-based"}
239
+
240
+ last_label = ""
241
+ version_format = "date-based"
242
+
243
+ for line in lines:
244
+ # Look for ## headings (changelog entry labels)
245
+ m = re.match(r"^## (.+)$", line)
246
+ if m:
247
+ last_label = m.group(1).strip()
248
+ # Detect semver format
249
+ if re.match(r"^v?\d+\.\d+\.\d+", last_label):
250
+ version_format = "semver"
251
+ break
252
+
253
+ return {"exists": True, "last_label": last_label, "format": version_format}
254
+
255
+
256
+ # ---------------------------------------------------------------------------
257
+ # Main
258
+ # ---------------------------------------------------------------------------
259
+
260
+ def main():
261
+ global quiet
262
+
263
+ parser = argparse.ArgumentParser(description="Gather shipped items for product-update-logger")
264
+ parser.add_argument("--since", default="7",
265
+ help="Days back to look, or YYYY-MM-DD date (default: 7)")
266
+ parser.add_argument("--repo", default="",
267
+ help="GitHub repo as owner/repo for PR fetching")
268
+ parser.add_argument("--items", default="",
269
+ help="Pipe-separated free text items")
270
+ parser.add_argument("--output", default="/tmp/pul-raw.json",
271
+ help="Output file path (default: /tmp/pul-raw.json)")
272
+ parser.add_argument("--stdout", action="store_true",
273
+ help="Print JSON to stdout instead of file")
274
+ parser.add_argument("--quiet", action="store_true",
275
+ help="Suppress progress output")
276
+ args = parser.parse_args()
277
+
278
+ quiet = args.quiet
279
+ use_stdout = args.stdout
280
+
281
+ def log(msg):
282
+ if not quiet:
283
+ print(msg, file=sys.stderr if use_stdout else sys.stdout)
284
+
285
+ # Resolve since_days and since_date
286
+ since_str = args.since.strip()
287
+ if re.match(r"^\d{4}-\d{2}-\d{2}$", since_str):
288
+ since_date = since_str
289
+ delta = TODAY.date() - datetime.strptime(since_str, "%Y-%m-%d").date()
290
+ since_days = max(delta.days, 1)
291
+ else:
292
+ since_days = int(since_str)
293
+ since_date = (TODAY - timedelta(days=since_days)).strftime("%Y-%m-%d")
294
+
295
+ today_str = TODAY.strftime("%Y-%m-%d")
296
+
297
+ log(f"Gathering items since {since_date} ({since_days} days)...")
298
+
299
+ # 1. Free text items
300
+ free_text_items = parse_free_text(args.items)
301
+ if free_text_items:
302
+ log(f" Free text: {len(free_text_items)} items")
303
+
304
+ # 2. Git commits
305
+ git_available = False
306
+ raw_commits = []
307
+ try:
308
+ result = subprocess.run(
309
+ ["git", "rev-parse", "--is-inside-work-tree"],
310
+ capture_output=True, text=True, timeout=5
311
+ )
312
+ git_available = result.returncode == 0
313
+ except Exception:
314
+ git_available = False
315
+
316
+ noise_filtered = 0
317
+ if git_available:
318
+ raw_commits = get_git_commits(since_days)
319
+ before = len(raw_commits)
320
+ raw_commits = filter_noise(raw_commits)
321
+ noise_filtered = before - len(raw_commits)
322
+ log(f" Git: {len(raw_commits)} commits ({noise_filtered} noise filtered)")
323
+
324
+ # 3. GitHub PRs
325
+ github_available = bool(GITHUB_TOKEN and args.repo)
326
+ prs = []
327
+ if github_available:
328
+ prs = get_github_prs(args.repo, since_date)
329
+ log(f" GitHub PRs: {len(prs)} merged PRs")
330
+
331
+ # 4. Deduplicate commits + PRs
332
+ combined = deduplicate(raw_commits, prs)
333
+
334
+ # 5. Merge with free text (free text first, then git/PR items)
335
+ all_items = free_text_items + combined
336
+ free_text_provided = bool(free_text_items)
337
+
338
+ # 6. Detect existing changelog
339
+ existing_changelog = detect_existing_changelog()
340
+
341
+ output = {
342
+ "date": today_str,
343
+ "since_date": since_date,
344
+ "since_days": since_days,
345
+ "git_available": git_available,
346
+ "github_available": github_available,
347
+ "free_text_provided": free_text_provided,
348
+ "items": all_items,
349
+ "total_items": len(all_items),
350
+ "noise_filtered": noise_filtered,
351
+ "existing_changelog": existing_changelog,
352
+ }
353
+
354
+ if use_stdout:
355
+ print(json.dumps(output, indent=2))
356
+ else:
357
+ with open(args.output, "w") as f:
358
+ json.dump(output, f, indent=2)
359
+ log(f"\nOutput: {args.output}")
360
+ log(f"Total items: {len(all_items)}")
361
+
362
+
363
+ if __name__ == "__main__":
364
+ main()
@@ -0,0 +1,42 @@
1
+ # vc-curated-match
2
+
3
+ > Identify targeted VC funds based on a product's description and URL. This skill matches project inputs to a curated dataset of top global venture capital firms based on industry tags, stage, and geography.
4
+
5
+ [![opendirectory](https://img.shields.io/badge/opendirectory-skill-blue)](https://opendirectory.dev)
6
+
7
+ ## Overview
8
+
9
+ The `vc-curated-match` is an OpenDirectory skill that connects founders and open-source creators with highly relevant Venture Capital firms. It relies on a static, curated list of real VC funds to prevent LLM hallucinations, ensuring all recommendations and rationales are grounded in actual fund thesis data.
10
+
11
+ **Positioning Note**: This skill is intentionally different from live-research investor discovery workflows. It provides deterministic, curated VC matching from a verified static dataset. It is best for fast, low-cost, repeatable first-pass investor targeting.
12
+
13
+ ## Prerequisites
14
+ - Python 3.10+ (Standard Library only)
15
+
16
+ ## Implementation Specs
17
+ - Pulls from a static `data/vc_funds.json` dataset to guarantee data validity.
18
+ - Ranks funds using tag-matching algorithms across industry focus, stage, and geography fit.
19
+ - Outputs confidence tiers (High, Medium, Low) to transparently surface the precision of the fit.
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ python scripts/run.py \
25
+ --description "A Next.js template for enterprise B2B SaaS" \
26
+ --url "https://trymylandingpage.com" \
27
+ --stage "Pre-seed"
28
+ ```
29
+
30
+ ## Methodology
31
+
32
+ - **Tag Matching**: Deterministic keyword matching using whole-word regex boundaries to ensure precision.
33
+ - **Geography Inference**: Inferred primarily from the URL Top-Level Domain (TLD). For example, `.in` triggers India and `.eu` triggers Europe.
34
+ - **Default Baseline**: Domains like `.com`, `.io`, and `.ai` default to a `Global` geography hint unless a specific `--geography` flag is provided.
35
+
36
+ ## Limitations
37
+
38
+ - **No Live Research**: This skill does not perform live web discovery and may miss niche or newly launched funds not present in the dataset.
39
+ - **Static Dataset**: The VC fund list is a curated static dataset. It reflects fund theses as of the last update and may not capture real-time changes in fund availability or personnel.
40
+ - **Taxonomy Constraints**: The scoring engine relies on a fixed taxonomy. Extremely niche or highly unusual product descriptions may not trigger specific industry tags and will default to a "Generalist" view.
41
+ - **Human Review Required**: These outputs are best-effort algorithmic matches. They serve as a research starting point and **must be reviewed by a human** before starting outreach.
42
+ - **No Financial Advice**: This tool does not provide financial or investment advice.
@@ -0,0 +1,59 @@
1
+ ---
2
+ name: vc-curated-match
3
+ description: Accepts a product description and URL to algorithmically identify relevant Venture Capital investors targeting exactly that stage, industry, and niche based on a curated static dataset.
4
+ author: OpenDirectory
5
+ version: 1.0.0
6
+ ---
7
+
8
+ # VC Curated Match Skill
9
+
10
+ Identify targeted VC funds based on a product's description and URL.
11
+
12
+ ---
13
+
14
+ ## When to Trigger This Skill
15
+
16
+ Use this skill when the user asks to:
17
+ - Find investors for their startup or open-source project.
18
+ - Get a list of relevant VC funds by stage, industry, or space.
19
+ - Match their product built with specific technologies to investor theses.
20
+
21
+ ---
22
+
23
+ ## Step 1: Confirm Input
24
+
25
+ Ask the user for the product description and the URL.
26
+ Optional parameters:
27
+ - Stage: `Pre-Seed`, `Seed`, `Series A` (`--stage`)
28
+ - Output: e.g., `vc-matches.md` (`--output`)
29
+
30
+ ---
31
+
32
+ ## Step 2: Fetch Matches & Generate Report
33
+
34
+ Run the orchestrator script to handle context fetching, VC matching, and Markdown generation in one command.
35
+
36
+ ```bash
37
+ python scripts/run.py --description "A fast rust-based web framework" --url "https://example.com"
38
+ ```
39
+
40
+ For custom requests, append overrides:
41
+ ```bash
42
+ python scripts/run.py \
43
+ --description "..." \
44
+ --url "..." \
45
+ [--stage Seed] \
46
+ [--output matched-investors.md]
47
+ ```
48
+
49
+ ---
50
+
51
+ ## Step 3: Present Results
52
+
53
+ Summarize the closest matches returned. Mention high-confidence matches explicitly. Provide the output path.
54
+
55
+ ---
56
+
57
+ ## Dependencies
58
+
59
+ Standard Python 3.10+ library (no external packages required).