@opendirectory.dev/skills 0.1.40 → 0.1.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/registry.json +8 -0
- package/skills/vc-curated-match/README.md +42 -0
- package/skills/vc-curated-match/SKILL.md +59 -0
- package/skills/vc-curated-match/data/vc_funds.json +277 -0
- package/skills/vc-curated-match/evals/ai-b2b-saas-seed.md +141 -0
- package/skills/vc-curated-match/evals/devtool-oss-seed.md +141 -0
- package/skills/vc-curated-match/evals/evals.json +43 -0
- package/skills/vc-curated-match/evals/fintech-india-preseed.md +142 -0
- package/skills/vc-curated-match/evals/varnan-seed.md +141 -0
- package/skills/vc-curated-match/scripts/fetch_product_context.py +80 -0
- package/skills/vc-curated-match/scripts/generate_report.py +111 -0
- package/skills/vc-curated-match/scripts/match_vcs.py +127 -0
- package/skills/vc-curated-match/scripts/run.py +82 -0
- package/skills/vc-finder/.env.example +1 -5
- package/skills/vc-finder/README.md +16 -9
- package/skills/vc-finder/SKILL.md +446 -193
- package/skills/vc-finder/data/vc_funds.json +277 -0
- package/skills/vc-finder/evals/evals.json +43 -25
- package/skills/vc-finder/scripts/match_funds.py +144 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Dict, Tuple
|
|
4
|
+
|
|
5
|
+
def load_funds(data_path: str) -> List[Dict]:
|
|
6
|
+
"""Load data/vc_funds.json"""
|
|
7
|
+
if not os.path.exists(data_path):
|
|
8
|
+
raise FileNotFoundError(f"Dataset not found at {data_path}")
|
|
9
|
+
with open(data_path, "r", encoding="utf-8") as f:
|
|
10
|
+
return json.load(f)
|
|
11
|
+
|
|
12
|
+
def score_fund(fund: Dict, product_context: Dict) -> Tuple[int, List[str]]:
|
|
13
|
+
"""Score a single fund based on overlap with product context."""
|
|
14
|
+
score = 0
|
|
15
|
+
|
|
16
|
+
# 1. Tag Overlap (max 60 points)
|
|
17
|
+
fund_tags = fund.get("industry_tags", [])
|
|
18
|
+
extracted_tags = product_context.get("extracted_tags", [])
|
|
19
|
+
if not extracted_tags:
|
|
20
|
+
extracted_tags = ["Generalist"]
|
|
21
|
+
|
|
22
|
+
tag_points = 0
|
|
23
|
+
matched_tags = []
|
|
24
|
+
|
|
25
|
+
for tag in extracted_tags:
|
|
26
|
+
if tag in fund_tags:
|
|
27
|
+
if tag == "Generalist":
|
|
28
|
+
tag_points += 5
|
|
29
|
+
else:
|
|
30
|
+
tag_points += 20
|
|
31
|
+
matched_tags.append(tag)
|
|
32
|
+
|
|
33
|
+
tag_points = min(tag_points, 60)
|
|
34
|
+
score += tag_points
|
|
35
|
+
|
|
36
|
+
# 2. Stage Match (max 20 points)
|
|
37
|
+
stage_points = 0
|
|
38
|
+
stage_hint = product_context.get("stage_hint")
|
|
39
|
+
fund_stages = fund.get("stage_focus", [])
|
|
40
|
+
|
|
41
|
+
STAGE_ORDER = {"Pre-seed": 0, "Seed": 1, "Series A": 2, "Growth": 3}
|
|
42
|
+
|
|
43
|
+
if not stage_hint:
|
|
44
|
+
stage_points = 10
|
|
45
|
+
elif not fund_stages:
|
|
46
|
+
pass # skip stage scoring
|
|
47
|
+
else:
|
|
48
|
+
if stage_hint in fund_stages:
|
|
49
|
+
stage_points = 20
|
|
50
|
+
else:
|
|
51
|
+
is_adjacent = False
|
|
52
|
+
if stage_hint in STAGE_ORDER:
|
|
53
|
+
hint_idx = STAGE_ORDER[stage_hint]
|
|
54
|
+
for f_stage in fund_stages:
|
|
55
|
+
if f_stage in STAGE_ORDER and abs(STAGE_ORDER[f_stage] - hint_idx) == 1:
|
|
56
|
+
is_adjacent = True
|
|
57
|
+
break
|
|
58
|
+
if is_adjacent:
|
|
59
|
+
stage_points = 10
|
|
60
|
+
|
|
61
|
+
score += stage_points
|
|
62
|
+
|
|
63
|
+
# 3. Geography Match (max 20 points)
|
|
64
|
+
geo_points = 0
|
|
65
|
+
geo_hint = product_context.get("geography_hint")
|
|
66
|
+
fund_geo = fund.get("geography_focus", [])
|
|
67
|
+
if not fund_geo:
|
|
68
|
+
fund_geo = ["Global"]
|
|
69
|
+
|
|
70
|
+
if not geo_hint or geo_hint == "Global":
|
|
71
|
+
geo_points = 10
|
|
72
|
+
elif fund_geo == ["India"] and geo_hint == "US":
|
|
73
|
+
geo_points = 0
|
|
74
|
+
elif geo_hint in fund_geo:
|
|
75
|
+
geo_points = 20
|
|
76
|
+
elif "Global" in fund_geo:
|
|
77
|
+
geo_points = 15
|
|
78
|
+
|
|
79
|
+
score += geo_points
|
|
80
|
+
|
|
81
|
+
# 4. Final Penalties
|
|
82
|
+
# Heuristic 1: India-only funds in US searches penalty
|
|
83
|
+
if geo_hint == "US" and "India" in fund_geo and "US" not in fund_geo and "Global" not in fund_geo:
|
|
84
|
+
score = max(0, score - 30)
|
|
85
|
+
|
|
86
|
+
# Heuristic 2: Primary focus mismatch
|
|
87
|
+
if fund_tags and extracted_tags:
|
|
88
|
+
if fund_tags[0] not in extracted_tags and tag_points <= 20:
|
|
89
|
+
score = max(0, score - 15)
|
|
90
|
+
|
|
91
|
+
return score, matched_tags
|
|
92
|
+
|
|
93
|
+
def get_confidence_tier(score: int) -> str:
|
|
94
|
+
"""Return High/Medium/Low based on score."""
|
|
95
|
+
if score >= 70:
|
|
96
|
+
return "High"
|
|
97
|
+
if score >= 40:
|
|
98
|
+
return "Medium"
|
|
99
|
+
return "Low"
|
|
100
|
+
|
|
101
|
+
def match_vcs(product_context: Dict, data_path: str = "data/vc_funds.json") -> List[Dict]:
|
|
102
|
+
"""Score all funds and return prioritized matches."""
|
|
103
|
+
funds = load_funds(data_path)
|
|
104
|
+
scored_funds = []
|
|
105
|
+
|
|
106
|
+
for fund in funds:
|
|
107
|
+
score, matched_tags = score_fund(fund, product_context)
|
|
108
|
+
scored_funds.append({
|
|
109
|
+
"fund": fund,
|
|
110
|
+
"score": score,
|
|
111
|
+
"confidence": get_confidence_tier(score),
|
|
112
|
+
"matched_tags": matched_tags
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
scored_funds.sort(key=lambda x: (-x["score"], x["fund"].get("fund_name", "")))
|
|
116
|
+
|
|
117
|
+
if all(f["score"] == 0 for f in scored_funds):
|
|
118
|
+
generalists = [f for f in funds if "Generalist" in f.get("industry_tags", [])][:5]
|
|
119
|
+
return [{
|
|
120
|
+
"fund": g,
|
|
121
|
+
"score": 0,
|
|
122
|
+
"confidence": "Low",
|
|
123
|
+
"matched_tags": ["Generalist"],
|
|
124
|
+
"warning": "No strong matches found. Showing generalist funds only."
|
|
125
|
+
} for g in generalists]
|
|
126
|
+
|
|
127
|
+
return scored_funds[:10]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
# Import the core pipeline components
|
|
6
|
+
from fetch_product_context import get_product_context
|
|
7
|
+
from match_vcs import match_vcs
|
|
8
|
+
from generate_report import generate_report
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
parser = argparse.ArgumentParser(description="vc-curated-match: Algorithmically identify relevant VCs based on product context.")
|
|
12
|
+
|
|
13
|
+
parser.add_argument("--description", required=True, help="Product description string.")
|
|
14
|
+
parser.add_argument("--url", required=True, help="Product homepage or GitHub URL.")
|
|
15
|
+
parser.add_argument("--stage", default=None, help="Optional startup stage hint.")
|
|
16
|
+
parser.add_argument("--geography", default=None, help="Optional target geography (Defaults to Global inference).")
|
|
17
|
+
parser.add_argument("--output", default="vc-matches.md", help="Output file path (Defaults to vc-matches.md).")
|
|
18
|
+
|
|
19
|
+
args = parser.parse_args()
|
|
20
|
+
|
|
21
|
+
# Validation
|
|
22
|
+
if not args.description or not args.description.strip():
|
|
23
|
+
print("Error: --description must not be empty or whitespace only.", file=sys.stderr)
|
|
24
|
+
sys.exit(1)
|
|
25
|
+
|
|
26
|
+
if not args.url or not args.url.strip():
|
|
27
|
+
print("Error: --url must not be empty.", file=sys.stderr)
|
|
28
|
+
sys.exit(1)
|
|
29
|
+
|
|
30
|
+
# Validate dataset existence per requirements
|
|
31
|
+
data_path = os.path.join(os.path.dirname(__file__), "..", "data", "vc_funds.json")
|
|
32
|
+
if not os.path.exists(data_path):
|
|
33
|
+
print("Error: data/vc_funds.json not found. Make sure you are running from the skill root directory.", file=sys.stderr)
|
|
34
|
+
sys.exit(1)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
stage_input = args.stage.strip() if args.stage else None
|
|
38
|
+
if stage_input and stage_input.lower() == "pre-seed":
|
|
39
|
+
stage_input = "Pre-seed"
|
|
40
|
+
elif stage_input:
|
|
41
|
+
stage_input = stage_input.capitalize()
|
|
42
|
+
|
|
43
|
+
# 1. Fetch Product Context
|
|
44
|
+
product_context = get_product_context(
|
|
45
|
+
description=args.description.strip(),
|
|
46
|
+
url=args.url.strip(),
|
|
47
|
+
stage=stage_input,
|
|
48
|
+
geography=args.geography.strip() if args.geography else None
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# 2. Match VCs
|
|
52
|
+
matches = match_vcs(product_context, data_path=data_path)
|
|
53
|
+
|
|
54
|
+
# 3. Generate Report
|
|
55
|
+
report_str = generate_report(matches, product_context)
|
|
56
|
+
|
|
57
|
+
# Ensure output directory exists and write
|
|
58
|
+
output_dir = os.path.dirname(os.path.abspath(args.output))
|
|
59
|
+
if output_dir:
|
|
60
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
with open(args.output, "w", encoding="utf-8") as f:
|
|
64
|
+
f.write(report_str)
|
|
65
|
+
except IOError:
|
|
66
|
+
print(f"Error: Could not write to {args.output}.", file=sys.stderr)
|
|
67
|
+
sys.exit(1)
|
|
68
|
+
|
|
69
|
+
# 4. Generate Summary Console Print
|
|
70
|
+
high = sum(1 for m in matches if m.get("confidence") == "High")
|
|
71
|
+
medium = sum(1 for m in matches if m.get("confidence") == "Medium")
|
|
72
|
+
low = sum(1 for m in matches if m.get("confidence") == "Low")
|
|
73
|
+
|
|
74
|
+
print(f"Done. Report saved to {args.output}")
|
|
75
|
+
print(f"Found {len(matches)} matches: {high} High, {medium} Medium, {low} Low confidence")
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
|
|
81
|
+
if __name__ == "__main__":
|
|
82
|
+
main()
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
# vc-finder: Environment Variables
|
|
2
2
|
# ===================================
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
# Required: Google Gemini API key for product analysis and VC synthesis
|
|
6
|
-
# Get it: aistudio.google.com > Get API key
|
|
7
|
-
GEMINI_API_KEY=your_gemini_api_key_here
|
|
3
|
+
# Tavily is required. Firecrawl is recommended.
|
|
8
4
|
|
|
9
5
|
# Required: Tavily API key for VC investment research (Track A and Track B searches)
|
|
10
6
|
# Get it: app.tavily.com > API Keys
|
|
@@ -30,29 +30,32 @@ https://github.com/user-attachments/assets/ee98a1b5-ebc4-452f-bbfb-c434f2935067
|
|
|
30
30
|
|
|
31
31
|
- Fetches the product URL via Firecrawl (handles JS-rendered SPAs) or Tavily extract as fallback
|
|
32
32
|
- Detects funding stage from CTA signals on the page (waitlist, free trial, pricing, sales CTAs)
|
|
33
|
-
-
|
|
33
|
+
- Maps a 3-level industry taxonomy (L1 > L2 > L3) from the product page
|
|
34
|
+
- **Curated pre-match (Step 5b):** Scores product against a verified dataset of 25 VC funds (sourced from fund websites) -- instant zero-hallucination matches with no Tavily credits consumed
|
|
35
|
+
- **Discovers comparable companies:** Curated portfolio companies from matched funds + Tavily live search for L3-niche specifics
|
|
34
36
|
- Track A: 5 Tavily searches to find who invested in each comparable company
|
|
35
37
|
- Track B: 3 Tavily searches to find VCs who publish investment theses about this specific niche
|
|
36
|
-
-
|
|
38
|
+
- Synthesizes and ranks all found VCs -- curated matches labeled "verified", Tavily matches labeled by track
|
|
37
39
|
- Produces top 5 deep-dives with fund overview, portfolio evidence, how-to-approach, and outreach hook
|
|
38
40
|
- Generates 3 product-specific outreach hooks (not generic advice)
|
|
39
41
|
- Saves output to `docs/vc-intel/[product]-[date].md`
|
|
40
42
|
|
|
43
|
+
**Zero-hallucination guarantee:** Every VC name, fund detail, check size, portfolio company, and thesis source in the output must trace to either (a) the curated `data/vc_funds.json` dataset (sourced from fund websites) or (b) a specific Tavily search result. The AI does not draw from training knowledge for any factual claim.
|
|
44
|
+
|
|
41
45
|
## Requirements
|
|
42
46
|
|
|
43
47
|
| Requirement | Purpose | How to Set Up |
|
|
44
48
|
|---|---|---|
|
|
45
|
-
| Gemini API key | Product analysis and VC synthesis | aistudio.google.com, Get API key |
|
|
46
49
|
| Tavily API key | VC investment research (Track A and Track B) | app.tavily.com, free tier: 1000 credits/month |
|
|
47
|
-
| Firecrawl API key | Fetching JS-rendered product pages | firecrawl.dev, free tier: 500 credits/month |
|
|
50
|
+
| Firecrawl API key | Fetching JS-rendered product pages (optional) | firecrawl.dev, free tier: 500 credits/month |
|
|
48
51
|
|
|
49
|
-
|
|
52
|
+
Tavily is required. Firecrawl is recommended -- without it, Tavily extract is used as fallback (may miss JS-rendered content).
|
|
50
53
|
|
|
51
54
|
## Setup
|
|
52
55
|
|
|
53
56
|
```bash
|
|
54
57
|
cp .env.example .env
|
|
55
|
-
# Add
|
|
58
|
+
# Add TAVILY_API_KEY (required)
|
|
56
59
|
# Add FIRECRAWL_API_KEY (recommended)
|
|
57
60
|
```
|
|
58
61
|
|
|
@@ -90,9 +93,9 @@ Each run produces:
|
|
|
90
93
|
## Cost per Run
|
|
91
94
|
|
|
92
95
|
- Firecrawl: ~$0.001 per fetch
|
|
93
|
-
- Tavily:
|
|
94
|
-
-
|
|
95
|
-
- Total: ~$0.
|
|
96
|
+
- Tavily: 10 searches at ~$0.01 each = ~$0.10 (2 comparable discovery + 5 Track A + 3 Track B)
|
|
97
|
+
- Curated pre-match (Step 5b): $0.00 -- local scoring against `data/vc_funds.json`, no API calls
|
|
98
|
+
- Total: ~$0.10 per run
|
|
96
99
|
|
|
97
100
|
## Project Structure
|
|
98
101
|
|
|
@@ -101,6 +104,10 @@ vc-finder/
|
|
|
101
104
|
├── SKILL.md
|
|
102
105
|
├── README.md
|
|
103
106
|
├── .env.example
|
|
107
|
+
├── data/
|
|
108
|
+
│ └── vc_funds.json (25 verified funds, sourced from fund websites)
|
|
109
|
+
├── scripts/
|
|
110
|
+
│ └── match_funds.py (standalone scoring script for testing)
|
|
104
111
|
├── evals/
|
|
105
112
|
│ └── evals.json
|
|
106
113
|
└── references/
|