@opendirectory.dev/skills 0.1.39 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ # VC Curated Match Report
2
+
3
+ **Product:** A payment infrastructure API for e-commerce businesses in India
4
+ **URL:** https://example-fintech.in
5
+ **Extracted Tags:** Data, E-commerce, FinTech, Infrastructure
6
+ **Stage:** Pre-seed
7
+ **Geography:** India
8
+ **Generated:** 2026-04-23
9
+
10
+ ---
11
+
12
+ ## Top VC Matches
13
+
14
+ ---
15
+ ## 1. Accel India — High Confidence
16
+
17
+ **Thesis:** We partner with exceptional founders from inception through all phases of private company growth in the Indian ecosystem.
18
+ **Check Size:** $1M - $15M
19
+ **Stage Focus:** Seed, Series A, Growth
20
+ **Geography:** India
21
+ **Notable Portfolio:** Flipkart, Swiggy, Freshworks
22
+ **Website:** https://www.accel.com/india
23
+ **Match Score:** 70/100
24
+ **Why this match:** This fund focuses on E-commerce, FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
25
+
26
+ ---
27
+ ## 2. Bain Capital Ventures — Medium Confidence
28
+
29
+ **Thesis:** From seed to growth, we back founders building legendary infrastructure, fintech, application, and commerce companies.
30
+ **Check Size:** $1M - $50M+
31
+ **Stage Focus:** Seed, Series A, Growth
32
+ **Geography:** US, Global
33
+ **Notable Portfolio:** DocuSign, SendGrid, Redis
34
+ **Website:** https://www.baincapitalventures.com
35
+ **Match Score:** 65/100
36
+ **Why this match:** This fund focuses on FinTech, Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
37
+
38
+ ---
39
+ ## 3. Amplify Partners — Medium Confidence
40
+
41
+ **Thesis:** We invest in technical founders building the next generation of IT infrastructure, developer tools, and data platforms.
42
+ **Check Size:** $2M - $8M
43
+ **Stage Focus:** Seed, Series A
44
+ **Geography:** US
45
+ **Notable Portfolio:** Datadog, OCTO, dbt Labs
46
+ **Website:** https://www.amplifypartners.com
47
+ **Match Score:** 50/100
48
+ **Why this match:** This fund focuses on Data, Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
49
+
50
+ ---
51
+ ## 4. boldstart ventures — Medium Confidence
52
+
53
+ **Thesis:** Day one partner for developer first, crypto, and SaaS founders. We love deeply technical founders solving hard infrastructure problems.
54
+ **Check Size:** $1M - $3M
55
+ **Stage Focus:** Pre-seed, Seed
56
+ **Geography:** Global, US
57
+ **Notable Portfolio:** Snyk, Blockdaemon, Superhuman
58
+ **Website:** https://boldstart.vc
59
+ **Match Score:** 40/100
60
+ **Why this match:** This fund focuses on Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
61
+
62
+ ---
63
+ ## 5. Elevation Capital — Low Confidence
64
+
65
+ **Thesis:** We partner with visionary founders in India across early stages to help them build category-defining businesses.
66
+ **Check Size:** $1M - $10M
67
+ **Stage Focus:** Seed, Series A
68
+ **Geography:** India
69
+ **Notable Portfolio:** Paytm, Swiggy, Meesho
70
+ **Website:** https://elevationcapital.com
71
+ **Match Score:** 35/100
72
+ **Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
73
+
74
+ ---
75
+ ## 6. Peak XV Partners — Low Confidence
76
+
77
+ **Thesis:** Formerly Sequoia India & SEA, we partner with founders across early, growth, and public stages to build enduring companies.
78
+ **Check Size:** $1M - $20M+
79
+ **Stage Focus:** Seed, Series A, Growth
80
+ **Geography:** India, South Asia
81
+ **Notable Portfolio:** Zomato, Pine Labs, Cred
82
+ **Website:** https://www.peakxv.com
83
+ **Match Score:** 35/100
84
+ **Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
85
+
86
+ ---
87
+ ## 7. Bessemer Venture Partners — Low Confidence
88
+
89
+ **Thesis:** BVP helps entrepreneurs lay strong foundations to build and forge long-standing companies.
90
+ **Check Size:** $1M - $20M+
91
+ **Stage Focus:** Seed, Series A, Growth
92
+ **Geography:** Global
93
+ **Notable Portfolio:** LinkedIn, Twilio, Shopify
94
+ **Website:** https://www.bvp.com
95
+ **Match Score:** 30/100
96
+ **Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
97
+
98
+ ---
99
+ ## 8. Heavybit — Low Confidence
100
+
101
+ **Thesis:** The leading investor in developer-first startups. We help technical founders launch, gain traction, and build enterprise-ready companies.
102
+ **Check Size:** $1M - $5M
103
+ **Stage Focus:** Seed, Series A
104
+ **Geography:** Global, US
105
+ **Notable Portfolio:** PagerDuty, Sanity, Netlify
106
+ **Website:** https://www.heavybit.com
107
+ **Match Score:** 30/100
108
+ **Why this match:** This fund focuses on Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
109
+
110
+ ---
111
+ ## 9. Index Ventures — Low Confidence
112
+
113
+ **Thesis:** We back the best and most ambitious entrepreneurs across all stages to build category-defining businesses.
114
+ **Check Size:** $1M - $20M+
115
+ **Stage Focus:** Seed, Series A, Growth
116
+ **Geography:** Europe, US, Global
117
+ **Notable Portfolio:** Dropbox, Slack, Figma
118
+ **Website:** https://www.indexventures.com
119
+ **Match Score:** 30/100
120
+ **Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
121
+
122
+ ---
123
+ ## 10. Lightspeed Venture Partners — Low Confidence
124
+
125
+ **Thesis:** We invest globally in enterprise, consumer, and health founders who are shaping the future.
126
+ **Check Size:** $1M - $25M+
127
+ **Stage Focus:** Seed, Series A, Growth
128
+ **Geography:** Global
129
+ **Notable Portfolio:** Snap, Rippling, MuleSoft
130
+ **Website:** https://lsvp.com
131
+ **Match Score:** 30/100
132
+ **Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
133
+
134
+ ---
135
+
136
+ ## Assumptions & Limitations
137
+
138
+ - Dataset contains 25 funds verified as of 2026-04-23
139
+ - Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.
140
+ - This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.
141
+ - Matches are a starting point for research, not financial advice.
142
+ - Low-confidence matches are included because no stronger sector-specific matches were found.
@@ -0,0 +1,141 @@
1
+ # VC Curated Match Report
2
+
3
+ **Product:** Varnan is a GTM distribution platform for AI startups. It helps founders automate outreach across LinkedIn, email, and s...
4
+ **URL:** https://varnan.tech
5
+ **Extracted Tags:** AI, B2B SaaS, Consumer
6
+ **Stage:** Seed
7
+ **Geography:** Global
8
+ **Generated:** 2026-04-23
9
+
10
+ ---
11
+
12
+ ## Top VC Matches
13
+
14
+ ---
15
+ ## 1. Peak XV Partners — High Confidence
16
+
17
+ **Thesis:** Formerly Sequoia India & SEA, we partner with founders across early, growth, and public stages to build enduring companies.
18
+ **Check Size:** $1M - $20M+
19
+ **Stage Focus:** Seed, Series A, Growth
20
+ **Geography:** India, South Asia
21
+ **Notable Portfolio:** Zomato, Pine Labs, Cred
22
+ **Website:** https://www.peakxv.com
23
+ **Match Score:** 90/100
24
+ **Why this match:** This fund focuses on AI, B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
25
+
26
+ ---
27
+ ## 2. Accel — High Confidence
28
+
29
+ **Thesis:** We partner with exceptional founders from inception through all phases of private company growth.
30
+ **Check Size:** $1M - $20M+
31
+ **Stage Focus:** Seed, Series A, Growth
32
+ **Geography:** Global
33
+ **Notable Portfolio:** Facebook, Atlassian, Spotify
34
+ **Website:** https://www.accel.com
35
+ **Match Score:** 70/100
36
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
37
+
38
+ ---
39
+ ## 3. Accel India — High Confidence
40
+
41
+ **Thesis:** We partner with exceptional founders from inception through all phases of private company growth in the Indian ecosystem.
42
+ **Check Size:** $1M - $15M
43
+ **Stage Focus:** Seed, Series A, Growth
44
+ **Geography:** India
45
+ **Notable Portfolio:** Flipkart, Swiggy, Freshworks
46
+ **Website:** https://www.accel.com/india
47
+ **Match Score:** 70/100
48
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
49
+
50
+ ---
51
+ ## 4. Andreessen Horowitz (a16z) — High Confidence
52
+
53
+ **Thesis:** We invest in software eating the world. We back bold entrepreneurs building the future through technology.
54
+ **Check Size:** $1M - $50M+
55
+ **Stage Focus:** Seed, Series A, Growth
56
+ **Geography:** Global, US
57
+ **Notable Portfolio:** Facebook, Coinbase, Figma
58
+ **Website:** https://a16z.com
59
+ **Match Score:** 70/100
60
+ **Why this match:** This fund focuses on AI, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
61
+
62
+ ---
63
+ ## 5. Blume Ventures — High Confidence
64
+
65
+ **Thesis:** We are a seed and pre-seed venture fund that backs startups with both funding and active mentoring.
66
+ **Check Size:** $500k - $3M
67
+ **Stage Focus:** Pre-seed, Seed
68
+ **Geography:** India
69
+ **Notable Portfolio:** Unacademy, Purplle, GreyOrange
70
+ **Website:** https://blume.vc
71
+ **Match Score:** 70/100
72
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
73
+
74
+ ---
75
+ ## 6. Cherry Ventures — High Confidence
76
+
77
+ **Thesis:** We champion founders in Europe from their earliest days. We are generalist seed investors.
78
+ **Check Size:** $1M - $4M
79
+ **Stage Focus:** Pre-seed, Seed
80
+ **Geography:** Europe
81
+ **Notable Portfolio:** FlixBus, Auto1 Group, Forto
82
+ **Website:** https://www.cherry.vc
83
+ **Match Score:** 70/100
84
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
85
+
86
+ ---
87
+ ## 7. Elevation Capital — High Confidence
88
+
89
+ **Thesis:** We partner with visionary founders in India across early stages to help them build category-defining businesses.
90
+ **Check Size:** $1M - $10M
91
+ **Stage Focus:** Seed, Series A
92
+ **Geography:** India
93
+ **Notable Portfolio:** Paytm, Swiggy, Meesho
94
+ **Website:** https://elevationcapital.com
95
+ **Match Score:** 70/100
96
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
97
+
98
+ ---
99
+ ## 8. First Round Capital — High Confidence
100
+
101
+ **Thesis:** We are the seed-stage firm that builds the most supportive community for founders.
102
+ **Check Size:** $1M - $4M
103
+ **Stage Focus:** Pre-seed, Seed
104
+ **Geography:** US
105
+ **Notable Portfolio:** Uber, Notion, Roblox
106
+ **Website:** https://firstround.com
107
+ **Match Score:** 70/100
108
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
109
+
110
+ ---
111
+ ## 9. Founder Collective — High Confidence
112
+
113
+ **Thesis:** We are a seed-stage venture capital fund, built by founders, for founders. We back weird, wonderful, and wild startups.
114
+ **Check Size:** $500k - $2M
115
+ **Stage Focus:** Seed
116
+ **Geography:** US, Global
117
+ **Notable Portfolio:** Uber, Airtable, BuzzFeed
118
+ **Website:** https://www.foundercollective.com
119
+ **Match Score:** 70/100
120
+ **Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
121
+
122
+ ---
123
+ ## 10. Greylock Partners — High Confidence
124
+
125
+ **Thesis:** We partner with early-stage founders to build enterprise and consumer software companies that define new categories.
126
+ **Check Size:** $1M - $10M
127
+ **Stage Focus:** Seed, Series A
128
+ **Geography:** US
129
+ **Notable Portfolio:** Workday, Palo Alto Networks, LinkedIn
130
+ **Website:** https://greylock.com
131
+ **Match Score:** 70/100
132
+ **Why this match:** This fund focuses on AI, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
133
+
134
+ ---
135
+
136
+ ## Assumptions & Limitations
137
+
138
+ - Dataset contains 25 funds verified as of 2026-04-23
139
+ - Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.
140
+ - This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.
141
+ - Matches are a starting point for research, not financial advice.
@@ -0,0 +1,80 @@
1
+ import re
2
+ from typing import Dict, List, Optional
3
+
4
+ TAXONOMY = {
5
+ "AI": ["ai", "artificial intelligence", "machine learning", "ml", "llm", "large language model", "generative ai", "gen ai", "ai-powered", "ai powered", "neural network", "nlp", "natural language processing", "deep learning"],
6
+ "DevTools": ["developer tools", "devtools", "coding", "programming", "software development", "sdk", "developer", "developers", "engineering team", "software engineer", "for engineers", "cli", "terminal", "command line", "command-line", "command line tool"],
7
+ "B2B SaaS": ["saas", "b2b", "enterprise software", "business software", "subscription software", "automation", "workflow", "productivity", "b2b software", "automate"],
8
+ "Open Source": ["open source", "open-source", "coss", "github", "public repo"],
9
+ "FinTech": ["fintech", "financial", "payment", "banking"],
10
+ "Crypto": ["crypto", "blockchain", "bitcoin", "ethereum", "web3", "wallet"],
11
+ "HealthTech": ["healthtech", "healthcare", "medical", "biotech", "wellness"],
12
+ "Consumer": ["consumer", "b2c", "app", "social media", "social platform", "gaming", "lifestyle"],
13
+ "DeepTech": ["deeptech", "robotics", "quantum", "hard tech", "science"],
14
+ "Infrastructure": ["infrastructure", "infra", "cloud", "backend", "hosting", "database"],
15
+ "Cybersecurity": ["security", "cybersecurity", "privacy", "protection", "firewall"],
16
+ "Marketplaces": ["marketplace", "multi-sided", "brokerage", "two-sided market", "buyer and seller"],
17
+ "E-commerce": ["e-commerce", "ecommerce", "shopping", "retail", "online store"],
18
+ "Enterprise": ["enterprise", "corporate", "large scale"],
19
+ "Data": ["data", "api", "rest", "graphql", "interface"]
20
+ }
21
+
22
+ from urllib.parse import urlparse
23
+
24
+ def get_geography_from_url(url: str) -> str:
25
+ """Infer geography from URL TLD."""
26
+ if not url:
27
+ return "Global"
28
+
29
+ try:
30
+ # Prepend scheme if missing for proper parsing
31
+ if not url.startswith(('http://', 'https://')):
32
+ url = 'https://' + url
33
+
34
+ hostname = urlparse(url).hostname or ""
35
+ if hostname.endswith(".in"):
36
+ return "India"
37
+ if hostname.endswith(".uk") or hostname.endswith(".co.uk"):
38
+ return "Europe"
39
+ if hostname.endswith(".eu"):
40
+ return "Europe"
41
+ if hostname.endswith(".de") or hostname.endswith(".fr") or hostname.endswith(".nl"):
42
+ return "Europe"
43
+ except Exception:
44
+ pass
45
+
46
+ return "Global"
47
+
48
+ def extract_tags(description: str, url: str) -> List[str]:
49
+ """Match keywords in description and URL against taxonomy."""
50
+ text = f"{description} {url}".lower()
51
+ extracted = []
52
+
53
+ for tag, keywords in TAXONOMY.items():
54
+ for kw in keywords:
55
+ if re.search(r'\b' + re.escape(kw) + r'\b', text):
56
+ extracted.append(tag)
57
+ break
58
+
59
+ if not extracted:
60
+ return ["Generalist"]
61
+
62
+ return sorted(list(set(extracted)))
63
+
64
+ def get_product_context(description: str, url: str, stage: Optional[str] = None, geography: Optional[str] = None) -> Dict:
65
+ """Entry point for parsing product context."""
66
+ return {
67
+ "description": description,
68
+ "url": url,
69
+ "extracted_tags": extract_tags(description, url),
70
+ "stage_hint": stage,
71
+ "geography_hint": geography if geography else get_geography_from_url(url)
72
+ }
73
+
74
+ if __name__ == "__main__":
75
+ # Quick CLI test
76
+ import sys
77
+ if len(sys.argv) > 1:
78
+ desc = sys.argv[1]
79
+ url = sys.argv[2] if len(sys.argv) > 2 else ""
80
+ print(get_product_context(desc, url))
@@ -0,0 +1,111 @@
1
+ import datetime
2
+ import json
3
+ import os
4
+ from typing import List, Dict
5
+
6
+ def _generate_rationale(matched_tags: List[str], product_context: Dict) -> str:
7
+ """Generate deterministic rationale based ONLY on matched data."""
8
+ if not matched_tags or matched_tags == ["Generalist"]:
9
+ return "No direct tag overlap. Included as a generalist fund active at your target stage."
10
+
11
+ tags_str = ", ".join(matched_tags)
12
+ stage = product_context.get("stage_hint") or "applicable"
13
+ geo = product_context.get("geography_hint") or "Global"
14
+
15
+ return f"This fund focuses on {tags_str} which aligns with your product's identified sector(s). They are active at your target stage ({stage}) and correspond to your geography focus ({geo})."
16
+
17
+ def generate_report(matches: List[Dict], product_context: Dict) -> str:
18
+ """Convert matched VC data into a formatted Markdown report."""
19
+ today = datetime.date.today().strftime("%Y-%m-%d")
20
+
21
+ # Calculate total funds for the assumptions section
22
+ try:
23
+ data_path = os.path.join(os.path.dirname(__file__), "..", "data", "vc_funds.json")
24
+ with open(data_path, "r", encoding="utf-8") as f:
25
+ total_funds = len(json.load(f))
26
+ except Exception:
27
+ total_funds = 25 # Fallback if file isn't accessible
28
+
29
+ # Handle empty edge case
30
+ if not matches:
31
+ return f"""# VC Curated Match Report
32
+
33
+ No matches found. Try broadening your description.
34
+
35
+ ---
36
+
37
+ ## Assumptions & Limitations
38
+
39
+ - Dataset contains {total_funds} funds verified as of {today}
40
+ - Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.
41
+ - This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.
42
+ - Matches are a starting point for research, not financial advice."""
43
+
44
+ # Header block
45
+ desc = product_context.get("description", "").replace("[", "\\[").replace("]", "\\]")
46
+ if len(desc) > 120:
47
+ desc = desc[:120] + "..."
48
+
49
+ url = product_context.get("url", "").replace("[", "\\[").replace("]", "\\]")
50
+ tags = ", ".join(product_context.get("extracted_tags", []))
51
+ stage = product_context.get("stage_hint") or "Not specified"
52
+ geo = product_context.get("geography_hint") or "Global"
53
+
54
+ lines = [
55
+ "# VC Curated Match Report",
56
+ "",
57
+ f"**Product:** {desc}",
58
+ f"**URL:** {url}",
59
+ f"**Extracted Tags:** {tags}",
60
+ f"**Stage:** {stage}",
61
+ f"**Geography:** {geo}",
62
+ f"**Generated:** {today}",
63
+ "",
64
+ "---",
65
+ "",
66
+ "## Top VC Matches"
67
+ ]
68
+
69
+ has_low_confidence = False
70
+
71
+ # Matches block
72
+ for rank, match in enumerate(matches, 1):
73
+ fund = match.get("fund", {})
74
+ conf = match.get("confidence", "Low")
75
+ score = match.get("score", 0)
76
+ matched_tags = match.get("matched_tags", [])
77
+
78
+ if conf == "Low":
79
+ has_low_confidence = True
80
+
81
+ lines.append("")
82
+ lines.append("---")
83
+ lines.append(f"## {rank}. {fund.get('fund_name', 'Unknown Fund')} — {conf} Confidence")
84
+ lines.append("")
85
+ lines.append(f"**Thesis:** {fund.get('thesis', '')}")
86
+ lines.append(f"**Check Size:** {fund.get('check_size', '')}")
87
+ lines.append(f"**Stage Focus:** {', '.join(fund.get('stage_focus', []))}")
88
+ lines.append(f"**Geography:** {', '.join(fund.get('geography_focus', []))}")
89
+ lines.append(f"**Notable Portfolio:** {', '.join(fund.get('notable_portfolio', []))}")
90
+
91
+ if fund.get("website"):
92
+ lines.append(f"**Website:** {fund['website']}")
93
+
94
+ lines.append(f"**Match Score:** {score}/100")
95
+ lines.append(f"**Why this match:** {_generate_rationale(matched_tags, product_context)}")
96
+
97
+ # Footer block
98
+ lines.append("")
99
+ lines.append("---")
100
+ lines.append("")
101
+ lines.append("## Assumptions & Limitations")
102
+ lines.append("")
103
+ lines.append(f"- Dataset contains {total_funds} funds verified as of {today}")
104
+ lines.append("- Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.")
105
+ lines.append("- This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.")
106
+ lines.append("- Matches are a starting point for research, not financial advice.")
107
+
108
+ if has_low_confidence:
109
+ lines.append("- Low-confidence matches are included because no stronger sector-specific matches were found.")
110
+
111
+ return "\n".join(lines)
@@ -0,0 +1,127 @@
1
+ import json
2
+ import os
3
+ from typing import List, Dict, Tuple
4
+
5
+ def load_funds(data_path: str) -> List[Dict]:
6
+ """Load data/vc_funds.json"""
7
+ if not os.path.exists(data_path):
8
+ raise FileNotFoundError(f"Dataset not found at {data_path}")
9
+ with open(data_path, "r", encoding="utf-8") as f:
10
+ return json.load(f)
11
+
12
+ def score_fund(fund: Dict, product_context: Dict) -> Tuple[int, List[str]]:
13
+ """Score a single fund based on overlap with product context."""
14
+ score = 0
15
+
16
+ # 1. Tag Overlap (max 60 points)
17
+ fund_tags = fund.get("industry_tags", [])
18
+ extracted_tags = product_context.get("extracted_tags", [])
19
+ if not extracted_tags:
20
+ extracted_tags = ["Generalist"]
21
+
22
+ tag_points = 0
23
+ matched_tags = []
24
+
25
+ for tag in extracted_tags:
26
+ if tag in fund_tags:
27
+ if tag == "Generalist":
28
+ tag_points += 5
29
+ else:
30
+ tag_points += 20
31
+ matched_tags.append(tag)
32
+
33
+ tag_points = min(tag_points, 60)
34
+ score += tag_points
35
+
36
+ # 2. Stage Match (max 20 points)
37
+ stage_points = 0
38
+ stage_hint = product_context.get("stage_hint")
39
+ fund_stages = fund.get("stage_focus", [])
40
+
41
+ STAGE_ORDER = {"Pre-seed": 0, "Seed": 1, "Series A": 2, "Growth": 3}
42
+
43
+ if not stage_hint:
44
+ stage_points = 10
45
+ elif not fund_stages:
46
+ pass # skip stage scoring
47
+ else:
48
+ if stage_hint in fund_stages:
49
+ stage_points = 20
50
+ else:
51
+ is_adjacent = False
52
+ if stage_hint in STAGE_ORDER:
53
+ hint_idx = STAGE_ORDER[stage_hint]
54
+ for f_stage in fund_stages:
55
+ if f_stage in STAGE_ORDER and abs(STAGE_ORDER[f_stage] - hint_idx) == 1:
56
+ is_adjacent = True
57
+ break
58
+ if is_adjacent:
59
+ stage_points = 10
60
+
61
+ score += stage_points
62
+
63
+ # 3. Geography Match (max 20 points)
64
+ geo_points = 0
65
+ geo_hint = product_context.get("geography_hint")
66
+ fund_geo = fund.get("geography_focus", [])
67
+ if not fund_geo:
68
+ fund_geo = ["Global"]
69
+
70
+ if not geo_hint or geo_hint == "Global":
71
+ geo_points = 10
72
+ elif fund_geo == ["India"] and geo_hint == "US":
73
+ geo_points = 0
74
+ elif geo_hint in fund_geo:
75
+ geo_points = 20
76
+ elif "Global" in fund_geo:
77
+ geo_points = 15
78
+
79
+ score += geo_points
80
+
81
+ # 4. Final Penalties
82
+ # Heuristic 1: India-only funds in US searches penalty
83
+ if geo_hint == "US" and "India" in fund_geo and "US" not in fund_geo and "Global" not in fund_geo:
84
+ score = max(0, score - 30)
85
+
86
+ # Heuristic 2: Primary focus mismatch
87
+ if fund_tags and extracted_tags:
88
+ if fund_tags[0] not in extracted_tags and tag_points <= 20:
89
+ score = max(0, score - 15)
90
+
91
+ return score, matched_tags
92
+
93
+ def get_confidence_tier(score: int) -> str:
94
+ """Return High/Medium/Low based on score."""
95
+ if score >= 70:
96
+ return "High"
97
+ if score >= 40:
98
+ return "Medium"
99
+ return "Low"
100
+
101
+ def match_vcs(product_context: Dict, data_path: str = "data/vc_funds.json") -> List[Dict]:
102
+ """Score all funds and return prioritized matches."""
103
+ funds = load_funds(data_path)
104
+ scored_funds = []
105
+
106
+ for fund in funds:
107
+ score, matched_tags = score_fund(fund, product_context)
108
+ scored_funds.append({
109
+ "fund": fund,
110
+ "score": score,
111
+ "confidence": get_confidence_tier(score),
112
+ "matched_tags": matched_tags
113
+ })
114
+
115
+ scored_funds.sort(key=lambda x: (-x["score"], x["fund"].get("fund_name", "")))
116
+
117
+ if all(f["score"] == 0 for f in scored_funds):
118
+ generalists = [f for f in funds if "Generalist" in f.get("industry_tags", [])][:5]
119
+ return [{
120
+ "fund": g,
121
+ "score": 0,
122
+ "confidence": "Low",
123
+ "matched_tags": ["Generalist"],
124
+ "warning": "No strong matches found. Showing generalist funds only."
125
+ } for g in generalists]
126
+
127
+ return scored_funds[:10]