@opendirectory.dev/skills 0.1.39 → 0.1.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/registry.json +16 -0
- package/skills/product-update-logger/.env.example +4 -0
- package/skills/product-update-logger/README.md +197 -0
- package/skills/product-update-logger/SKILL.md +462 -0
- package/skills/product-update-logger/evals/evals.json +119 -0
- package/skills/product-update-logger/references/changelog-format.md +96 -0
- package/skills/product-update-logger/references/content-rules.md +154 -0
- package/skills/product-update-logger/references/noise-filter.md +86 -0
- package/skills/product-update-logger/scripts/gather.py +364 -0
- package/skills/vc-curated-match/README.md +42 -0
- package/skills/vc-curated-match/SKILL.md +59 -0
- package/skills/vc-curated-match/data/vc_funds.json +277 -0
- package/skills/vc-curated-match/evals/ai-b2b-saas-seed.md +141 -0
- package/skills/vc-curated-match/evals/devtool-oss-seed.md +141 -0
- package/skills/vc-curated-match/evals/evals.json +43 -0
- package/skills/vc-curated-match/evals/fintech-india-preseed.md +142 -0
- package/skills/vc-curated-match/evals/varnan-seed.md +141 -0
- package/skills/vc-curated-match/scripts/fetch_product_context.py +80 -0
- package/skills/vc-curated-match/scripts/generate_report.py +111 -0
- package/skills/vc-curated-match/scripts/match_vcs.py +127 -0
- package/skills/vc-curated-match/scripts/run.py +82 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# VC Curated Match Report
|
|
2
|
+
|
|
3
|
+
**Product:** A payment infrastructure API for e-commerce businesses in India
|
|
4
|
+
**URL:** https://example-fintech.in
|
|
5
|
+
**Extracted Tags:** Data, E-commerce, FinTech, Infrastructure
|
|
6
|
+
**Stage:** Pre-seed
|
|
7
|
+
**Geography:** India
|
|
8
|
+
**Generated:** 2026-04-23
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Top VC Matches
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
## 1. Accel India — High Confidence
|
|
16
|
+
|
|
17
|
+
**Thesis:** We partner with exceptional founders from inception through all phases of private company growth in the Indian ecosystem.
|
|
18
|
+
**Check Size:** $1M - $15M
|
|
19
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
20
|
+
**Geography:** India
|
|
21
|
+
**Notable Portfolio:** Flipkart, Swiggy, Freshworks
|
|
22
|
+
**Website:** https://www.accel.com/india
|
|
23
|
+
**Match Score:** 70/100
|
|
24
|
+
**Why this match:** This fund focuses on E-commerce, FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
## 2. Bain Capital Ventures — Medium Confidence
|
|
28
|
+
|
|
29
|
+
**Thesis:** From seed to growth, we back founders building legendary infrastructure, fintech, application, and commerce companies.
|
|
30
|
+
**Check Size:** $1M - $50M+
|
|
31
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
32
|
+
**Geography:** US, Global
|
|
33
|
+
**Notable Portfolio:** DocuSign, SendGrid, Redis
|
|
34
|
+
**Website:** https://www.baincapitalventures.com
|
|
35
|
+
**Match Score:** 65/100
|
|
36
|
+
**Why this match:** This fund focuses on FinTech, Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
## 3. Amplify Partners — Medium Confidence
|
|
40
|
+
|
|
41
|
+
**Thesis:** We invest in technical founders building the next generation of IT infrastructure, developer tools, and data platforms.
|
|
42
|
+
**Check Size:** $2M - $8M
|
|
43
|
+
**Stage Focus:** Seed, Series A
|
|
44
|
+
**Geography:** US
|
|
45
|
+
**Notable Portfolio:** Datadog, OCTO, dbt Labs
|
|
46
|
+
**Website:** https://www.amplifypartners.com
|
|
47
|
+
**Match Score:** 50/100
|
|
48
|
+
**Why this match:** This fund focuses on Data, Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
## 4. boldstart ventures — Medium Confidence
|
|
52
|
+
|
|
53
|
+
**Thesis:** Day one partner for developer first, crypto, and SaaS founders. We love deeply technical founders solving hard infrastructure problems.
|
|
54
|
+
**Check Size:** $1M - $3M
|
|
55
|
+
**Stage Focus:** Pre-seed, Seed
|
|
56
|
+
**Geography:** Global, US
|
|
57
|
+
**Notable Portfolio:** Snyk, Blockdaemon, Superhuman
|
|
58
|
+
**Website:** https://boldstart.vc
|
|
59
|
+
**Match Score:** 40/100
|
|
60
|
+
**Why this match:** This fund focuses on Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
## 5. Elevation Capital — Low Confidence
|
|
64
|
+
|
|
65
|
+
**Thesis:** We partner with visionary founders in India across early stages to help them build category-defining businesses.
|
|
66
|
+
**Check Size:** $1M - $10M
|
|
67
|
+
**Stage Focus:** Seed, Series A
|
|
68
|
+
**Geography:** India
|
|
69
|
+
**Notable Portfolio:** Paytm, Swiggy, Meesho
|
|
70
|
+
**Website:** https://elevationcapital.com
|
|
71
|
+
**Match Score:** 35/100
|
|
72
|
+
**Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
## 6. Peak XV Partners — Low Confidence
|
|
76
|
+
|
|
77
|
+
**Thesis:** Formerly Sequoia India & SEA, we partner with founders across early, growth, and public stages to build enduring companies.
|
|
78
|
+
**Check Size:** $1M - $20M+
|
|
79
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
80
|
+
**Geography:** India, South Asia
|
|
81
|
+
**Notable Portfolio:** Zomato, Pine Labs, Cred
|
|
82
|
+
**Website:** https://www.peakxv.com
|
|
83
|
+
**Match Score:** 35/100
|
|
84
|
+
**Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
## 7. Bessemer Venture Partners — Low Confidence
|
|
88
|
+
|
|
89
|
+
**Thesis:** BVP helps entrepreneurs lay strong foundations to build and forge long-standing companies.
|
|
90
|
+
**Check Size:** $1M - $20M+
|
|
91
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
92
|
+
**Geography:** Global
|
|
93
|
+
**Notable Portfolio:** LinkedIn, Twilio, Shopify
|
|
94
|
+
**Website:** https://www.bvp.com
|
|
95
|
+
**Match Score:** 30/100
|
|
96
|
+
**Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
## 8. Heavybit — Low Confidence
|
|
100
|
+
|
|
101
|
+
**Thesis:** The leading investor in developer-first startups. We help technical founders launch, gain traction, and build enterprise-ready companies.
|
|
102
|
+
**Check Size:** $1M - $5M
|
|
103
|
+
**Stage Focus:** Seed, Series A
|
|
104
|
+
**Geography:** Global, US
|
|
105
|
+
**Notable Portfolio:** PagerDuty, Sanity, Netlify
|
|
106
|
+
**Website:** https://www.heavybit.com
|
|
107
|
+
**Match Score:** 30/100
|
|
108
|
+
**Why this match:** This fund focuses on Infrastructure which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
## 9. Index Ventures — Low Confidence
|
|
112
|
+
|
|
113
|
+
**Thesis:** We back the best and most ambitious entrepreneurs across all stages to build category-defining businesses.
|
|
114
|
+
**Check Size:** $1M - $20M+
|
|
115
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
116
|
+
**Geography:** Europe, US, Global
|
|
117
|
+
**Notable Portfolio:** Dropbox, Slack, Figma
|
|
118
|
+
**Website:** https://www.indexventures.com
|
|
119
|
+
**Match Score:** 30/100
|
|
120
|
+
**Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
## 10. Lightspeed Venture Partners — Low Confidence
|
|
124
|
+
|
|
125
|
+
**Thesis:** We invest globally in enterprise, consumer, and health founders who are shaping the future.
|
|
126
|
+
**Check Size:** $1M - $25M+
|
|
127
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
128
|
+
**Geography:** Global
|
|
129
|
+
**Notable Portfolio:** Snap, Rippling, MuleSoft
|
|
130
|
+
**Website:** https://lsvp.com
|
|
131
|
+
**Match Score:** 30/100
|
|
132
|
+
**Why this match:** This fund focuses on FinTech which aligns with your product's identified sector(s). They are active at your target stage (Pre-seed) and correspond to your geography focus (India).
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Assumptions & Limitations
|
|
137
|
+
|
|
138
|
+
- Dataset contains 25 funds verified as of 2026-04-23
|
|
139
|
+
- Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.
|
|
140
|
+
- This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.
|
|
141
|
+
- Matches are a starting point for research, not financial advice.
|
|
142
|
+
- Low-confidence matches are included because no stronger sector-specific matches were found.
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# VC Curated Match Report
|
|
2
|
+
|
|
3
|
+
**Product:** Varnan is a GTM distribution platform for AI startups. It helps founders automate outreach across LinkedIn, email, and s...
|
|
4
|
+
**URL:** https://varnan.tech
|
|
5
|
+
**Extracted Tags:** AI, B2B SaaS, Consumer
|
|
6
|
+
**Stage:** Seed
|
|
7
|
+
**Geography:** Global
|
|
8
|
+
**Generated:** 2026-04-23
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Top VC Matches
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
## 1. Peak XV Partners — High Confidence
|
|
16
|
+
|
|
17
|
+
**Thesis:** Formerly Sequoia India & SEA, we partner with founders across early, growth, and public stages to build enduring companies.
|
|
18
|
+
**Check Size:** $1M - $20M+
|
|
19
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
20
|
+
**Geography:** India, South Asia
|
|
21
|
+
**Notable Portfolio:** Zomato, Pine Labs, Cred
|
|
22
|
+
**Website:** https://www.peakxv.com
|
|
23
|
+
**Match Score:** 90/100
|
|
24
|
+
**Why this match:** This fund focuses on AI, B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
## 2. Accel — High Confidence
|
|
28
|
+
|
|
29
|
+
**Thesis:** We partner with exceptional founders from inception through all phases of private company growth.
|
|
30
|
+
**Check Size:** $1M - $20M+
|
|
31
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
32
|
+
**Geography:** Global
|
|
33
|
+
**Notable Portfolio:** Facebook, Atlassian, Spotify
|
|
34
|
+
**Website:** https://www.accel.com
|
|
35
|
+
**Match Score:** 70/100
|
|
36
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
## 3. Accel India — High Confidence
|
|
40
|
+
|
|
41
|
+
**Thesis:** We partner with exceptional founders from inception through all phases of private company growth in the Indian ecosystem.
|
|
42
|
+
**Check Size:** $1M - $15M
|
|
43
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
44
|
+
**Geography:** India
|
|
45
|
+
**Notable Portfolio:** Flipkart, Swiggy, Freshworks
|
|
46
|
+
**Website:** https://www.accel.com/india
|
|
47
|
+
**Match Score:** 70/100
|
|
48
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
## 4. Andreessen Horowitz (a16z) — High Confidence
|
|
52
|
+
|
|
53
|
+
**Thesis:** We invest in software eating the world. We back bold entrepreneurs building the future through technology.
|
|
54
|
+
**Check Size:** $1M - $50M+
|
|
55
|
+
**Stage Focus:** Seed, Series A, Growth
|
|
56
|
+
**Geography:** Global, US
|
|
57
|
+
**Notable Portfolio:** Facebook, Coinbase, Figma
|
|
58
|
+
**Website:** https://a16z.com
|
|
59
|
+
**Match Score:** 70/100
|
|
60
|
+
**Why this match:** This fund focuses on AI, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
## 5. Blume Ventures — High Confidence
|
|
64
|
+
|
|
65
|
+
**Thesis:** We are a seed and pre-seed venture fund that backs startups with both funding and active mentoring.
|
|
66
|
+
**Check Size:** $500k - $3M
|
|
67
|
+
**Stage Focus:** Pre-seed, Seed
|
|
68
|
+
**Geography:** India
|
|
69
|
+
**Notable Portfolio:** Unacademy, Purplle, GreyOrange
|
|
70
|
+
**Website:** https://blume.vc
|
|
71
|
+
**Match Score:** 70/100
|
|
72
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
## 6. Cherry Ventures — High Confidence
|
|
76
|
+
|
|
77
|
+
**Thesis:** We champion founders in Europe from their earliest days. We are generalist seed investors.
|
|
78
|
+
**Check Size:** $1M - $4M
|
|
79
|
+
**Stage Focus:** Pre-seed, Seed
|
|
80
|
+
**Geography:** Europe
|
|
81
|
+
**Notable Portfolio:** FlixBus, Auto1 Group, Forto
|
|
82
|
+
**Website:** https://www.cherry.vc
|
|
83
|
+
**Match Score:** 70/100
|
|
84
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
## 7. Elevation Capital — High Confidence
|
|
88
|
+
|
|
89
|
+
**Thesis:** We partner with visionary founders in India across early stages to help them build category-defining businesses.
|
|
90
|
+
**Check Size:** $1M - $10M
|
|
91
|
+
**Stage Focus:** Seed, Series A
|
|
92
|
+
**Geography:** India
|
|
93
|
+
**Notable Portfolio:** Paytm, Swiggy, Meesho
|
|
94
|
+
**Website:** https://elevationcapital.com
|
|
95
|
+
**Match Score:** 70/100
|
|
96
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
## 8. First Round Capital — High Confidence
|
|
100
|
+
|
|
101
|
+
**Thesis:** We are the seed-stage firm that builds the most supportive community for founders.
|
|
102
|
+
**Check Size:** $1M - $4M
|
|
103
|
+
**Stage Focus:** Pre-seed, Seed
|
|
104
|
+
**Geography:** US
|
|
105
|
+
**Notable Portfolio:** Uber, Notion, Roblox
|
|
106
|
+
**Website:** https://firstround.com
|
|
107
|
+
**Match Score:** 70/100
|
|
108
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
## 9. Founder Collective — High Confidence
|
|
112
|
+
|
|
113
|
+
**Thesis:** We are a seed-stage venture capital fund, built by founders, for founders. We back weird, wonderful, and wild startups.
|
|
114
|
+
**Check Size:** $500k - $2M
|
|
115
|
+
**Stage Focus:** Seed
|
|
116
|
+
**Geography:** US, Global
|
|
117
|
+
**Notable Portfolio:** Uber, Airtable, BuzzFeed
|
|
118
|
+
**Website:** https://www.foundercollective.com
|
|
119
|
+
**Match Score:** 70/100
|
|
120
|
+
**Why this match:** This fund focuses on B2B SaaS, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
## 10. Greylock Partners — High Confidence
|
|
124
|
+
|
|
125
|
+
**Thesis:** We partner with early-stage founders to build enterprise and consumer software companies that define new categories.
|
|
126
|
+
**Check Size:** $1M - $10M
|
|
127
|
+
**Stage Focus:** Seed, Series A
|
|
128
|
+
**Geography:** US
|
|
129
|
+
**Notable Portfolio:** Workday, Palo Alto Networks, LinkedIn
|
|
130
|
+
**Website:** https://greylock.com
|
|
131
|
+
**Match Score:** 70/100
|
|
132
|
+
**Why this match:** This fund focuses on AI, Consumer which aligns with your product's identified sector(s). They are active at your target stage (Seed) and correspond to your geography focus (Global).
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Assumptions & Limitations
|
|
137
|
+
|
|
138
|
+
- Dataset contains 25 funds verified as of 2026-04-23
|
|
139
|
+
- Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.
|
|
140
|
+
- This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.
|
|
141
|
+
- Matches are a starting point for research, not financial advice.
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
TAXONOMY = {
|
|
5
|
+
"AI": ["ai", "artificial intelligence", "machine learning", "ml", "llm", "large language model", "generative ai", "gen ai", "ai-powered", "ai powered", "neural network", "nlp", "natural language processing", "deep learning"],
|
|
6
|
+
"DevTools": ["developer tools", "devtools", "coding", "programming", "software development", "sdk", "developer", "developers", "engineering team", "software engineer", "for engineers", "cli", "terminal", "command line", "command-line", "command line tool"],
|
|
7
|
+
"B2B SaaS": ["saas", "b2b", "enterprise software", "business software", "subscription software", "automation", "workflow", "productivity", "b2b software", "automate"],
|
|
8
|
+
"Open Source": ["open source", "open-source", "coss", "github", "public repo"],
|
|
9
|
+
"FinTech": ["fintech", "financial", "payment", "banking"],
|
|
10
|
+
"Crypto": ["crypto", "blockchain", "bitcoin", "ethereum", "web3", "wallet"],
|
|
11
|
+
"HealthTech": ["healthtech", "healthcare", "medical", "biotech", "wellness"],
|
|
12
|
+
"Consumer": ["consumer", "b2c", "app", "social media", "social platform", "gaming", "lifestyle"],
|
|
13
|
+
"DeepTech": ["deeptech", "robotics", "quantum", "hard tech", "science"],
|
|
14
|
+
"Infrastructure": ["infrastructure", "infra", "cloud", "backend", "hosting", "database"],
|
|
15
|
+
"Cybersecurity": ["security", "cybersecurity", "privacy", "protection", "firewall"],
|
|
16
|
+
"Marketplaces": ["marketplace", "multi-sided", "brokerage", "two-sided market", "buyer and seller"],
|
|
17
|
+
"E-commerce": ["e-commerce", "ecommerce", "shopping", "retail", "online store"],
|
|
18
|
+
"Enterprise": ["enterprise", "corporate", "large scale"],
|
|
19
|
+
"Data": ["data", "api", "rest", "graphql", "interface"]
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
from urllib.parse import urlparse
|
|
23
|
+
|
|
24
|
+
def get_geography_from_url(url: str) -> str:
|
|
25
|
+
"""Infer geography from URL TLD."""
|
|
26
|
+
if not url:
|
|
27
|
+
return "Global"
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# Prepend scheme if missing for proper parsing
|
|
31
|
+
if not url.startswith(('http://', 'https://')):
|
|
32
|
+
url = 'https://' + url
|
|
33
|
+
|
|
34
|
+
hostname = urlparse(url).hostname or ""
|
|
35
|
+
if hostname.endswith(".in"):
|
|
36
|
+
return "India"
|
|
37
|
+
if hostname.endswith(".uk") or hostname.endswith(".co.uk"):
|
|
38
|
+
return "Europe"
|
|
39
|
+
if hostname.endswith(".eu"):
|
|
40
|
+
return "Europe"
|
|
41
|
+
if hostname.endswith(".de") or hostname.endswith(".fr") or hostname.endswith(".nl"):
|
|
42
|
+
return "Europe"
|
|
43
|
+
except Exception:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
return "Global"
|
|
47
|
+
|
|
48
|
+
def extract_tags(description: str, url: str) -> List[str]:
|
|
49
|
+
"""Match keywords in description and URL against taxonomy."""
|
|
50
|
+
text = f"{description} {url}".lower()
|
|
51
|
+
extracted = []
|
|
52
|
+
|
|
53
|
+
for tag, keywords in TAXONOMY.items():
|
|
54
|
+
for kw in keywords:
|
|
55
|
+
if re.search(r'\b' + re.escape(kw) + r'\b', text):
|
|
56
|
+
extracted.append(tag)
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
if not extracted:
|
|
60
|
+
return ["Generalist"]
|
|
61
|
+
|
|
62
|
+
return sorted(list(set(extracted)))
|
|
63
|
+
|
|
64
|
+
def get_product_context(description: str, url: str, stage: Optional[str] = None, geography: Optional[str] = None) -> Dict:
|
|
65
|
+
"""Entry point for parsing product context."""
|
|
66
|
+
return {
|
|
67
|
+
"description": description,
|
|
68
|
+
"url": url,
|
|
69
|
+
"extracted_tags": extract_tags(description, url),
|
|
70
|
+
"stage_hint": stage,
|
|
71
|
+
"geography_hint": geography if geography else get_geography_from_url(url)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
# Quick CLI test
|
|
76
|
+
import sys
|
|
77
|
+
if len(sys.argv) > 1:
|
|
78
|
+
desc = sys.argv[1]
|
|
79
|
+
url = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
80
|
+
print(get_product_context(desc, url))
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from typing import List, Dict
|
|
5
|
+
|
|
6
|
+
def _generate_rationale(matched_tags: List[str], product_context: Dict) -> str:
|
|
7
|
+
"""Generate deterministic rationale based ONLY on matched data."""
|
|
8
|
+
if not matched_tags or matched_tags == ["Generalist"]:
|
|
9
|
+
return "No direct tag overlap. Included as a generalist fund active at your target stage."
|
|
10
|
+
|
|
11
|
+
tags_str = ", ".join(matched_tags)
|
|
12
|
+
stage = product_context.get("stage_hint") or "applicable"
|
|
13
|
+
geo = product_context.get("geography_hint") or "Global"
|
|
14
|
+
|
|
15
|
+
return f"This fund focuses on {tags_str} which aligns with your product's identified sector(s). They are active at your target stage ({stage}) and correspond to your geography focus ({geo})."
|
|
16
|
+
|
|
17
|
+
def generate_report(matches: List[Dict], product_context: Dict) -> str:
|
|
18
|
+
"""Convert matched VC data into a formatted Markdown report."""
|
|
19
|
+
today = datetime.date.today().strftime("%Y-%m-%d")
|
|
20
|
+
|
|
21
|
+
# Calculate total funds for the assumptions section
|
|
22
|
+
try:
|
|
23
|
+
data_path = os.path.join(os.path.dirname(__file__), "..", "data", "vc_funds.json")
|
|
24
|
+
with open(data_path, "r", encoding="utf-8") as f:
|
|
25
|
+
total_funds = len(json.load(f))
|
|
26
|
+
except Exception:
|
|
27
|
+
total_funds = 25 # Fallback if file isn't accessible
|
|
28
|
+
|
|
29
|
+
# Handle empty edge case
|
|
30
|
+
if not matches:
|
|
31
|
+
return f"""# VC Curated Match Report
|
|
32
|
+
|
|
33
|
+
No matches found. Try broadening your description.
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Assumptions & Limitations
|
|
38
|
+
|
|
39
|
+
- Dataset contains {total_funds} funds verified as of {today}
|
|
40
|
+
- Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.
|
|
41
|
+
- This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.
|
|
42
|
+
- Matches are a starting point for research, not financial advice."""
|
|
43
|
+
|
|
44
|
+
# Header block
|
|
45
|
+
desc = product_context.get("description", "").replace("[", "\\[").replace("]", "\\]")
|
|
46
|
+
if len(desc) > 120:
|
|
47
|
+
desc = desc[:120] + "..."
|
|
48
|
+
|
|
49
|
+
url = product_context.get("url", "").replace("[", "\\[").replace("]", "\\]")
|
|
50
|
+
tags = ", ".join(product_context.get("extracted_tags", []))
|
|
51
|
+
stage = product_context.get("stage_hint") or "Not specified"
|
|
52
|
+
geo = product_context.get("geography_hint") or "Global"
|
|
53
|
+
|
|
54
|
+
lines = [
|
|
55
|
+
"# VC Curated Match Report",
|
|
56
|
+
"",
|
|
57
|
+
f"**Product:** {desc}",
|
|
58
|
+
f"**URL:** {url}",
|
|
59
|
+
f"**Extracted Tags:** {tags}",
|
|
60
|
+
f"**Stage:** {stage}",
|
|
61
|
+
f"**Geography:** {geo}",
|
|
62
|
+
f"**Generated:** {today}",
|
|
63
|
+
"",
|
|
64
|
+
"---",
|
|
65
|
+
"",
|
|
66
|
+
"## Top VC Matches"
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
has_low_confidence = False
|
|
70
|
+
|
|
71
|
+
# Matches block
|
|
72
|
+
for rank, match in enumerate(matches, 1):
|
|
73
|
+
fund = match.get("fund", {})
|
|
74
|
+
conf = match.get("confidence", "Low")
|
|
75
|
+
score = match.get("score", 0)
|
|
76
|
+
matched_tags = match.get("matched_tags", [])
|
|
77
|
+
|
|
78
|
+
if conf == "Low":
|
|
79
|
+
has_low_confidence = True
|
|
80
|
+
|
|
81
|
+
lines.append("")
|
|
82
|
+
lines.append("---")
|
|
83
|
+
lines.append(f"## {rank}. {fund.get('fund_name', 'Unknown Fund')} — {conf} Confidence")
|
|
84
|
+
lines.append("")
|
|
85
|
+
lines.append(f"**Thesis:** {fund.get('thesis', '')}")
|
|
86
|
+
lines.append(f"**Check Size:** {fund.get('check_size', '')}")
|
|
87
|
+
lines.append(f"**Stage Focus:** {', '.join(fund.get('stage_focus', []))}")
|
|
88
|
+
lines.append(f"**Geography:** {', '.join(fund.get('geography_focus', []))}")
|
|
89
|
+
lines.append(f"**Notable Portfolio:** {', '.join(fund.get('notable_portfolio', []))}")
|
|
90
|
+
|
|
91
|
+
if fund.get("website"):
|
|
92
|
+
lines.append(f"**Website:** {fund['website']}")
|
|
93
|
+
|
|
94
|
+
lines.append(f"**Match Score:** {score}/100")
|
|
95
|
+
lines.append(f"**Why this match:** {_generate_rationale(matched_tags, product_context)}")
|
|
96
|
+
|
|
97
|
+
# Footer block
|
|
98
|
+
lines.append("")
|
|
99
|
+
lines.append("---")
|
|
100
|
+
lines.append("")
|
|
101
|
+
lines.append("## Assumptions & Limitations")
|
|
102
|
+
lines.append("")
|
|
103
|
+
lines.append(f"- Dataset contains {total_funds} funds verified as of {today}")
|
|
104
|
+
lines.append("- Fund theses and portfolios change over time. Verify directly at each fund's website before outreach.")
|
|
105
|
+
lines.append("- This tool matches based on public thesis data only. It does not reflect current deployment status or fund availability.")
|
|
106
|
+
lines.append("- Matches are a starting point for research, not financial advice.")
|
|
107
|
+
|
|
108
|
+
if has_low_confidence:
|
|
109
|
+
lines.append("- Low-confidence matches are included because no stronger sector-specific matches were found.")
|
|
110
|
+
|
|
111
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Dict, Tuple
|
|
4
|
+
|
|
5
|
+
def load_funds(data_path: str) -> List[Dict]:
|
|
6
|
+
"""Load data/vc_funds.json"""
|
|
7
|
+
if not os.path.exists(data_path):
|
|
8
|
+
raise FileNotFoundError(f"Dataset not found at {data_path}")
|
|
9
|
+
with open(data_path, "r", encoding="utf-8") as f:
|
|
10
|
+
return json.load(f)
|
|
11
|
+
|
|
12
|
+
def score_fund(fund: Dict, product_context: Dict) -> Tuple[int, List[str]]:
|
|
13
|
+
"""Score a single fund based on overlap with product context."""
|
|
14
|
+
score = 0
|
|
15
|
+
|
|
16
|
+
# 1. Tag Overlap (max 60 points)
|
|
17
|
+
fund_tags = fund.get("industry_tags", [])
|
|
18
|
+
extracted_tags = product_context.get("extracted_tags", [])
|
|
19
|
+
if not extracted_tags:
|
|
20
|
+
extracted_tags = ["Generalist"]
|
|
21
|
+
|
|
22
|
+
tag_points = 0
|
|
23
|
+
matched_tags = []
|
|
24
|
+
|
|
25
|
+
for tag in extracted_tags:
|
|
26
|
+
if tag in fund_tags:
|
|
27
|
+
if tag == "Generalist":
|
|
28
|
+
tag_points += 5
|
|
29
|
+
else:
|
|
30
|
+
tag_points += 20
|
|
31
|
+
matched_tags.append(tag)
|
|
32
|
+
|
|
33
|
+
tag_points = min(tag_points, 60)
|
|
34
|
+
score += tag_points
|
|
35
|
+
|
|
36
|
+
# 2. Stage Match (max 20 points)
|
|
37
|
+
stage_points = 0
|
|
38
|
+
stage_hint = product_context.get("stage_hint")
|
|
39
|
+
fund_stages = fund.get("stage_focus", [])
|
|
40
|
+
|
|
41
|
+
STAGE_ORDER = {"Pre-seed": 0, "Seed": 1, "Series A": 2, "Growth": 3}
|
|
42
|
+
|
|
43
|
+
if not stage_hint:
|
|
44
|
+
stage_points = 10
|
|
45
|
+
elif not fund_stages:
|
|
46
|
+
pass # skip stage scoring
|
|
47
|
+
else:
|
|
48
|
+
if stage_hint in fund_stages:
|
|
49
|
+
stage_points = 20
|
|
50
|
+
else:
|
|
51
|
+
is_adjacent = False
|
|
52
|
+
if stage_hint in STAGE_ORDER:
|
|
53
|
+
hint_idx = STAGE_ORDER[stage_hint]
|
|
54
|
+
for f_stage in fund_stages:
|
|
55
|
+
if f_stage in STAGE_ORDER and abs(STAGE_ORDER[f_stage] - hint_idx) == 1:
|
|
56
|
+
is_adjacent = True
|
|
57
|
+
break
|
|
58
|
+
if is_adjacent:
|
|
59
|
+
stage_points = 10
|
|
60
|
+
|
|
61
|
+
score += stage_points
|
|
62
|
+
|
|
63
|
+
# 3. Geography Match (max 20 points)
|
|
64
|
+
geo_points = 0
|
|
65
|
+
geo_hint = product_context.get("geography_hint")
|
|
66
|
+
fund_geo = fund.get("geography_focus", [])
|
|
67
|
+
if not fund_geo:
|
|
68
|
+
fund_geo = ["Global"]
|
|
69
|
+
|
|
70
|
+
if not geo_hint or geo_hint == "Global":
|
|
71
|
+
geo_points = 10
|
|
72
|
+
elif fund_geo == ["India"] and geo_hint == "US":
|
|
73
|
+
geo_points = 0
|
|
74
|
+
elif geo_hint in fund_geo:
|
|
75
|
+
geo_points = 20
|
|
76
|
+
elif "Global" in fund_geo:
|
|
77
|
+
geo_points = 15
|
|
78
|
+
|
|
79
|
+
score += geo_points
|
|
80
|
+
|
|
81
|
+
# 4. Final Penalties
|
|
82
|
+
# Heuristic 1: India-only funds in US searches penalty
|
|
83
|
+
if geo_hint == "US" and "India" in fund_geo and "US" not in fund_geo and "Global" not in fund_geo:
|
|
84
|
+
score = max(0, score - 30)
|
|
85
|
+
|
|
86
|
+
# Heuristic 2: Primary focus mismatch
|
|
87
|
+
if fund_tags and extracted_tags:
|
|
88
|
+
if fund_tags[0] not in extracted_tags and tag_points <= 20:
|
|
89
|
+
score = max(0, score - 15)
|
|
90
|
+
|
|
91
|
+
return score, matched_tags
|
|
92
|
+
|
|
93
|
+
def get_confidence_tier(score: int) -> str:
|
|
94
|
+
"""Return High/Medium/Low based on score."""
|
|
95
|
+
if score >= 70:
|
|
96
|
+
return "High"
|
|
97
|
+
if score >= 40:
|
|
98
|
+
return "Medium"
|
|
99
|
+
return "Low"
|
|
100
|
+
|
|
101
|
+
def match_vcs(product_context: Dict, data_path: str = "data/vc_funds.json") -> List[Dict]:
|
|
102
|
+
"""Score all funds and return prioritized matches."""
|
|
103
|
+
funds = load_funds(data_path)
|
|
104
|
+
scored_funds = []
|
|
105
|
+
|
|
106
|
+
for fund in funds:
|
|
107
|
+
score, matched_tags = score_fund(fund, product_context)
|
|
108
|
+
scored_funds.append({
|
|
109
|
+
"fund": fund,
|
|
110
|
+
"score": score,
|
|
111
|
+
"confidence": get_confidence_tier(score),
|
|
112
|
+
"matched_tags": matched_tags
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
scored_funds.sort(key=lambda x: (-x["score"], x["fund"].get("fund_name", "")))
|
|
116
|
+
|
|
117
|
+
if all(f["score"] == 0 for f in scored_funds):
|
|
118
|
+
generalists = [f for f in funds if "Generalist" in f.get("industry_tags", [])][:5]
|
|
119
|
+
return [{
|
|
120
|
+
"fund": g,
|
|
121
|
+
"score": 0,
|
|
122
|
+
"confidence": "Low",
|
|
123
|
+
"matched_tags": ["Generalist"],
|
|
124
|
+
"warning": "No strong matches found. Showing generalist funds only."
|
|
125
|
+
} for g in generalists]
|
|
126
|
+
|
|
127
|
+
return scored_funds[:10]
|