@opendirectory.dev/skills 0.1.41 → 0.1.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/registry.json +8 -0
- package/skills/competitor-pr-finder/.env.example +13 -0
- package/skills/competitor-pr-finder/README.md +127 -0
- package/skills/competitor-pr-finder/SKILL.md +767 -0
- package/skills/competitor-pr-finder/evals/evals.json +120 -0
- package/skills/competitor-pr-finder/references/pitch-guide.md +119 -0
- package/skills/competitor-pr-finder/references/pr-channel-types.md +81 -0
- package/skills/competitor-pr-finder/references/tier-scoring.md +91 -0
- package/skills/competitor-pr-finder/scripts/research.py +292 -0
- package/skills/vc-finder/.env.example +1 -5
- package/skills/vc-finder/README.md +16 -9
- package/skills/vc-finder/SKILL.md +446 -193
- package/skills/vc-finder/data/vc_funds.json +277 -0
- package/skills/vc-finder/evals/evals.json +43 -25
- package/skills/vc-finder/scripts/match_funds.py +144 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""
|
|
2
|
+
research.py -- two-phase Tavily data collector for competitor-pr-finder.
|
|
3
|
+
|
|
4
|
+
Phase 1 (discover): finds competitor candidates from product analysis context.
|
|
5
|
+
Phase 2 (pr-research): runs three-track PR search per confirmed competitor.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Phase 1
|
|
9
|
+
python3 scripts/research.py \
|
|
10
|
+
--phase discover \
|
|
11
|
+
--product-analysis /tmp/cprf-product-analysis.json \
|
|
12
|
+
--tavily-key "$TAVILY_API_KEY" \
|
|
13
|
+
--output /tmp/cprf-competitors-raw.json
|
|
14
|
+
|
|
15
|
+
# Phase 2
|
|
16
|
+
python3 scripts/research.py \
|
|
17
|
+
--phase pr-research \
|
|
18
|
+
--competitors /tmp/cprf-competitors-confirmed.json \
|
|
19
|
+
--product-analysis /tmp/cprf-product-analysis.json \
|
|
20
|
+
--tavily-key "$TAVILY_API_KEY" \
|
|
21
|
+
--output /tmp/cprf-pr-raw.json
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
import ssl
|
|
28
|
+
import sys
|
|
29
|
+
import urllib.request
|
|
30
|
+
from datetime import date
|
|
31
|
+
|
|
32
|
+
_ssl_ctx = ssl._create_unverified_context()
|
|
33
|
+
|
|
34
|
+
quiet = False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def log(msg):
|
|
38
|
+
if not quiet:
|
|
39
|
+
print(msg, file=sys.stderr)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def fetch_json(url, payload, timeout=25):
|
|
43
|
+
data = json.dumps(payload).encode()
|
|
44
|
+
req = urllib.request.Request(
|
|
45
|
+
url,
|
|
46
|
+
data=data,
|
|
47
|
+
headers={
|
|
48
|
+
"Content-Type": "application/json",
|
|
49
|
+
"User-Agent": "competitor-pr-finder/1.0",
|
|
50
|
+
},
|
|
51
|
+
method="POST",
|
|
52
|
+
)
|
|
53
|
+
with urllib.request.urlopen(req, timeout=timeout, context=_ssl_ctx) as resp:
|
|
54
|
+
return json.loads(resp.read())
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def tavily_search(query, key, depth="advanced", max_results=7):
|
|
58
|
+
log(f" Tavily [{depth}] {query[:80]}")
|
|
59
|
+
try:
|
|
60
|
+
result = fetch_json(
|
|
61
|
+
"https://api.tavily.com/search",
|
|
62
|
+
{
|
|
63
|
+
"api_key": key,
|
|
64
|
+
"query": query,
|
|
65
|
+
"search_depth": depth,
|
|
66
|
+
"max_results": max_results,
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
return {
|
|
70
|
+
"answer": result.get("answer", ""),
|
|
71
|
+
"results": result.get("results", []),
|
|
72
|
+
}
|
|
73
|
+
except Exception as e:
|
|
74
|
+
log(f" ERROR: {e}")
|
|
75
|
+
return {"answer": "", "results": [], "error": str(e)}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# Phase 1: competitor discovery
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
def run_discover(product_analysis, tavily_key):
|
|
83
|
+
name = product_analysis.get("product_name", "")
|
|
84
|
+
l2 = product_analysis.get("industry_taxonomy", {}).get("l2", "")
|
|
85
|
+
l3 = product_analysis.get("industry_taxonomy", {}).get("l3", "")
|
|
86
|
+
|
|
87
|
+
log(f"\nPhase 1: competitor discovery for '{name}'")
|
|
88
|
+
log(f" taxonomy: {l2} > {l3}")
|
|
89
|
+
|
|
90
|
+
queries = [
|
|
91
|
+
f'"{name}" competitors alternatives {l3}',
|
|
92
|
+
f"{l2} {l3} startups companies funded 2022 2023 2024",
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
competitor_searches = []
|
|
96
|
+
for q in queries:
|
|
97
|
+
result = tavily_search(q, tavily_key, depth="advanced", max_results=8)
|
|
98
|
+
competitor_searches.append(
|
|
99
|
+
{
|
|
100
|
+
"query": q,
|
|
101
|
+
"answer": result.get("answer", ""),
|
|
102
|
+
"results": [
|
|
103
|
+
{
|
|
104
|
+
"title": r.get("title", ""),
|
|
105
|
+
"url": r.get("url", ""),
|
|
106
|
+
"content": r.get("content", "")[:500],
|
|
107
|
+
}
|
|
108
|
+
for r in result.get("results", [])
|
|
109
|
+
],
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
log(f" {len(result.get('results', []))} results")
|
|
113
|
+
|
|
114
|
+
log("Phase 1 complete.")
|
|
115
|
+
return {
|
|
116
|
+
"date": str(date.today()),
|
|
117
|
+
"product_name": name,
|
|
118
|
+
"competitor_searches": competitor_searches,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
# Phase 2: three-track PR research
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
TRACK_QUERIES = {
|
|
127
|
+
"editorial": '"{competitor}" featured press coverage TechCrunch Forbes Wired article interview',
|
|
128
|
+
"podcast": '"{competitor}" founder CEO podcast interview appeared on episode',
|
|
129
|
+
"community": '"{competitor}" site:reddit.com OR site:news.ycombinator.com OR site:producthunt.com',
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
FALLBACK_QUERY = '"{competitor}" review coverage press news'
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def run_pr_research(confirmed_competitors, product_analysis, tavily_key):
|
|
136
|
+
log(f"\nPhase 2: PR research for {len(confirmed_competitors)} competitors")
|
|
137
|
+
|
|
138
|
+
results = []
|
|
139
|
+
for comp in confirmed_competitors:
|
|
140
|
+
name = comp.get("name", "")
|
|
141
|
+
url = comp.get("url", "")
|
|
142
|
+
log(f"\n [{name}]")
|
|
143
|
+
|
|
144
|
+
comp_result = {
|
|
145
|
+
"competitor": name,
|
|
146
|
+
"url": url,
|
|
147
|
+
"tracks": {},
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
total_results = 0
|
|
151
|
+
for track, query_template in TRACK_QUERIES.items():
|
|
152
|
+
query = query_template.replace("{competitor}", name)
|
|
153
|
+
data = tavily_search(query, tavily_key, depth="advanced", max_results=7)
|
|
154
|
+
track_results = [
|
|
155
|
+
{
|
|
156
|
+
"title": r.get("title", ""),
|
|
157
|
+
"url": r.get("url", ""),
|
|
158
|
+
"content": r.get("content", "")[:500],
|
|
159
|
+
}
|
|
160
|
+
for r in data.get("results", [])
|
|
161
|
+
]
|
|
162
|
+
comp_result["tracks"][track] = {
|
|
163
|
+
"query": query,
|
|
164
|
+
"answer": data.get("answer", ""),
|
|
165
|
+
"results": track_results,
|
|
166
|
+
}
|
|
167
|
+
count = len(track_results)
|
|
168
|
+
total_results += count
|
|
169
|
+
log(f" {track}: {count} results")
|
|
170
|
+
|
|
171
|
+
# Fallback if all 3 tracks returned nothing
|
|
172
|
+
if total_results == 0:
|
|
173
|
+
log(f" WARNING: 0 results across all tracks. Running fallback search.")
|
|
174
|
+
fallback_query = FALLBACK_QUERY.replace("{competitor}", name)
|
|
175
|
+
fallback_data = tavily_search(
|
|
176
|
+
fallback_query, tavily_key, depth="advanced", max_results=7
|
|
177
|
+
)
|
|
178
|
+
comp_result["tracks"]["fallback"] = {
|
|
179
|
+
"query": fallback_query,
|
|
180
|
+
"answer": fallback_data.get("answer", ""),
|
|
181
|
+
"results": [
|
|
182
|
+
{
|
|
183
|
+
"title": r.get("title", ""),
|
|
184
|
+
"url": r.get("url", ""),
|
|
185
|
+
"content": r.get("content", "")[:500],
|
|
186
|
+
}
|
|
187
|
+
for r in fallback_data.get("results", [])
|
|
188
|
+
],
|
|
189
|
+
}
|
|
190
|
+
comp_result["data_quality_flag"] = "All 3 tracks returned 0 results. Fallback search used."
|
|
191
|
+
log(f" fallback: {len(fallback_data.get('results', []))} results")
|
|
192
|
+
|
|
193
|
+
results.append(comp_result)
|
|
194
|
+
|
|
195
|
+
log("\nPhase 2 complete.")
|
|
196
|
+
return {
|
|
197
|
+
"date": str(date.today()),
|
|
198
|
+
"competitors_researched": len(confirmed_competitors),
|
|
199
|
+
"results": results,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
# CLI
|
|
205
|
+
# ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
def main():
|
|
208
|
+
global quiet
|
|
209
|
+
|
|
210
|
+
parser = argparse.ArgumentParser(description="competitor-pr-finder research script")
|
|
211
|
+
parser.add_argument(
|
|
212
|
+
"--phase",
|
|
213
|
+
required=True,
|
|
214
|
+
choices=["discover", "pr-research"],
|
|
215
|
+
help="Which phase to run",
|
|
216
|
+
)
|
|
217
|
+
parser.add_argument(
|
|
218
|
+
"--product-analysis",
|
|
219
|
+
required=True,
|
|
220
|
+
help="Path to cprf-product-analysis.json",
|
|
221
|
+
)
|
|
222
|
+
parser.add_argument(
|
|
223
|
+
"--competitors",
|
|
224
|
+
default="",
|
|
225
|
+
help="Path to cprf-competitors-confirmed.json (Phase 2 only)",
|
|
226
|
+
)
|
|
227
|
+
parser.add_argument(
|
|
228
|
+
"--tavily-key",
|
|
229
|
+
default=os.environ.get("TAVILY_API_KEY", ""),
|
|
230
|
+
help="Tavily API key (or set TAVILY_API_KEY env var)",
|
|
231
|
+
)
|
|
232
|
+
parser.add_argument(
|
|
233
|
+
"--output",
|
|
234
|
+
required=True,
|
|
235
|
+
help="Path to write JSON output",
|
|
236
|
+
)
|
|
237
|
+
parser.add_argument(
|
|
238
|
+
"--quiet",
|
|
239
|
+
action="store_true",
|
|
240
|
+
help="Suppress progress output to stderr",
|
|
241
|
+
)
|
|
242
|
+
args = parser.parse_args()
|
|
243
|
+
|
|
244
|
+
quiet = args.quiet
|
|
245
|
+
|
|
246
|
+
if not args.tavily_key:
|
|
247
|
+
print("ERROR: Tavily API key required. Pass --tavily-key or set TAVILY_API_KEY.", file=sys.stderr)
|
|
248
|
+
sys.exit(1)
|
|
249
|
+
|
|
250
|
+
if not os.path.exists(args.product_analysis):
|
|
251
|
+
print(f"ERROR: {args.product_analysis} not found", file=sys.stderr)
|
|
252
|
+
sys.exit(1)
|
|
253
|
+
|
|
254
|
+
with open(args.product_analysis) as f:
|
|
255
|
+
product_analysis = json.load(f)
|
|
256
|
+
|
|
257
|
+
if args.phase == "discover":
|
|
258
|
+
output = run_discover(product_analysis, args.tavily_key)
|
|
259
|
+
|
|
260
|
+
elif args.phase == "pr-research":
|
|
261
|
+
if not args.competitors:
|
|
262
|
+
print("ERROR: --competitors required for pr-research phase", file=sys.stderr)
|
|
263
|
+
sys.exit(1)
|
|
264
|
+
if not os.path.exists(args.competitors):
|
|
265
|
+
print(f"ERROR: {args.competitors} not found", file=sys.stderr)
|
|
266
|
+
sys.exit(1)
|
|
267
|
+
with open(args.competitors) as f:
|
|
268
|
+
competitors_data = json.load(f)
|
|
269
|
+
confirmed = competitors_data.get("confirmed_competitors", [])
|
|
270
|
+
if not confirmed:
|
|
271
|
+
print("ERROR: no confirmed_competitors in input file", file=sys.stderr)
|
|
272
|
+
sys.exit(1)
|
|
273
|
+
output = run_pr_research(confirmed, product_analysis, args.tavily_key)
|
|
274
|
+
|
|
275
|
+
with open(args.output, "w") as f:
|
|
276
|
+
json.dump(output, f, indent=2)
|
|
277
|
+
|
|
278
|
+
log(f"\nOutput written to {args.output}")
|
|
279
|
+
|
|
280
|
+
# Print summary for SKILL.md to parse
|
|
281
|
+
if args.phase == "discover":
|
|
282
|
+
total = sum(len(s["results"]) for s in output["competitor_searches"])
|
|
283
|
+
print(f"Discover complete: {len(output['competitor_searches'])} queries, {total} total results")
|
|
284
|
+
else:
|
|
285
|
+
print(f"PR research complete: {output['competitors_researched']} competitors researched")
|
|
286
|
+
for r in output.get("results", []):
|
|
287
|
+
track_counts = {t: len(v["results"]) for t, v in r["tracks"].items()}
|
|
288
|
+
print(f" {r['competitor']}: {track_counts}")
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
if __name__ == "__main__":
|
|
292
|
+
main()
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
# vc-finder: Environment Variables
|
|
2
2
|
# ===================================
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
# Required: Google Gemini API key for product analysis and VC synthesis
|
|
6
|
-
# Get it: aistudio.google.com > Get API key
|
|
7
|
-
GEMINI_API_KEY=your_gemini_api_key_here
|
|
3
|
+
# Tavily is required. Firecrawl is recommended.
|
|
8
4
|
|
|
9
5
|
# Required: Tavily API key for VC investment research (Track A and Track B searches)
|
|
10
6
|
# Get it: app.tavily.com > API Keys
|
|
@@ -30,29 +30,32 @@ https://github.com/user-attachments/assets/ee98a1b5-ebc4-452f-bbfb-c434f2935067
|
|
|
30
30
|
|
|
31
31
|
- Fetches the product URL via Firecrawl (handles JS-rendered SPAs) or Tavily extract as fallback
|
|
32
32
|
- Detects funding stage from CTA signals on the page (waitlist, free trial, pricing, sales CTAs)
|
|
33
|
-
-
|
|
33
|
+
- Maps a 3-level industry taxonomy (L1 > L2 > L3) from the product page
|
|
34
|
+
- **Curated pre-match (Step 5b):** Scores product against a verified dataset of 25 VC funds (sourced from fund websites) -- instant zero-hallucination matches with no Tavily credits consumed
|
|
35
|
+
- **Discovers comparable companies:** Curated portfolio companies from matched funds + Tavily live search for L3-niche specifics
|
|
34
36
|
- Track A: 5 Tavily searches to find who invested in each comparable company
|
|
35
37
|
- Track B: 3 Tavily searches to find VCs who publish investment theses about this specific niche
|
|
36
|
-
-
|
|
38
|
+
- Synthesizes and ranks all found VCs -- curated matches labeled "verified", Tavily matches labeled by track
|
|
37
39
|
- Produces top 5 deep-dives with fund overview, portfolio evidence, how-to-approach, and outreach hook
|
|
38
40
|
- Generates 3 product-specific outreach hooks (not generic advice)
|
|
39
41
|
- Saves output to `docs/vc-intel/[product]-[date].md`
|
|
40
42
|
|
|
43
|
+
**Zero-hallucination guarantee:** Every VC name, fund detail, check size, portfolio company, and thesis source in the output must trace to either (a) the curated `data/vc_funds.json` dataset (sourced from fund websites) or (b) a specific Tavily search result. The AI does not draw from training knowledge for any factual claim.
|
|
44
|
+
|
|
41
45
|
## Requirements
|
|
42
46
|
|
|
43
47
|
| Requirement | Purpose | How to Set Up |
|
|
44
48
|
|---|---|---|
|
|
45
|
-
| Gemini API key | Product analysis and VC synthesis | aistudio.google.com, Get API key |
|
|
46
49
|
| Tavily API key | VC investment research (Track A and Track B) | app.tavily.com, free tier: 1000 credits/month |
|
|
47
|
-
| Firecrawl API key | Fetching JS-rendered product pages | firecrawl.dev, free tier: 500 credits/month |
|
|
50
|
+
| Firecrawl API key | Fetching JS-rendered product pages (optional) | firecrawl.dev, free tier: 500 credits/month |
|
|
48
51
|
|
|
49
|
-
|
|
52
|
+
Tavily is required. Firecrawl is recommended -- without it, Tavily extract is used as fallback (may miss JS-rendered content).
|
|
50
53
|
|
|
51
54
|
## Setup
|
|
52
55
|
|
|
53
56
|
```bash
|
|
54
57
|
cp .env.example .env
|
|
55
|
-
# Add
|
|
58
|
+
# Add TAVILY_API_KEY (required)
|
|
56
59
|
# Add FIRECRAWL_API_KEY (recommended)
|
|
57
60
|
```
|
|
58
61
|
|
|
@@ -90,9 +93,9 @@ Each run produces:
|
|
|
90
93
|
## Cost per Run
|
|
91
94
|
|
|
92
95
|
- Firecrawl: ~$0.001 per fetch
|
|
93
|
-
- Tavily:
|
|
94
|
-
-
|
|
95
|
-
- Total: ~$0.
|
|
96
|
+
- Tavily: 10 searches at ~$0.01 each = ~$0.10 (2 comparable discovery + 5 Track A + 3 Track B)
|
|
97
|
+
- Curated pre-match (Step 5b): $0.00 -- local scoring against `data/vc_funds.json`, no API calls
|
|
98
|
+
- Total: ~$0.10 per run
|
|
96
99
|
|
|
97
100
|
## Project Structure
|
|
98
101
|
|
|
@@ -101,6 +104,10 @@ vc-finder/
|
|
|
101
104
|
├── SKILL.md
|
|
102
105
|
├── README.md
|
|
103
106
|
├── .env.example
|
|
107
|
+
├── data/
|
|
108
|
+
│ └── vc_funds.json (25 verified funds, sourced from fund websites)
|
|
109
|
+
├── scripts/
|
|
110
|
+
│ └── match_funds.py (standalone scoring script for testing)
|
|
104
111
|
├── evals/
|
|
105
112
|
│ └── evals.json
|
|
106
113
|
└── references/
|