@veyralabs/skills 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,288 @@
1
+ """
2
+ Level 1 data sources for venture-analyst.
3
+ Zero API keys required. Works immediately after install.
4
+ """
5
+ import time
6
+ import requests
7
+ from typing import Optional
8
+
9
+ REDDIT_UA = "venture-analyst/1.0 (open-source research tool; github.com/veyralabsgroup/veyraskills)"
10
+
11
+
12
+ # ── HN Algolia (no auth, 10k req/hour) ────────────────────────────────────────
13
+
14
+ def search_hn(query: str, limit: int = 20) -> list[dict]:
15
+ """Search Hacker News via Algolia API. Best zero-key source."""
16
+ url = "https://hn.algolia.com/api/v1/search"
17
+ params = {
18
+ "query": query,
19
+ "hitsPerPage": limit,
20
+ "tags": "(story,ask_hn,show_hn)",
21
+ }
22
+ try:
23
+ r = requests.get(url, params=params, timeout=10)
24
+ r.raise_for_status()
25
+ hits = r.json().get("hits", [])
26
+ return [
27
+ {
28
+ "source": "hackernews",
29
+ "title": h.get("title") or (h.get("story_text", "")[:80] + "..."),
30
+ "url": h.get("url") or f"https://news.ycombinator.com/item?id={h.get('objectID')}",
31
+ "points": h.get("points", 0),
32
+ "comments": h.get("num_comments", 0),
33
+ "text": (h.get("story_text") or "")[:400],
34
+ "author": h.get("author", ""),
35
+ "date": h.get("created_at", "")[:10],
36
+ }
37
+ for h in hits
38
+ ]
39
+ except Exception:
40
+ return []
41
+
42
+
43
+ def search_hn_comments(query: str, min_points: int = 5, limit: int = 30) -> list[dict]:
44
+ """Search HN comments — great for finding raw pain and opinions."""
45
+ url = "https://hn.algolia.com/api/v1/search"
46
+ params = {
47
+ "query": query,
48
+ "hitsPerPage": limit,
49
+ "tags": "comment",
50
+ "numericFilters": f"points>{min_points}",
51
+ }
52
+ try:
53
+ r = requests.get(url, params=params, timeout=10)
54
+ r.raise_for_status()
55
+ hits = r.json().get("hits", [])
56
+ return [
57
+ {
58
+ "source": "hackernews_comment",
59
+ "text": (h.get("comment_text") or "")[:500],
60
+ "url": f"https://news.ycombinator.com/item?id={h.get('objectID')}",
61
+ "points": h.get("points", 0),
62
+ "author": h.get("author", ""),
63
+ }
64
+ for h in hits if h.get("comment_text")
65
+ ]
66
+ except Exception:
67
+ return []
68
+
69
+
70
+ # ── Reddit (no auth, custom UA required) ──────────────────────────────────────
71
+
72
+ def search_reddit(
73
+ query: str,
74
+ subreddits: Optional[list[str]] = None,
75
+ limit: int = 25,
76
+ timeframe: str = "year",
77
+ ) -> list[dict]:
78
+ """Search Reddit via public .json endpoint. Custom UA avoids 429s."""
79
+ results = []
80
+ headers = {"User-Agent": REDDIT_UA}
81
+
82
+ if subreddits:
83
+ per_sub = max(5, limit // len(subreddits[:4]))
84
+ for sub in subreddits[:4]:
85
+ url = f"https://www.reddit.com/r/{sub}/search.json"
86
+ params = {"q": query, "sort": "top", "limit": per_sub, "t": timeframe, "restrict_sr": 1}
87
+ _reddit_fetch(url, params, headers, results)
88
+ time.sleep(1.2)
89
+ else:
90
+ url = "https://www.reddit.com/search.json"
91
+ params = {"q": query, "sort": "relevance", "limit": limit, "t": timeframe}
92
+ _reddit_fetch(url, params, headers, results)
93
+
94
+ return results
95
+
96
+
97
+ def _reddit_fetch(url: str, params: dict, headers: dict, results: list) -> None:
98
+ try:
99
+ r = requests.get(url, params=params, headers=headers, timeout=12)
100
+ if r.status_code == 200:
101
+ for post in r.json().get("data", {}).get("children", []):
102
+ d = post.get("data", {})
103
+ results.append({
104
+ "source": "reddit",
105
+ "title": d.get("title", ""),
106
+ "url": f"https://reddit.com{d.get('permalink', '')}",
107
+ "upvotes": d.get("score", 0),
108
+ "comments": d.get("num_comments", 0),
109
+ "text": (d.get("selftext") or "")[:500],
110
+ "subreddit": d.get("subreddit", ""),
111
+ })
112
+ except Exception:
113
+ pass
114
+
115
+
116
+ # ── GitHub Issues (no auth, 60 req/hour) ──────────────────────────────────────
117
+
118
+ def search_github_issues(
119
+ query: str,
120
+ limit: int = 20,
121
+ token: Optional[str] = None,
122
+ ) -> list[dict]:
123
+ """
124
+ Search GitHub issues for pain points and feature requests.
125
+ Unauthenticated: 60 req/hour (enough for a single analysis).
126
+ With GITHUB_TOKEN: 5,000 req/hour.
127
+ """
128
+ url = "https://api.github.com/search/issues"
129
+ params = {
130
+ "q": f"{query} type:issue",
131
+ "sort": "reactions",
132
+ "order": "desc",
133
+ "per_page": min(limit, 30),
134
+ }
135
+ headers = {"Accept": "application/vnd.github.v3+json"}
136
+ if token:
137
+ headers["Authorization"] = f"token {token}"
138
+
139
+ try:
140
+ r = requests.get(url, params=params, headers=headers, timeout=12)
141
+ if r.status_code != 200:
142
+ return []
143
+ return [
144
+ {
145
+ "source": "github",
146
+ "title": i.get("title", ""),
147
+ "url": i.get("html_url", ""),
148
+ "reactions": i.get("reactions", {}).get("total_count", 0),
149
+ "comments": i.get("comments", 0),
150
+ "text": (i.get("body") or "")[:400],
151
+ "repo": i.get("repository_url", "").split("/")[-1],
152
+ "state": i.get("state", ""),
153
+ }
154
+ for i in r.json().get("items", [])
155
+ ]
156
+ except Exception:
157
+ return []
158
+
159
+
160
+ def search_github_repos(
161
+ query: str,
162
+ limit: int = 10,
163
+ token: Optional[str] = None,
164
+ ) -> list[dict]:
165
+ """Find existing repos/tools in the space."""
166
+ url = "https://api.github.com/search/repositories"
167
+ params = {
168
+ "q": query,
169
+ "sort": "stars",
170
+ "order": "desc",
171
+ "per_page": min(limit, 20),
172
+ }
173
+ headers = {"Accept": "application/vnd.github.v3+json"}
174
+ if token:
175
+ headers["Authorization"] = f"token {token}"
176
+
177
+ try:
178
+ r = requests.get(url, params=params, headers=headers, timeout=12)
179
+ if r.status_code != 200:
180
+ return []
181
+ return [
182
+ {
183
+ "source": "github_repo",
184
+ "name": repo.get("full_name", ""),
185
+ "url": repo.get("html_url", ""),
186
+ "stars": repo.get("stargazers_count", 0),
187
+ "description": repo.get("description", ""),
188
+ "language": repo.get("language", ""),
189
+ "updated": repo.get("updated_at", "")[:10],
190
+ }
191
+ for repo in r.json().get("items", [])
192
+ ]
193
+ except Exception:
194
+ return []
195
+
196
+
197
+ # ── Web search (ddgs, no key) ──────────────────────────────────────────────────
198
+
199
+ def search_web(query: str, limit: int = 10) -> list[dict]:
200
+ """Web search via ddgs. No API key. May rate-limit on heavy use."""
201
+ try:
202
+ from ddgs import DDGS
203
+ results = []
204
+ with DDGS() as ddgs:
205
+ for r in ddgs.text(query, max_results=limit):
206
+ results.append({
207
+ "source": "web",
208
+ "title": r.get("title", ""),
209
+ "url": r.get("href", ""),
210
+ "text": r.get("body", ""),
211
+ })
212
+ return results
213
+ except Exception:
214
+ return []
215
+
216
+
217
+ # ── Google Trends (no key) ─────────────────────────────────────────────────────
218
+
219
+ def get_trends(keyword: str) -> dict:
220
+ """Google Trends via trendspyg. Fragile but free."""
221
+ try:
222
+ from trendspyg import TrendReq
223
+ pytrends = TrendReq(hl="en-US", tz=360)
224
+ pytrends.build_payload([keyword], timeframe="today 12-m")
225
+ data = pytrends.interest_over_time()
226
+ if data.empty:
227
+ return {"trend": "no_data", "avg_interest": 0}
228
+
229
+ avg = float(data[keyword].mean())
230
+ recent = float(data[keyword].iloc[-8:].mean())
231
+ if recent > avg * 1.25:
232
+ trend = "rising"
233
+ elif recent < avg * 0.75:
234
+ trend = "declining"
235
+ else:
236
+ trend = "stable"
237
+
238
+ related = {}
239
+ try:
240
+ related_data = pytrends.related_queries()
241
+ top = related_data.get(keyword, {}).get("top")
242
+ if top is not None and not top.empty:
243
+ related = {row["query"]: row["value"] for _, row in top.head(5).iterrows()}
244
+ except Exception:
245
+ pass
246
+
247
+ return {
248
+ "trend": trend,
249
+ "avg_interest": round(avg, 1),
250
+ "recent_interest": round(recent, 1),
251
+ "related_queries": related,
252
+ }
253
+ except Exception:
254
+ return {"trend": "unavailable", "avg_interest": 0}
255
+
256
+
257
+ # ── Evidence Score ─────────────────────────────────────────────────────────────
258
+
259
+ def calculate_evidence_score(results: dict) -> dict:
260
+ """
261
+ Score the evidence quality collected across all sources.
262
+ Returns score 0-100 + breakdown per source.
263
+ """
264
+ reddit = results.get("reddit", [])
265
+ hn = results.get("hackernews", []) + results.get("hackernews_comment", [])
266
+ github = results.get("github", [])
267
+ competitors = results.get("competitors", [])
268
+ trends = results.get("trends", {})
269
+
270
+ # Weighted scoring
271
+ reddit_pts = min(len(reddit) * 1.5, 25)
272
+ hn_pts = min(len(hn) * 2.5, 25)
273
+ github_pts = min(len(github) * 2, 20)
274
+ comp_pts = min(len(competitors) * 3, 20)
275
+ trend_pts = 10 if trends.get("trend") not in ("unavailable", "no_data") else 0
276
+
277
+ score = int(reddit_pts + hn_pts + github_pts + comp_pts + trend_pts)
278
+
279
+ return {
280
+ "evidence_score": min(score, 100),
281
+ "breakdown": {
282
+ "reddit_mentions": len(reddit),
283
+ "hn_discussions": len(hn),
284
+ "github_issues": len(github),
285
+ "competitors_found": len(competitors),
286
+ "trend_data": trends.get("trend", "unavailable"),
287
+ },
288
+ }
@@ -0,0 +1,119 @@
1
+ # Experiment Spec
2
+
3
+ Use this template when designing a specific validation experiment. Fill every section — vague experiments produce vague results.
4
+
5
+ ---
6
+
7
+ ## Experiment: [name]
8
+
9
+ **Idea being validated:** [one sentence]
10
+ **Hypothesis:** If [target customer] experiences [problem], then [% of them] will [take specific action] when shown [this offering].
11
+ **Type:** discovery / demand_signal / value_validation / willingness_to_pay
12
+
13
+ ---
14
+
15
+ ### Setup
16
+
17
+ **Duration:** [X days/weeks]
18
+ **Budget:** [€0 / €X]
19
+ **Effort:** [hours estimated]
20
+
21
+ **Who:** [exact target customer - be specific. "founders" is not specific. "B2B SaaS founders with 1-10 person team, pre-Series A" is specific.]
22
+
23
+ **Channel:** [where you'll find them]
24
+ - Primary: [Reddit / LinkedIn / Cold email / specific community]
25
+ - Backup: [if primary fails]
26
+
27
+ **What you're showing them:**
28
+ [Landing page URL / message template / demo link / mockup]
29
+
30
+ ---
31
+
32
+ ### Metrics
33
+
34
+ **Primary metric:** [one number]
35
+ **Target:** [specific threshold that determines pass/fail]
36
+
37
+ | Metric | How to measure | Pass threshold | Fail threshold |
38
+ |--------|---------------|----------------|----------------|
39
+ | [primary] | [tool/method] | [number] | [number] |
40
+ | [secondary] | [tool/method] | [number] | [number] |
41
+
42
+ **Data collection method:** [Google Analytics / Tally form / manual tracking / Stripe]
43
+
44
+ ---
45
+
46
+ ### Scripts and materials
47
+
48
+ **Outreach message (cold):**
49
+ ```
50
+ Subject: [subject line - no spam words]
51
+
52
+ [message body - short, no pitch, curiosity-based]
53
+ ```
54
+
55
+ **Interview opener:**
56
+ "I'm researching how [people like you] handle [problem area]. Not selling anything — want 15 minutes to understand your current process."
57
+
58
+ **Landing page headline:** [outcome they want] without [current pain]
59
+ **CTA text:** [Join waitlist / Get early access / Book a call]
60
+
61
+ ---
62
+
63
+ ### Mom Test checklist
64
+
65
+ Before running interviews, verify all questions pass the Mom Test:
66
+
67
+ - [ ] Questions ask about past behavior, not future intentions
68
+ - [ ] No hypotheticals ("would you use X?" is banned)
69
+ - [ ] No leading questions ("don't you find X frustrating?")
70
+ - [ ] No pitching during problem interviews
71
+ - [ ] Success criteria defined before starting (not after seeing results)
72
+
73
+ ---
74
+
75
+ ### Week-by-week plan
76
+
77
+ **Week 1:**
78
+ - [ ] [specific task]
79
+ - [ ] [specific task]
80
+
81
+ **Week 2:**
82
+ - [ ] [specific task]
83
+ - [ ] Review data against thresholds
84
+
85
+ ---
86
+
87
+ ### Results tracking
88
+
89
+ | Date | Channel | Contacts/Views | Actions | Conversion |
90
+ |------|---------|----------------|---------|------------|
91
+ | | | | | |
92
+
93
+ **Running total:**
94
+ - Primary metric: [X / target]
95
+ - Secondary: [X / target]
96
+
97
+ ---
98
+
99
+ ### Decision rules
100
+
101
+ **If primary metric >= target by [date]:** [proceed to next experiment / begin building]
102
+
103
+ **If primary metric < 50% of target by [date]:** [pivot message / pivot target customer / kill experiment]
104
+
105
+ **If qualitative signals contradict quantitative:** [investigate further — don't average them out]
106
+
107
+ ---
108
+
109
+ ### Post-experiment notes
110
+
111
+ **What worked:**
112
+
113
+ **What didn't:**
114
+
115
+ **Biggest surprise:**
116
+
117
+ **Quote from a customer that changed how I think:**
118
+
119
+ **Next experiment:** [or "begin building" if validated]
@@ -0,0 +1,160 @@
1
+ # Verdict Template
2
+
3
+ The final output of venture-analyst. Complete all sections. No section is optional.
4
+
5
+ ---
6
+
7
+ ## [Idea Name] — Venture Verdict
8
+
9
+ **Date:** [YYYY-MM-DD]
10
+ **Evidence Score:** [0-100] (from calculate_evidence_score())
11
+ **Recommendation:** BUILD / VALIDATE FIRST / AVOID
12
+
13
+ ---
14
+
15
+ ## Evidence Summary
16
+
17
+ ### Problem signals found
18
+
19
+ | Source | Count | Strongest signal |
20
+ |--------|-------|-----------------|
21
+ | HN discussions | [n] | "[quote]" |
22
+ | Reddit mentions | [n] | "[quote]" |
23
+ | GitHub issues | [n] | "[repo/issue]" |
24
+ | Market trend | [rising/stable/declining] | [data point] |
25
+
26
+ **Evidence quality:** [Strong / Moderate / Thin / Weak]
27
+
28
+ ### Competitor landscape
29
+
30
+ | Name | Pricing | Users/Stars | Main weakness |
31
+ |------|---------|-------------|---------------|
32
+ | [A] | [price] | [n] | [gap] |
33
+ | [B] | [price] | [n] | [gap] |
34
+
35
+ **Market gap identified:** [Yes/No — describe if yes]
36
+ **Dominant player:** [Yes/No — name if yes]
37
+
38
+ ---
39
+
40
+ ## Bull Case
41
+
42
+ *Arguments for building this.*
43
+
44
+ **Strongest evidence:**
45
+ [2-3 specific data points — quotes, numbers, sources. Not opinions.]
46
+
47
+ **Market timing:**
48
+ [Why now? What changed recently that makes this viable?]
49
+
50
+ **Competitive angle:**
51
+ [What can this do that incumbents can't or won't?]
52
+
53
+ **Best-case scenario:**
54
+ In 18 months, [specific outcome] if [specific condition] holds. Revenue path: [rough numbers].
55
+
56
+ ---
57
+
58
+ ## Bear Case
59
+
60
+ *Steel man the opposition. Give it everything.*
61
+
62
+ **Strongest evidence against:**
63
+ [2-3 specific counterarguments — facts, not opinions.]
64
+
65
+ **Why existing solutions might be good enough:**
66
+ [What do incumbents have that would be hard to beat?]
67
+
68
+ **Risk factors:**
69
+ - Market risk: [specific]
70
+ - Timing risk: [specific]
71
+ - Execution risk: [specific]
72
+
73
+ **Worst-case scenario:**
74
+ [What happens if the problem isn't as painful as signals suggest? Or if incumbents copy the positioning?]
75
+
76
+ ---
77
+
78
+ ## Judge Verdict
79
+
80
+ *Read both cases above before scoring.*
81
+
82
+ ### Score
83
+
84
+ | Signal | Present? | Points |
85
+ |--------|----------|--------|
86
+ | Evidence score > 60 | [Y/N] | +2 / 0 |
87
+ | Trend = rising | [Y/N] | +1 / 0 |
88
+ | Competitor has clear weakness | [Y/N] | +1 / 0 |
89
+ | No dominant player >50% share | [Y/N] | +1 / 0 |
90
+ | B2B with willingness-to-pay signals | [Y/N] | +1 / 0 |
91
+ | Price ceiling gap exists | [Y/N] | +1 / 0 |
92
+ | Evidence score < 30 | [Y/N] | 0 / -3 |
93
+ | Trend = declining | [Y/N] | 0 / -2 |
94
+ | Competitor with >100k users + free tier | [Y/N] | 0 / -2 |
95
+ | Niche < 10k potential users | [Y/N] | 0 / -1 |
96
+ | **Total** | | **[sum]** |
97
+
98
+ ### Recommendation
99
+
100
+ **[BUILD / VALIDATE FIRST / AVOID]**
101
+
102
+ **Confidence:** High / Medium / Low
103
+
104
+ *Reasoning (2-3 sentences max. Direct. No hedging):*
105
+ [Judge's reasoning here. Reference specific evidence from both cases. State what tipped the scale.]
106
+
107
+ ---
108
+
109
+ ## Next Steps
110
+
111
+ ### If BUILD
112
+
113
+ **Start here:**
114
+ [One specific, concrete first action. Not "do customer research." "Post in r/[subreddit] asking about [specific problem] — aim for 10 direct messages this week."]
115
+
116
+ **Recommended MVP type:** [Concierge / Wizard of Oz / Fake door / Single feature]
117
+ **First version scope:** [What does v0.1 do? What does it explicitly NOT do?]
118
+ **Target for first 10 customers:** [Where specifically. Who exactly.]
119
+
120
+ ---
121
+
122
+ ### If VALIDATE FIRST
123
+
124
+ **Critical unknown:**
125
+ [The one thing that must be true for this to work, that is not yet proven.]
126
+
127
+ **Experiment to run:**
128
+ [Specific experiment from experiments.py — name, duration, success metric]
129
+
130
+ **Decision point:**
131
+ Run [experiment] for [X weeks]. If [metric] >= [threshold], proceed to build. If not, [pivot or kill].
132
+
133
+ ---
134
+
135
+ ### If AVOID
136
+
137
+ **Core problem:**
138
+ [Why specifically this fails. One clear reason, not a list.]
139
+
140
+ **What would have to change:**
141
+ [What signal would need to appear for this to be worth revisiting? If nothing could change it, say so.]
142
+
143
+ **Adjacent opportunity (if any):**
144
+ [Sometimes the research reveals a related problem that IS worth pursuing. If found, name it.]
145
+
146
+ ---
147
+
148
+ ## Appendix — Raw Data
149
+
150
+ ### Top HN threads
151
+ [list with URLs and point counts]
152
+
153
+ ### Top Reddit posts
154
+ [list with URLs and upvote counts]
155
+
156
+ ### Competitor data
157
+ [scraped pricing, feature lists, tech stack if available]
158
+
159
+ ### Trend data
160
+ [keyword, avg interest, trend direction, related queries]