@opendirectory.dev/skills 0.1.42 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,292 @@
1
+ """
2
+ research.py -- two-phase Tavily data collector for competitor-pr-finder.
3
+
4
+ Phase 1 (discover): finds competitor candidates from product analysis context.
5
+ Phase 2 (pr-research): runs three-track PR search per confirmed competitor.
6
+
7
+ Usage:
8
+ # Phase 1
9
+ python3 scripts/research.py \
10
+ --phase discover \
11
+ --product-analysis /tmp/cprf-product-analysis.json \
12
+ --tavily-key "$TAVILY_API_KEY" \
13
+ --output /tmp/cprf-competitors-raw.json
14
+
15
+ # Phase 2
16
+ python3 scripts/research.py \
17
+ --phase pr-research \
18
+ --competitors /tmp/cprf-competitors-confirmed.json \
19
+ --product-analysis /tmp/cprf-product-analysis.json \
20
+ --tavily-key "$TAVILY_API_KEY" \
21
+ --output /tmp/cprf-pr-raw.json
22
+ """
23
+
24
+ import argparse
25
+ import json
26
+ import os
27
+ import ssl
28
+ import sys
29
+ import urllib.request
30
+ from datetime import date
31
+
32
+ _ssl_ctx = ssl._create_unverified_context()
33
+
34
+ quiet = False
35
+
36
+
37
+ def log(msg):
38
+ if not quiet:
39
+ print(msg, file=sys.stderr)
40
+
41
+
42
+ def fetch_json(url, payload, timeout=25):
43
+ data = json.dumps(payload).encode()
44
+ req = urllib.request.Request(
45
+ url,
46
+ data=data,
47
+ headers={
48
+ "Content-Type": "application/json",
49
+ "User-Agent": "competitor-pr-finder/1.0",
50
+ },
51
+ method="POST",
52
+ )
53
+ with urllib.request.urlopen(req, timeout=timeout, context=_ssl_ctx) as resp:
54
+ return json.loads(resp.read())
55
+
56
+
57
+ def tavily_search(query, key, depth="advanced", max_results=7):
58
+ log(f" Tavily [{depth}] {query[:80]}")
59
+ try:
60
+ result = fetch_json(
61
+ "https://api.tavily.com/search",
62
+ {
63
+ "api_key": key,
64
+ "query": query,
65
+ "search_depth": depth,
66
+ "max_results": max_results,
67
+ },
68
+ )
69
+ return {
70
+ "answer": result.get("answer", ""),
71
+ "results": result.get("results", []),
72
+ }
73
+ except Exception as e:
74
+ log(f" ERROR: {e}")
75
+ return {"answer": "", "results": [], "error": str(e)}
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Phase 1: competitor discovery
80
+ # ---------------------------------------------------------------------------
81
+
82
+ def run_discover(product_analysis, tavily_key):
83
+ name = product_analysis.get("product_name", "")
84
+ l2 = product_analysis.get("industry_taxonomy", {}).get("l2", "")
85
+ l3 = product_analysis.get("industry_taxonomy", {}).get("l3", "")
86
+
87
+ log(f"\nPhase 1: competitor discovery for '{name}'")
88
+ log(f" taxonomy: {l2} > {l3}")
89
+
90
+ queries = [
91
+ f'"{name}" competitors alternatives {l3}',
92
+ f"{l2} {l3} startups companies funded 2022 2023 2024",
93
+ ]
94
+
95
+ competitor_searches = []
96
+ for q in queries:
97
+ result = tavily_search(q, tavily_key, depth="advanced", max_results=8)
98
+ competitor_searches.append(
99
+ {
100
+ "query": q,
101
+ "answer": result.get("answer", ""),
102
+ "results": [
103
+ {
104
+ "title": r.get("title", ""),
105
+ "url": r.get("url", ""),
106
+ "content": r.get("content", "")[:500],
107
+ }
108
+ for r in result.get("results", [])
109
+ ],
110
+ }
111
+ )
112
+ log(f" {len(result.get('results', []))} results")
113
+
114
+ log("Phase 1 complete.")
115
+ return {
116
+ "date": str(date.today()),
117
+ "product_name": name,
118
+ "competitor_searches": competitor_searches,
119
+ }
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Phase 2: three-track PR research
124
+ # ---------------------------------------------------------------------------
125
+
126
+ TRACK_QUERIES = {
127
+ "editorial": '"{competitor}" featured press coverage TechCrunch Forbes Wired article interview',
128
+ "podcast": '"{competitor}" founder CEO podcast interview appeared on episode',
129
+ "community": '"{competitor}" site:reddit.com OR site:news.ycombinator.com OR site:producthunt.com',
130
+ }
131
+
132
+ FALLBACK_QUERY = '"{competitor}" review coverage press news'
133
+
134
+
135
+ def run_pr_research(confirmed_competitors, product_analysis, tavily_key):
136
+ log(f"\nPhase 2: PR research for {len(confirmed_competitors)} competitors")
137
+
138
+ results = []
139
+ for comp in confirmed_competitors:
140
+ name = comp.get("name", "")
141
+ url = comp.get("url", "")
142
+ log(f"\n [{name}]")
143
+
144
+ comp_result = {
145
+ "competitor": name,
146
+ "url": url,
147
+ "tracks": {},
148
+ }
149
+
150
+ total_results = 0
151
+ for track, query_template in TRACK_QUERIES.items():
152
+ query = query_template.replace("{competitor}", name)
153
+ data = tavily_search(query, tavily_key, depth="advanced", max_results=7)
154
+ track_results = [
155
+ {
156
+ "title": r.get("title", ""),
157
+ "url": r.get("url", ""),
158
+ "content": r.get("content", "")[:500],
159
+ }
160
+ for r in data.get("results", [])
161
+ ]
162
+ comp_result["tracks"][track] = {
163
+ "query": query,
164
+ "answer": data.get("answer", ""),
165
+ "results": track_results,
166
+ }
167
+ count = len(track_results)
168
+ total_results += count
169
+ log(f" {track}: {count} results")
170
+
171
+ # Fallback if all 3 tracks returned nothing
172
+ if total_results == 0:
173
+ log(f" WARNING: 0 results across all tracks. Running fallback search.")
174
+ fallback_query = FALLBACK_QUERY.replace("{competitor}", name)
175
+ fallback_data = tavily_search(
176
+ fallback_query, tavily_key, depth="advanced", max_results=7
177
+ )
178
+ comp_result["tracks"]["fallback"] = {
179
+ "query": fallback_query,
180
+ "answer": fallback_data.get("answer", ""),
181
+ "results": [
182
+ {
183
+ "title": r.get("title", ""),
184
+ "url": r.get("url", ""),
185
+ "content": r.get("content", "")[:500],
186
+ }
187
+ for r in fallback_data.get("results", [])
188
+ ],
189
+ }
190
+ comp_result["data_quality_flag"] = "All 3 tracks returned 0 results. Fallback search used."
191
+ log(f" fallback: {len(fallback_data.get('results', []))} results")
192
+
193
+ results.append(comp_result)
194
+
195
+ log("\nPhase 2 complete.")
196
+ return {
197
+ "date": str(date.today()),
198
+ "competitors_researched": len(confirmed_competitors),
199
+ "results": results,
200
+ }
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # CLI
205
+ # ---------------------------------------------------------------------------
206
+
207
+ def main():
208
+ global quiet
209
+
210
+ parser = argparse.ArgumentParser(description="competitor-pr-finder research script")
211
+ parser.add_argument(
212
+ "--phase",
213
+ required=True,
214
+ choices=["discover", "pr-research"],
215
+ help="Which phase to run",
216
+ )
217
+ parser.add_argument(
218
+ "--product-analysis",
219
+ required=True,
220
+ help="Path to cprf-product-analysis.json",
221
+ )
222
+ parser.add_argument(
223
+ "--competitors",
224
+ default="",
225
+ help="Path to cprf-competitors-confirmed.json (Phase 2 only)",
226
+ )
227
+ parser.add_argument(
228
+ "--tavily-key",
229
+ default=os.environ.get("TAVILY_API_KEY", ""),
230
+ help="Tavily API key (or set TAVILY_API_KEY env var)",
231
+ )
232
+ parser.add_argument(
233
+ "--output",
234
+ required=True,
235
+ help="Path to write JSON output",
236
+ )
237
+ parser.add_argument(
238
+ "--quiet",
239
+ action="store_true",
240
+ help="Suppress progress output to stderr",
241
+ )
242
+ args = parser.parse_args()
243
+
244
+ quiet = args.quiet
245
+
246
+ if not args.tavily_key:
247
+ print("ERROR: Tavily API key required. Pass --tavily-key or set TAVILY_API_KEY.", file=sys.stderr)
248
+ sys.exit(1)
249
+
250
+ if not os.path.exists(args.product_analysis):
251
+ print(f"ERROR: {args.product_analysis} not found", file=sys.stderr)
252
+ sys.exit(1)
253
+
254
+ with open(args.product_analysis) as f:
255
+ product_analysis = json.load(f)
256
+
257
+ if args.phase == "discover":
258
+ output = run_discover(product_analysis, args.tavily_key)
259
+
260
+ elif args.phase == "pr-research":
261
+ if not args.competitors:
262
+ print("ERROR: --competitors required for pr-research phase", file=sys.stderr)
263
+ sys.exit(1)
264
+ if not os.path.exists(args.competitors):
265
+ print(f"ERROR: {args.competitors} not found", file=sys.stderr)
266
+ sys.exit(1)
267
+ with open(args.competitors) as f:
268
+ competitors_data = json.load(f)
269
+ confirmed = competitors_data.get("confirmed_competitors", [])
270
+ if not confirmed:
271
+ print("ERROR: no confirmed_competitors in input file", file=sys.stderr)
272
+ sys.exit(1)
273
+ output = run_pr_research(confirmed, product_analysis, args.tavily_key)
274
+
275
+ with open(args.output, "w") as f:
276
+ json.dump(output, f, indent=2)
277
+
278
+ log(f"\nOutput written to {args.output}")
279
+
280
+ # Print summary for SKILL.md to parse
281
+ if args.phase == "discover":
282
+ total = sum(len(s["results"]) for s in output["competitor_searches"])
283
+ print(f"Discover complete: {len(output['competitor_searches'])} queries, {total} total results")
284
+ else:
285
+ print(f"PR research complete: {output['competitors_researched']} competitors researched")
286
+ for r in output.get("results", []):
287
+ track_counts = {t: len(v["results"]) for t, v in r["tracks"].items()}
288
+ print(f" {r['competitor']}: {track_counts}")
289
+
290
+
291
+ if __name__ == "__main__":
292
+ main()