@opendirectory.dev/skills 0.1.40 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ import json
2
+ import os
3
+ from typing import List, Dict, Tuple
4
+
5
+ def load_funds(data_path: str) -> List[Dict]:
6
+ """Load data/vc_funds.json"""
7
+ if not os.path.exists(data_path):
8
+ raise FileNotFoundError(f"Dataset not found at {data_path}")
9
+ with open(data_path, "r", encoding="utf-8") as f:
10
+ return json.load(f)
11
+
12
+ def score_fund(fund: Dict, product_context: Dict) -> Tuple[int, List[str]]:
13
+ """Score a single fund based on overlap with product context."""
14
+ score = 0
15
+
16
+ # 1. Tag Overlap (max 60 points)
17
+ fund_tags = fund.get("industry_tags", [])
18
+ extracted_tags = product_context.get("extracted_tags", [])
19
+ if not extracted_tags:
20
+ extracted_tags = ["Generalist"]
21
+
22
+ tag_points = 0
23
+ matched_tags = []
24
+
25
+ for tag in extracted_tags:
26
+ if tag in fund_tags:
27
+ if tag == "Generalist":
28
+ tag_points += 5
29
+ else:
30
+ tag_points += 20
31
+ matched_tags.append(tag)
32
+
33
+ tag_points = min(tag_points, 60)
34
+ score += tag_points
35
+
36
+ # 2. Stage Match (max 20 points)
37
+ stage_points = 0
38
+ stage_hint = product_context.get("stage_hint")
39
+ fund_stages = fund.get("stage_focus", [])
40
+
41
+ STAGE_ORDER = {"Pre-seed": 0, "Seed": 1, "Series A": 2, "Growth": 3}
42
+
43
+ if not stage_hint:
44
+ stage_points = 10
45
+ elif not fund_stages:
46
+ pass # skip stage scoring
47
+ else:
48
+ if stage_hint in fund_stages:
49
+ stage_points = 20
50
+ else:
51
+ is_adjacent = False
52
+ if stage_hint in STAGE_ORDER:
53
+ hint_idx = STAGE_ORDER[stage_hint]
54
+ for f_stage in fund_stages:
55
+ if f_stage in STAGE_ORDER and abs(STAGE_ORDER[f_stage] - hint_idx) == 1:
56
+ is_adjacent = True
57
+ break
58
+ if is_adjacent:
59
+ stage_points = 10
60
+
61
+ score += stage_points
62
+
63
+ # 3. Geography Match (max 20 points)
64
+ geo_points = 0
65
+ geo_hint = product_context.get("geography_hint")
66
+ fund_geo = fund.get("geography_focus", [])
67
+ if not fund_geo:
68
+ fund_geo = ["Global"]
69
+
70
+ if not geo_hint or geo_hint == "Global":
71
+ geo_points = 10
72
+ elif fund_geo == ["India"] and geo_hint == "US":
73
+ geo_points = 0
74
+ elif geo_hint in fund_geo:
75
+ geo_points = 20
76
+ elif "Global" in fund_geo:
77
+ geo_points = 15
78
+
79
+ score += geo_points
80
+
81
+ # 4. Final Penalties
82
+ # Heuristic 1: India-only funds in US searches penalty
83
+ if geo_hint == "US" and "India" in fund_geo and "US" not in fund_geo and "Global" not in fund_geo:
84
+ score = max(0, score - 30)
85
+
86
+ # Heuristic 2: Primary focus mismatch
87
+ if fund_tags and extracted_tags:
88
+ if fund_tags[0] not in extracted_tags and tag_points <= 20:
89
+ score = max(0, score - 15)
90
+
91
+ return score, matched_tags
92
+
93
+ def get_confidence_tier(score: int) -> str:
94
+ """Return High/Medium/Low based on score."""
95
+ if score >= 70:
96
+ return "High"
97
+ if score >= 40:
98
+ return "Medium"
99
+ return "Low"
100
+
101
+ def match_vcs(product_context: Dict, data_path: str = "data/vc_funds.json") -> List[Dict]:
102
+ """Score all funds and return prioritized matches."""
103
+ funds = load_funds(data_path)
104
+ scored_funds = []
105
+
106
+ for fund in funds:
107
+ score, matched_tags = score_fund(fund, product_context)
108
+ scored_funds.append({
109
+ "fund": fund,
110
+ "score": score,
111
+ "confidence": get_confidence_tier(score),
112
+ "matched_tags": matched_tags
113
+ })
114
+
115
+ scored_funds.sort(key=lambda x: (-x["score"], x["fund"].get("fund_name", "")))
116
+
117
+ if all(f["score"] == 0 for f in scored_funds):
118
+ generalists = [f for f in funds if "Generalist" in f.get("industry_tags", [])][:5]
119
+ return [{
120
+ "fund": g,
121
+ "score": 0,
122
+ "confidence": "Low",
123
+ "matched_tags": ["Generalist"],
124
+ "warning": "No strong matches found. Showing generalist funds only."
125
+ } for g in generalists]
126
+
127
+ return scored_funds[:10]
@@ -0,0 +1,82 @@
1
+ import argparse
2
+ import sys
3
+ import os
4
+
5
+ # Import the core pipeline components
6
+ from fetch_product_context import get_product_context
7
+ from match_vcs import match_vcs
8
+ from generate_report import generate_report
9
+
10
+ def main():
11
+ parser = argparse.ArgumentParser(description="vc-curated-match: Algorithmically identify relevant VCs based on product context.")
12
+
13
+ parser.add_argument("--description", required=True, help="Product description string.")
14
+ parser.add_argument("--url", required=True, help="Product homepage or GitHub URL.")
15
+ parser.add_argument("--stage", default=None, help="Optional startup stage hint.")
16
+ parser.add_argument("--geography", default=None, help="Optional target geography (Defaults to Global inference).")
17
+ parser.add_argument("--output", default="vc-matches.md", help="Output file path (Defaults to vc-matches.md).")
18
+
19
+ args = parser.parse_args()
20
+
21
+ # Validation
22
+ if not args.description or not args.description.strip():
23
+ print("Error: --description must not be empty or whitespace only.", file=sys.stderr)
24
+ sys.exit(1)
25
+
26
+ if not args.url or not args.url.strip():
27
+ print("Error: --url must not be empty.", file=sys.stderr)
28
+ sys.exit(1)
29
+
30
+ # Validate dataset existence per requirements
31
+ data_path = os.path.join(os.path.dirname(__file__), "..", "data", "vc_funds.json")
32
+ if not os.path.exists(data_path):
33
+ print("Error: data/vc_funds.json not found. Make sure you are running from the skill root directory.", file=sys.stderr)
34
+ sys.exit(1)
35
+
36
+ try:
37
+ stage_input = args.stage.strip() if args.stage else None
38
+ if stage_input and stage_input.lower() == "pre-seed":
39
+ stage_input = "Pre-seed"
40
+ elif stage_input:
41
+ stage_input = stage_input.capitalize()
42
+
43
+ # 1. Fetch Product Context
44
+ product_context = get_product_context(
45
+ description=args.description.strip(),
46
+ url=args.url.strip(),
47
+ stage=stage_input,
48
+ geography=args.geography.strip() if args.geography else None
49
+ )
50
+
51
+ # 2. Match VCs
52
+ matches = match_vcs(product_context, data_path=data_path)
53
+
54
+ # 3. Generate Report
55
+ report_str = generate_report(matches, product_context)
56
+
57
+ # Ensure output directory exists and write
58
+ output_dir = os.path.dirname(os.path.abspath(args.output))
59
+ if output_dir:
60
+ os.makedirs(output_dir, exist_ok=True)
61
+
62
+ try:
63
+ with open(args.output, "w", encoding="utf-8") as f:
64
+ f.write(report_str)
65
+ except IOError:
66
+ print(f"Error: Could not write to {args.output}.", file=sys.stderr)
67
+ sys.exit(1)
68
+
69
+ # 4. Generate Summary Console Print
70
+ high = sum(1 for m in matches if m.get("confidence") == "High")
71
+ medium = sum(1 for m in matches if m.get("confidence") == "Medium")
72
+ low = sum(1 for m in matches if m.get("confidence") == "Low")
73
+
74
+ print(f"Done. Report saved to {args.output}")
75
+ print(f"Found {len(matches)} matches: {high} High, {medium} Medium, {low} Low confidence")
76
+
77
+ except Exception as e:
78
+ print(f"Error: {str(e)}", file=sys.stderr)
79
+ sys.exit(1)
80
+
81
+ if __name__ == "__main__":
82
+ main()