github-talent-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- github_talent_mcp/__init__.py +0 -0
- github_talent_mcp/__main__.py +3 -0
- github_talent_mcp/github_client.py +191 -0
- github_talent_mcp/models.py +96 -0
- github_talent_mcp/scoring.py +169 -0
- github_talent_mcp/server.py +127 -0
- github_talent_mcp/tools/__init__.py +0 -0
- github_talent_mcp/tools/contributors.py +61 -0
- github_talent_mcp/tools/profile.py +219 -0
- github_talent_mcp/tools/rank.py +91 -0
- github_talent_mcp/tools/search.py +43 -0
- github_talent_mcp-0.1.0.dist-info/METADATA +218 -0
- github_talent_mcp-0.1.0.dist-info/RECORD +17 -0
- github_talent_mcp-0.1.0.dist-info/WHEEL +5 -0
- github_talent_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- github_talent_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
- github_talent_mcp-0.1.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
GITHUB_API = "https://api.github.com"
|
|
11
|
+
PERMISSIVE_LICENSES = frozenset({
|
|
12
|
+
"mit", "apache-2.0", "bsd-2-clause", "bsd-3-clause", "isc", "unlicense",
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
log = logging.getLogger("github-talent-mcp")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GitHubClient:
|
|
19
|
+
def __init__(self, token: str | None = None):
|
|
20
|
+
self._token = token or os.environ.get("GITHUB_TOKEN", "")
|
|
21
|
+
self._cache: dict[str, tuple[float, Any]] = {}
|
|
22
|
+
self._cache_ttl = 300 # 5 minutes
|
|
23
|
+
headers: dict[str, str] = {
|
|
24
|
+
"Accept": "application/vnd.github+json",
|
|
25
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
26
|
+
}
|
|
27
|
+
if self._token:
|
|
28
|
+
headers["Authorization"] = f"Bearer {self._token}"
|
|
29
|
+
self._client = httpx.AsyncClient(
|
|
30
|
+
base_url=GITHUB_API,
|
|
31
|
+
headers=headers,
|
|
32
|
+
timeout=30.0,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
async def close(self) -> None:
|
|
36
|
+
await self._client.aclose()
|
|
37
|
+
|
|
38
|
+
# -- Cache helpers --
|
|
39
|
+
|
|
40
|
+
def _cache_get(self, key: str) -> Any | None:
|
|
41
|
+
if key in self._cache:
|
|
42
|
+
ts, data = self._cache[key]
|
|
43
|
+
if time.monotonic() - ts < self._cache_ttl:
|
|
44
|
+
return data
|
|
45
|
+
del self._cache[key]
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
def _cache_set(self, key: str, data: Any) -> None:
|
|
49
|
+
self._cache[key] = (time.monotonic(), data)
|
|
50
|
+
|
|
51
|
+
def _check_rate_limit(self, resp: httpx.Response) -> None:
|
|
52
|
+
remaining = resp.headers.get("X-RateLimit-Remaining")
|
|
53
|
+
if remaining and int(remaining) < 100:
|
|
54
|
+
reset = resp.headers.get("X-RateLimit-Reset", "unknown")
|
|
55
|
+
log.warning(f"Rate limit low: {remaining} remaining, resets at {reset}")
|
|
56
|
+
|
|
57
|
+
# -- API methods --
|
|
58
|
+
|
|
59
|
+
async def search_users(
|
|
60
|
+
self,
|
|
61
|
+
*,
|
|
62
|
+
languages: list[str] | None = None,
|
|
63
|
+
location: str | None = None,
|
|
64
|
+
min_followers: int | None = None,
|
|
65
|
+
min_repos: int | None = None,
|
|
66
|
+
per_page: int = 30,
|
|
67
|
+
page: int = 1,
|
|
68
|
+
) -> dict:
|
|
69
|
+
parts: list[str] = ["type:user"]
|
|
70
|
+
if languages:
|
|
71
|
+
for lang in languages:
|
|
72
|
+
parts.append(f"language:{lang}")
|
|
73
|
+
if location:
|
|
74
|
+
parts.append(f"location:{location}")
|
|
75
|
+
if min_followers is not None:
|
|
76
|
+
parts.append(f"followers:>={min_followers}")
|
|
77
|
+
if min_repos is not None:
|
|
78
|
+
parts.append(f"repos:>={min_repos}")
|
|
79
|
+
# Note: pushed:> is NOT a valid qualifier for /search/users — it silently
|
|
80
|
+
# returns 0 results. Use created:> for account age filtering instead.
|
|
81
|
+
# Recent activity should be verified via get_developer_profile.
|
|
82
|
+
|
|
83
|
+
q = " ".join(parts)
|
|
84
|
+
resp = await self._client.get(
|
|
85
|
+
"/search/users",
|
|
86
|
+
params={"q": q, "per_page": per_page, "page": page, "sort": "followers", "order": "desc"},
|
|
87
|
+
)
|
|
88
|
+
self._check_rate_limit(resp)
|
|
89
|
+
resp.raise_for_status()
|
|
90
|
+
return resp.json()
|
|
91
|
+
|
|
92
|
+
async def get_repo_info(self, owner: str, repo: str) -> dict:
|
|
93
|
+
cache_key = f"repo:{owner}/{repo}"
|
|
94
|
+
cached = self._cache_get(cache_key)
|
|
95
|
+
if cached is not None:
|
|
96
|
+
return cached
|
|
97
|
+
resp = await self._client.get(f"/repos/{owner}/{repo}")
|
|
98
|
+
self._check_rate_limit(resp)
|
|
99
|
+
resp.raise_for_status()
|
|
100
|
+
data = resp.json()
|
|
101
|
+
self._cache_set(cache_key, data)
|
|
102
|
+
return data
|
|
103
|
+
|
|
104
|
+
async def get_user(self, username: str) -> dict:
|
|
105
|
+
cache_key = f"user:{username}"
|
|
106
|
+
cached = self._cache_get(cache_key)
|
|
107
|
+
if cached is not None:
|
|
108
|
+
return cached
|
|
109
|
+
resp = await self._client.get(f"/users/{username}")
|
|
110
|
+
self._check_rate_limit(resp)
|
|
111
|
+
resp.raise_for_status()
|
|
112
|
+
data = resp.json()
|
|
113
|
+
self._cache_set(cache_key, data)
|
|
114
|
+
return data
|
|
115
|
+
|
|
116
|
+
async def get_user_repos(self, username: str, per_page: int = 100) -> list[dict]:
|
|
117
|
+
cache_key = f"repos:{username}"
|
|
118
|
+
cached = self._cache_get(cache_key)
|
|
119
|
+
if cached is not None:
|
|
120
|
+
return cached
|
|
121
|
+
resp = await self._client.get(
|
|
122
|
+
f"/users/{username}/repos",
|
|
123
|
+
params={"per_page": per_page, "sort": "pushed", "direction": "desc", "type": "owner"},
|
|
124
|
+
)
|
|
125
|
+
self._check_rate_limit(resp)
|
|
126
|
+
resp.raise_for_status()
|
|
127
|
+
data = resp.json()
|
|
128
|
+
self._cache_set(cache_key, data)
|
|
129
|
+
return data
|
|
130
|
+
|
|
131
|
+
async def get_repo_languages(self, owner: str, repo: str) -> dict[str, int]:
|
|
132
|
+
cache_key = f"langs:{owner}/{repo}"
|
|
133
|
+
cached = self._cache_get(cache_key)
|
|
134
|
+
if cached is not None:
|
|
135
|
+
return cached
|
|
136
|
+
resp = await self._client.get(f"/repos/{owner}/{repo}/languages")
|
|
137
|
+
self._check_rate_limit(resp)
|
|
138
|
+
resp.raise_for_status()
|
|
139
|
+
data = resp.json()
|
|
140
|
+
self._cache_set(cache_key, data)
|
|
141
|
+
return data
|
|
142
|
+
|
|
143
|
+
async def get_user_events(self, username: str, max_pages: int = 3) -> list[dict]:
|
|
144
|
+
cache_key = f"events:{username}"
|
|
145
|
+
cached = self._cache_get(cache_key)
|
|
146
|
+
if cached is not None:
|
|
147
|
+
return cached
|
|
148
|
+
all_events: list[dict] = []
|
|
149
|
+
for page in range(1, max_pages + 1):
|
|
150
|
+
resp = await self._client.get(
|
|
151
|
+
f"/users/{username}/events/public",
|
|
152
|
+
params={"per_page": 100, "page": page},
|
|
153
|
+
)
|
|
154
|
+
self._check_rate_limit(resp)
|
|
155
|
+
resp.raise_for_status()
|
|
156
|
+
events = resp.json()
|
|
157
|
+
if not events:
|
|
158
|
+
break
|
|
159
|
+
all_events.extend(events)
|
|
160
|
+
self._cache_set(cache_key, all_events)
|
|
161
|
+
return all_events
|
|
162
|
+
|
|
163
|
+
async def get_profile_readme(self, username: str) -> str | None:
|
|
164
|
+
cache_key = f"readme:{username}"
|
|
165
|
+
cached = self._cache_get(cache_key)
|
|
166
|
+
if cached is not None:
|
|
167
|
+
return cached
|
|
168
|
+
try:
|
|
169
|
+
resp = await self._client.get(
|
|
170
|
+
f"/repos/{username}/{username}/readme",
|
|
171
|
+
headers={"Accept": "application/vnd.github.raw+json"},
|
|
172
|
+
)
|
|
173
|
+
if resp.status_code == 200:
|
|
174
|
+
content = resp.text[:3000]
|
|
175
|
+
self._cache_set(cache_key, content)
|
|
176
|
+
return content
|
|
177
|
+
except httpx.HTTPError:
|
|
178
|
+
pass
|
|
179
|
+
self._cache_set(cache_key, None)
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
async def get_repo_contributors(
|
|
183
|
+
self, owner: str, repo: str, per_page: int = 30,
|
|
184
|
+
) -> list[dict]:
|
|
185
|
+
resp = await self._client.get(
|
|
186
|
+
f"/repos/{owner}/{repo}/contributors",
|
|
187
|
+
params={"per_page": per_page},
|
|
188
|
+
)
|
|
189
|
+
self._check_rate_limit(resp)
|
|
190
|
+
resp.raise_for_status()
|
|
191
|
+
return resp.json()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DeveloperSearchResult(BaseModel):
|
|
7
|
+
login: str
|
|
8
|
+
name: str | None = None
|
|
9
|
+
avatar_url: str
|
|
10
|
+
html_url: str
|
|
11
|
+
type: str = "User"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NotableRepo(BaseModel):
|
|
15
|
+
name: str
|
|
16
|
+
description: str | None = None
|
|
17
|
+
stars: int = 0
|
|
18
|
+
forks: int = 0
|
|
19
|
+
language: str | None = None
|
|
20
|
+
license: str | None = None
|
|
21
|
+
topics: list[str] = Field(default_factory=list)
|
|
22
|
+
last_updated: str | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DeveloperProfile(BaseModel):
|
|
26
|
+
login: str
|
|
27
|
+
name: str | None = None
|
|
28
|
+
bio: str | None = None
|
|
29
|
+
location: str | None = None
|
|
30
|
+
email: str | None = None
|
|
31
|
+
blog: str | None = None
|
|
32
|
+
company: str | None = None
|
|
33
|
+
twitter_username: str | None = None
|
|
34
|
+
hireable: bool | None = None
|
|
35
|
+
followers: int = 0
|
|
36
|
+
following: int = 0
|
|
37
|
+
public_repos: int = 0
|
|
38
|
+
account_age_days: int = 0
|
|
39
|
+
avatar_url: str = ""
|
|
40
|
+
html_url: str = ""
|
|
41
|
+
|
|
42
|
+
# Profile README
|
|
43
|
+
has_profile_readme: bool = False
|
|
44
|
+
profile_readme_length: int = 0
|
|
45
|
+
profile_readme_summary: str | None = None
|
|
46
|
+
|
|
47
|
+
# Language analysis
|
|
48
|
+
top_languages: list[str] = Field(default_factory=list)
|
|
49
|
+
language_breakdown: dict[str, float] = Field(default_factory=dict)
|
|
50
|
+
|
|
51
|
+
# Repo metrics
|
|
52
|
+
total_stars_received: int = 0
|
|
53
|
+
total_forks_received: int = 0
|
|
54
|
+
notable_repos: list[NotableRepo] = Field(default_factory=list)
|
|
55
|
+
repos_with_description_ratio: float = 0.0
|
|
56
|
+
|
|
57
|
+
# License analysis
|
|
58
|
+
open_source_license_ratio: float = 0.0
|
|
59
|
+
license_breakdown: dict[str, int] = Field(default_factory=dict)
|
|
60
|
+
has_permissive_license_repos: bool = False
|
|
61
|
+
|
|
62
|
+
# Activity
|
|
63
|
+
commits_last_30_days: int = 0
|
|
64
|
+
commits_last_90_days: int = 0
|
|
65
|
+
prs_opened_last_30_days: int = 0
|
|
66
|
+
prs_opened_last_90_days: int = 0
|
|
67
|
+
contributed_repo_stars: int = 0
|
|
68
|
+
last_active: str | None = None
|
|
69
|
+
|
|
70
|
+
# OSS contributions
|
|
71
|
+
contributes_to_major_oss: bool = False
|
|
72
|
+
major_oss_contributions: list[str] = Field(default_factory=list)
|
|
73
|
+
|
|
74
|
+
# Linked profiles
|
|
75
|
+
linked_profiles: dict[str, str | None] = Field(default_factory=dict)
|
|
76
|
+
|
|
77
|
+
# Scoring
|
|
78
|
+
activity_score: int = 0
|
|
79
|
+
activity_score_breakdown: dict[str, int] = Field(default_factory=dict)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class RankedCandidate(BaseModel):
|
|
83
|
+
rank: int
|
|
84
|
+
username: str
|
|
85
|
+
score: float
|
|
86
|
+
reasoning: str
|
|
87
|
+
strengths: list[str] = Field(default_factory=list)
|
|
88
|
+
gaps: list[str] = Field(default_factory=list)
|
|
89
|
+
profile_url: str
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class RepoContributor(BaseModel):
|
|
93
|
+
login: str
|
|
94
|
+
contributions: int
|
|
95
|
+
html_url: str
|
|
96
|
+
avatar_url: str = ""
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
PERMISSIVE_LICENSES = frozenset({
|
|
7
|
+
"mit", "apache-2.0", "bsd-2-clause", "bsd-3-clause", "isc", "unlicense",
|
|
8
|
+
})
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _compute_reputation_floor(followers: int, stars: int, account_age_days: int) -> int:
|
|
12
|
+
"""Compute a minimum score floor based on cumulative reputation.
|
|
13
|
+
|
|
14
|
+
Prevents well-known developers from scoring low just because their
|
|
15
|
+
recent GitHub activity doesn't match behavioral scoring expectations
|
|
16
|
+
(e.g., Torvalds works via mailing lists, not GitHub PRs).
|
|
17
|
+
"""
|
|
18
|
+
if followers >= 10_000 or stars >= 50_000:
|
|
19
|
+
return 150
|
|
20
|
+
if followers >= 1_000 or stars >= 5_000:
|
|
21
|
+
return 120
|
|
22
|
+
if followers >= 500 or stars >= 1_000:
|
|
23
|
+
return 100
|
|
24
|
+
if followers >= 100 or stars >= 200:
|
|
25
|
+
return 80
|
|
26
|
+
return 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def compute_activity_score(profile: dict[str, Any]) -> tuple[int, dict[str, int]]:
|
|
30
|
+
"""Compute activity score with per-dimension breakdown.
|
|
31
|
+
|
|
32
|
+
The score combines behavioral signals (recent commits, PRs, OSS contributions)
|
|
33
|
+
with a reputation floor (followers, stars) so that well-known developers
|
|
34
|
+
aren't penalized for workflows that don't produce GitHub events.
|
|
35
|
+
|
|
36
|
+
Returns (total_score, breakdown_dict).
|
|
37
|
+
"""
|
|
38
|
+
breakdown: dict[str, int] = {}
|
|
39
|
+
|
|
40
|
+
commits_90d = profile.get("commits_last_90_days", 0)
|
|
41
|
+
breakdown["commits_last_90_days"] = min(commits_90d * 3, 60)
|
|
42
|
+
|
|
43
|
+
breakdown["has_profile_readme"] = 20 if profile.get("has_profile_readme") else 0
|
|
44
|
+
|
|
45
|
+
stars = profile.get("total_stars_received", 0)
|
|
46
|
+
breakdown["stars_on_own_repos"] = min(stars * 2, 40)
|
|
47
|
+
|
|
48
|
+
followers = profile.get("followers", 0)
|
|
49
|
+
breakdown["followers"] = min(followers, 20)
|
|
50
|
+
|
|
51
|
+
desc_ratio = profile.get("repos_with_description_ratio", 0.0)
|
|
52
|
+
breakdown["repos_with_description"] = int(desc_ratio * 20)
|
|
53
|
+
|
|
54
|
+
breakdown["permissive_license_repos"] = 15 if profile.get("has_permissive_license_repos") else 0
|
|
55
|
+
|
|
56
|
+
oss = profile.get("major_oss_contributions", [])
|
|
57
|
+
breakdown["major_oss_contributions"] = min(len(oss) * 10, 30)
|
|
58
|
+
|
|
59
|
+
behavioral_score = sum(breakdown.values())
|
|
60
|
+
|
|
61
|
+
# Apply reputation floor — cumulative impact shouldn't be erased by a quiet quarter
|
|
62
|
+
account_age_days = profile.get("account_age_days", 0)
|
|
63
|
+
reputation_floor = _compute_reputation_floor(followers, stars, account_age_days)
|
|
64
|
+
total = max(behavioral_score, reputation_floor)
|
|
65
|
+
|
|
66
|
+
if reputation_floor > behavioral_score:
|
|
67
|
+
breakdown["reputation_floor"] = reputation_floor
|
|
68
|
+
|
|
69
|
+
return total, breakdown
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def extract_keywords(job_description: str) -> list[str]:
|
|
73
|
+
"""Extract meaningful keywords from a job description."""
|
|
74
|
+
noise = {
|
|
75
|
+
"the", "a", "an", "and", "or", "is", "are", "was", "were", "be", "been",
|
|
76
|
+
"with", "for", "to", "of", "in", "on", "at", "by", "from", "as", "we",
|
|
77
|
+
"you", "our", "your", "this", "that", "will", "can", "should", "must",
|
|
78
|
+
"have", "has", "had", "do", "does", "did", "not", "but", "if", "about",
|
|
79
|
+
"experience", "team", "work", "working", "looking", "join", "role",
|
|
80
|
+
"ability", "strong", "plus", "years", "knowledge", "skills", "required",
|
|
81
|
+
"preferred", "etc", "including", "such", "also", "may", "would", "could",
|
|
82
|
+
}
|
|
83
|
+
words = re.findall(r"[a-zA-Z#+.]+", job_description.lower())
|
|
84
|
+
seen: set[str] = set()
|
|
85
|
+
keywords: list[str] = []
|
|
86
|
+
for w in words:
|
|
87
|
+
if len(w) >= 2 and w not in noise and w not in seen:
|
|
88
|
+
seen.add(w)
|
|
89
|
+
keywords.append(w)
|
|
90
|
+
return keywords
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def compute_relevance_score(profile: dict[str, Any], job_keywords: list[str]) -> int:
|
|
94
|
+
"""Score 0-100 based on keyword overlap between profile and job description."""
|
|
95
|
+
if not job_keywords:
|
|
96
|
+
return 50
|
|
97
|
+
|
|
98
|
+
searchable_parts = [
|
|
99
|
+
profile.get("bio") or "",
|
|
100
|
+
" ".join(profile.get("top_languages", [])),
|
|
101
|
+
" ".join(profile.get("major_oss_contributions", [])),
|
|
102
|
+
profile.get("profile_readme_summary") or "",
|
|
103
|
+
profile.get("company") or "",
|
|
104
|
+
]
|
|
105
|
+
for repo in profile.get("notable_repos", []):
|
|
106
|
+
if isinstance(repo, dict):
|
|
107
|
+
searchable_parts.append(repo.get("description") or "")
|
|
108
|
+
searchable_parts.extend(repo.get("topics") or [])
|
|
109
|
+
searchable_parts.append(repo.get("language") or "")
|
|
110
|
+
|
|
111
|
+
searchable = " ".join(searchable_parts).lower()
|
|
112
|
+
|
|
113
|
+
matches = sum(1 for kw in job_keywords if kw in searchable)
|
|
114
|
+
return min(int((matches / len(job_keywords)) * 100), 100)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def generate_strengths_gaps(profile: dict[str, Any]) -> tuple[list[str], list[str]]:
|
|
118
|
+
"""Generate human-readable strengths and gaps from profile data."""
|
|
119
|
+
strengths: list[str] = []
|
|
120
|
+
gaps: list[str] = []
|
|
121
|
+
|
|
122
|
+
commits_90d = profile.get("commits_last_90_days", 0)
|
|
123
|
+
prs_90d = profile.get("prs_opened_last_90_days", 0)
|
|
124
|
+
if commits_90d > 20 or prs_90d > 10:
|
|
125
|
+
parts = []
|
|
126
|
+
if commits_90d > 0:
|
|
127
|
+
parts.append(f"{commits_90d} commits")
|
|
128
|
+
if prs_90d > 0:
|
|
129
|
+
parts.append(f"{prs_90d} PRs opened")
|
|
130
|
+
strengths.append(f"Active contributor: {', '.join(parts)} in last 90 days")
|
|
131
|
+
elif commits_90d == 0 and prs_90d == 0:
|
|
132
|
+
gaps.append("No recent public commit or PR activity")
|
|
133
|
+
|
|
134
|
+
contributed_stars = profile.get("contributed_repo_stars", 0)
|
|
135
|
+
if contributed_stars > 1000:
|
|
136
|
+
strengths.append(f"Contributes to repos with {contributed_stars:,} combined stars")
|
|
137
|
+
|
|
138
|
+
if profile.get("has_profile_readme"):
|
|
139
|
+
strengths.append("Maintains a profile README")
|
|
140
|
+
|
|
141
|
+
stars = profile.get("total_stars_received", 0)
|
|
142
|
+
if stars > 50:
|
|
143
|
+
strengths.append(f"Popular open source work: {stars} total stars received")
|
|
144
|
+
elif stars == 0:
|
|
145
|
+
gaps.append("No starred repositories")
|
|
146
|
+
|
|
147
|
+
oss = profile.get("major_oss_contributions", [])
|
|
148
|
+
if oss:
|
|
149
|
+
strengths.append(f"Contributes to {len(oss)} external OSS project(s): {', '.join(oss[:3])}")
|
|
150
|
+
|
|
151
|
+
if not profile.get("has_permissive_license_repos"):
|
|
152
|
+
gaps.append("No repos with permissive open-source licenses")
|
|
153
|
+
|
|
154
|
+
followers = profile.get("followers", 0)
|
|
155
|
+
if followers >= 10_000:
|
|
156
|
+
strengths.append(f"Exceptional community presence: {followers:,} followers")
|
|
157
|
+
elif followers >= 1_000:
|
|
158
|
+
strengths.append(f"Strong community presence: {followers:,} followers")
|
|
159
|
+
elif followers > 100:
|
|
160
|
+
strengths.append(f"Notable community presence: {followers:,} followers")
|
|
161
|
+
|
|
162
|
+
if profile.get("hireable"):
|
|
163
|
+
strengths.append("Marked as hireable on GitHub")
|
|
164
|
+
|
|
165
|
+
langs = profile.get("top_languages", [])
|
|
166
|
+
if langs:
|
|
167
|
+
strengths.append(f"Primary languages: {', '.join(langs[:5])}")
|
|
168
|
+
|
|
169
|
+
return strengths, gaps
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
from mcp.server.fastmcp import FastMCP
|
|
9
|
+
|
|
10
|
+
from github_talent_mcp.github_client import GitHubClient
|
|
11
|
+
from github_talent_mcp.tools.search import search_developers as _search
|
|
12
|
+
from github_talent_mcp.tools.profile import get_developer_profile as _profile
|
|
13
|
+
from github_talent_mcp.tools.rank import rank_candidates as _rank
|
|
14
|
+
from github_talent_mcp.tools.contributors import get_repo_contributors as _contributors
|
|
15
|
+
|
|
16
|
+
load_dotenv()
|
|
17
|
+
|
|
18
|
+
logging.basicConfig(level=logging.INFO, stream=sys.stderr)
|
|
19
|
+
|
|
20
|
+
mcp = FastMCP("github-talent")
|
|
21
|
+
|
|
22
|
+
_client: GitHubClient | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _get_client() -> GitHubClient:
|
|
26
|
+
global _client
|
|
27
|
+
if _client is None:
|
|
28
|
+
token = os.environ.get("GITHUB_TOKEN", "")
|
|
29
|
+
if not token:
|
|
30
|
+
logging.warning("GITHUB_TOKEN not set — API requests limited to 60/hr")
|
|
31
|
+
_client = GitHubClient(token=token)
|
|
32
|
+
return _client
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@mcp.tool()
|
|
36
|
+
async def search_developers(
|
|
37
|
+
languages: list[str] | None = None,
|
|
38
|
+
location: str | None = None,
|
|
39
|
+
min_followers: int | None = None,
|
|
40
|
+
min_repos: int | None = None,
|
|
41
|
+
limit: int = 20,
|
|
42
|
+
) -> str:
|
|
43
|
+
"""Search GitHub developers by technical and geographic filters.
|
|
44
|
+
|
|
45
|
+
Returns a list of matching usernames sorted by followers. Use
|
|
46
|
+
get_developer_profile on interesting candidates for full enrichment
|
|
47
|
+
and to verify recent activity.
|
|
48
|
+
|
|
49
|
+
For topic-based sourcing (e.g. "LLM", "inference"), use get_repo_contributors
|
|
50
|
+
on relevant repos instead — GitHub user search doesn't support topic/bio search.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
languages: Filter by programming languages, e.g. ["python", "rust"]
|
|
54
|
+
location: Filter by location, e.g. "San Francisco" or "Germany"
|
|
55
|
+
min_followers: Minimum follower count
|
|
56
|
+
min_repos: Minimum public repo count
|
|
57
|
+
limit: Max results to return (default 20, max 100)
|
|
58
|
+
"""
|
|
59
|
+
return await _search(
|
|
60
|
+
_get_client(),
|
|
61
|
+
languages=languages,
|
|
62
|
+
location=location,
|
|
63
|
+
min_followers=min_followers,
|
|
64
|
+
min_repos=min_repos,
|
|
65
|
+
limit=limit,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@mcp.tool()
|
|
70
|
+
async def get_developer_profile(username: str) -> str:
|
|
71
|
+
"""Get enriched GitHub developer profile with activity scoring.
|
|
72
|
+
|
|
73
|
+
Returns languages, stars, commit activity, OSS contributions, profile README,
|
|
74
|
+
license breakdown, and a 0-205 activity score with per-dimension breakdown.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
username: GitHub username to analyze
|
|
78
|
+
"""
|
|
79
|
+
return await _profile(_get_client(), username)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@mcp.tool()
|
|
83
|
+
async def rank_candidates(
|
|
84
|
+
usernames: list[str],
|
|
85
|
+
job_description: str,
|
|
86
|
+
top_n: int = 10,
|
|
87
|
+
) -> str:
|
|
88
|
+
"""Rank GitHub users against a job description.
|
|
89
|
+
|
|
90
|
+
Enriches each profile, scores activity + relevance, and returns candidates
|
|
91
|
+
sorted by combined score with strengths, gaps, and reasoning.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
usernames: GitHub usernames to evaluate
|
|
95
|
+
job_description: The role description to rank candidates against
|
|
96
|
+
top_n: Number of top candidates to return (default 10)
|
|
97
|
+
"""
|
|
98
|
+
return await _rank(
|
|
99
|
+
_get_client(),
|
|
100
|
+
usernames=usernames,
|
|
101
|
+
job_description=job_description,
|
|
102
|
+
top_n=top_n,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@mcp.tool()
|
|
107
|
+
async def get_repo_contributors(
|
|
108
|
+
repo: str,
|
|
109
|
+
limit: int = 25,
|
|
110
|
+
) -> str:
|
|
111
|
+
"""Get top contributors for a GitHub repository as candidate leads.
|
|
112
|
+
|
|
113
|
+
Accepts 'owner/repo' format or full GitHub URL.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
repo: Repository in 'owner/repo' format or GitHub URL
|
|
117
|
+
limit: Max contributors to return (default 25)
|
|
118
|
+
"""
|
|
119
|
+
return await _contributors(_get_client(), repo=repo, limit=limit)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def main():
|
|
123
|
+
mcp.run(transport="stdio")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
from github_talent_mcp.github_client import GitHubClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_repo_string(repo: str) -> tuple[str, str]:
|
|
10
|
+
"""Parse 'owner/repo', 'https://github.com/owner/repo', or 'github.com/owner/repo' into (owner, repo)."""
|
|
11
|
+
repo = repo.strip().rstrip("/")
|
|
12
|
+
|
|
13
|
+
# Handle URLs
|
|
14
|
+
if "github.com" in repo:
|
|
15
|
+
parsed = urlparse(repo if "://" in repo else f"https://{repo}")
|
|
16
|
+
parts = [p for p in parsed.path.strip("/").split("/") if p]
|
|
17
|
+
if len(parts) >= 2:
|
|
18
|
+
return parts[0], parts[1]
|
|
19
|
+
raise ValueError(f"Could not parse owner/repo from URL: {repo}")
|
|
20
|
+
|
|
21
|
+
# Handle owner/repo
|
|
22
|
+
if "/" in repo:
|
|
23
|
+
parts = repo.split("/")
|
|
24
|
+
if len(parts) == 2 and parts[0] and parts[1]:
|
|
25
|
+
return parts[0], parts[1]
|
|
26
|
+
|
|
27
|
+
raise ValueError(f"Expected 'owner/repo' or GitHub URL, got: {repo}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def get_repo_contributors(
|
|
31
|
+
client: GitHubClient,
|
|
32
|
+
*,
|
|
33
|
+
repo: str,
|
|
34
|
+
limit: int = 25,
|
|
35
|
+
) -> str:
|
|
36
|
+
try:
|
|
37
|
+
owner, repo_name = parse_repo_string(repo)
|
|
38
|
+
except ValueError as e:
|
|
39
|
+
return json.dumps({"error": str(e)})
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
data = await client.get_repo_contributors(owner, repo_name, per_page=min(limit, 100))
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return json.dumps({"error": f"GitHub API error: {e}"})
|
|
45
|
+
|
|
46
|
+
contributors = []
|
|
47
|
+
for c in data[:limit]:
|
|
48
|
+
if c.get("type") != "User":
|
|
49
|
+
continue
|
|
50
|
+
contributors.append({
|
|
51
|
+
"login": c["login"],
|
|
52
|
+
"contributions": c["contributions"],
|
|
53
|
+
"html_url": f"https://github.com/{c['login']}",
|
|
54
|
+
"avatar_url": c.get("avatar_url", ""),
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
return json.dumps({
|
|
58
|
+
"repo": f"{owner}/{repo_name}",
|
|
59
|
+
"total_returned": len(contributors),
|
|
60
|
+
"contributors": contributors,
|
|
61
|
+
}, indent=2)
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
|
|
7
|
+
from github_talent_mcp.github_client import GitHubClient, PERMISSIVE_LICENSES
|
|
8
|
+
from github_talent_mcp.models import DeveloperProfile, NotableRepo
|
|
9
|
+
from github_talent_mcp.scoring import compute_activity_score
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def get_developer_profile(client: GitHubClient, username: str) -> str:
|
|
13
|
+
try:
|
|
14
|
+
return await _build_profile(client, username)
|
|
15
|
+
except Exception as e:
|
|
16
|
+
return json.dumps({"error": f"Failed to build profile for {username}: {e}"})
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _build_profile(client: GitHubClient, username: str) -> str:
|
|
20
|
+
# 1. Base user data
|
|
21
|
+
user = await client.get_user(username)
|
|
22
|
+
now = datetime.now(timezone.utc)
|
|
23
|
+
|
|
24
|
+
created_at = user.get("created_at", "")
|
|
25
|
+
account_age_days = 0
|
|
26
|
+
if created_at:
|
|
27
|
+
created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
|
|
28
|
+
account_age_days = (now - created).days
|
|
29
|
+
|
|
30
|
+
# 2. Repos (owner's non-fork repos for analysis)
|
|
31
|
+
all_repos = await client.get_user_repos(username)
|
|
32
|
+
owned_repos = [r for r in all_repos if not r.get("fork")]
|
|
33
|
+
|
|
34
|
+
# 3. Language breakdown from top 10 repos by stars
|
|
35
|
+
sorted_by_stars = sorted(owned_repos, key=lambda r: r.get("stargazers_count", 0), reverse=True)
|
|
36
|
+
lang_totals: dict[str, int] = {}
|
|
37
|
+
for repo in sorted_by_stars[:10]:
|
|
38
|
+
langs = await client.get_repo_languages(username, repo["name"])
|
|
39
|
+
for lang, byte_count in langs.items():
|
|
40
|
+
lang_totals[lang] = lang_totals.get(lang, 0) + byte_count
|
|
41
|
+
|
|
42
|
+
total_bytes = sum(lang_totals.values()) or 1
|
|
43
|
+
language_breakdown = {
|
|
44
|
+
lang: round(bytes_ / total_bytes, 3)
|
|
45
|
+
for lang, bytes_ in sorted(lang_totals.items(), key=lambda x: x[1], reverse=True)
|
|
46
|
+
}
|
|
47
|
+
top_languages = list(language_breakdown.keys())[:10]
|
|
48
|
+
|
|
49
|
+
# 4. Stars and forks
|
|
50
|
+
total_stars = sum(r.get("stargazers_count", 0) for r in owned_repos)
|
|
51
|
+
total_forks = sum(r.get("forks_count", 0) for r in owned_repos)
|
|
52
|
+
|
|
53
|
+
# 5. Description ratio
|
|
54
|
+
if owned_repos:
|
|
55
|
+
with_desc = sum(1 for r in owned_repos if r.get("description"))
|
|
56
|
+
desc_ratio = round(with_desc / len(owned_repos), 2)
|
|
57
|
+
else:
|
|
58
|
+
desc_ratio = 0.0
|
|
59
|
+
|
|
60
|
+
# 6. License analysis
|
|
61
|
+
license_counts: dict[str, int] = {}
|
|
62
|
+
for repo in owned_repos:
|
|
63
|
+
lic = repo.get("license")
|
|
64
|
+
spdx = lic.get("spdx_id", "NOASSERTION") if lic else "none"
|
|
65
|
+
license_counts[spdx] = license_counts.get(spdx, 0) + 1
|
|
66
|
+
|
|
67
|
+
has_permissive = any(
|
|
68
|
+
spdx.lower() in PERMISSIVE_LICENSES for spdx in license_counts
|
|
69
|
+
)
|
|
70
|
+
licensed_repos = sum(v for k, v in license_counts.items() if k not in ("none", "NOASSERTION"))
|
|
71
|
+
license_ratio = round(licensed_repos / len(owned_repos), 2) if owned_repos else 0.0
|
|
72
|
+
|
|
73
|
+
# 7. Events: commits, PRs, and OSS contributions
|
|
74
|
+
events = await client.get_user_events(username)
|
|
75
|
+
commits_30d = 0
|
|
76
|
+
commits_90d = 0
|
|
77
|
+
prs_opened_30d = 0
|
|
78
|
+
prs_opened_90d = 0
|
|
79
|
+
last_active: str | None = None
|
|
80
|
+
oss_contributions: set[str] = set()
|
|
81
|
+
|
|
82
|
+
for event in events:
|
|
83
|
+
created_str = event.get("created_at", "")
|
|
84
|
+
if not created_str:
|
|
85
|
+
continue
|
|
86
|
+
created = datetime.fromisoformat(created_str.replace("Z", "+00:00"))
|
|
87
|
+
age_days = (now - created).days
|
|
88
|
+
|
|
89
|
+
if last_active is None:
|
|
90
|
+
last_active = created_str
|
|
91
|
+
|
|
92
|
+
if event["type"] == "PushEvent":
|
|
93
|
+
num_commits = len(event.get("payload", {}).get("commits", []))
|
|
94
|
+
if age_days <= 30:
|
|
95
|
+
commits_30d += num_commits
|
|
96
|
+
if age_days <= 90:
|
|
97
|
+
commits_90d += num_commits
|
|
98
|
+
|
|
99
|
+
# Count PR opens as activity (catches PR-based workflows that PushEvents miss)
|
|
100
|
+
if event["type"] == "PullRequestEvent":
|
|
101
|
+
action = event.get("payload", {}).get("action", "")
|
|
102
|
+
if action == "opened":
|
|
103
|
+
if age_days <= 30:
|
|
104
|
+
prs_opened_30d += 1
|
|
105
|
+
if age_days <= 90:
|
|
106
|
+
prs_opened_90d += 1
|
|
107
|
+
|
|
108
|
+
if event["type"] in ("PullRequestEvent", "PushEvent", "IssuesEvent", "IssueCommentEvent"):
|
|
109
|
+
repo_name = event.get("repo", {}).get("name", "")
|
|
110
|
+
if repo_name and not repo_name.lower().startswith(f"{username.lower()}/"):
|
|
111
|
+
oss_contributions.add(repo_name)
|
|
112
|
+
|
|
113
|
+
# 7b. Fetch star counts for contributed repos (captures org repo impact)
|
|
114
|
+
contributed_stars = 0
|
|
115
|
+
for contrib_repo in sorted(oss_contributions)[:5]: # cap at 5 to limit API calls
|
|
116
|
+
parts = contrib_repo.split("/")
|
|
117
|
+
if len(parts) == 2:
|
|
118
|
+
try:
|
|
119
|
+
repo_info = await client.get_repo_info(parts[0], parts[1])
|
|
120
|
+
contributed_stars += repo_info.get("stargazers_count", 0)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logging.getLogger("github-talent-mcp").debug(
|
|
123
|
+
"Failed to fetch repo info for %s: %s", contrib_repo, e,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# 8. Profile README
|
|
127
|
+
readme_content = await client.get_profile_readme(username)
|
|
128
|
+
has_readme = readme_content is not None
|
|
129
|
+
readme_length = len(readme_content) if readme_content else 0
|
|
130
|
+
readme_summary = None
|
|
131
|
+
if readme_content:
|
|
132
|
+
# Take first ~500 chars as summary (the calling LLM can summarize further)
|
|
133
|
+
readme_summary = readme_content[:500].strip()
|
|
134
|
+
if len(readme_content) > 500:
|
|
135
|
+
readme_summary += "..."
|
|
136
|
+
|
|
137
|
+
# 9. Notable repos
|
|
138
|
+
notable_repos = []
|
|
139
|
+
for repo in sorted_by_stars[:5]:
|
|
140
|
+
lic = repo.get("license")
|
|
141
|
+
notable_repos.append(NotableRepo(
|
|
142
|
+
name=repo["name"],
|
|
143
|
+
description=repo.get("description"),
|
|
144
|
+
stars=repo.get("stargazers_count", 0),
|
|
145
|
+
forks=repo.get("forks_count", 0),
|
|
146
|
+
language=repo.get("language"),
|
|
147
|
+
license=lic.get("spdx_id") if lic else None,
|
|
148
|
+
topics=repo.get("topics", []),
|
|
149
|
+
last_updated=repo.get("pushed_at"),
|
|
150
|
+
))
|
|
151
|
+
|
|
152
|
+
# 10. Linked profiles
|
|
153
|
+
linked_profiles = {
|
|
154
|
+
"twitter": user.get("twitter_username"),
|
|
155
|
+
"personal_site": user.get("blog") or None,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# 11. Activity score
|
|
159
|
+
# Combine push commits + PR opens for total activity signal
|
|
160
|
+
# PRs weighted x3 (each PR represents more effort than a single commit)
|
|
161
|
+
total_activity_90d = commits_90d + (prs_opened_90d * 3)
|
|
162
|
+
# Combine personal repo stars + contributed repo stars
|
|
163
|
+
combined_stars = total_stars + contributed_stars
|
|
164
|
+
|
|
165
|
+
score_input = {
|
|
166
|
+
"commits_last_90_days": total_activity_90d,
|
|
167
|
+
"has_profile_readme": has_readme,
|
|
168
|
+
"total_stars_received": combined_stars,
|
|
169
|
+
"followers": user.get("followers", 0),
|
|
170
|
+
"repos_with_description_ratio": desc_ratio,
|
|
171
|
+
"has_permissive_license_repos": has_permissive,
|
|
172
|
+
"major_oss_contributions": sorted(oss_contributions),
|
|
173
|
+
"account_age_days": account_age_days,
|
|
174
|
+
}
|
|
175
|
+
activity_score, score_breakdown = compute_activity_score(score_input)
|
|
176
|
+
|
|
177
|
+
# 12. Build the full profile
|
|
178
|
+
profile = DeveloperProfile(
|
|
179
|
+
login=user["login"],
|
|
180
|
+
name=user.get("name"),
|
|
181
|
+
bio=user.get("bio"),
|
|
182
|
+
location=user.get("location"),
|
|
183
|
+
email=user.get("email"),
|
|
184
|
+
blog=user.get("blog"),
|
|
185
|
+
company=user.get("company"),
|
|
186
|
+
twitter_username=user.get("twitter_username"),
|
|
187
|
+
hireable=user.get("hireable"),
|
|
188
|
+
followers=user.get("followers", 0),
|
|
189
|
+
following=user.get("following", 0),
|
|
190
|
+
public_repos=user.get("public_repos", 0),
|
|
191
|
+
account_age_days=account_age_days,
|
|
192
|
+
avatar_url=user.get("avatar_url", ""),
|
|
193
|
+
html_url=user.get("html_url", f"https://github.com/{username}"),
|
|
194
|
+
has_profile_readme=has_readme,
|
|
195
|
+
profile_readme_length=readme_length,
|
|
196
|
+
profile_readme_summary=readme_summary,
|
|
197
|
+
top_languages=top_languages,
|
|
198
|
+
language_breakdown=language_breakdown,
|
|
199
|
+
total_stars_received=total_stars,
|
|
200
|
+
total_forks_received=total_forks,
|
|
201
|
+
notable_repos=notable_repos,
|
|
202
|
+
repos_with_description_ratio=desc_ratio,
|
|
203
|
+
open_source_license_ratio=license_ratio,
|
|
204
|
+
license_breakdown=license_counts,
|
|
205
|
+
has_permissive_license_repos=has_permissive,
|
|
206
|
+
commits_last_30_days=commits_30d,
|
|
207
|
+
commits_last_90_days=commits_90d,
|
|
208
|
+
prs_opened_last_30_days=prs_opened_30d,
|
|
209
|
+
prs_opened_last_90_days=prs_opened_90d,
|
|
210
|
+
contributed_repo_stars=contributed_stars,
|
|
211
|
+
last_active=last_active,
|
|
212
|
+
contributes_to_major_oss=bool(oss_contributions),
|
|
213
|
+
major_oss_contributions=sorted(oss_contributions),
|
|
214
|
+
linked_profiles=linked_profiles,
|
|
215
|
+
activity_score=activity_score,
|
|
216
|
+
activity_score_breakdown=score_breakdown,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return json.dumps(profile.model_dump(), indent=2)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from github_talent_mcp.github_client import GitHubClient
|
|
6
|
+
from github_talent_mcp.scoring import (
|
|
7
|
+
compute_relevance_score,
|
|
8
|
+
extract_keywords,
|
|
9
|
+
generate_strengths_gaps,
|
|
10
|
+
)
|
|
11
|
+
from github_talent_mcp.tools.profile import get_developer_profile
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def rank_candidates(
|
|
15
|
+
client: GitHubClient,
|
|
16
|
+
*,
|
|
17
|
+
usernames: list[str],
|
|
18
|
+
job_description: str,
|
|
19
|
+
top_n: int = 10,
|
|
20
|
+
) -> str:
|
|
21
|
+
keywords = extract_keywords(job_description)
|
|
22
|
+
candidates = []
|
|
23
|
+
|
|
24
|
+
for username in usernames:
|
|
25
|
+
profile_json = await get_developer_profile(client, username)
|
|
26
|
+
profile = json.loads(profile_json)
|
|
27
|
+
|
|
28
|
+
if "error" in profile:
|
|
29
|
+
candidates.append({
|
|
30
|
+
"rank": 0,
|
|
31
|
+
"username": username,
|
|
32
|
+
"score": 0,
|
|
33
|
+
"reasoning": f"Could not fetch profile: {profile['error']}",
|
|
34
|
+
"strengths": [],
|
|
35
|
+
"gaps": ["Profile unavailable"],
|
|
36
|
+
"profile_url": f"https://github.com/{username}",
|
|
37
|
+
})
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
activity = profile.get("activity_score", 0)
|
|
41
|
+
relevance = compute_relevance_score(profile, keywords)
|
|
42
|
+
|
|
43
|
+
# Weighted combination: relevance matters more than raw activity
|
|
44
|
+
combined = activity * 0.4 + relevance * 0.6
|
|
45
|
+
|
|
46
|
+
strengths, gaps = generate_strengths_gaps(profile)
|
|
47
|
+
|
|
48
|
+
# Build reasoning sentence
|
|
49
|
+
parts = []
|
|
50
|
+
if activity >= 120:
|
|
51
|
+
parts.append("exceptional GitHub activity")
|
|
52
|
+
elif activity >= 80:
|
|
53
|
+
parts.append("strong GitHub activity")
|
|
54
|
+
elif activity >= 40:
|
|
55
|
+
parts.append("moderate GitHub activity")
|
|
56
|
+
else:
|
|
57
|
+
parts.append("limited public activity")
|
|
58
|
+
|
|
59
|
+
if relevance >= 70:
|
|
60
|
+
parts.append("high keyword match with job description")
|
|
61
|
+
elif relevance >= 40:
|
|
62
|
+
parts.append("partial keyword match")
|
|
63
|
+
else:
|
|
64
|
+
parts.append("low keyword overlap with job description")
|
|
65
|
+
|
|
66
|
+
top_langs = profile.get("top_languages", [])[:3]
|
|
67
|
+
if top_langs:
|
|
68
|
+
parts.append(f"primary languages: {', '.join(top_langs)}")
|
|
69
|
+
|
|
70
|
+
reasoning = ". ".join(p.capitalize() for p in parts) + "."
|
|
71
|
+
|
|
72
|
+
candidates.append({
|
|
73
|
+
"rank": 0,
|
|
74
|
+
"username": username,
|
|
75
|
+
"score": round(combined, 1),
|
|
76
|
+
"reasoning": reasoning,
|
|
77
|
+
"strengths": strengths,
|
|
78
|
+
"gaps": gaps,
|
|
79
|
+
"profile_url": profile.get("html_url", f"https://github.com/{username}"),
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
# Sort by score descending and assign ranks
|
|
83
|
+
candidates.sort(key=lambda c: c["score"], reverse=True)
|
|
84
|
+
for i, c in enumerate(candidates[:top_n], 1):
|
|
85
|
+
c["rank"] = i
|
|
86
|
+
|
|
87
|
+
return json.dumps({
|
|
88
|
+
"job_keywords_extracted": keywords[:20],
|
|
89
|
+
"total_evaluated": len(candidates),
|
|
90
|
+
"candidates": candidates[:top_n],
|
|
91
|
+
}, indent=2)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from github_talent_mcp.github_client import GitHubClient
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def search_developers(
|
|
9
|
+
client: GitHubClient,
|
|
10
|
+
*,
|
|
11
|
+
languages: list[str] | None = None,
|
|
12
|
+
location: str | None = None,
|
|
13
|
+
min_followers: int | None = None,
|
|
14
|
+
min_repos: int | None = None,
|
|
15
|
+
limit: int = 20,
|
|
16
|
+
) -> str:
|
|
17
|
+
per_page = min(limit, 100)
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
data = await client.search_users(
|
|
21
|
+
languages=languages,
|
|
22
|
+
location=location,
|
|
23
|
+
min_followers=min_followers,
|
|
24
|
+
min_repos=min_repos,
|
|
25
|
+
per_page=per_page,
|
|
26
|
+
)
|
|
27
|
+
except Exception as e:
|
|
28
|
+
return json.dumps({"error": f"GitHub API error: {e}"})
|
|
29
|
+
|
|
30
|
+
results = []
|
|
31
|
+
for item in data.get("items", [])[:limit]:
|
|
32
|
+
results.append({
|
|
33
|
+
"login": item["login"],
|
|
34
|
+
"avatar_url": item.get("avatar_url", ""),
|
|
35
|
+
"html_url": item.get("html_url", f"https://github.com/{item['login']}"),
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
return json.dumps({
|
|
39
|
+
"total_count": data.get("total_count", 0),
|
|
40
|
+
"returned": len(results),
|
|
41
|
+
"note": "Use get_developer_profile on candidates for full enrichment with activity scoring.",
|
|
42
|
+
"developers": results,
|
|
43
|
+
}, indent=2)
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: github-talent-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for searching, scoring, and ranking GitHub developers for technical recruiting
|
|
5
|
+
Author: Carolina Cherry
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/carolinacherry/github-talent-mcp
|
|
8
|
+
Project-URL: Repository, https://github.com/carolinacherry/github-talent-mcp
|
|
9
|
+
Project-URL: Issues, https://github.com/carolinacherry/github-talent-mcp/issues
|
|
10
|
+
Keywords: mcp,github,recruiting,developer-tools,claude
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: mcp>=1.0.0
|
|
25
|
+
Requires-Dist: httpx>=0.27.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0.0
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# github-talent-mcp
|
|
31
|
+
|
|
32
|
+
[](LICENSE)
|
|
33
|
+
[](https://www.python.org)
|
|
34
|
+
[](https://modelcontextprotocol.io)
|
|
35
|
+
[](https://claude.ai)
|
|
36
|
+
[](https://docs.github.com/en/rest)
|
|
37
|
+
|
|
38
|
+
MCP server that searches, scores, and ranks GitHub developers for technical recruiting.
|
|
39
|
+
|
|
40
|
+
## Demo
|
|
41
|
+
|
|
42
|
+
https://github.com/user-attachments/assets/2dfd82b4-3eb5-4f2b-bc0a-2580b95043e4
|
|
43
|
+
|
|
44
|
+
### Profile deep dive
|
|
45
|
+
|
|
46
|
+
> Get the full developer profile and activity score for torvalds on GitHub
|
|
47
|
+
|
|
48
|
+
Claude calls `get_developer_profile("torvalds")` and returns:
|
|
49
|
+
|
|
50
|
+
| Field | Value |
|
|
51
|
+
|---|---|
|
|
52
|
+
| **Activity Score** | **150** (reputation floor applied) |
|
|
53
|
+
| Location | Portland, OR |
|
|
54
|
+
| Followers | 293,321 |
|
|
55
|
+
| Stars Received | 235,068 |
|
|
56
|
+
| Primary Language | C (98.1%) |
|
|
57
|
+
| Commits (90d) | 0 |
|
|
58
|
+
| PRs (90d) | 0 |
|
|
59
|
+
| Notable Repos | linux (183K stars), libdc-for-dirk, subsurface-for-dirk, uemacs, pesern-resolve |
|
|
60
|
+
| Profile README | No |
|
|
61
|
+
| Hireable | No |
|
|
62
|
+
|
|
63
|
+
Torvalds has zero recent GitHub activity because kernel development flows through mailing lists, not GitHub PRs. The **reputation floor** (293K followers) overrides the behavioral score and sets it to 150.
|
|
64
|
+
|
|
65
|
+
### Repo contributor ranking
|
|
66
|
+
|
|
67
|
+
> Get the top contributors to huggingface/transformers and rank them for a founding ML engineer role at an AI startup
|
|
68
|
+
|
|
69
|
+
Claude calls `get_repo_contributors("huggingface/transformers")` → `rank_candidates` on the top 24 contributors:
|
|
70
|
+
|
|
71
|
+
| Rank | Developer | Combined Score | Activity | Relevance | Strengths |
|
|
72
|
+
|---|---|---|---|---|---|
|
|
73
|
+
| 1 | stas00 | 83.4 | 150 | 72 | 4,553 stars, contributes to major OSS, MIT-licensed repos |
|
|
74
|
+
| 2 | cyyever | 80.8 | 120 | 64 | 1,217 followers, active contributor, profile README |
|
|
75
|
+
| 3 | Cyrilvallez | 77.2 | 120 | 56 | Active: 13 commits + 57 PRs in 90 days, strong OSS presence |
|
|
76
|
+
| 4 | ArthurZucker | 74.4 | 120 | 48 | 37 PRs in 90 days, contributes to huggingface/transformers |
|
|
77
|
+
| 5 | ydshieh | 72.0 | 120 | 40 | Active: 9 commits + 40 PRs in 90 days |
|
|
78
|
+
|
|
79
|
+
Combined score = activity × 0.4 + relevance × 0.6. Relevance is keyword overlap with the job description (ML, AI, startup, engineer, etc.).
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
### 1. Clone and install
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
git clone https://github.com/carolinacherry/github-talent-mcp.git
|
|
87
|
+
cd github-talent-mcp
|
|
88
|
+
python3 -m venv .venv && source .venv/bin/activate
|
|
89
|
+
pip install -e .
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 2. Create a GitHub personal access token
|
|
93
|
+
|
|
94
|
+
Go to [github.com/settings/tokens](https://github.com/settings/tokens) and create a **fine-grained** or **classic** token with these scopes:
|
|
95
|
+
|
|
96
|
+
| Scope | Why |
|
|
97
|
+
|---|---|
|
|
98
|
+
| `read:user` | Read user profiles and search users |
|
|
99
|
+
| `public_repo` | Read public repo data, languages, contributors |
|
|
100
|
+
|
|
101
|
+
Create a `.env` file in the project root:
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
GITHUB_TOKEN=ghp_xxxxxxxxxxxx
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 3. Connect to Claude
|
|
108
|
+
|
|
109
|
+
#### Claude Code (CLI)
|
|
110
|
+
|
|
111
|
+
One command:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
claude mcp add github-talent -- /path/to/github-talent-mcp/.venv/bin/python3 -m github_talent_mcp
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Then set the token as an environment variable. Either:
|
|
118
|
+
- Export it in your shell: `export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`
|
|
119
|
+
- Or keep it in the `.env` file — the server reads it via `python-dotenv` on startup
|
|
120
|
+
|
|
121
|
+
Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 4 tools under `github-talent`.
|
|
122
|
+
|
|
123
|
+
#### Claude Desktop
|
|
124
|
+
|
|
125
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
126
|
+
|
|
127
|
+
```json
|
|
128
|
+
{
|
|
129
|
+
"mcpServers": {
|
|
130
|
+
"github-talent": {
|
|
131
|
+
"command": "/path/to/github-talent-mcp/.venv/bin/python3",
|
|
132
|
+
"args": ["-m", "github_talent_mcp"],
|
|
133
|
+
"cwd": "/path/to/github-talent-mcp",
|
|
134
|
+
"env": {
|
|
135
|
+
"GITHUB_TOKEN": "ghp_xxxxxxxxxxxx"
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Restart Claude Desktop. The tools will appear in the toolbox icon.
|
|
143
|
+
|
|
144
|
+
## Try It
|
|
145
|
+
|
|
146
|
+
Once installed, paste these prompts to verify everything works:
|
|
147
|
+
|
|
148
|
+
**Basic search:**
|
|
149
|
+
> Find Python developers in Raleigh active in the last 60 days
|
|
150
|
+
|
|
151
|
+
**Profile deep dive:**
|
|
152
|
+
> Get the full developer profile and activity score for torvalds on GitHub
|
|
153
|
+
|
|
154
|
+
**Full workflow:**
|
|
155
|
+
> Find 10 ML engineers in San Francisco active in the last 30 days, then rank them for a senior LLM inference engineer role
|
|
156
|
+
|
|
157
|
+
**Repo contributors:**
|
|
158
|
+
> Get the top contributors to huggingface/transformers and rank them for a founding ML engineer role at an AI startup
|
|
159
|
+
|
|
160
|
+
## Tools
|
|
161
|
+
|
|
162
|
+
| Tool | Description |
|
|
163
|
+
|---|---|
|
|
164
|
+
| `search_developers` | Search GitHub users by language, location, activity, followers. For topic-based sourcing, use `get_repo_contributors` on relevant repos instead. |
|
|
165
|
+
| `get_developer_profile` | Deep profile enrichment: languages, stars, commits + PRs, OSS contributions, license breakdown, profile README, and activity score with breakdown. |
|
|
166
|
+
| `rank_candidates` | Rank usernames against a job description. Returns sorted candidates with combined score, strengths, gaps, and reasoning. |
|
|
167
|
+
| `get_repo_contributors` | Top contributors for any repo. Accepts `owner/repo` or full URL. The fastest way to source for a specific domain. |
|
|
168
|
+
|
|
169
|
+
## Scoring
|
|
170
|
+
|
|
171
|
+
The activity score combines two layers: **behavioral signals** (what you did recently) and a **reputation floor** (what you've built over time).
|
|
172
|
+
|
|
173
|
+
### Behavioral Score (0-205)
|
|
174
|
+
|
|
175
|
+
| Signal | Max Points | How |
|
|
176
|
+
|---|---|---|
|
|
177
|
+
| Commits + PRs (last 90 days) | 60 | Push commits + PR opens (PRs weighted x3). Captures both push-based and PR-based workflows. |
|
|
178
|
+
| Stars on repos | 40 | Personal repo stars + stars on repos you contribute to. Org repo maintainers get credit. |
|
|
179
|
+
| Profile README | 20 | Presence of a profile README (github.com/username/username). |
|
|
180
|
+
| Followers | 20 | Capped at 20. |
|
|
181
|
+
| Repos with descriptions | 20 | Ratio of repos that have descriptions. Signal of care and polish. |
|
|
182
|
+
| Permissive license repos | 15 | Has at least one repo with MIT, Apache-2.0, BSD, ISC, or Unlicense. |
|
|
183
|
+
| Major OSS contributions | 30 | PRs, pushes, or issues on repos you don't own. Capped at 3 repos (10 pts each). |
|
|
184
|
+
|
|
185
|
+
### Reputation Floor
|
|
186
|
+
|
|
187
|
+
The behavioral score alone penalizes developers whose work doesn't produce GitHub events — Torvalds works through mailing lists, senior maintainers merge via org bots, and many engineers work in private repos.
|
|
188
|
+
|
|
189
|
+
The reputation floor ensures cumulative impact isn't erased by a quiet quarter:
|
|
190
|
+
|
|
191
|
+
| Threshold | Floor |
|
|
192
|
+
|---|---|
|
|
193
|
+
| 10K+ followers **or** 50K+ stars | 150 |
|
|
194
|
+
| 1K+ followers **or** 5K+ stars | 120 |
|
|
195
|
+
| 500+ followers **or** 1K+ stars | 100 |
|
|
196
|
+
| 100+ followers **or** 200+ stars | 80 |
|
|
197
|
+
|
|
198
|
+
The final score is `max(behavioral_score, reputation_floor)`. If the floor is applied, the breakdown includes a `reputation_floor` field so you know.
|
|
199
|
+
|
|
200
|
+
### Score Tiers
|
|
201
|
+
|
|
202
|
+
- **150+** — exceptional (top OSS maintainers, well-known engineers)
|
|
203
|
+
- **120-149** — strong signal, worth reaching out
|
|
204
|
+
- **80-119** — solid developer with meaningful public work
|
|
205
|
+
- **40-79** — active but limited public signal
|
|
206
|
+
- **<40** — low signal (likely private work or junior)
|
|
207
|
+
|
|
208
|
+
### Ranking
|
|
209
|
+
|
|
210
|
+
`rank_candidates` combines the activity score with a **relevance score** (0-100) based on keyword overlap between the job description and the candidate's profile (bio, languages, repo topics, README). The combined score weights relevance at 60% and activity at 40% — a high-activity developer with no overlap to the job shouldn't outrank a relevant one.
|
|
211
|
+
|
|
212
|
+
## Rate Limits
|
|
213
|
+
|
|
214
|
+
GitHub REST API: 5,000 requests/hour with token. A typical workflow (search + enrich 5 candidates + rank) uses ~60-100 API calls. Profile results are cached within a session to avoid redundant calls during ranking.
|
|
215
|
+
|
|
216
|
+
## License
|
|
217
|
+
|
|
218
|
+
MIT
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
github_talent_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
github_talent_mcp/__main__.py,sha256=xboib-GsIoLb6gAKr_J8Ez58m2bVEtcIylTIEX6hMmM,50
|
|
3
|
+
github_talent_mcp/github_client.py,sha256=At7Q1KWXkDwp75rfN8KRc-jT7fQlNlBt03HRWOPgHw8,6614
|
|
4
|
+
github_talent_mcp/models.py,sha256=fc1erOnkw-E5qRgHCNcbctf9wJQTpWphf0RmEtrjsqc,2545
|
|
5
|
+
github_talent_mcp/scoring.py,sha256=ruM2cYGl_ZXJcsTWaWQI3STVDyd9ifdQatN4z0esxSo,6592
|
|
6
|
+
github_talent_mcp/server.py,sha256=rf9pq1wtxV3_uZGHCOpj-DE6uy_mNGpcb3n7j5XXAnU,3691
|
|
7
|
+
github_talent_mcp/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
github_talent_mcp/tools/contributors.py,sha256=Oy4lKEg31dczj0wE3crILYmscBDamUN6DZrEwirlbjQ,1859
|
|
9
|
+
github_talent_mcp/tools/profile.py,sha256=u7UHgj6j7NQXutgBwJPp-RXkd_xms6PnLdXtzdqO-58,8683
|
|
10
|
+
github_talent_mcp/tools/rank.py,sha256=_Msx_Req2YqPq__UHj9UtJLRomwS11c6_23HMx3RO9c,2897
|
|
11
|
+
github_talent_mcp/tools/search.py,sha256=HLJ2BkleuQPGNnGqAW9xn5aYl6AJHpia0f6yf2aEAfY,1234
|
|
12
|
+
github_talent_mcp-0.1.0.dist-info/licenses/LICENSE,sha256=fvdAvsHztUyLfy24J6KGQxObupndrkyC7loArb9nSa4,1072
|
|
13
|
+
github_talent_mcp-0.1.0.dist-info/METADATA,sha256=3qZuPS5nGplWCbc3Qs1hVXeyAP9YUQJXEGTIrfjt5ro,8943
|
|
14
|
+
github_talent_mcp-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
15
|
+
github_talent_mcp-0.1.0.dist-info/entry_points.txt,sha256=mbH4DQjPoKjMoCuaeYMKULS1thXzBHCk0bTwVrY9gJc,68
|
|
16
|
+
github_talent_mcp-0.1.0.dist-info/top_level.txt,sha256=s6U6EATNZhgMhH4K9fGZFBq63L9nOeHtTBFUbfchKjY,18
|
|
17
|
+
github_talent_mcp-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Carolina Cherry
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
github_talent_mcp
|