github-talent-mcp 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {github_talent_mcp-0.1.0/src/github_talent_mcp.egg-info → github_talent_mcp-0.2.0}/PKG-INFO +58 -6
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/README.md +53 -1
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/pyproject.toml +5 -5
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/github_client.py +43 -1
- github_talent_mcp-0.2.0/src/github_talent_mcp/scoring.py +342 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/server.py +112 -0
- github_talent_mcp-0.2.0/src/github_talent_mcp/tools/bulk.py +136 -0
- github_talent_mcp-0.2.0/src/github_talent_mcp/tools/compare.py +99 -0
- github_talent_mcp-0.2.0/src/github_talent_mcp/tools/outreach.py +137 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/profile.py +16 -2
- github_talent_mcp-0.2.0/src/github_talent_mcp/tools/score_jd.py +64 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0/src/github_talent_mcp.egg-info}/PKG-INFO +58 -6
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/SOURCES.txt +8 -0
- github_talent_mcp-0.2.0/src/github_talent_mcp.egg-info/requires.txt +4 -0
- github_talent_mcp-0.2.0/tests/test_bulk.py +122 -0
- github_talent_mcp-0.2.0/tests/test_compare.py +94 -0
- github_talent_mcp-0.2.0/tests/test_outreach.py +104 -0
- github_talent_mcp-0.2.0/tests/test_score_jd.py +88 -0
- github_talent_mcp-0.1.0/src/github_talent_mcp/scoring.py +0 -169
- github_talent_mcp-0.1.0/src/github_talent_mcp.egg-info/requires.txt +0 -4
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/LICENSE +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/setup.cfg +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/__init__.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/__main__.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/models.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/__init__.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/contributors.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/rank.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/search.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/dependency_links.txt +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/entry_points.txt +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/top_level.txt +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/tests/test_scoring.py +0 -0
- {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: github-talent-mcp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: MCP server for searching, scoring, and ranking GitHub developers for technical recruiting
|
|
5
5
|
Author: Carolina Cherry
|
|
6
6
|
License: MIT
|
|
@@ -21,10 +21,10 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
21
21
|
Requires-Python: >=3.10
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
License-File: LICENSE
|
|
24
|
-
Requires-Dist: mcp
|
|
25
|
-
Requires-Dist: httpx
|
|
26
|
-
Requires-Dist: pydantic
|
|
27
|
-
Requires-Dist: python-dotenv
|
|
24
|
+
Requires-Dist: mcp<2,>=1.0.0
|
|
25
|
+
Requires-Dist: httpx<1,>=0.27.0
|
|
26
|
+
Requires-Dist: pydantic<3,>=2.0.0
|
|
27
|
+
Requires-Dist: python-dotenv<2,>=1.0.0
|
|
28
28
|
Dynamic: license-file
|
|
29
29
|
|
|
30
30
|
# github-talent-mcp
|
|
@@ -39,6 +39,10 @@ MCP server that searches, scores, and ranks GitHub developers for technical recr
|
|
|
39
39
|
|
|
40
40
|
## Demo
|
|
41
41
|
|
|
42
|
+
https://github.com/user-attachments/assets/b2dbe9e0-26ee-4849-861a-4b5cb268facc
|
|
43
|
+
|
|
44
|
+
Sourcing candidates for a real Anthropic JD, live in Claude Cowork.
|
|
45
|
+
|
|
42
46
|
https://github.com/user-attachments/assets/2dfd82b4-3eb5-4f2b-bc0a-2580b95043e4
|
|
43
47
|
|
|
44
48
|
### Profile deep dive
|
|
@@ -118,7 +122,7 @@ Then set the token as an environment variable. Either:
|
|
|
118
122
|
- Export it in your shell: `export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`
|
|
119
123
|
- Or keep it in the `.env` file — the server reads it via `python-dotenv` on startup
|
|
120
124
|
|
|
121
|
-
Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see
|
|
125
|
+
Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 8 tools under `github-talent`.
|
|
122
126
|
|
|
123
127
|
#### Claude Desktop
|
|
124
128
|
|
|
@@ -141,6 +145,38 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
|
141
145
|
|
|
142
146
|
Restart Claude Desktop. The tools will appear in the toolbox icon.
|
|
143
147
|
|
|
148
|
+
#### GitHub Copilot (CLI & desktop app)
|
|
149
|
+
|
|
150
|
+
The [GitHub Copilot CLI](https://docs.github.com/en/copilot/how-tos/copilot-cli) and the [GitHub Copilot app](https://github.com/features/ai/github-app) share one MCP config, so a single setup covers both.
|
|
151
|
+
|
|
152
|
+
One command (Copilot CLI):
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
copilot mcp add github-talent -e GITHUB_TOKEN=$GITHUB_TOKEN -- uvx github-talent-mcp
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Or add it manually to `~/.copilot/mcp-config.json`:
|
|
159
|
+
|
|
160
|
+
```json
|
|
161
|
+
{
|
|
162
|
+
"mcpServers": {
|
|
163
|
+
"github-talent": {
|
|
164
|
+
"type": "local",
|
|
165
|
+
"command": "uvx",
|
|
166
|
+
"args": ["github-talent-mcp"],
|
|
167
|
+
"env": {
|
|
168
|
+
"GITHUB_TOKEN": "${GITHUB_TOKEN}"
|
|
169
|
+
},
|
|
170
|
+
"tools": ["*"]
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Export your token first (`export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`) — Copilot only inherits `PATH`, so the `${GITHUB_TOKEN}` reference reads it from your shell. If `uvx` isn't on your `PATH`, use its absolute path as `command`.
|
|
177
|
+
|
|
178
|
+
The **Copilot app** picks up this same config; it also syncs a repo's `.copilot/mcp-config.json` automatically and lets you add servers under Settings → MCP. Verify with `copilot mcp list` (terminal) or `/mcp show` (in a Copilot session) — you should see 8 tools under `github-talent`.
|
|
179
|
+
|
|
144
180
|
## Try It
|
|
145
181
|
|
|
146
182
|
Once installed, paste these prompts to verify everything works:
|
|
@@ -157,6 +193,18 @@ Once installed, paste these prompts to verify everything works:
|
|
|
157
193
|
**Repo contributors:**
|
|
158
194
|
> Get the top contributors to huggingface/transformers and rank them for a founding ML engineer role at an AI startup
|
|
159
195
|
|
|
196
|
+
**JD scoring:**
|
|
197
|
+
> Score these candidates against this job description: [paste JD]. Candidates: tiangolo, karpathy, hwchase17
|
|
198
|
+
|
|
199
|
+
**Compare candidates:**
|
|
200
|
+
> Compare tiangolo and hwchase17 for a Senior Python AI Engineer role
|
|
201
|
+
|
|
202
|
+
**Bulk scoring:**
|
|
203
|
+
> Score these 10 GitHub usernames and give me a ranked table: [paste list]
|
|
204
|
+
|
|
205
|
+
**Outreach:**
|
|
206
|
+
> Generate a casual recruiter message for tiangolo about a Senior Python role at Acme. My name is Daniel.
|
|
207
|
+
|
|
160
208
|
## Tools
|
|
161
209
|
|
|
162
210
|
| Tool | Description |
|
|
@@ -164,6 +212,10 @@ Once installed, paste these prompts to verify everything works:
|
|
|
164
212
|
| `search_developers` | Search GitHub users by language, location, activity, followers. For topic-based sourcing, use `get_repo_contributors` on relevant repos instead. |
|
|
165
213
|
| `get_developer_profile` | Deep profile enrichment: languages, stars, commits + PRs, OSS contributions, license breakdown, profile README, and activity score with breakdown. |
|
|
166
214
|
| `rank_candidates` | Rank usernames against a job description. Returns sorted candidates with combined score, strengths, gaps, and reasoning. |
|
|
215
|
+
| `score_against_jd` | Score candidates against a JD with per-dimension breakdown (tech stack, experience level, OSS signal, leadership). Returns gaps and personalized interview questions. |
|
|
216
|
+
| `compare_candidates` | Side-by-side comparison of 2-5 candidates. Shows dimension winners and a recommendation. Optionally scored against a JD. |
|
|
217
|
+
| `bulk_score` | Score up to 100 GitHub usernames in one call. Returns a ranked markdown table or CSV. Supports optional JD matching. |
|
|
218
|
+
| `generate_outreach` | Generate personalized recruiter messages (short/medium/detailed) that reference the candidate's actual repos and contributions. Requires your company name and sender name. Casual or formal tone. |
|
|
167
219
|
| `get_repo_contributors` | Top contributors for any repo. Accepts `owner/repo` or full URL. The fastest way to source for a specific domain. |
|
|
168
220
|
|
|
169
221
|
## Scoring
|
|
@@ -10,6 +10,10 @@ MCP server that searches, scores, and ranks GitHub developers for technical recr
|
|
|
10
10
|
|
|
11
11
|
## Demo
|
|
12
12
|
|
|
13
|
+
https://github.com/user-attachments/assets/b2dbe9e0-26ee-4849-861a-4b5cb268facc
|
|
14
|
+
|
|
15
|
+
Sourcing candidates for a real Anthropic JD, live in Claude Cowork.
|
|
16
|
+
|
|
13
17
|
https://github.com/user-attachments/assets/2dfd82b4-3eb5-4f2b-bc0a-2580b95043e4
|
|
14
18
|
|
|
15
19
|
### Profile deep dive
|
|
@@ -89,7 +93,7 @@ Then set the token as an environment variable. Either:
|
|
|
89
93
|
- Export it in your shell: `export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`
|
|
90
94
|
- Or keep it in the `.env` file — the server reads it via `python-dotenv` on startup
|
|
91
95
|
|
|
92
|
-
Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see
|
|
96
|
+
Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 8 tools under `github-talent`.
|
|
93
97
|
|
|
94
98
|
#### Claude Desktop
|
|
95
99
|
|
|
@@ -112,6 +116,38 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
|
112
116
|
|
|
113
117
|
Restart Claude Desktop. The tools will appear in the toolbox icon.
|
|
114
118
|
|
|
119
|
+
#### GitHub Copilot (CLI & desktop app)
|
|
120
|
+
|
|
121
|
+
The [GitHub Copilot CLI](https://docs.github.com/en/copilot/how-tos/copilot-cli) and the [GitHub Copilot app](https://github.com/features/ai/github-app) share one MCP config, so a single setup covers both.
|
|
122
|
+
|
|
123
|
+
One command (Copilot CLI):
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
copilot mcp add github-talent -e GITHUB_TOKEN=$GITHUB_TOKEN -- uvx github-talent-mcp
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Or add it manually to `~/.copilot/mcp-config.json`:
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{
|
|
133
|
+
"mcpServers": {
|
|
134
|
+
"github-talent": {
|
|
135
|
+
"type": "local",
|
|
136
|
+
"command": "uvx",
|
|
137
|
+
"args": ["github-talent-mcp"],
|
|
138
|
+
"env": {
|
|
139
|
+
"GITHUB_TOKEN": "${GITHUB_TOKEN}"
|
|
140
|
+
},
|
|
141
|
+
"tools": ["*"]
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Export your token first (`export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`) — Copilot only inherits `PATH`, so the `${GITHUB_TOKEN}` reference reads it from your shell. If `uvx` isn't on your `PATH`, use its absolute path as `command`.
|
|
148
|
+
|
|
149
|
+
The **Copilot app** picks up this same config; it also syncs a repo's `.copilot/mcp-config.json` automatically and lets you add servers under Settings → MCP. Verify with `copilot mcp list` (terminal) or `/mcp show` (in a Copilot session) — you should see 8 tools under `github-talent`.
|
|
150
|
+
|
|
115
151
|
## Try It
|
|
116
152
|
|
|
117
153
|
Once installed, paste these prompts to verify everything works:
|
|
@@ -128,6 +164,18 @@ Once installed, paste these prompts to verify everything works:
|
|
|
128
164
|
**Repo contributors:**
|
|
129
165
|
> Get the top contributors to huggingface/transformers and rank them for a founding ML engineer role at an AI startup
|
|
130
166
|
|
|
167
|
+
**JD scoring:**
|
|
168
|
+
> Score these candidates against this job description: [paste JD]. Candidates: tiangolo, karpathy, hwchase17
|
|
169
|
+
|
|
170
|
+
**Compare candidates:**
|
|
171
|
+
> Compare tiangolo and hwchase17 for a Senior Python AI Engineer role
|
|
172
|
+
|
|
173
|
+
**Bulk scoring:**
|
|
174
|
+
> Score these 10 GitHub usernames and give me a ranked table: [paste list]
|
|
175
|
+
|
|
176
|
+
**Outreach:**
|
|
177
|
+
> Generate a casual recruiter message for tiangolo about a Senior Python role at Acme. My name is Daniel.
|
|
178
|
+
|
|
131
179
|
## Tools
|
|
132
180
|
|
|
133
181
|
| Tool | Description |
|
|
@@ -135,6 +183,10 @@ Once installed, paste these prompts to verify everything works:
|
|
|
135
183
|
| `search_developers` | Search GitHub users by language, location, activity, followers. For topic-based sourcing, use `get_repo_contributors` on relevant repos instead. |
|
|
136
184
|
| `get_developer_profile` | Deep profile enrichment: languages, stars, commits + PRs, OSS contributions, license breakdown, profile README, and activity score with breakdown. |
|
|
137
185
|
| `rank_candidates` | Rank usernames against a job description. Returns sorted candidates with combined score, strengths, gaps, and reasoning. |
|
|
186
|
+
| `score_against_jd` | Score candidates against a JD with per-dimension breakdown (tech stack, experience level, OSS signal, leadership). Returns gaps and personalized interview questions. |
|
|
187
|
+
| `compare_candidates` | Side-by-side comparison of 2-5 candidates. Shows dimension winners and a recommendation. Optionally scored against a JD. |
|
|
188
|
+
| `bulk_score` | Score up to 100 GitHub usernames in one call. Returns a ranked markdown table or CSV. Supports optional JD matching. |
|
|
189
|
+
| `generate_outreach` | Generate personalized recruiter messages (short/medium/detailed) that reference the candidate's actual repos and contributions. Requires your company name and sender name. Casual or formal tone. |
|
|
138
190
|
| `get_repo_contributors` | Top contributors for any repo. Accepts `owner/repo` or full URL. The fastest way to source for a specific domain. |
|
|
139
191
|
|
|
140
192
|
## Scoring
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "github-talent-mcp"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "MCP server for searching, scoring, and ranking GitHub developers for technical recruiting"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -26,10 +26,10 @@ classifiers = [
|
|
|
26
26
|
"Topic :: Software Development :: Libraries",
|
|
27
27
|
]
|
|
28
28
|
dependencies = [
|
|
29
|
-
"mcp>=1.0.0",
|
|
30
|
-
"httpx>=0.27.0",
|
|
31
|
-
"pydantic>=2.0.0",
|
|
32
|
-
"python-dotenv>=1.0.0",
|
|
29
|
+
"mcp>=1.0.0,<2",
|
|
30
|
+
"httpx>=0.27.0,<1",
|
|
31
|
+
"pydantic>=2.0.0,<3",
|
|
32
|
+
"python-dotenv>=1.0.0,<2",
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
[project.urls]
|
|
@@ -71,7 +71,7 @@ class GitHubClient:
|
|
|
71
71
|
for lang in languages:
|
|
72
72
|
parts.append(f"language:{lang}")
|
|
73
73
|
if location:
|
|
74
|
-
parts.append(f
|
|
74
|
+
parts.append(f'location:"{location}"')
|
|
75
75
|
if min_followers is not None:
|
|
76
76
|
parts.append(f"followers:>={min_followers}")
|
|
77
77
|
if min_repos is not None:
|
|
@@ -160,6 +160,48 @@ class GitHubClient:
|
|
|
160
160
|
self._cache_set(cache_key, all_events)
|
|
161
161
|
return all_events
|
|
162
162
|
|
|
163
|
+
async def search_commit_count(self, username: str, since_date: str) -> int:
|
|
164
|
+
"""Count commits by username since a date using the Search API.
|
|
165
|
+
|
|
166
|
+
More accurate than Events API for users whose commits don't
|
|
167
|
+
surface as PushEvents (e.g. Torvalds' kernel merges).
|
|
168
|
+
"""
|
|
169
|
+
cache_key = f"commit_count:{username}:{since_date}"
|
|
170
|
+
cached = self._cache_get(cache_key)
|
|
171
|
+
if cached is not None:
|
|
172
|
+
return cached
|
|
173
|
+
resp = await self._client.get(
|
|
174
|
+
"/search/commits",
|
|
175
|
+
params={
|
|
176
|
+
"q": f"author:{username} user:{username} committer-date:>={since_date}",
|
|
177
|
+
"per_page": 1,
|
|
178
|
+
},
|
|
179
|
+
)
|
|
180
|
+
self._check_rate_limit(resp)
|
|
181
|
+
resp.raise_for_status()
|
|
182
|
+
count = resp.json().get("total_count", 0)
|
|
183
|
+
self._cache_set(cache_key, count)
|
|
184
|
+
return count
|
|
185
|
+
|
|
186
|
+
async def search_pr_count(self, username: str, since_date: str) -> int:
|
|
187
|
+
"""Count PRs opened by username since a date using the Search API."""
|
|
188
|
+
cache_key = f"pr_count:{username}:{since_date}"
|
|
189
|
+
cached = self._cache_get(cache_key)
|
|
190
|
+
if cached is not None:
|
|
191
|
+
return cached
|
|
192
|
+
resp = await self._client.get(
|
|
193
|
+
"/search/issues",
|
|
194
|
+
params={
|
|
195
|
+
"q": f"author:{username} type:pr created:>={since_date}",
|
|
196
|
+
"per_page": 1,
|
|
197
|
+
},
|
|
198
|
+
)
|
|
199
|
+
self._check_rate_limit(resp)
|
|
200
|
+
resp.raise_for_status()
|
|
201
|
+
count = resp.json().get("total_count", 0)
|
|
202
|
+
self._cache_set(cache_key, count)
|
|
203
|
+
return count
|
|
204
|
+
|
|
163
205
|
async def get_profile_readme(self, username: str) -> str | None:
|
|
164
206
|
cache_key = f"readme:{username}"
|
|
165
207
|
cached = self._cache_get(cache_key)
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
PERMISSIVE_LICENSES = frozenset({
|
|
7
|
+
"mit", "apache-2.0", "bsd-2-clause", "bsd-3-clause", "isc", "unlicense",
|
|
8
|
+
})
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _compute_reputation_floor(followers: int, stars: int, account_age_days: int) -> int:
|
|
12
|
+
"""Compute a minimum score floor based on cumulative reputation.
|
|
13
|
+
|
|
14
|
+
Prevents well-known developers from scoring low just because their
|
|
15
|
+
recent GitHub activity doesn't match behavioral scoring expectations
|
|
16
|
+
(e.g., Torvalds works via mailing lists, not GitHub PRs).
|
|
17
|
+
"""
|
|
18
|
+
if followers >= 10_000 or stars >= 50_000:
|
|
19
|
+
return 150
|
|
20
|
+
if followers >= 1_000 or stars >= 5_000:
|
|
21
|
+
return 120
|
|
22
|
+
if followers >= 500 or stars >= 1_000:
|
|
23
|
+
return 100
|
|
24
|
+
if followers >= 100 or stars >= 200:
|
|
25
|
+
return 80
|
|
26
|
+
return 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def compute_activity_score(profile: dict[str, Any]) -> tuple[int, dict[str, int]]:
|
|
30
|
+
"""Compute activity score with per-dimension breakdown.
|
|
31
|
+
|
|
32
|
+
The score combines behavioral signals (recent commits, PRs, OSS contributions)
|
|
33
|
+
with a reputation floor (followers, stars) so that well-known developers
|
|
34
|
+
aren't penalized for workflows that don't produce GitHub events.
|
|
35
|
+
|
|
36
|
+
Returns (total_score, breakdown_dict).
|
|
37
|
+
"""
|
|
38
|
+
breakdown: dict[str, int] = {}
|
|
39
|
+
|
|
40
|
+
commits_90d = profile.get("commits_last_90_days", 0)
|
|
41
|
+
breakdown["commits_last_90_days"] = min(commits_90d * 3, 60)
|
|
42
|
+
|
|
43
|
+
breakdown["has_profile_readme"] = 20 if profile.get("has_profile_readme") else 0
|
|
44
|
+
|
|
45
|
+
stars = profile.get("total_stars_received", 0)
|
|
46
|
+
breakdown["stars_on_own_repos"] = min(stars * 2, 40)
|
|
47
|
+
|
|
48
|
+
followers = profile.get("followers", 0)
|
|
49
|
+
breakdown["followers"] = min(followers, 20)
|
|
50
|
+
|
|
51
|
+
desc_ratio = profile.get("repos_with_description_ratio", 0.0)
|
|
52
|
+
breakdown["repos_with_description"] = int(desc_ratio * 20)
|
|
53
|
+
|
|
54
|
+
breakdown["permissive_license_repos"] = 15 if profile.get("has_permissive_license_repos") else 0
|
|
55
|
+
|
|
56
|
+
oss = profile.get("major_oss_contributions", [])
|
|
57
|
+
breakdown["major_oss_contributions"] = min(len(oss) * 10, 30)
|
|
58
|
+
|
|
59
|
+
behavioral_score = sum(breakdown.values())
|
|
60
|
+
|
|
61
|
+
# Apply reputation floor — cumulative impact shouldn't be erased by a quiet quarter
|
|
62
|
+
account_age_days = profile.get("account_age_days", 0)
|
|
63
|
+
reputation_floor = _compute_reputation_floor(followers, stars, account_age_days)
|
|
64
|
+
total = max(behavioral_score, reputation_floor)
|
|
65
|
+
|
|
66
|
+
if reputation_floor > behavioral_score:
|
|
67
|
+
breakdown["reputation_floor"] = reputation_floor
|
|
68
|
+
|
|
69
|
+
return total, breakdown
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def extract_keywords(job_description: str) -> list[str]:
|
|
73
|
+
"""Extract meaningful keywords from a job description."""
|
|
74
|
+
noise = {
|
|
75
|
+
"the", "a", "an", "and", "or", "is", "are", "was", "were", "be", "been",
|
|
76
|
+
"with", "for", "to", "of", "in", "on", "at", "by", "from", "as", "we",
|
|
77
|
+
"you", "our", "your", "this", "that", "will", "can", "should", "must",
|
|
78
|
+
"have", "has", "had", "do", "does", "did", "not", "but", "if", "about",
|
|
79
|
+
"experience", "team", "work", "working", "looking", "join", "role",
|
|
80
|
+
"ability", "strong", "plus", "years", "knowledge", "skills", "required",
|
|
81
|
+
"preferred", "etc", "including", "such", "also", "may", "would", "could",
|
|
82
|
+
}
|
|
83
|
+
words = re.findall(r"[a-zA-Z#+.]+", job_description.lower())
|
|
84
|
+
seen: set[str] = set()
|
|
85
|
+
keywords: list[str] = []
|
|
86
|
+
for w in words:
|
|
87
|
+
if len(w) >= 2 and w not in noise and w not in seen:
|
|
88
|
+
seen.add(w)
|
|
89
|
+
keywords.append(w)
|
|
90
|
+
return keywords
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def compute_relevance_score(profile: dict[str, Any], job_keywords: list[str]) -> int:
|
|
94
|
+
"""Score 0-100 based on keyword overlap between profile and job description."""
|
|
95
|
+
if not job_keywords:
|
|
96
|
+
return 50
|
|
97
|
+
|
|
98
|
+
searchable_parts = [
|
|
99
|
+
profile.get("bio") or "",
|
|
100
|
+
" ".join(profile.get("top_languages", [])),
|
|
101
|
+
" ".join(profile.get("major_oss_contributions", [])),
|
|
102
|
+
profile.get("profile_readme_summary") or "",
|
|
103
|
+
profile.get("company") or "",
|
|
104
|
+
]
|
|
105
|
+
for repo in profile.get("notable_repos", []):
|
|
106
|
+
if isinstance(repo, dict):
|
|
107
|
+
searchable_parts.append(repo.get("description") or "")
|
|
108
|
+
searchable_parts.extend(repo.get("topics") or [])
|
|
109
|
+
searchable_parts.append(repo.get("language") or "")
|
|
110
|
+
|
|
111
|
+
searchable = " ".join(searchable_parts).lower()
|
|
112
|
+
|
|
113
|
+
matches = sum(1 for kw in job_keywords if kw in searchable)
|
|
114
|
+
return min(int((matches / len(job_keywords)) * 100), 100)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def score_jd_dimensions(profile: dict[str, Any], job_description: str) -> dict[str, Any]:
|
|
118
|
+
"""Score a profile against a job description across structured dimensions.
|
|
119
|
+
|
|
120
|
+
Returns a dict with per-dimension scores (0-100), reasoning, gaps,
|
|
121
|
+
and suggested interview questions.
|
|
122
|
+
"""
|
|
123
|
+
jd_lower = job_description.lower()
|
|
124
|
+
keywords = extract_keywords(job_description)
|
|
125
|
+
|
|
126
|
+
# --- Tech stack match (0-100) ---
|
|
127
|
+
# Match all JD keywords (not just a hardcoded set) against the full profile
|
|
128
|
+
profile_tech = " ".join([
|
|
129
|
+
" ".join(profile.get("top_languages", [])),
|
|
130
|
+
" ".join(r.get("language", "") or "" for r in profile.get("notable_repos", []) if isinstance(r, dict)),
|
|
131
|
+
" ".join(r.get("name", "") or "" for r in profile.get("notable_repos", []) if isinstance(r, dict)),
|
|
132
|
+
" ".join(r.get("description", "") or "" for r in profile.get("notable_repos", []) if isinstance(r, dict)),
|
|
133
|
+
" ".join(t for r in profile.get("notable_repos", []) if isinstance(r, dict) for t in (r.get("topics") or [])),
|
|
134
|
+
profile.get("profile_readme_summary") or "",
|
|
135
|
+
profile.get("bio") or "",
|
|
136
|
+
" ".join(profile.get("major_oss_contributions", [])),
|
|
137
|
+
]).lower()
|
|
138
|
+
|
|
139
|
+
# Filter keywords to tech-relevant ones (skip soft skills, process words)
|
|
140
|
+
soft_terms = {
|
|
141
|
+
# Role/process words
|
|
142
|
+
"mentoring", "leadership", "communication", "collaboration",
|
|
143
|
+
"agile", "scrum", "remote", "hybrid", "onsite", "track", "record",
|
|
144
|
+
"open", "source", "contributions", "engineer", "engineering",
|
|
145
|
+
"developer", "build", "design", "implement", "maintain",
|
|
146
|
+
"senior", "staff", "principal", "lead", "manager",
|
|
147
|
+
"junior", "mid", "level", "familiar", "familiarity", "bonus", "nice",
|
|
148
|
+
"looking", "seeking", "ideal", "candidate", "responsible", "own",
|
|
149
|
+
"across", "help", "growth", "impact", "high",
|
|
150
|
+
# JD boilerplate
|
|
151
|
+
"requirements", "minimum", "qualifications", "preferred", "required",
|
|
152
|
+
"expect", "type", "full", "time", "part", "contract", "salary",
|
|
153
|
+
"benefits", "company", "team", "position", "opportunity",
|
|
154
|
+
"based", "features", "projects", "production", "software",
|
|
155
|
+
"applications", "understanding", "shipping", "building", "deep",
|
|
156
|
+
"join", "power", "have", "knowledge", "years", "experience",
|
|
157
|
+
"strong", "expertise", "multi", "resilient", "context", "aware",
|
|
158
|
+
}
|
|
159
|
+
tech_keywords = [kw for kw in keywords if kw not in soft_terms and not kw.endswith(".")]
|
|
160
|
+
|
|
161
|
+
if tech_keywords:
|
|
162
|
+
tech_matches = sum(1 for t in tech_keywords if t in profile_tech)
|
|
163
|
+
tech_score = min(int((tech_matches / len(tech_keywords)) * 100), 100)
|
|
164
|
+
else:
|
|
165
|
+
tech_score = 50 # no tech requirements specified
|
|
166
|
+
|
|
167
|
+
# --- Experience level match (0-100) ---
|
|
168
|
+
# Infer required seniority from JD
|
|
169
|
+
senior_terms = {"senior", "staff", "principal", "lead", "architect", "head", "director"}
|
|
170
|
+
mid_terms = {"mid", "intermediate", "ii", "iii"}
|
|
171
|
+
junior_terms = {"junior", "entry", "associate", "intern", "graduate"}
|
|
172
|
+
|
|
173
|
+
jd_words = set(jd_lower.split())
|
|
174
|
+
if jd_words & senior_terms:
|
|
175
|
+
required_level = "senior"
|
|
176
|
+
elif jd_words & junior_terms:
|
|
177
|
+
required_level = "junior"
|
|
178
|
+
elif jd_words & mid_terms:
|
|
179
|
+
required_level = "mid"
|
|
180
|
+
else:
|
|
181
|
+
required_level = "unknown"
|
|
182
|
+
|
|
183
|
+
# Estimate candidate level from signals
|
|
184
|
+
stars = profile.get("total_stars_received", 0)
|
|
185
|
+
followers = profile.get("followers", 0)
|
|
186
|
+
account_age = profile.get("account_age_days", 0)
|
|
187
|
+
oss_count = len(profile.get("major_oss_contributions", []))
|
|
188
|
+
|
|
189
|
+
if followers >= 500 or stars >= 1000 or (account_age > 2500 and oss_count >= 2):
|
|
190
|
+
candidate_level = "senior"
|
|
191
|
+
elif account_age > 1000 or stars >= 50:
|
|
192
|
+
candidate_level = "mid"
|
|
193
|
+
else:
|
|
194
|
+
candidate_level = "junior"
|
|
195
|
+
|
|
196
|
+
if required_level == "unknown":
|
|
197
|
+
exp_score = 60
|
|
198
|
+
elif required_level == candidate_level:
|
|
199
|
+
exp_score = 90
|
|
200
|
+
elif (required_level == "senior" and candidate_level == "mid"):
|
|
201
|
+
exp_score = 50
|
|
202
|
+
elif (required_level == "mid" and candidate_level == "senior"):
|
|
203
|
+
exp_score = 80 # overqualified but fine
|
|
204
|
+
elif (required_level == "junior" and candidate_level != "junior"):
|
|
205
|
+
exp_score = 70 # overqualified
|
|
206
|
+
else:
|
|
207
|
+
exp_score = 30
|
|
208
|
+
|
|
209
|
+
# --- OSS signal (0-100) ---
|
|
210
|
+
activity = profile.get("activity_score", 0)
|
|
211
|
+
oss_score = min(int((activity / 150) * 100), 100)
|
|
212
|
+
|
|
213
|
+
# --- Leadership signals (0-100) ---
|
|
214
|
+
leadership_score = 0
|
|
215
|
+
if profile.get("has_profile_readme"):
|
|
216
|
+
leadership_score += 20
|
|
217
|
+
if stars >= 100:
|
|
218
|
+
leadership_score += 20
|
|
219
|
+
if oss_count >= 1:
|
|
220
|
+
leadership_score += 25
|
|
221
|
+
if followers >= 100:
|
|
222
|
+
leadership_score += 15
|
|
223
|
+
desc_ratio = profile.get("repos_with_description_ratio", 0)
|
|
224
|
+
if desc_ratio >= 0.7:
|
|
225
|
+
leadership_score += 10
|
|
226
|
+
if profile.get("has_permissive_license_repos"):
|
|
227
|
+
leadership_score += 10
|
|
228
|
+
leadership_score = min(leadership_score, 100)
|
|
229
|
+
|
|
230
|
+
# --- Weighted overall ---
|
|
231
|
+
overall = int(tech_score * 0.35 + exp_score * 0.25 + oss_score * 0.25 + leadership_score * 0.15)
|
|
232
|
+
|
|
233
|
+
# --- Gaps ---
|
|
234
|
+
gaps = []
|
|
235
|
+
if tech_score < 50 and tech_keywords:
|
|
236
|
+
missing = [t for t in tech_keywords if t not in profile_tech]
|
|
237
|
+
if missing:
|
|
238
|
+
gaps.append(f"Missing from profile: {', '.join(missing[:5])}")
|
|
239
|
+
if exp_score < 50:
|
|
240
|
+
gaps.append(f"Experience level mismatch: role needs {required_level}, candidate appears {candidate_level}")
|
|
241
|
+
if oss_score < 30:
|
|
242
|
+
gaps.append("Low public GitHub activity signal")
|
|
243
|
+
if leadership_score < 30:
|
|
244
|
+
gaps.append("Few visible leadership/community signals")
|
|
245
|
+
|
|
246
|
+
# --- Interview questions ---
|
|
247
|
+
questions = []
|
|
248
|
+
top_langs = profile.get("top_languages", [])[:2]
|
|
249
|
+
repos = profile.get("notable_repos", [])
|
|
250
|
+
# Pick most relevant repo (highest star count that matches a JD keyword)
|
|
251
|
+
best_repo_name = None
|
|
252
|
+
if repos:
|
|
253
|
+
for repo in repos:
|
|
254
|
+
if not isinstance(repo, dict):
|
|
255
|
+
continue
|
|
256
|
+
repo_text = f"{repo.get('name', '')} {repo.get('description', '')}".lower()
|
|
257
|
+
if any(kw in repo_text for kw in tech_keywords):
|
|
258
|
+
best_repo_name = repo.get("name")
|
|
259
|
+
break
|
|
260
|
+
if not best_repo_name and isinstance(repos[0], dict):
|
|
261
|
+
best_repo_name = repos[0].get("name", "your top project")
|
|
262
|
+
if best_repo_name:
|
|
263
|
+
questions.append(f"Walk me through the architecture of {best_repo_name}.")
|
|
264
|
+
if gaps:
|
|
265
|
+
# Extract the actual missing items, not the label
|
|
266
|
+
first_gap = gaps[0]
|
|
267
|
+
if ":" in first_gap:
|
|
268
|
+
missing_items = first_gap.split(":", 1)[1].strip()
|
|
269
|
+
questions.append(f"What experience do you have with {missing_items}?")
|
|
270
|
+
else:
|
|
271
|
+
questions.append(f"Tell me about: {first_gap}")
|
|
272
|
+
if top_langs:
|
|
273
|
+
questions.append(f"What trade-offs have you hit working with {top_langs[0]} at scale?")
|
|
274
|
+
|
|
275
|
+
return {
|
|
276
|
+
"dimensions": {
|
|
277
|
+
"tech_stack_match": tech_score,
|
|
278
|
+
"experience_level": exp_score,
|
|
279
|
+
"oss_signal": oss_score,
|
|
280
|
+
"leadership_signals": leadership_score,
|
|
281
|
+
},
|
|
282
|
+
"overall_fit": overall,
|
|
283
|
+
"required_level": required_level,
|
|
284
|
+
"estimated_candidate_level": candidate_level,
|
|
285
|
+
"gaps": gaps,
|
|
286
|
+
"interview_questions": questions[:3],
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def generate_strengths_gaps(profile: dict[str, Any]) -> tuple[list[str], list[str]]:
|
|
291
|
+
"""Generate human-readable strengths and gaps from profile data."""
|
|
292
|
+
strengths: list[str] = []
|
|
293
|
+
gaps: list[str] = []
|
|
294
|
+
|
|
295
|
+
commits_90d = profile.get("commits_last_90_days", 0)
|
|
296
|
+
prs_90d = profile.get("prs_opened_last_90_days", 0)
|
|
297
|
+
if commits_90d > 20 or prs_90d > 10:
|
|
298
|
+
parts = []
|
|
299
|
+
if commits_90d > 0:
|
|
300
|
+
parts.append(f"{commits_90d} commits")
|
|
301
|
+
if prs_90d > 0:
|
|
302
|
+
parts.append(f"{prs_90d} PRs opened")
|
|
303
|
+
strengths.append(f"Active contributor: {', '.join(parts)} in last 90 days")
|
|
304
|
+
elif commits_90d == 0 and prs_90d == 0:
|
|
305
|
+
gaps.append("No recent public commit or PR activity")
|
|
306
|
+
|
|
307
|
+
contributed_stars = profile.get("contributed_repo_stars", 0)
|
|
308
|
+
if contributed_stars > 1000:
|
|
309
|
+
strengths.append(f"Contributes to repos with {contributed_stars:,} combined stars")
|
|
310
|
+
|
|
311
|
+
if profile.get("has_profile_readme"):
|
|
312
|
+
strengths.append("Maintains a profile README")
|
|
313
|
+
|
|
314
|
+
stars = profile.get("total_stars_received", 0)
|
|
315
|
+
if stars > 50:
|
|
316
|
+
strengths.append(f"Popular open source work: {stars} total stars received")
|
|
317
|
+
elif stars == 0:
|
|
318
|
+
gaps.append("No starred repositories")
|
|
319
|
+
|
|
320
|
+
oss = profile.get("major_oss_contributions", [])
|
|
321
|
+
if oss:
|
|
322
|
+
strengths.append(f"Contributes to {len(oss)} external OSS project(s): {', '.join(oss[:3])}")
|
|
323
|
+
|
|
324
|
+
if not profile.get("has_permissive_license_repos"):
|
|
325
|
+
gaps.append("No repos with permissive open-source licenses")
|
|
326
|
+
|
|
327
|
+
followers = profile.get("followers", 0)
|
|
328
|
+
if followers >= 10_000:
|
|
329
|
+
strengths.append(f"Exceptional community presence: {followers:,} followers")
|
|
330
|
+
elif followers >= 1_000:
|
|
331
|
+
strengths.append(f"Strong community presence: {followers:,} followers")
|
|
332
|
+
elif followers > 100:
|
|
333
|
+
strengths.append(f"Notable community presence: {followers:,} followers")
|
|
334
|
+
|
|
335
|
+
if profile.get("hireable"):
|
|
336
|
+
strengths.append("Marked as hireable on GitHub")
|
|
337
|
+
|
|
338
|
+
langs = profile.get("top_languages", [])
|
|
339
|
+
if langs:
|
|
340
|
+
strengths.append(f"Primary languages: {', '.join(langs[:5])}")
|
|
341
|
+
|
|
342
|
+
return strengths, gaps
|