github-talent-mcp 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {github_talent_mcp-0.1.0/src/github_talent_mcp.egg-info → github_talent_mcp-0.2.0}/PKG-INFO +58 -6
  2. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/README.md +53 -1
  3. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/pyproject.toml +5 -5
  4. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/github_client.py +43 -1
  5. github_talent_mcp-0.2.0/src/github_talent_mcp/scoring.py +342 -0
  6. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/server.py +112 -0
  7. github_talent_mcp-0.2.0/src/github_talent_mcp/tools/bulk.py +136 -0
  8. github_talent_mcp-0.2.0/src/github_talent_mcp/tools/compare.py +99 -0
  9. github_talent_mcp-0.2.0/src/github_talent_mcp/tools/outreach.py +137 -0
  10. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/profile.py +16 -2
  11. github_talent_mcp-0.2.0/src/github_talent_mcp/tools/score_jd.py +64 -0
  12. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0/src/github_talent_mcp.egg-info}/PKG-INFO +58 -6
  13. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/SOURCES.txt +8 -0
  14. github_talent_mcp-0.2.0/src/github_talent_mcp.egg-info/requires.txt +4 -0
  15. github_talent_mcp-0.2.0/tests/test_bulk.py +122 -0
  16. github_talent_mcp-0.2.0/tests/test_compare.py +94 -0
  17. github_talent_mcp-0.2.0/tests/test_outreach.py +104 -0
  18. github_talent_mcp-0.2.0/tests/test_score_jd.py +88 -0
  19. github_talent_mcp-0.1.0/src/github_talent_mcp/scoring.py +0 -169
  20. github_talent_mcp-0.1.0/src/github_talent_mcp.egg-info/requires.txt +0 -4
  21. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/LICENSE +0 -0
  22. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/setup.cfg +0 -0
  23. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/__init__.py +0 -0
  24. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/__main__.py +0 -0
  25. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/models.py +0 -0
  26. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/__init__.py +0 -0
  27. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/contributors.py +0 -0
  28. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/rank.py +0 -0
  29. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp/tools/search.py +0 -0
  30. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/dependency_links.txt +0 -0
  31. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/entry_points.txt +0 -0
  32. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/src/github_talent_mcp.egg-info/top_level.txt +0 -0
  33. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/tests/test_scoring.py +0 -0
  34. {github_talent_mcp-0.1.0 → github_talent_mcp-0.2.0}/tests/test_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: github-talent-mcp
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: MCP server for searching, scoring, and ranking GitHub developers for technical recruiting
5
5
  Author: Carolina Cherry
6
6
  License: MIT
@@ -21,10 +21,10 @@ Classifier: Topic :: Software Development :: Libraries
21
21
  Requires-Python: >=3.10
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
- Requires-Dist: mcp>=1.0.0
25
- Requires-Dist: httpx>=0.27.0
26
- Requires-Dist: pydantic>=2.0.0
27
- Requires-Dist: python-dotenv>=1.0.0
24
+ Requires-Dist: mcp<2,>=1.0.0
25
+ Requires-Dist: httpx<1,>=0.27.0
26
+ Requires-Dist: pydantic<3,>=2.0.0
27
+ Requires-Dist: python-dotenv<2,>=1.0.0
28
28
  Dynamic: license-file
29
29
 
30
30
  # github-talent-mcp
@@ -39,6 +39,10 @@ MCP server that searches, scores, and ranks GitHub developers for technical recr
39
39
 
40
40
  ## Demo
41
41
 
42
+ https://github.com/user-attachments/assets/b2dbe9e0-26ee-4849-861a-4b5cb268facc
43
+
44
+ Sourcing candidates for a real Anthropic JD, live in Claude Cowork.
45
+
42
46
  https://github.com/user-attachments/assets/2dfd82b4-3eb5-4f2b-bc0a-2580b95043e4
43
47
 
44
48
  ### Profile deep dive
@@ -118,7 +122,7 @@ Then set the token as an environment variable. Either:
118
122
  - Export it in your shell: `export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`
119
123
  - Or keep it in the `.env` file — the server reads it via `python-dotenv` on startup
120
124
 
121
- Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 4 tools under `github-talent`.
125
+ Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 8 tools under `github-talent`.
122
126
 
123
127
  #### Claude Desktop
124
128
 
@@ -141,6 +145,38 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
141
145
 
142
146
  Restart Claude Desktop. The tools will appear in the toolbox icon.
143
147
 
148
+ #### GitHub Copilot (CLI & desktop app)
149
+
150
+ The [GitHub Copilot CLI](https://docs.github.com/en/copilot/how-tos/copilot-cli) and the [GitHub Copilot app](https://github.com/features/ai/github-app) share one MCP config, so a single setup covers both.
151
+
152
+ One command (Copilot CLI):
153
+
154
+ ```bash
155
+ copilot mcp add github-talent -e GITHUB_TOKEN=$GITHUB_TOKEN -- uvx github-talent-mcp
156
+ ```
157
+
158
+ Or add it manually to `~/.copilot/mcp-config.json`:
159
+
160
+ ```json
161
+ {
162
+ "mcpServers": {
163
+ "github-talent": {
164
+ "type": "local",
165
+ "command": "uvx",
166
+ "args": ["github-talent-mcp"],
167
+ "env": {
168
+ "GITHUB_TOKEN": "${GITHUB_TOKEN}"
169
+ },
170
+ "tools": ["*"]
171
+ }
172
+ }
173
+ }
174
+ ```
175
+
176
+ Export your token first (`export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`) — Copilot only inherits `PATH`, so the `${GITHUB_TOKEN}` reference reads it from your shell. If `uvx` isn't on your `PATH`, use its absolute path as `command`.
177
+
178
+ The **Copilot app** picks up this same config; it also syncs a repo's `.copilot/mcp-config.json` automatically and lets you add servers under Settings → MCP. Verify with `copilot mcp list` (terminal) or `/mcp show` (in a Copilot session) — you should see 8 tools under `github-talent`.
179
+
144
180
  ## Try It
145
181
 
146
182
  Once installed, paste these prompts to verify everything works:
@@ -157,6 +193,18 @@ Once installed, paste these prompts to verify everything works:
157
193
  **Repo contributors:**
158
194
  > Get the top contributors to huggingface/transformers and rank them for a founding ML engineer role at an AI startup
159
195
 
196
+ **JD scoring:**
197
+ > Score these candidates against this job description: [paste JD]. Candidates: tiangolo, karpathy, hwchase17
198
+
199
+ **Compare candidates:**
200
+ > Compare tiangolo and hwchase17 for a Senior Python AI Engineer role
201
+
202
+ **Bulk scoring:**
203
+ > Score these 10 GitHub usernames and give me a ranked table: [paste list]
204
+
205
+ **Outreach:**
206
+ > Generate a casual recruiter message for tiangolo about a Senior Python role at Acme. My name is Daniel.
207
+
160
208
  ## Tools
161
209
 
162
210
  | Tool | Description |
@@ -164,6 +212,10 @@ Once installed, paste these prompts to verify everything works:
164
212
  | `search_developers` | Search GitHub users by language, location, activity, followers. For topic-based sourcing, use `get_repo_contributors` on relevant repos instead. |
165
213
  | `get_developer_profile` | Deep profile enrichment: languages, stars, commits + PRs, OSS contributions, license breakdown, profile README, and activity score with breakdown. |
166
214
  | `rank_candidates` | Rank usernames against a job description. Returns sorted candidates with combined score, strengths, gaps, and reasoning. |
215
+ | `score_against_jd` | Score candidates against a JD with per-dimension breakdown (tech stack, experience level, OSS signal, leadership). Returns gaps and personalized interview questions. |
216
+ | `compare_candidates` | Side-by-side comparison of 2-5 candidates. Shows dimension winners and a recommendation. Optionally scored against a JD. |
217
+ | `bulk_score` | Score up to 100 GitHub usernames in one call. Returns a ranked markdown table or CSV. Supports optional JD matching. |
218
+ | `generate_outreach` | Generate personalized recruiter messages (short/medium/detailed) that reference the candidate's actual repos and contributions. Requires your company name and sender name. Casual or formal tone. |
167
219
  | `get_repo_contributors` | Top contributors for any repo. Accepts `owner/repo` or full URL. The fastest way to source for a specific domain. |
168
220
 
169
221
  ## Scoring
@@ -10,6 +10,10 @@ MCP server that searches, scores, and ranks GitHub developers for technical recr
10
10
 
11
11
  ## Demo
12
12
 
13
+ https://github.com/user-attachments/assets/b2dbe9e0-26ee-4849-861a-4b5cb268facc
14
+
15
+ Sourcing candidates for a real Anthropic JD, live in Claude Cowork.
16
+
13
17
  https://github.com/user-attachments/assets/2dfd82b4-3eb5-4f2b-bc0a-2580b95043e4
14
18
 
15
19
  ### Profile deep dive
@@ -89,7 +93,7 @@ Then set the token as an environment variable. Either:
89
93
  - Export it in your shell: `export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`
90
94
  - Or keep it in the `.env` file — the server reads it via `python-dotenv` on startup
91
95
 
92
- Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 4 tools under `github-talent`.
96
+ Restart Claude Code to pick up the new server. Verify with `/mcp` — you should see 8 tools under `github-talent`.
93
97
 
94
98
  #### Claude Desktop
95
99
 
@@ -112,6 +116,38 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
112
116
 
113
117
  Restart Claude Desktop. The tools will appear in the toolbox icon.
114
118
 
119
+ #### GitHub Copilot (CLI & desktop app)
120
+
121
+ The [GitHub Copilot CLI](https://docs.github.com/en/copilot/how-tos/copilot-cli) and the [GitHub Copilot app](https://github.com/features/ai/github-app) share one MCP config, so a single setup covers both.
122
+
123
+ One command (Copilot CLI):
124
+
125
+ ```bash
126
+ copilot mcp add github-talent -e GITHUB_TOKEN=$GITHUB_TOKEN -- uvx github-talent-mcp
127
+ ```
128
+
129
+ Or add it manually to `~/.copilot/mcp-config.json`:
130
+
131
+ ```json
132
+ {
133
+ "mcpServers": {
134
+ "github-talent": {
135
+ "type": "local",
136
+ "command": "uvx",
137
+ "args": ["github-talent-mcp"],
138
+ "env": {
139
+ "GITHUB_TOKEN": "${GITHUB_TOKEN}"
140
+ },
141
+ "tools": ["*"]
142
+ }
143
+ }
144
+ }
145
+ ```
146
+
147
+ Export your token first (`export GITHUB_TOKEN=ghp_xxxxxxxxxxxx`) — Copilot only inherits `PATH`, so the `${GITHUB_TOKEN}` reference reads it from your shell. If `uvx` isn't on your `PATH`, use its absolute path as `command`.
148
+
149
+ The **Copilot app** picks up this same config; it also syncs a repo's `.copilot/mcp-config.json` automatically and lets you add servers under Settings → MCP. Verify with `copilot mcp list` (terminal) or `/mcp show` (in a Copilot session) — you should see 8 tools under `github-talent`.
150
+
115
151
  ## Try It
116
152
 
117
153
  Once installed, paste these prompts to verify everything works:
@@ -128,6 +164,18 @@ Once installed, paste these prompts to verify everything works:
128
164
  **Repo contributors:**
129
165
  > Get the top contributors to huggingface/transformers and rank them for a founding ML engineer role at an AI startup
130
166
 
167
+ **JD scoring:**
168
+ > Score these candidates against this job description: [paste JD]. Candidates: tiangolo, karpathy, hwchase17
169
+
170
+ **Compare candidates:**
171
+ > Compare tiangolo and hwchase17 for a Senior Python AI Engineer role
172
+
173
+ **Bulk scoring:**
174
+ > Score these 10 GitHub usernames and give me a ranked table: [paste list]
175
+
176
+ **Outreach:**
177
+ > Generate a casual recruiter message for tiangolo about a Senior Python role at Acme. My name is Daniel.
178
+
131
179
  ## Tools
132
180
 
133
181
  | Tool | Description |
@@ -135,6 +183,10 @@ Once installed, paste these prompts to verify everything works:
135
183
  | `search_developers` | Search GitHub users by language, location, activity, followers. For topic-based sourcing, use `get_repo_contributors` on relevant repos instead. |
136
184
  | `get_developer_profile` | Deep profile enrichment: languages, stars, commits + PRs, OSS contributions, license breakdown, profile README, and activity score with breakdown. |
137
185
  | `rank_candidates` | Rank usernames against a job description. Returns sorted candidates with combined score, strengths, gaps, and reasoning. |
186
+ | `score_against_jd` | Score candidates against a JD with per-dimension breakdown (tech stack, experience level, OSS signal, leadership). Returns gaps and personalized interview questions. |
187
+ | `compare_candidates` | Side-by-side comparison of 2-5 candidates. Shows dimension winners and a recommendation. Optionally scored against a JD. |
188
+ | `bulk_score` | Score up to 100 GitHub usernames in one call. Returns a ranked markdown table or CSV. Supports optional JD matching. |
189
+ | `generate_outreach` | Generate personalized recruiter messages (short/medium/detailed) that reference the candidate's actual repos and contributions. Requires your company name and sender name. Casual or formal tone. |
138
190
  | `get_repo_contributors` | Top contributors for any repo. Accepts `owner/repo` or full URL. The fastest way to source for a specific domain. |
139
191
 
140
192
  ## Scoring
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "github-talent-mcp"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "MCP server for searching, scoring, and ranking GitHub developers for technical recruiting"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -26,10 +26,10 @@ classifiers = [
26
26
  "Topic :: Software Development :: Libraries",
27
27
  ]
28
28
  dependencies = [
29
- "mcp>=1.0.0",
30
- "httpx>=0.27.0",
31
- "pydantic>=2.0.0",
32
- "python-dotenv>=1.0.0",
29
+ "mcp>=1.0.0,<2",
30
+ "httpx>=0.27.0,<1",
31
+ "pydantic>=2.0.0,<3",
32
+ "python-dotenv>=1.0.0,<2",
33
33
  ]
34
34
 
35
35
  [project.urls]
@@ -71,7 +71,7 @@ class GitHubClient:
71
71
  for lang in languages:
72
72
  parts.append(f"language:{lang}")
73
73
  if location:
74
- parts.append(f"location:{location}")
74
+ parts.append(f'location:"{location}"')
75
75
  if min_followers is not None:
76
76
  parts.append(f"followers:>={min_followers}")
77
77
  if min_repos is not None:
@@ -160,6 +160,48 @@ class GitHubClient:
160
160
  self._cache_set(cache_key, all_events)
161
161
  return all_events
162
162
 
163
+ async def search_commit_count(self, username: str, since_date: str) -> int:
164
+ """Count commits by username since a date using the Search API.
165
+
166
+ More accurate than Events API for users whose commits don't
167
+ surface as PushEvents (e.g. Torvalds' kernel merges).
168
+ """
169
+ cache_key = f"commit_count:{username}:{since_date}"
170
+ cached = self._cache_get(cache_key)
171
+ if cached is not None:
172
+ return cached
173
+ resp = await self._client.get(
174
+ "/search/commits",
175
+ params={
176
+ "q": f"author:{username} user:{username} committer-date:>={since_date}",
177
+ "per_page": 1,
178
+ },
179
+ )
180
+ self._check_rate_limit(resp)
181
+ resp.raise_for_status()
182
+ count = resp.json().get("total_count", 0)
183
+ self._cache_set(cache_key, count)
184
+ return count
185
+
186
+ async def search_pr_count(self, username: str, since_date: str) -> int:
187
+ """Count PRs opened by username since a date using the Search API."""
188
+ cache_key = f"pr_count:{username}:{since_date}"
189
+ cached = self._cache_get(cache_key)
190
+ if cached is not None:
191
+ return cached
192
+ resp = await self._client.get(
193
+ "/search/issues",
194
+ params={
195
+ "q": f"author:{username} type:pr created:>={since_date}",
196
+ "per_page": 1,
197
+ },
198
+ )
199
+ self._check_rate_limit(resp)
200
+ resp.raise_for_status()
201
+ count = resp.json().get("total_count", 0)
202
+ self._cache_set(cache_key, count)
203
+ return count
204
+
163
205
  async def get_profile_readme(self, username: str) -> str | None:
164
206
  cache_key = f"readme:{username}"
165
207
  cached = self._cache_get(cache_key)
@@ -0,0 +1,342 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Any
5
+
6
+ PERMISSIVE_LICENSES = frozenset({
7
+ "mit", "apache-2.0", "bsd-2-clause", "bsd-3-clause", "isc", "unlicense",
8
+ })
9
+
10
+
11
+ def _compute_reputation_floor(followers: int, stars: int, account_age_days: int) -> int:
12
+ """Compute a minimum score floor based on cumulative reputation.
13
+
14
+ Prevents well-known developers from scoring low just because their
15
+ recent GitHub activity doesn't match behavioral scoring expectations
16
+ (e.g., Torvalds works via mailing lists, not GitHub PRs).
17
+ """
18
+ if followers >= 10_000 or stars >= 50_000:
19
+ return 150
20
+ if followers >= 1_000 or stars >= 5_000:
21
+ return 120
22
+ if followers >= 500 or stars >= 1_000:
23
+ return 100
24
+ if followers >= 100 or stars >= 200:
25
+ return 80
26
+ return 0
27
+
28
+
29
+ def compute_activity_score(profile: dict[str, Any]) -> tuple[int, dict[str, int]]:
30
+ """Compute activity score with per-dimension breakdown.
31
+
32
+ The score combines behavioral signals (recent commits, PRs, OSS contributions)
33
+ with a reputation floor (followers, stars) so that well-known developers
34
+ aren't penalized for workflows that don't produce GitHub events.
35
+
36
+ Returns (total_score, breakdown_dict).
37
+ """
38
+ breakdown: dict[str, int] = {}
39
+
40
+ commits_90d = profile.get("commits_last_90_days", 0)
41
+ breakdown["commits_last_90_days"] = min(commits_90d * 3, 60)
42
+
43
+ breakdown["has_profile_readme"] = 20 if profile.get("has_profile_readme") else 0
44
+
45
+ stars = profile.get("total_stars_received", 0)
46
+ breakdown["stars_on_own_repos"] = min(stars * 2, 40)
47
+
48
+ followers = profile.get("followers", 0)
49
+ breakdown["followers"] = min(followers, 20)
50
+
51
+ desc_ratio = profile.get("repos_with_description_ratio", 0.0)
52
+ breakdown["repos_with_description"] = int(desc_ratio * 20)
53
+
54
+ breakdown["permissive_license_repos"] = 15 if profile.get("has_permissive_license_repos") else 0
55
+
56
+ oss = profile.get("major_oss_contributions", [])
57
+ breakdown["major_oss_contributions"] = min(len(oss) * 10, 30)
58
+
59
+ behavioral_score = sum(breakdown.values())
60
+
61
+ # Apply reputation floor — cumulative impact shouldn't be erased by a quiet quarter
62
+ account_age_days = profile.get("account_age_days", 0)
63
+ reputation_floor = _compute_reputation_floor(followers, stars, account_age_days)
64
+ total = max(behavioral_score, reputation_floor)
65
+
66
+ if reputation_floor > behavioral_score:
67
+ breakdown["reputation_floor"] = reputation_floor
68
+
69
+ return total, breakdown
70
+
71
+
72
+ def extract_keywords(job_description: str) -> list[str]:
73
+ """Extract meaningful keywords from a job description."""
74
+ noise = {
75
+ "the", "a", "an", "and", "or", "is", "are", "was", "were", "be", "been",
76
+ "with", "for", "to", "of", "in", "on", "at", "by", "from", "as", "we",
77
+ "you", "our", "your", "this", "that", "will", "can", "should", "must",
78
+ "have", "has", "had", "do", "does", "did", "not", "but", "if", "about",
79
+ "experience", "team", "work", "working", "looking", "join", "role",
80
+ "ability", "strong", "plus", "years", "knowledge", "skills", "required",
81
+ "preferred", "etc", "including", "such", "also", "may", "would", "could",
82
+ }
83
+ words = re.findall(r"[a-zA-Z#+.]+", job_description.lower())
84
+ seen: set[str] = set()
85
+ keywords: list[str] = []
86
+ for w in words:
87
+ if len(w) >= 2 and w not in noise and w not in seen:
88
+ seen.add(w)
89
+ keywords.append(w)
90
+ return keywords
91
+
92
+
93
+ def compute_relevance_score(profile: dict[str, Any], job_keywords: list[str]) -> int:
94
+ """Score 0-100 based on keyword overlap between profile and job description."""
95
+ if not job_keywords:
96
+ return 50
97
+
98
+ searchable_parts = [
99
+ profile.get("bio") or "",
100
+ " ".join(profile.get("top_languages", [])),
101
+ " ".join(profile.get("major_oss_contributions", [])),
102
+ profile.get("profile_readme_summary") or "",
103
+ profile.get("company") or "",
104
+ ]
105
+ for repo in profile.get("notable_repos", []):
106
+ if isinstance(repo, dict):
107
+ searchable_parts.append(repo.get("description") or "")
108
+ searchable_parts.extend(repo.get("topics") or [])
109
+ searchable_parts.append(repo.get("language") or "")
110
+
111
+ searchable = " ".join(searchable_parts).lower()
112
+
113
+ matches = sum(1 for kw in job_keywords if kw in searchable)
114
+ return min(int((matches / len(job_keywords)) * 100), 100)
115
+
116
+
117
+ def score_jd_dimensions(profile: dict[str, Any], job_description: str) -> dict[str, Any]:
118
+ """Score a profile against a job description across structured dimensions.
119
+
120
+ Returns a dict with per-dimension scores (0-100), reasoning, gaps,
121
+ and suggested interview questions.
122
+ """
123
+ jd_lower = job_description.lower()
124
+ keywords = extract_keywords(job_description)
125
+
126
+ # --- Tech stack match (0-100) ---
127
+ # Match all JD keywords (not just a hardcoded set) against the full profile
128
+ profile_tech = " ".join([
129
+ " ".join(profile.get("top_languages", [])),
130
+ " ".join(r.get("language", "") or "" for r in profile.get("notable_repos", []) if isinstance(r, dict)),
131
+ " ".join(r.get("name", "") or "" for r in profile.get("notable_repos", []) if isinstance(r, dict)),
132
+ " ".join(r.get("description", "") or "" for r in profile.get("notable_repos", []) if isinstance(r, dict)),
133
+ " ".join(t for r in profile.get("notable_repos", []) if isinstance(r, dict) for t in (r.get("topics") or [])),
134
+ profile.get("profile_readme_summary") or "",
135
+ profile.get("bio") or "",
136
+ " ".join(profile.get("major_oss_contributions", [])),
137
+ ]).lower()
138
+
139
+ # Filter keywords to tech-relevant ones (skip soft skills, process words)
140
+ soft_terms = {
141
+ # Role/process words
142
+ "mentoring", "leadership", "communication", "collaboration",
143
+ "agile", "scrum", "remote", "hybrid", "onsite", "track", "record",
144
+ "open", "source", "contributions", "engineer", "engineering",
145
+ "developer", "build", "design", "implement", "maintain",
146
+ "senior", "staff", "principal", "lead", "manager",
147
+ "junior", "mid", "level", "familiar", "familiarity", "bonus", "nice",
148
+ "looking", "seeking", "ideal", "candidate", "responsible", "own",
149
+ "across", "help", "growth", "impact", "high",
150
+ # JD boilerplate
151
+ "requirements", "minimum", "qualifications", "preferred", "required",
152
+ "expect", "type", "full", "time", "part", "contract", "salary",
153
+ "benefits", "company", "team", "position", "opportunity",
154
+ "based", "features", "projects", "production", "software",
155
+ "applications", "understanding", "shipping", "building", "deep",
156
+ "join", "power", "have", "knowledge", "years", "experience",
157
+ "strong", "expertise", "multi", "resilient", "context", "aware",
158
+ }
159
+ tech_keywords = [kw for kw in keywords if kw not in soft_terms and not kw.endswith(".")]
160
+
161
+ if tech_keywords:
162
+ tech_matches = sum(1 for t in tech_keywords if t in profile_tech)
163
+ tech_score = min(int((tech_matches / len(tech_keywords)) * 100), 100)
164
+ else:
165
+ tech_score = 50 # no tech requirements specified
166
+
167
+ # --- Experience level match (0-100) ---
168
+ # Infer required seniority from JD
169
+ senior_terms = {"senior", "staff", "principal", "lead", "architect", "head", "director"}
170
+ mid_terms = {"mid", "intermediate", "ii", "iii"}
171
+ junior_terms = {"junior", "entry", "associate", "intern", "graduate"}
172
+
173
+ jd_words = set(jd_lower.split())
174
+ if jd_words & senior_terms:
175
+ required_level = "senior"
176
+ elif jd_words & junior_terms:
177
+ required_level = "junior"
178
+ elif jd_words & mid_terms:
179
+ required_level = "mid"
180
+ else:
181
+ required_level = "unknown"
182
+
183
+ # Estimate candidate level from signals
184
+ stars = profile.get("total_stars_received", 0)
185
+ followers = profile.get("followers", 0)
186
+ account_age = profile.get("account_age_days", 0)
187
+ oss_count = len(profile.get("major_oss_contributions", []))
188
+
189
+ if followers >= 500 or stars >= 1000 or (account_age > 2500 and oss_count >= 2):
190
+ candidate_level = "senior"
191
+ elif account_age > 1000 or stars >= 50:
192
+ candidate_level = "mid"
193
+ else:
194
+ candidate_level = "junior"
195
+
196
+ if required_level == "unknown":
197
+ exp_score = 60
198
+ elif required_level == candidate_level:
199
+ exp_score = 90
200
+ elif (required_level == "senior" and candidate_level == "mid"):
201
+ exp_score = 50
202
+ elif (required_level == "mid" and candidate_level == "senior"):
203
+ exp_score = 80 # overqualified but fine
204
+ elif (required_level == "junior" and candidate_level != "junior"):
205
+ exp_score = 70 # overqualified
206
+ else:
207
+ exp_score = 30
208
+
209
+ # --- OSS signal (0-100) ---
210
+ activity = profile.get("activity_score", 0)
211
+ oss_score = min(int((activity / 150) * 100), 100)
212
+
213
+ # --- Leadership signals (0-100) ---
214
+ leadership_score = 0
215
+ if profile.get("has_profile_readme"):
216
+ leadership_score += 20
217
+ if stars >= 100:
218
+ leadership_score += 20
219
+ if oss_count >= 1:
220
+ leadership_score += 25
221
+ if followers >= 100:
222
+ leadership_score += 15
223
+ desc_ratio = profile.get("repos_with_description_ratio", 0)
224
+ if desc_ratio >= 0.7:
225
+ leadership_score += 10
226
+ if profile.get("has_permissive_license_repos"):
227
+ leadership_score += 10
228
+ leadership_score = min(leadership_score, 100)
229
+
230
+ # --- Weighted overall ---
231
+ overall = int(tech_score * 0.35 + exp_score * 0.25 + oss_score * 0.25 + leadership_score * 0.15)
232
+
233
+ # --- Gaps ---
234
+ gaps = []
235
+ if tech_score < 50 and tech_keywords:
236
+ missing = [t for t in tech_keywords if t not in profile_tech]
237
+ if missing:
238
+ gaps.append(f"Missing from profile: {', '.join(missing[:5])}")
239
+ if exp_score < 50:
240
+ gaps.append(f"Experience level mismatch: role needs {required_level}, candidate appears {candidate_level}")
241
+ if oss_score < 30:
242
+ gaps.append("Low public GitHub activity signal")
243
+ if leadership_score < 30:
244
+ gaps.append("Few visible leadership/community signals")
245
+
246
+ # --- Interview questions ---
247
+ questions = []
248
+ top_langs = profile.get("top_languages", [])[:2]
249
+ repos = profile.get("notable_repos", [])
250
+ # Pick most relevant repo (highest star count that matches a JD keyword)
251
+ best_repo_name = None
252
+ if repos:
253
+ for repo in repos:
254
+ if not isinstance(repo, dict):
255
+ continue
256
+ repo_text = f"{repo.get('name', '')} {repo.get('description', '')}".lower()
257
+ if any(kw in repo_text for kw in tech_keywords):
258
+ best_repo_name = repo.get("name")
259
+ break
260
+ if not best_repo_name and isinstance(repos[0], dict):
261
+ best_repo_name = repos[0].get("name", "your top project")
262
+ if best_repo_name:
263
+ questions.append(f"Walk me through the architecture of {best_repo_name}.")
264
+ if gaps:
265
+ # Extract the actual missing items, not the label
266
+ first_gap = gaps[0]
267
+ if ":" in first_gap:
268
+ missing_items = first_gap.split(":", 1)[1].strip()
269
+ questions.append(f"What experience do you have with {missing_items}?")
270
+ else:
271
+ questions.append(f"Tell me about: {first_gap}")
272
+ if top_langs:
273
+ questions.append(f"What trade-offs have you hit working with {top_langs[0]} at scale?")
274
+
275
+ return {
276
+ "dimensions": {
277
+ "tech_stack_match": tech_score,
278
+ "experience_level": exp_score,
279
+ "oss_signal": oss_score,
280
+ "leadership_signals": leadership_score,
281
+ },
282
+ "overall_fit": overall,
283
+ "required_level": required_level,
284
+ "estimated_candidate_level": candidate_level,
285
+ "gaps": gaps,
286
+ "interview_questions": questions[:3],
287
+ }
288
+
289
+
290
+ def generate_strengths_gaps(profile: dict[str, Any]) -> tuple[list[str], list[str]]:
291
+ """Generate human-readable strengths and gaps from profile data."""
292
+ strengths: list[str] = []
293
+ gaps: list[str] = []
294
+
295
+ commits_90d = profile.get("commits_last_90_days", 0)
296
+ prs_90d = profile.get("prs_opened_last_90_days", 0)
297
+ if commits_90d > 20 or prs_90d > 10:
298
+ parts = []
299
+ if commits_90d > 0:
300
+ parts.append(f"{commits_90d} commits")
301
+ if prs_90d > 0:
302
+ parts.append(f"{prs_90d} PRs opened")
303
+ strengths.append(f"Active contributor: {', '.join(parts)} in last 90 days")
304
+ elif commits_90d == 0 and prs_90d == 0:
305
+ gaps.append("No recent public commit or PR activity")
306
+
307
+ contributed_stars = profile.get("contributed_repo_stars", 0)
308
+ if contributed_stars > 1000:
309
+ strengths.append(f"Contributes to repos with {contributed_stars:,} combined stars")
310
+
311
+ if profile.get("has_profile_readme"):
312
+ strengths.append("Maintains a profile README")
313
+
314
+ stars = profile.get("total_stars_received", 0)
315
+ if stars > 50:
316
+ strengths.append(f"Popular open source work: {stars} total stars received")
317
+ elif stars == 0:
318
+ gaps.append("No starred repositories")
319
+
320
+ oss = profile.get("major_oss_contributions", [])
321
+ if oss:
322
+ strengths.append(f"Contributes to {len(oss)} external OSS project(s): {', '.join(oss[:3])}")
323
+
324
+ if not profile.get("has_permissive_license_repos"):
325
+ gaps.append("No repos with permissive open-source licenses")
326
+
327
+ followers = profile.get("followers", 0)
328
+ if followers >= 10_000:
329
+ strengths.append(f"Exceptional community presence: {followers:,} followers")
330
+ elif followers >= 1_000:
331
+ strengths.append(f"Strong community presence: {followers:,} followers")
332
+ elif followers > 100:
333
+ strengths.append(f"Notable community presence: {followers:,} followers")
334
+
335
+ if profile.get("hireable"):
336
+ strengths.append("Marked as hireable on GitHub")
337
+
338
+ langs = profile.get("top_languages", [])
339
+ if langs:
340
+ strengths.append(f"Primary languages: {', '.join(langs[:5])}")
341
+
342
+ return strengths, gaps