online-audit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/package.json +33 -0
- package/src/config.ts +13 -0
- package/src/server.ts +73 -0
- package/src/tools/github-profile.ts +54 -0
- package/src/tools/reddit-user.ts +69 -0
- package/src/tools/scrape-page.ts +43 -0
- package/src/tools/search-google.ts +24 -0
package/README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Online Presence Audit — MCP Server
|
|
2
|
+
|
|
3
|
+
An MCP server that provides tools to collect a person's public online presence data. The LLM client (Claude, etc.) orchestrates the tools and synthesizes the audit report itself.
|
|
4
|
+
|
|
5
|
+
## Tools
|
|
6
|
+
|
|
7
|
+
| Tool | Description |
|
|
8
|
+
|------|-------------|
|
|
9
|
+
| `search_google` | Google search via Serper API. Use `site:linkedin.com/in/` for LinkedIn data |
|
|
10
|
+
| `get_github_profile` | GitHub public profile, bio, top repos, stars, languages |
|
|
11
|
+
| `scrape_page` | Scrape any public web page via ScrapeDo (blogs, portfolios, etc.) |
|
|
12
|
+
| `get_reddit_user` | Reddit profile, karma, active subreddits, recent activity |
|
|
13
|
+
|
|
14
|
+
## Setup
|
|
15
|
+
|
|
16
|
+
```json
|
|
17
|
+
{
|
|
18
|
+
"mcpServers": {
|
|
19
|
+
"online-audit": {
|
|
20
|
+
"command": "npx",
|
|
21
|
+
"args": ["online-audit"],
|
|
22
|
+
"env": {
|
|
23
|
+
"SERPER_API_KEY": "your-key",
|
|
24
|
+
"SCRAPEDO_API_KEY": "your-key",
|
|
25
|
+
"REDDIT_CLIENT_ID": "your-id",
|
|
26
|
+
"REDDIT_CLIENT_SECRET": "your-secret"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### API Keys
|
|
34
|
+
|
|
35
|
+
- **Serper** — [serper.dev](https://serper.dev) (Google search)
|
|
36
|
+
- **ScrapeDo** — [scrape.do](https://scrape.do) (web scraping)
|
|
37
|
+
- **Reddit** — [reddit.com/prefs/apps](https://www.reddit.com/prefs/apps) (Reddit API)
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
Ask your LLM:
|
|
42
|
+
> "Audit the online presence of Jane Doe. Her GitHub is janedoe and search LinkedIn for her."
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "online-audit",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server for auditing a person's public online presence — Google search, GitHub, Reddit, web scraping",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"online-audit": "src/server.ts"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"start": "tsx src/server.ts"
|
|
11
|
+
},
|
|
12
|
+
"keywords": ["mcp", "online-presence", "audit", "github", "reddit", "scraping"],
|
|
13
|
+
"author": "muratcakmak",
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "https://github.com/muratcakmak/online-audit.git"
|
|
18
|
+
},
|
|
19
|
+
"files": [
|
|
20
|
+
"src/**/*.ts",
|
|
21
|
+
"package.json",
|
|
22
|
+
"README.md"
|
|
23
|
+
],
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"@modelcontextprotocol/sdk": "^1.12.1",
|
|
26
|
+
"dotenv": "^16.4.7",
|
|
27
|
+
"tsx": "^4.19.0"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"typescript": "^5.7.0",
|
|
31
|
+
"@types/node": "^22.10.0"
|
|
32
|
+
}
|
|
33
|
+
}
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import 'dotenv/config';
|
|
2
|
+
|
|
3
|
+
function requireEnv(key: string): string {
|
|
4
|
+
const v = process.env[key];
|
|
5
|
+
if (!v) throw new Error(`Missing env var: ${key}`);
|
|
6
|
+
return v;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export const config = {
|
|
10
|
+
serper: { apiKey: requireEnv('SERPER_API_KEY') },
|
|
11
|
+
scrapedo: { apiKey: requireEnv('SCRAPEDO_API_KEY') },
|
|
12
|
+
reddit: { clientId: requireEnv('REDDIT_CLIENT_ID'), clientSecret: requireEnv('REDDIT_CLIENT_SECRET') },
|
|
13
|
+
} as const;
|
package/src/server.ts
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
|
|
6
|
+
import { searchGoogle } from './tools/search-google.js';
|
|
7
|
+
import { getGithubProfile } from './tools/github-profile.js';
|
|
8
|
+
import { scrapePage } from './tools/scrape-page.js';
|
|
9
|
+
import { getRedditUser } from './tools/reddit-user.js';
|
|
10
|
+
|
|
11
|
+
const server = new McpServer({
|
|
12
|
+
name: 'online-audit',
|
|
13
|
+
version: '1.0.0',
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
server.tool(
|
|
17
|
+
'search_google',
|
|
18
|
+
'Search Google for a person or query. Returns top results with titles, URLs, and snippets. Use with site:linkedin.com/in/ to get LinkedIn data.',
|
|
19
|
+
{ query: z.string().describe('Search query'), num: z.number().optional().describe('Number of results (default 10)') },
|
|
20
|
+
async ({ query, num }) => {
|
|
21
|
+
try {
|
|
22
|
+
const result = await searchGoogle(query, num);
|
|
23
|
+
return { content: [{ type: 'text', text: result.formatted }] };
|
|
24
|
+
} catch (e: any) {
|
|
25
|
+
return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true };
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
server.tool(
|
|
31
|
+
'get_github_profile',
|
|
32
|
+
'Get a GitHub user\'s public profile, bio, and top repositories with stars and languages.',
|
|
33
|
+
{ username: z.string().describe('GitHub username') },
|
|
34
|
+
async ({ username }) => {
|
|
35
|
+
try {
|
|
36
|
+
const result = await getGithubProfile(username);
|
|
37
|
+
return { content: [{ type: 'text', text: result.formatted }] };
|
|
38
|
+
} catch (e: any) {
|
|
39
|
+
return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true };
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
server.tool(
|
|
45
|
+
'scrape_page',
|
|
46
|
+
'Scrape a public web page and extract text content and social links. Works for personal websites, blogs, portfolios. May not work for heavily protected sites like LinkedIn.',
|
|
47
|
+
{ url: z.string().url().describe('URL to scrape') },
|
|
48
|
+
async ({ url }) => {
|
|
49
|
+
try {
|
|
50
|
+
const result = await scrapePage(url);
|
|
51
|
+
return { content: [{ type: 'text', text: result.formatted }] };
|
|
52
|
+
} catch (e: any) {
|
|
53
|
+
return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true };
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
server.tool(
|
|
59
|
+
'get_reddit_user',
|
|
60
|
+
'Get a Reddit user\'s profile, karma, active subreddits, and recent posts/comments.',
|
|
61
|
+
{ username: z.string().describe('Reddit username (without u/)') },
|
|
62
|
+
async ({ username }) => {
|
|
63
|
+
try {
|
|
64
|
+
const result = await getRedditUser(username);
|
|
65
|
+
return { content: [{ type: 'text', text: result.formatted }] };
|
|
66
|
+
} catch (e: any) {
|
|
67
|
+
return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true };
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
const transport = new StdioServerTransport();
|
|
73
|
+
await server.connect(transport);
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export async function getGithubProfile(username: string) {
|
|
2
|
+
const [profileRes, reposRes] = await Promise.all([
|
|
3
|
+
fetch(`https://api.github.com/users/${encodeURIComponent(username)}`, {
|
|
4
|
+
headers: { 'User-Agent': 'OnlineAudit/1.0', Accept: 'application/vnd.github+json' },
|
|
5
|
+
}),
|
|
6
|
+
fetch(`https://api.github.com/users/${encodeURIComponent(username)}/repos?sort=updated&per_page=10`, {
|
|
7
|
+
headers: { 'User-Agent': 'OnlineAudit/1.0', Accept: 'application/vnd.github+json' },
|
|
8
|
+
}),
|
|
9
|
+
]);
|
|
10
|
+
|
|
11
|
+
if (!profileRes.ok) throw new Error(`GitHub API error: ${profileRes.status}`);
|
|
12
|
+
const profile = await profileRes.json() as any;
|
|
13
|
+
const repos = reposRes.ok ? (await reposRes.json() as any[]) : [];
|
|
14
|
+
|
|
15
|
+
const topRepos = repos.slice(0, 10).map((r: any) => ({
|
|
16
|
+
name: r.name,
|
|
17
|
+
description: r.description,
|
|
18
|
+
language: r.language,
|
|
19
|
+
stars: r.stargazers_count,
|
|
20
|
+
forks: r.forks_count,
|
|
21
|
+
url: r.html_url,
|
|
22
|
+
}));
|
|
23
|
+
|
|
24
|
+
const formatted = [
|
|
25
|
+
`# GitHub: ${profile.name ?? username}`,
|
|
26
|
+
`Bio: ${profile.bio ?? 'None'}`,
|
|
27
|
+
`Location: ${profile.location ?? 'Not specified'}`,
|
|
28
|
+
`Company: ${profile.company ?? 'Not specified'}`,
|
|
29
|
+
`Blog: ${profile.blog || 'None'}`,
|
|
30
|
+
`Public repos: ${profile.public_repos} | Followers: ${profile.followers} | Following: ${profile.following}`,
|
|
31
|
+
`Account created: ${profile.created_at}`,
|
|
32
|
+
'',
|
|
33
|
+
'## Top Repositories',
|
|
34
|
+
...topRepos.map((r: any) => `- **${r.name}** (${r.language ?? '?'}, ${r.stars}★) — ${r.description ?? 'No description'}`),
|
|
35
|
+
].join('\n');
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
profile: {
|
|
39
|
+
name: profile.name,
|
|
40
|
+
bio: profile.bio,
|
|
41
|
+
location: profile.location,
|
|
42
|
+
company: profile.company,
|
|
43
|
+
blog: profile.blog,
|
|
44
|
+
publicRepos: profile.public_repos,
|
|
45
|
+
followers: profile.followers,
|
|
46
|
+
following: profile.following,
|
|
47
|
+
createdAt: profile.created_at,
|
|
48
|
+
avatarUrl: profile.avatar_url,
|
|
49
|
+
url: profile.html_url,
|
|
50
|
+
},
|
|
51
|
+
topRepos,
|
|
52
|
+
formatted,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { config } from '../config.js';
|
|
2
|
+
|
|
3
|
+
let cachedToken: { token: string; expiresAt: number } | null = null;
|
|
4
|
+
|
|
5
|
+
async function getRedditToken(): Promise<string> {
|
|
6
|
+
if (cachedToken && Date.now() < cachedToken.expiresAt) return cachedToken.token;
|
|
7
|
+
|
|
8
|
+
const res = await fetch('https://www.reddit.com/api/v1/access_token', {
|
|
9
|
+
method: 'POST',
|
|
10
|
+
headers: {
|
|
11
|
+
Authorization: `Basic ${btoa(config.reddit.clientId + ':' + config.reddit.clientSecret)}`,
|
|
12
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
13
|
+
},
|
|
14
|
+
body: 'grant_type=client_credentials',
|
|
15
|
+
});
|
|
16
|
+
if (!res.ok) throw new Error(`Reddit auth error: ${res.status}`);
|
|
17
|
+
const data = await res.json() as any;
|
|
18
|
+
cachedToken = { token: data.access_token, expiresAt: Date.now() + (data.expires_in - 60) * 1000 };
|
|
19
|
+
return data.access_token;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export async function getRedditUser(username: string) {
|
|
23
|
+
const token = await getRedditToken();
|
|
24
|
+
const headers = { Authorization: `Bearer ${token}`, 'User-Agent': 'OnlineAudit/1.0' };
|
|
25
|
+
|
|
26
|
+
const [aboutRes, overviewRes] = await Promise.all([
|
|
27
|
+
fetch(`https://oauth.reddit.com/user/${encodeURIComponent(username)}/about`, { headers }),
|
|
28
|
+
fetch(`https://oauth.reddit.com/user/${encodeURIComponent(username)}/overview?limit=25`, { headers }),
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
if (!aboutRes.ok) throw new Error(`Reddit user not found: ${aboutRes.status}`);
|
|
32
|
+
const about = (await aboutRes.json() as any).data;
|
|
33
|
+
const overview = overviewRes.ok ? (await overviewRes.json() as any) : { data: { children: [] } };
|
|
34
|
+
|
|
35
|
+
const posts = (overview.data?.children ?? []).map((p: any) => {
|
|
36
|
+
const d = p.data;
|
|
37
|
+
return {
|
|
38
|
+
type: p.kind === 't3' ? 'post' : 'comment',
|
|
39
|
+
subreddit: d.subreddit,
|
|
40
|
+
title: d.title ?? null,
|
|
41
|
+
body: (d.selftext ?? d.body ?? '').slice(0, 200),
|
|
42
|
+
score: d.score,
|
|
43
|
+
url: `https://reddit.com${d.permalink}`,
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const subreddits = [...new Set(posts.map((p: any) => p.subreddit))];
|
|
48
|
+
|
|
49
|
+
const formatted = [
|
|
50
|
+
`# Reddit: u/${username}`,
|
|
51
|
+
`Karma: ${about.link_karma + about.comment_karma} (${about.link_karma} post / ${about.comment_karma} comment)`,
|
|
52
|
+
`Account age: created ${new Date(about.created_utc * 1000).toISOString().split('T')[0]}`,
|
|
53
|
+
`Active subreddits: ${subreddits.join(', ') || 'None found'}`,
|
|
54
|
+
'',
|
|
55
|
+
'## Recent Activity',
|
|
56
|
+
...posts.slice(0, 15).map((p: any) =>
|
|
57
|
+
`- [${p.type}] r/${p.subreddit}: ${p.title ?? p.body.slice(0, 80)} (score: ${p.score})`
|
|
58
|
+
),
|
|
59
|
+
].join('\n');
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
username,
|
|
63
|
+
karma: { post: about.link_karma, comment: about.comment_karma },
|
|
64
|
+
createdAt: new Date(about.created_utc * 1000).toISOString(),
|
|
65
|
+
subreddits,
|
|
66
|
+
recentPosts: posts,
|
|
67
|
+
formatted,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { config } from '../config.js';
|
|
2
|
+
|
|
3
|
+
export async function scrapePage(url: string) {
|
|
4
|
+
const targetUrl = encodeURIComponent(url);
|
|
5
|
+
const res = await fetch(
|
|
6
|
+
`https://api.scrape.do/?token=${config.scrapedo.apiKey}&url=${targetUrl}`,
|
|
7
|
+
{ signal: AbortSignal.timeout(30_000) },
|
|
8
|
+
);
|
|
9
|
+
if (!res.ok) {
|
|
10
|
+
const body = await res.text().catch(() => '');
|
|
11
|
+
throw new Error(`ScrapeDo error ${res.status}: ${body.slice(0, 200)}`);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const html = await res.text();
|
|
15
|
+
|
|
16
|
+
// Strip tags, collapse whitespace, truncate
|
|
17
|
+
const text = html
|
|
18
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
19
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
20
|
+
.replace(/<[^>]+>/g, ' ')
|
|
21
|
+
.replace(/\s+/g, ' ')
|
|
22
|
+
.trim()
|
|
23
|
+
.slice(0, 8000);
|
|
24
|
+
|
|
25
|
+
// Extract social links
|
|
26
|
+
const linkRegex = /href="(https?:\/\/[^"]+)"/g;
|
|
27
|
+
const socialDomains = ['github.com', 'linkedin.com', 'twitter.com', 'x.com', 'medium.com', 'dev.to', 'youtube.com'];
|
|
28
|
+
const socialLinks: string[] = [];
|
|
29
|
+
let match;
|
|
30
|
+
while ((match = linkRegex.exec(html)) !== null) {
|
|
31
|
+
if (socialDomains.some(d => match![1].includes(d))) {
|
|
32
|
+
socialLinks.push(match[1]);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return {
|
|
37
|
+
url,
|
|
38
|
+
textLength: text.length,
|
|
39
|
+
text,
|
|
40
|
+
socialLinks: [...new Set(socialLinks)],
|
|
41
|
+
formatted: `# Scraped: ${url}\n\n${text}\n\n## Social links found:\n${socialLinks.length ? socialLinks.map(l => `- ${l}`).join('\n') : 'None'}`,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { config } from '../config.js';
|
|
2
|
+
|
|
3
|
+
export async function searchGoogle(query: string, num = 10) {
|
|
4
|
+
const res = await fetch('https://google.serper.dev/search', {
|
|
5
|
+
method: 'POST',
|
|
6
|
+
headers: { 'X-API-KEY': config.serper.apiKey, 'Content-Type': 'application/json' },
|
|
7
|
+
body: JSON.stringify({ q: query, num }),
|
|
8
|
+
});
|
|
9
|
+
if (!res.ok) throw new Error(`Serper API error: ${res.status} ${await res.text()}`);
|
|
10
|
+
const data = await res.json() as any;
|
|
11
|
+
|
|
12
|
+
const results = (data.organic ?? []).map((r: any) => ({
|
|
13
|
+
title: r.title,
|
|
14
|
+
url: r.link,
|
|
15
|
+
snippet: r.snippet,
|
|
16
|
+
}));
|
|
17
|
+
|
|
18
|
+
return {
|
|
19
|
+
query,
|
|
20
|
+
knowledgeGraph: data.knowledgeGraph ?? null,
|
|
21
|
+
results,
|
|
22
|
+
formatted: results.map((r: any, i: number) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.snippet}`).join('\n\n'),
|
|
23
|
+
};
|
|
24
|
+
}
|