ghspy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ghspy-0.1.0/LICENSE +21 -0
- ghspy-0.1.0/PKG-INFO +146 -0
- ghspy-0.1.0/README.md +115 -0
- ghspy-0.1.0/ghspy/__init__.py +3 -0
- ghspy-0.1.0/ghspy/__main__.py +6 -0
- ghspy-0.1.0/ghspy/analyzer.py +305 -0
- ghspy-0.1.0/ghspy/cli.py +176 -0
- ghspy-0.1.0/ghspy/display.py +227 -0
- ghspy-0.1.0/ghspy/github_api.py +85 -0
- ghspy-0.1.0/ghspy.egg-info/PKG-INFO +146 -0
- ghspy-0.1.0/ghspy.egg-info/SOURCES.txt +15 -0
- ghspy-0.1.0/ghspy.egg-info/dependency_links.txt +1 -0
- ghspy-0.1.0/ghspy.egg-info/entry_points.txt +2 -0
- ghspy-0.1.0/ghspy.egg-info/requires.txt +3 -0
- ghspy-0.1.0/ghspy.egg-info/top_level.txt +1 -0
- ghspy-0.1.0/pyproject.toml +46 -0
- ghspy-0.1.0/setup.cfg +4 -0
ghspy-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 shazeus
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
ghspy-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ghspy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: GitHub OSINT tool — extract intelligence from any GitHub user profile
|
|
5
|
+
Author: shazeus
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/shazeus/ghspy
|
|
8
|
+
Project-URL: Repository, https://github.com/shazeus/ghspy
|
|
9
|
+
Project-URL: Issues, https://github.com/shazeus/ghspy/issues
|
|
10
|
+
Keywords: github,osint,reconnaissance,cli,security,intelligence
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Security
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: click>=8.0
|
|
28
|
+
Requires-Dist: requests>=2.28
|
|
29
|
+
Requires-Dist: rich>=13.0
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
<p align="center">
|
|
33
|
+
<h1 align="center">GhSpy</h1>
|
|
34
|
+
<p align="center">GitHub OSINT tool — extract intelligence from any GitHub user profile.</p>
|
|
35
|
+
<p align="center">
|
|
36
|
+
<a href="https://pypi.org/project/ghspy/"><img src="https://img.shields.io/pypi/v/ghspy?color=blue&label=PyPI" alt="PyPI"></a>
|
|
37
|
+
<a href="https://pypi.org/project/ghspy/"><img src="https://img.shields.io/pypi/pyversions/ghspy" alt="Python"></a>
|
|
38
|
+
<a href="https://github.com/shazeus/ghspy/blob/main/LICENSE"><img src="https://img.shields.io/github/license/shazeus/ghspy" alt="License"></a>
|
|
39
|
+
<a href="https://github.com/shazeus/ghspy/stargazers"><img src="https://img.shields.io/github/stars/shazeus/ghspy?style=social" alt="Stars"></a>
|
|
40
|
+
</p>
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
Discover emails, estimate timezones, map tech stacks, find collaborators, and analyze activity patterns — all from your terminal. GhSpy uses the public GitHub API to gather open-source intelligence on any GitHub user.
|
|
46
|
+
|
|
47
|
+
- **Email Discovery** — extract real email addresses from commit history
|
|
48
|
+
- **Timezone Estimation** — estimate location from commit hour patterns
|
|
49
|
+
- **Activity Analysis** — peak hours, active days, contribution streaks
|
|
50
|
+
- **Tech Stack Mapping** — languages, topics, original vs forked repos
|
|
51
|
+
- **Collaborator Detection** — frequent interactions, orgs, following
|
|
52
|
+
- **Identity Mapping** — cross-reference commit emails and author names
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install ghspy
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Requires Python 3.8+. Works on Linux, macOS, and Windows.
|
|
61
|
+
|
|
62
|
+
## Usage
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Full OSINT scan
|
|
66
|
+
ghspy scan torvalds
|
|
67
|
+
|
|
68
|
+
# Extract emails from commit history
|
|
69
|
+
ghspy emails dhh
|
|
70
|
+
|
|
71
|
+
# Estimate timezone
|
|
72
|
+
ghspy timezone antirez
|
|
73
|
+
|
|
74
|
+
# Activity patterns
|
|
75
|
+
ghspy activity shazeus
|
|
76
|
+
|
|
77
|
+
# Tech stack
|
|
78
|
+
ghspy techstack gvanrossum
|
|
79
|
+
|
|
80
|
+
# Collaborators & organizations
|
|
81
|
+
ghspy collabs octocat
|
|
82
|
+
|
|
83
|
+
# Export to JSON
|
|
84
|
+
ghspy export torvalds --format json -o report.json
|
|
85
|
+
|
|
86
|
+
# Export to CSV
|
|
87
|
+
ghspy export torvalds --format csv -o report.csv
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Commands
|
|
91
|
+
|
|
92
|
+
| Command | Description |
|
|
93
|
+
|---------|-------------|
|
|
94
|
+
| `ghspy scan <user>` | Full OSINT scan with all modules |
|
|
95
|
+
| `ghspy emails <user>` | Extract emails from commit history |
|
|
96
|
+
| `ghspy timezone <user>` | Estimate timezone from commit patterns |
|
|
97
|
+
| `ghspy activity <user>` | Activity breakdown by hour, day, and event type |
|
|
98
|
+
| `ghspy techstack <user>` | Languages, topics, and repo statistics |
|
|
99
|
+
| `ghspy collabs <user>` | Collaborators, organizations, and following |
|
|
100
|
+
| `ghspy export <user>` | Export findings to JSON or CSV |
|
|
101
|
+
| `ghspy rate-limit` | Check GitHub API rate limit |
|
|
102
|
+
|
|
103
|
+
## Configuration
|
|
104
|
+
|
|
105
|
+
### GitHub Token
|
|
106
|
+
|
|
107
|
+
Without a token you get **60 requests/hour**. With a token you get **5,000 requests/hour**. A full scan uses 20–50 requests depending on the user's repo count.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Set as environment variable (recommended)
|
|
111
|
+
export GITHUB_TOKEN=ghp_your_token_here
|
|
112
|
+
|
|
113
|
+
# Or pass directly
|
|
114
|
+
ghspy --token ghp_xxxx scan torvalds
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Generate a token at [github.com/settings/tokens](https://github.com/settings/tokens) — no special scopes needed for public data.
|
|
118
|
+
|
|
119
|
+
### JSON Output
|
|
120
|
+
|
|
121
|
+
Every command supports `--json-output` for piping to other tools:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
ghspy scan user --json-output | jq '.emails'
|
|
125
|
+
ghspy scan user --json-output | jq '.timezone.estimated_timezone'
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## How It Works
|
|
129
|
+
|
|
130
|
+
GhSpy queries the public GitHub REST API to collect:
|
|
131
|
+
|
|
132
|
+
1. **User profile** — public info, bio, location, social links
|
|
133
|
+
2. **Repositories** — languages, topics, fork status, activity dates
|
|
134
|
+
3. **Commit history** — author emails, timestamps, committer info
|
|
135
|
+
4. **Public events** — pushes, PRs, issues, comments
|
|
136
|
+
5. **Social graph** — followers, following, organizations
|
|
137
|
+
|
|
138
|
+
All data is publicly available through GitHub's API. No authentication bypass or scraping is involved.
|
|
139
|
+
|
|
140
|
+
## Disclaimer
|
|
141
|
+
|
|
142
|
+
This tool only accesses **publicly available data** through the official GitHub API. It does not bypass access controls, scrape private information, or violate GitHub's Terms of Service. Use responsibly.
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
[MIT](LICENSE)
|
ghspy-0.1.0/README.md
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<h1 align="center">GhSpy</h1>
|
|
3
|
+
<p align="center">GitHub OSINT tool — extract intelligence from any GitHub user profile.</p>
|
|
4
|
+
<p align="center">
|
|
5
|
+
<a href="https://pypi.org/project/ghspy/"><img src="https://img.shields.io/pypi/v/ghspy?color=blue&label=PyPI" alt="PyPI"></a>
|
|
6
|
+
<a href="https://pypi.org/project/ghspy/"><img src="https://img.shields.io/pypi/pyversions/ghspy" alt="Python"></a>
|
|
7
|
+
<a href="https://github.com/shazeus/ghspy/blob/main/LICENSE"><img src="https://img.shields.io/github/license/shazeus/ghspy" alt="License"></a>
|
|
8
|
+
<a href="https://github.com/shazeus/ghspy/stargazers"><img src="https://img.shields.io/github/stars/shazeus/ghspy?style=social" alt="Stars"></a>
|
|
9
|
+
</p>
|
|
10
|
+
</p>
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
Discover emails, estimate timezones, map tech stacks, find collaborators, and analyze activity patterns — all from your terminal. GhSpy uses the public GitHub API to gather open-source intelligence on any GitHub user.
|
|
15
|
+
|
|
16
|
+
- **Email Discovery** — extract real email addresses from commit history
|
|
17
|
+
- **Timezone Estimation** — estimate location from commit hour patterns
|
|
18
|
+
- **Activity Analysis** — peak hours, active days, contribution streaks
|
|
19
|
+
- **Tech Stack Mapping** — languages, topics, original vs forked repos
|
|
20
|
+
- **Collaborator Detection** — frequent interactions, orgs, following
|
|
21
|
+
- **Identity Mapping** — cross-reference commit emails and author names
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install ghspy
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Requires Python 3.8+. Works on Linux, macOS, and Windows.
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Full OSINT scan
|
|
35
|
+
ghspy scan torvalds
|
|
36
|
+
|
|
37
|
+
# Extract emails from commit history
|
|
38
|
+
ghspy emails dhh
|
|
39
|
+
|
|
40
|
+
# Estimate timezone
|
|
41
|
+
ghspy timezone antirez
|
|
42
|
+
|
|
43
|
+
# Activity patterns
|
|
44
|
+
ghspy activity shazeus
|
|
45
|
+
|
|
46
|
+
# Tech stack
|
|
47
|
+
ghspy techstack gvanrossum
|
|
48
|
+
|
|
49
|
+
# Collaborators & organizations
|
|
50
|
+
ghspy collabs octocat
|
|
51
|
+
|
|
52
|
+
# Export to JSON
|
|
53
|
+
ghspy export torvalds --format json -o report.json
|
|
54
|
+
|
|
55
|
+
# Export to CSV
|
|
56
|
+
ghspy export torvalds --format csv -o report.csv
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Commands
|
|
60
|
+
|
|
61
|
+
| Command | Description |
|
|
62
|
+
|---------|-------------|
|
|
63
|
+
| `ghspy scan <user>` | Full OSINT scan with all modules |
|
|
64
|
+
| `ghspy emails <user>` | Extract emails from commit history |
|
|
65
|
+
| `ghspy timezone <user>` | Estimate timezone from commit patterns |
|
|
66
|
+
| `ghspy activity <user>` | Activity breakdown by hour, day, and event type |
|
|
67
|
+
| `ghspy techstack <user>` | Languages, topics, and repo statistics |
|
|
68
|
+
| `ghspy collabs <user>` | Collaborators, organizations, and following |
|
|
69
|
+
| `ghspy export <user>` | Export findings to JSON or CSV |
|
|
70
|
+
| `ghspy rate-limit` | Check GitHub API rate limit |
|
|
71
|
+
|
|
72
|
+
## Configuration
|
|
73
|
+
|
|
74
|
+
### GitHub Token
|
|
75
|
+
|
|
76
|
+
Without a token you get **60 requests/hour**. With a token you get **5,000 requests/hour**. A full scan uses 20–50 requests depending on the user's repo count.
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Set as environment variable (recommended)
|
|
80
|
+
export GITHUB_TOKEN=ghp_your_token_here
|
|
81
|
+
|
|
82
|
+
# Or pass directly
|
|
83
|
+
ghspy --token ghp_xxxx scan torvalds
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Generate a token at [github.com/settings/tokens](https://github.com/settings/tokens) — no special scopes needed for public data.
|
|
87
|
+
|
|
88
|
+
### JSON Output
|
|
89
|
+
|
|
90
|
+
Every command supports `--json-output` for piping to other tools:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
ghspy scan user --json-output | jq '.emails'
|
|
94
|
+
ghspy scan user --json-output | jq '.timezone.estimated_timezone'
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## How It Works
|
|
98
|
+
|
|
99
|
+
GhSpy queries the public GitHub REST API to collect:
|
|
100
|
+
|
|
101
|
+
1. **User profile** — public info, bio, location, social links
|
|
102
|
+
2. **Repositories** — languages, topics, fork status, activity dates
|
|
103
|
+
3. **Commit history** — author emails, timestamps, committer info
|
|
104
|
+
4. **Public events** — pushes, PRs, issues, comments
|
|
105
|
+
5. **Social graph** — followers, following, organizations
|
|
106
|
+
|
|
107
|
+
All data is publicly available through GitHub's API. No authentication bypass or scraping is involved.
|
|
108
|
+
|
|
109
|
+
## Disclaimer
|
|
110
|
+
|
|
111
|
+
This tool only accesses **publicly available data** through the official GitHub API. It does not bypass access controls, scrape private information, or violate GitHub's Terms of Service. Use responsibly.
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""OSINT analysis logic for GitSpy."""
|
|
2
|
+
|
|
3
|
+
from collections import Counter, defaultdict
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UserAnalyzer:
|
|
9
|
+
def __init__(self, client, username):
|
|
10
|
+
self.client = client
|
|
11
|
+
self.username = username
|
|
12
|
+
self._user = None
|
|
13
|
+
self._repos = None
|
|
14
|
+
self._events = None
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def user(self):
|
|
18
|
+
if self._user is None:
|
|
19
|
+
self._user = self.client.get_user(self.username)
|
|
20
|
+
return self._user
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def repos(self):
|
|
24
|
+
if self._repos is None:
|
|
25
|
+
self._repos = self.client.get_repos(self.username)
|
|
26
|
+
return self._repos
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def events(self):
|
|
30
|
+
if self._events is None:
|
|
31
|
+
self._events = self.client.get_events(self.username)
|
|
32
|
+
return self._events
|
|
33
|
+
|
|
34
|
+
def profile_info(self):
|
|
35
|
+
u = self.user
|
|
36
|
+
return {
|
|
37
|
+
"login": u["login"],
|
|
38
|
+
"name": u.get("name") or "N/A",
|
|
39
|
+
"bio": u.get("bio") or "N/A",
|
|
40
|
+
"company": u.get("company") or "N/A",
|
|
41
|
+
"location": u.get("location") or "N/A",
|
|
42
|
+
"email": u.get("email") or "N/A",
|
|
43
|
+
"blog": u.get("blog") or "N/A",
|
|
44
|
+
"twitter": u.get("twitter_username") or "N/A",
|
|
45
|
+
"public_repos": u["public_repos"],
|
|
46
|
+
"public_gists": u["public_gists"],
|
|
47
|
+
"followers": u["followers"],
|
|
48
|
+
"following": u["following"],
|
|
49
|
+
"created_at": u["created_at"],
|
|
50
|
+
"updated_at": u["updated_at"],
|
|
51
|
+
"avatar": u["avatar_url"],
|
|
52
|
+
"profile": u["html_url"],
|
|
53
|
+
"hireable": u.get("hireable"),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
def extract_emails(self):
|
|
57
|
+
emails = set()
|
|
58
|
+
profile_email = self.user.get("email")
|
|
59
|
+
if profile_email:
|
|
60
|
+
emails.add(profile_email)
|
|
61
|
+
|
|
62
|
+
for repo in self.repos[:10]:
|
|
63
|
+
owner = repo["owner"]["login"]
|
|
64
|
+
name = repo["name"]
|
|
65
|
+
if repo.get("fork"):
|
|
66
|
+
continue
|
|
67
|
+
try:
|
|
68
|
+
commits = self.client.get_repo_commits(owner, name, author=self.username, max_pages=1)
|
|
69
|
+
for c in commits[:20]:
|
|
70
|
+
commit_data = c.get("commit", {})
|
|
71
|
+
author = commit_data.get("author", {})
|
|
72
|
+
committer = commit_data.get("committer", {})
|
|
73
|
+
if author.get("email") and not self._is_noreply(author["email"]):
|
|
74
|
+
emails.add(author["email"])
|
|
75
|
+
if committer.get("email") and not self._is_noreply(committer["email"]):
|
|
76
|
+
emails.add(committer["email"])
|
|
77
|
+
except Exception:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
return sorted(emails)
|
|
81
|
+
|
|
82
|
+
def _is_noreply(self, email):
|
|
83
|
+
noreply_patterns = ["noreply", "users.noreply.github.com"]
|
|
84
|
+
return any(p in email.lower() for p in noreply_patterns)
|
|
85
|
+
|
|
86
|
+
def estimate_timezone(self):
|
|
87
|
+
hours = []
|
|
88
|
+
for repo in self.repos[:8]:
|
|
89
|
+
if repo.get("fork"):
|
|
90
|
+
continue
|
|
91
|
+
try:
|
|
92
|
+
commits = self.client.get_repo_commits(
|
|
93
|
+
repo["owner"]["login"], repo["name"],
|
|
94
|
+
author=self.username, max_pages=1
|
|
95
|
+
)
|
|
96
|
+
for c in commits[:30]:
|
|
97
|
+
date_str = c.get("commit", {}).get("author", {}).get("date", "")
|
|
98
|
+
if date_str:
|
|
99
|
+
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
100
|
+
hours.append(dt.hour)
|
|
101
|
+
except Exception:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
if not hours:
|
|
105
|
+
return {"estimated_utc_offset": None, "confidence": "none", "peak_hours_utc": []}
|
|
106
|
+
|
|
107
|
+
hour_counts = Counter(hours)
|
|
108
|
+
peak_hours = [h for h, _ in hour_counts.most_common(5)]
|
|
109
|
+
|
|
110
|
+
avg_peak = sum(peak_hours) / len(peak_hours)
|
|
111
|
+
if 6 <= avg_peak <= 10:
|
|
112
|
+
offset = 0
|
|
113
|
+
tz_guess = "UTC+0 (UK/Portugal/West Africa)"
|
|
114
|
+
elif 10 < avg_peak <= 14:
|
|
115
|
+
offset = -(avg_peak - 9)
|
|
116
|
+
tz_guess = f"UTC{int(offset):+d} (US East / Americas)"
|
|
117
|
+
elif 14 < avg_peak <= 18:
|
|
118
|
+
offset = -(avg_peak - 9)
|
|
119
|
+
tz_guess = f"UTC{int(offset):+d} (US West / Pacific)"
|
|
120
|
+
elif avg_peak > 18 or avg_peak < 3:
|
|
121
|
+
offset = 24 - avg_peak + 9 if avg_peak > 18 else 9 - avg_peak
|
|
122
|
+
tz_guess = f"UTC+{int(offset)} (Asia / Eastern)"
|
|
123
|
+
else:
|
|
124
|
+
offset = 9 - avg_peak
|
|
125
|
+
tz_guess = f"UTC+{int(offset):+d} (Europe)"
|
|
126
|
+
|
|
127
|
+
total_commits = len(hours)
|
|
128
|
+
if total_commits > 50:
|
|
129
|
+
confidence = "high"
|
|
130
|
+
elif total_commits > 20:
|
|
131
|
+
confidence = "medium"
|
|
132
|
+
else:
|
|
133
|
+
confidence = "low"
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
"estimated_timezone": tz_guess,
|
|
137
|
+
"estimated_utc_offset": round(offset),
|
|
138
|
+
"confidence": confidence,
|
|
139
|
+
"total_commits_analyzed": total_commits,
|
|
140
|
+
"peak_hours_utc": sorted(peak_hours),
|
|
141
|
+
"hour_distribution": dict(sorted(hour_counts.items())),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
def activity_patterns(self):
|
|
145
|
+
hours = []
|
|
146
|
+
days = []
|
|
147
|
+
dates = []
|
|
148
|
+
|
|
149
|
+
for event in self.events:
|
|
150
|
+
date_str = event.get("created_at", "")
|
|
151
|
+
if date_str:
|
|
152
|
+
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
153
|
+
hours.append(dt.hour)
|
|
154
|
+
days.append(dt.strftime("%A"))
|
|
155
|
+
dates.append(dt.date())
|
|
156
|
+
|
|
157
|
+
for repo in self.repos[:5]:
|
|
158
|
+
if repo.get("fork"):
|
|
159
|
+
continue
|
|
160
|
+
try:
|
|
161
|
+
commits = self.client.get_repo_commits(
|
|
162
|
+
repo["owner"]["login"], repo["name"],
|
|
163
|
+
author=self.username, max_pages=1
|
|
164
|
+
)
|
|
165
|
+
for c in commits[:20]:
|
|
166
|
+
date_str = c.get("commit", {}).get("author", {}).get("date", "")
|
|
167
|
+
if date_str:
|
|
168
|
+
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
169
|
+
hours.append(dt.hour)
|
|
170
|
+
days.append(dt.strftime("%A"))
|
|
171
|
+
dates.append(dt.date())
|
|
172
|
+
except Exception:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
hour_counts = Counter(hours)
|
|
176
|
+
day_counts = Counter(days)
|
|
177
|
+
|
|
178
|
+
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
|
179
|
+
sorted_days = {d: day_counts.get(d, 0) for d in day_order}
|
|
180
|
+
|
|
181
|
+
streak = 0
|
|
182
|
+
if dates:
|
|
183
|
+
unique_days = sorted(set(dates), reverse=True)
|
|
184
|
+
streak = 1
|
|
185
|
+
for i in range(1, len(unique_days)):
|
|
186
|
+
if (unique_days[i - 1] - unique_days[i]).days == 1:
|
|
187
|
+
streak += 1
|
|
188
|
+
else:
|
|
189
|
+
break
|
|
190
|
+
|
|
191
|
+
event_types = Counter(e.get("type", "Unknown") for e in self.events)
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
"total_events": len(self.events),
|
|
195
|
+
"by_hour": dict(sorted(hour_counts.items())),
|
|
196
|
+
"by_day": sorted_days,
|
|
197
|
+
"streak": streak,
|
|
198
|
+
"event_types": dict(event_types.most_common(10)),
|
|
199
|
+
"first_activity": min(dates).isoformat() if dates else None,
|
|
200
|
+
"last_activity": max(dates).isoformat() if dates else None,
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def tech_stack(self):
|
|
204
|
+
languages = Counter()
|
|
205
|
+
topics = Counter()
|
|
206
|
+
all_deps = set()
|
|
207
|
+
|
|
208
|
+
for repo in self.repos:
|
|
209
|
+
if repo.get("fork"):
|
|
210
|
+
continue
|
|
211
|
+
lang = repo.get("language")
|
|
212
|
+
if lang:
|
|
213
|
+
languages[lang] += 1
|
|
214
|
+
for topic in repo.get("topics", []):
|
|
215
|
+
topics[topic] += 1
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
"languages": dict(languages.most_common(15)),
|
|
219
|
+
"topics": dict(topics.most_common(20)),
|
|
220
|
+
"total_repos": len(self.repos),
|
|
221
|
+
"original_repos": sum(1 for r in self.repos if not r.get("fork")),
|
|
222
|
+
"forked_repos": sum(1 for r in self.repos if r.get("fork")),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
def find_collaborators(self):
|
|
226
|
+
collaborators = Counter()
|
|
227
|
+
|
|
228
|
+
for event in self.events:
|
|
229
|
+
payload = event.get("payload", {})
|
|
230
|
+
if event["type"] == "PullRequestEvent":
|
|
231
|
+
pr = payload.get("pull_request", {})
|
|
232
|
+
user = pr.get("user", {}).get("login")
|
|
233
|
+
if user and user != self.username:
|
|
234
|
+
collaborators[user] += 1
|
|
235
|
+
merged_by = pr.get("merged_by", {})
|
|
236
|
+
if merged_by:
|
|
237
|
+
merger = merged_by.get("login")
|
|
238
|
+
if merger and merger != self.username:
|
|
239
|
+
collaborators[merger] += 1
|
|
240
|
+
|
|
241
|
+
if event["type"] == "IssueCommentEvent":
|
|
242
|
+
issue = payload.get("issue", {})
|
|
243
|
+
user = issue.get("user", {}).get("login")
|
|
244
|
+
if user and user != self.username:
|
|
245
|
+
collaborators[user] += 1
|
|
246
|
+
|
|
247
|
+
orgs = self.client.get_orgs(self.username)
|
|
248
|
+
org_names = [o["login"] for o in orgs]
|
|
249
|
+
|
|
250
|
+
following = self.client.get_following(self.username)
|
|
251
|
+
following_names = [f["login"] for f in following[:50]]
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
"frequent_collaborators": dict(collaborators.most_common(15)),
|
|
255
|
+
"organizations": org_names,
|
|
256
|
+
"following": following_names[:20],
|
|
257
|
+
"following_count": len(following),
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
def detect_alt_emails(self):
|
|
261
|
+
email_to_names = defaultdict(set)
|
|
262
|
+
name_to_emails = defaultdict(set)
|
|
263
|
+
|
|
264
|
+
for repo in self.repos[:10]:
|
|
265
|
+
if repo.get("fork"):
|
|
266
|
+
continue
|
|
267
|
+
try:
|
|
268
|
+
commits = self.client.get_repo_commits(
|
|
269
|
+
repo["owner"]["login"], repo["name"],
|
|
270
|
+
author=self.username, max_pages=1,
|
|
271
|
+
)
|
|
272
|
+
for c in commits[:20]:
|
|
273
|
+
author = c.get("commit", {}).get("author", {})
|
|
274
|
+
name = author.get("name", "")
|
|
275
|
+
email = author.get("email", "")
|
|
276
|
+
if name and email:
|
|
277
|
+
email_to_names[email].add(name)
|
|
278
|
+
name_to_emails[name].add(email)
|
|
279
|
+
except Exception:
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
identities = []
|
|
283
|
+
for email, names in email_to_names.items():
|
|
284
|
+
identities.append({
|
|
285
|
+
"email": email,
|
|
286
|
+
"names": sorted(names),
|
|
287
|
+
"is_noreply": self._is_noreply(email),
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
"identities": identities,
|
|
292
|
+
"unique_emails": len(email_to_names),
|
|
293
|
+
"unique_names": len(name_to_emails),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
def full_scan(self):
|
|
297
|
+
return {
|
|
298
|
+
"profile": self.profile_info(),
|
|
299
|
+
"emails": self.extract_emails(),
|
|
300
|
+
"timezone": self.estimate_timezone(),
|
|
301
|
+
"activity": self.activity_patterns(),
|
|
302
|
+
"tech_stack": self.tech_stack(),
|
|
303
|
+
"collaborators": self.find_collaborators(),
|
|
304
|
+
"identities": self.detect_alt_emails(),
|
|
305
|
+
}
|
ghspy-0.1.0/ghspy/cli.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""CLI interface for GitSpy."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
from ghspy import __version__
|
|
9
|
+
from ghspy.github_api import GitHubClient
|
|
10
|
+
from ghspy.analyzer import UserAnalyzer
|
|
11
|
+
from ghspy import display
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@click.group()
|
|
17
|
+
@click.version_option(version=__version__, prog_name="ghspy")
|
|
18
|
+
@click.option("--token", envvar="GITHUB_TOKEN", help="GitHub personal access token")
|
|
19
|
+
@click.pass_context
|
|
20
|
+
def cli(ctx, token):
|
|
21
|
+
"""GitSpy - GitHub OSINT tool for user reconnaissance."""
|
|
22
|
+
ctx.ensure_object(dict)
|
|
23
|
+
ctx.obj["client"] = GitHubClient(token)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cli.command()
|
|
27
|
+
@click.argument("username")
|
|
28
|
+
@click.option("--json-output", "as_json", is_flag=True, help="Output as JSON")
|
|
29
|
+
@click.pass_context
|
|
30
|
+
def scan(ctx, username, as_json):
|
|
31
|
+
"""Full OSINT scan of a GitHub user."""
|
|
32
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
33
|
+
|
|
34
|
+
if as_json:
|
|
35
|
+
report = analyzer.full_scan()
|
|
36
|
+
click.echo(json.dumps(report, indent=2, default=str))
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
display.print_header(username)
|
|
40
|
+
with console.status(f"[cyan]Scanning @{username}...[/]"):
|
|
41
|
+
report = analyzer.full_scan()
|
|
42
|
+
display.print_full_scan(report)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@cli.command()
|
|
46
|
+
@click.argument("username")
|
|
47
|
+
@click.pass_context
|
|
48
|
+
def emails(ctx, username):
|
|
49
|
+
"""Extract email addresses from commit history."""
|
|
50
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
51
|
+
display.print_header(username)
|
|
52
|
+
with console.status("[cyan]Extracting emails from commits...[/]"):
|
|
53
|
+
found = analyzer.extract_emails()
|
|
54
|
+
display.print_emails(found)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@cli.command()
|
|
58
|
+
@click.argument("username")
|
|
59
|
+
@click.pass_context
|
|
60
|
+
def timezone(ctx, username):
|
|
61
|
+
"""Estimate timezone from commit timestamps."""
|
|
62
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
63
|
+
display.print_header(username)
|
|
64
|
+
with console.status("[cyan]Analyzing commit timestamps...[/]"):
|
|
65
|
+
data = analyzer.estimate_timezone()
|
|
66
|
+
display.print_timezone(data)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@cli.command()
|
|
70
|
+
@click.argument("username")
|
|
71
|
+
@click.pass_context
|
|
72
|
+
def activity(ctx, username):
|
|
73
|
+
"""Show activity patterns and heatmap."""
|
|
74
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
75
|
+
display.print_header(username)
|
|
76
|
+
with console.status("[cyan]Analyzing activity...[/]"):
|
|
77
|
+
data = analyzer.activity_patterns()
|
|
78
|
+
display.print_activity(data)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@cli.command()
|
|
82
|
+
@click.argument("username")
|
|
83
|
+
@click.pass_context
|
|
84
|
+
def collabs(ctx, username):
|
|
85
|
+
"""List frequent collaborators and organizations."""
|
|
86
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
87
|
+
display.print_header(username)
|
|
88
|
+
with console.status("[cyan]Finding collaborators...[/]"):
|
|
89
|
+
data = analyzer.find_collaborators()
|
|
90
|
+
display.print_collaborators(data)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@cli.command()
|
|
94
|
+
@click.argument("username")
|
|
95
|
+
@click.pass_context
|
|
96
|
+
def techstack(ctx, username):
|
|
97
|
+
"""Show technology stack from repositories."""
|
|
98
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
99
|
+
display.print_header(username)
|
|
100
|
+
data = analyzer.tech_stack()
|
|
101
|
+
display.print_tech_stack(data)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@cli.command()
|
|
105
|
+
@click.argument("username")
|
|
106
|
+
@click.option("--format", "fmt", type=click.Choice(["json", "csv"]), default="json")
|
|
107
|
+
@click.option("--output", "-o", help="Output file path")
|
|
108
|
+
@click.pass_context
|
|
109
|
+
def export(ctx, username, fmt, output):
|
|
110
|
+
"""Export all findings to JSON or CSV."""
|
|
111
|
+
analyzer = UserAnalyzer(ctx.obj["client"], username)
|
|
112
|
+
|
|
113
|
+
with console.status(f"[cyan]Running full scan on @{username}...[/]"):
|
|
114
|
+
report = analyzer.full_scan()
|
|
115
|
+
|
|
116
|
+
if fmt == "json":
|
|
117
|
+
content = json.dumps(report, indent=2, default=str)
|
|
118
|
+
if output:
|
|
119
|
+
with open(output, "w") as f:
|
|
120
|
+
f.write(content)
|
|
121
|
+
console.print(f"[green]Exported to {output}[/]")
|
|
122
|
+
else:
|
|
123
|
+
click.echo(content)
|
|
124
|
+
|
|
125
|
+
elif fmt == "csv":
|
|
126
|
+
import csv
|
|
127
|
+
import io
|
|
128
|
+
|
|
129
|
+
out = io.StringIO()
|
|
130
|
+
writer = csv.writer(out)
|
|
131
|
+
writer.writerow(["Category", "Key", "Value"])
|
|
132
|
+
|
|
133
|
+
profile = report["profile"]
|
|
134
|
+
for k, v in profile.items():
|
|
135
|
+
writer.writerow(["profile", k, v])
|
|
136
|
+
|
|
137
|
+
for email in report["emails"]:
|
|
138
|
+
writer.writerow(["email", "address", email])
|
|
139
|
+
|
|
140
|
+
tz = report["timezone"]
|
|
141
|
+
for k, v in tz.items():
|
|
142
|
+
if k != "hour_distribution":
|
|
143
|
+
writer.writerow(["timezone", k, v])
|
|
144
|
+
|
|
145
|
+
for lang, count in report["tech_stack"]["languages"].items():
|
|
146
|
+
writer.writerow(["language", lang, count])
|
|
147
|
+
|
|
148
|
+
content = out.getvalue()
|
|
149
|
+
if output:
|
|
150
|
+
with open(output, "w") as f:
|
|
151
|
+
f.write(content)
|
|
152
|
+
console.print(f"[green]Exported to {output}[/]")
|
|
153
|
+
else:
|
|
154
|
+
click.echo(content)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@cli.command(name="rate-limit")
|
|
158
|
+
@click.pass_context
|
|
159
|
+
def rate_limit(ctx):
|
|
160
|
+
"""Check GitHub API rate limit status."""
|
|
161
|
+
client = ctx.obj["client"]
|
|
162
|
+
data = client.get_rate_limit()
|
|
163
|
+
core = data["resources"]["core"]
|
|
164
|
+
|
|
165
|
+
console.print(f"\n[bold]GitHub API Rate Limit[/]")
|
|
166
|
+
remaining = core["remaining"]
|
|
167
|
+
limit = core["limit"]
|
|
168
|
+
color = "green" if remaining > 100 else "red"
|
|
169
|
+
console.print(f" Remaining: [{color}]{remaining}[/] / {limit}")
|
|
170
|
+
|
|
171
|
+
from datetime import datetime, timezone
|
|
172
|
+
reset_time = datetime.fromtimestamp(core["reset"], tz=timezone.utc)
|
|
173
|
+
console.print(f" Resets at: {reset_time.strftime('%H:%M:%S UTC')}")
|
|
174
|
+
if not client.token:
|
|
175
|
+
console.print(" [yellow]Tip: Set GITHUB_TOKEN for 5000 req/hr instead of 60[/]")
|
|
176
|
+
console.print()
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Rich terminal display for GitSpy."""
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.panel import Panel
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
from rich import box
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def print_header(username):
|
|
12
|
+
console.print()
|
|
13
|
+
console.print(
|
|
14
|
+
Panel(
|
|
15
|
+
f"[bold cyan]GitSpy[/] — scanning [bold yellow]@{username}[/]",
|
|
16
|
+
border_style="cyan",
|
|
17
|
+
padding=(1, 2),
|
|
18
|
+
)
|
|
19
|
+
)
|
|
20
|
+
console.print()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def print_profile(data):
|
|
24
|
+
table = Table(title="Profile", box=box.ROUNDED, border_style="blue")
|
|
25
|
+
table.add_column("Field", style="bold")
|
|
26
|
+
table.add_column("Value")
|
|
27
|
+
|
|
28
|
+
table.add_row("Username", data["login"])
|
|
29
|
+
table.add_row("Name", data["name"])
|
|
30
|
+
table.add_row("Bio", data["bio"])
|
|
31
|
+
table.add_row("Company", data["company"])
|
|
32
|
+
table.add_row("Location", data["location"])
|
|
33
|
+
table.add_row("Email", data["email"])
|
|
34
|
+
table.add_row("Blog", data["blog"])
|
|
35
|
+
table.add_row("Twitter", f"@{data['twitter']}" if data["twitter"] != "N/A" else "N/A")
|
|
36
|
+
table.add_row("Repos", str(data["public_repos"]))
|
|
37
|
+
table.add_row("Gists", str(data["public_gists"]))
|
|
38
|
+
table.add_row("Followers", str(data["followers"]))
|
|
39
|
+
table.add_row("Following", str(data["following"]))
|
|
40
|
+
table.add_row("Created", data["created_at"][:10])
|
|
41
|
+
table.add_row("Hireable", str(data["hireable"]) if data["hireable"] is not None else "N/A")
|
|
42
|
+
table.add_row("Profile", data["profile"])
|
|
43
|
+
|
|
44
|
+
console.print(table)
|
|
45
|
+
console.print()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def print_emails(emails):
|
|
49
|
+
if not emails:
|
|
50
|
+
console.print("[dim]No emails found in commit history.[/]")
|
|
51
|
+
console.print()
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
table = Table(title="Discovered Emails", box=box.ROUNDED, border_style="green")
|
|
55
|
+
table.add_column("#", justify="right", style="dim")
|
|
56
|
+
table.add_column("Email", style="bold green")
|
|
57
|
+
|
|
58
|
+
for i, email in enumerate(emails, 1):
|
|
59
|
+
table.add_row(str(i), email)
|
|
60
|
+
|
|
61
|
+
console.print(table)
|
|
62
|
+
console.print()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def print_timezone(data):
|
|
66
|
+
if data.get("confidence") == "none":
|
|
67
|
+
console.print("[dim]Not enough commit data to estimate timezone.[/]")
|
|
68
|
+
console.print()
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
confidence = data["confidence"]
|
|
72
|
+
if confidence == "high":
|
|
73
|
+
color = "green"
|
|
74
|
+
elif confidence == "medium":
|
|
75
|
+
color = "yellow"
|
|
76
|
+
else:
|
|
77
|
+
color = "red"
|
|
78
|
+
|
|
79
|
+
console.print(
|
|
80
|
+
Panel(
|
|
81
|
+
f"[bold]{data['estimated_timezone']}[/]\n"
|
|
82
|
+
f"Confidence: [{color}]{confidence}[/] ({data['total_commits_analyzed']} commits analyzed)\n"
|
|
83
|
+
f"Peak hours (UTC): {', '.join(f'{h:02d}:00' for h in data['peak_hours_utc'])}",
|
|
84
|
+
title="[bold]Timezone Estimate[/]",
|
|
85
|
+
border_style="magenta",
|
|
86
|
+
padding=(1, 2),
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
console.print()
|
|
90
|
+
|
|
91
|
+
if data.get("hour_distribution"):
|
|
92
|
+
table = Table(title="Commit Hour Distribution (UTC)", box=box.SIMPLE)
|
|
93
|
+
table.add_column("Hour", justify="right")
|
|
94
|
+
table.add_column("Commits", justify="right")
|
|
95
|
+
table.add_column("Graph")
|
|
96
|
+
|
|
97
|
+
max_val = max(data["hour_distribution"].values()) if data["hour_distribution"] else 1
|
|
98
|
+
for hour in range(24):
|
|
99
|
+
count = data["hour_distribution"].get(hour, 0)
|
|
100
|
+
if count > 0:
|
|
101
|
+
bar_len = int(count / max_val * 25)
|
|
102
|
+
bar = "█" * bar_len
|
|
103
|
+
table.add_row(f"{hour:02d}:00", str(count), f"[cyan]{bar}[/]")
|
|
104
|
+
|
|
105
|
+
console.print(table)
|
|
106
|
+
console.print()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def print_activity(data):
|
|
110
|
+
console.print(f"[bold]Activity Patterns[/] — {data['total_events']} events analyzed")
|
|
111
|
+
if data.get("last_activity"):
|
|
112
|
+
console.print(f" Last activity: [green]{data['last_activity']}[/]")
|
|
113
|
+
if data.get("streak"):
|
|
114
|
+
console.print(f" Current streak: [yellow]{data['streak']} day(s)[/]")
|
|
115
|
+
console.print()
|
|
116
|
+
|
|
117
|
+
if data["by_day"]:
|
|
118
|
+
day_table = Table(title="Activity by Day", box=box.SIMPLE)
|
|
119
|
+
day_table.add_column("Day", style="bold")
|
|
120
|
+
day_table.add_column("Count", justify="right")
|
|
121
|
+
day_table.add_column("Graph")
|
|
122
|
+
|
|
123
|
+
max_day = max(data["by_day"].values()) if any(data["by_day"].values()) else 1
|
|
124
|
+
for day, count in data["by_day"].items():
|
|
125
|
+
bar_len = int(count / max_day * 20) if max_day > 0 else 0
|
|
126
|
+
bar = "█" * bar_len
|
|
127
|
+
day_table.add_row(day[:3], str(count), f"[cyan]{bar}[/]")
|
|
128
|
+
|
|
129
|
+
console.print(day_table)
|
|
130
|
+
console.print()
|
|
131
|
+
|
|
132
|
+
if data["event_types"]:
|
|
133
|
+
type_table = Table(title="Event Types", box=box.SIMPLE)
|
|
134
|
+
type_table.add_column("Type", style="bold")
|
|
135
|
+
type_table.add_column("Count", justify="right")
|
|
136
|
+
|
|
137
|
+
for etype, count in data["event_types"].items():
|
|
138
|
+
name = etype.replace("Event", "")
|
|
139
|
+
type_table.add_row(name, str(count))
|
|
140
|
+
|
|
141
|
+
console.print(type_table)
|
|
142
|
+
console.print()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def print_tech_stack(data):
|
|
146
|
+
console.print(
|
|
147
|
+
f"[bold]Tech Stack[/] — {data['original_repos']} original repos, "
|
|
148
|
+
f"{data['forked_repos']} forks"
|
|
149
|
+
)
|
|
150
|
+
console.print()
|
|
151
|
+
|
|
152
|
+
if data["languages"]:
|
|
153
|
+
table = Table(title="Languages", box=box.ROUNDED, border_style="magenta")
|
|
154
|
+
table.add_column("Language", style="bold")
|
|
155
|
+
table.add_column("Repos", justify="right")
|
|
156
|
+
table.add_column("Graph")
|
|
157
|
+
|
|
158
|
+
max_val = max(data["languages"].values()) if data["languages"] else 1
|
|
159
|
+
colors = ["cyan", "green", "yellow", "red", "blue", "magenta"]
|
|
160
|
+
for i, (lang, count) in enumerate(data["languages"].items()):
|
|
161
|
+
color = colors[i % len(colors)]
|
|
162
|
+
bar_len = int(count / max_val * 20)
|
|
163
|
+
bar = "█" * bar_len
|
|
164
|
+
table.add_row(lang, str(count), f"[{color}]{bar}[/]")
|
|
165
|
+
|
|
166
|
+
console.print(table)
|
|
167
|
+
console.print()
|
|
168
|
+
|
|
169
|
+
if data["topics"]:
|
|
170
|
+
topics_str = ", ".join(f"[dim]{t}[/]" for t in data["topics"])
|
|
171
|
+
console.print(f" Topics: {topics_str}")
|
|
172
|
+
console.print()
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def print_collaborators(data):
|
|
176
|
+
if data["organizations"]:
|
|
177
|
+
console.print(f"[bold]Organizations:[/] {', '.join(data['organizations'])}")
|
|
178
|
+
console.print()
|
|
179
|
+
|
|
180
|
+
if data["frequent_collaborators"]:
|
|
181
|
+
table = Table(title="Frequent Collaborators", box=box.ROUNDED, border_style="yellow")
|
|
182
|
+
table.add_column("User", style="bold")
|
|
183
|
+
table.add_column("Interactions", justify="right")
|
|
184
|
+
|
|
185
|
+
for user, count in data["frequent_collaborators"].items():
|
|
186
|
+
table.add_row(f"@{user}", str(count))
|
|
187
|
+
|
|
188
|
+
console.print(table)
|
|
189
|
+
console.print()
|
|
190
|
+
|
|
191
|
+
if data["following"]:
|
|
192
|
+
console.print(f"[bold]Following ({data['following_count']}):[/] {', '.join(data['following'][:15])}")
|
|
193
|
+
if data["following_count"] > 15:
|
|
194
|
+
console.print(f" [dim]... and {data['following_count'] - 15} more[/]")
|
|
195
|
+
console.print()
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def print_identities(data):
|
|
199
|
+
if not data["identities"]:
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
table = Table(title="Commit Identities", box=box.ROUNDED, border_style="red")
|
|
203
|
+
table.add_column("Email", style="bold")
|
|
204
|
+
table.add_column("Names Used")
|
|
205
|
+
table.add_column("Noreply?")
|
|
206
|
+
|
|
207
|
+
for identity in data["identities"]:
|
|
208
|
+
names = ", ".join(identity["names"])
|
|
209
|
+
noreply = "[dim]yes[/]" if identity["is_noreply"] else "[yellow]no[/]"
|
|
210
|
+
table.add_row(identity["email"], names, noreply)
|
|
211
|
+
|
|
212
|
+
console.print(table)
|
|
213
|
+
console.print(
|
|
214
|
+
f" [dim]{data['unique_emails']} unique email(s), "
|
|
215
|
+
f"{data['unique_names']} unique name(s)[/]"
|
|
216
|
+
)
|
|
217
|
+
console.print()
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def print_full_scan(report):
|
|
221
|
+
print_profile(report["profile"])
|
|
222
|
+
print_emails(report["emails"])
|
|
223
|
+
print_timezone(report["timezone"])
|
|
224
|
+
print_activity(report["activity"])
|
|
225
|
+
print_tech_stack(report["tech_stack"])
|
|
226
|
+
print_collaborators(report["collaborators"])
|
|
227
|
+
print_identities(report["identities"])
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""GitHub API client for GhSpy."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GitHubClient:
|
|
10
|
+
BASE_URL = "https://api.github.com"
|
|
11
|
+
|
|
12
|
+
def __init__(self, token=None):
|
|
13
|
+
self.token = token or os.environ.get("GITHUB_TOKEN")
|
|
14
|
+
self.session = requests.Session()
|
|
15
|
+
self.session.headers.update({
|
|
16
|
+
"Accept": "application/vnd.github.v3+json",
|
|
17
|
+
"User-Agent": "GhSpy",
|
|
18
|
+
})
|
|
19
|
+
if self.token:
|
|
20
|
+
self.session.headers["Authorization"] = f"token {self.token}"
|
|
21
|
+
|
|
22
|
+
def _get(self, endpoint, params=None):
|
|
23
|
+
url = f"{self.BASE_URL}{endpoint}"
|
|
24
|
+
resp = self.session.get(url, params=params)
|
|
25
|
+
if resp.status_code == 403 and "rate limit" in resp.text.lower():
|
|
26
|
+
print("GitHub API rate limit exceeded. Set GITHUB_TOKEN for higher limits.")
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
if resp.status_code == 404:
|
|
29
|
+
print(f"Not found: {endpoint}")
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
resp.raise_for_status()
|
|
32
|
+
return resp.json()
|
|
33
|
+
|
|
34
|
+
def _get_paginated(self, endpoint, params=None, max_pages=5):
|
|
35
|
+
params = params or {}
|
|
36
|
+
params.setdefault("per_page", 100)
|
|
37
|
+
results = []
|
|
38
|
+
for page in range(1, max_pages + 1):
|
|
39
|
+
params["page"] = page
|
|
40
|
+
data = self._get(endpoint, params)
|
|
41
|
+
if not data:
|
|
42
|
+
break
|
|
43
|
+
results.extend(data)
|
|
44
|
+
return results
|
|
45
|
+
|
|
46
|
+
def get_user(self, username):
|
|
47
|
+
return self._get(f"/users/{username}")
|
|
48
|
+
|
|
49
|
+
def get_repos(self, username, max_pages=3):
|
|
50
|
+
return self._get_paginated(
|
|
51
|
+
f"/users/{username}/repos",
|
|
52
|
+
params={"sort": "updated"},
|
|
53
|
+
max_pages=max_pages,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def get_events(self, username, max_pages=3):
|
|
57
|
+
return self._get_paginated(
|
|
58
|
+
f"/users/{username}/events/public",
|
|
59
|
+
max_pages=max_pages,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def get_repo_commits(self, owner, repo, author=None, max_pages=2):
|
|
63
|
+
params = {}
|
|
64
|
+
if author:
|
|
65
|
+
params["author"] = author
|
|
66
|
+
return self._get_paginated(
|
|
67
|
+
f"/repos/{owner}/{repo}/commits",
|
|
68
|
+
params=params,
|
|
69
|
+
max_pages=max_pages,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def get_orgs(self, username):
|
|
73
|
+
return self._get(f"/users/{username}/orgs")
|
|
74
|
+
|
|
75
|
+
def get_followers(self, username):
|
|
76
|
+
return self._get_paginated(f"/users/{username}/followers", max_pages=2)
|
|
77
|
+
|
|
78
|
+
def get_following(self, username):
|
|
79
|
+
return self._get_paginated(f"/users/{username}/following", max_pages=2)
|
|
80
|
+
|
|
81
|
+
def get_gists(self, username):
|
|
82
|
+
return self._get_paginated(f"/users/{username}/gists", max_pages=2)
|
|
83
|
+
|
|
84
|
+
def get_rate_limit(self):
|
|
85
|
+
return self._get("/rate_limit")
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ghspy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: GitHub OSINT tool — extract intelligence from any GitHub user profile
|
|
5
|
+
Author: shazeus
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/shazeus/ghspy
|
|
8
|
+
Project-URL: Repository, https://github.com/shazeus/ghspy
|
|
9
|
+
Project-URL: Issues, https://github.com/shazeus/ghspy/issues
|
|
10
|
+
Keywords: github,osint,reconnaissance,cli,security,intelligence
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Security
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: click>=8.0
|
|
28
|
+
Requires-Dist: requests>=2.28
|
|
29
|
+
Requires-Dist: rich>=13.0
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
<p align="center">
|
|
33
|
+
<h1 align="center">GhSpy</h1>
|
|
34
|
+
<p align="center">GitHub OSINT tool — extract intelligence from any GitHub user profile.</p>
|
|
35
|
+
<p align="center">
|
|
36
|
+
<a href="https://pypi.org/project/ghspy/"><img src="https://img.shields.io/pypi/v/ghspy?color=blue&label=PyPI" alt="PyPI"></a>
|
|
37
|
+
<a href="https://pypi.org/project/ghspy/"><img src="https://img.shields.io/pypi/pyversions/ghspy" alt="Python"></a>
|
|
38
|
+
<a href="https://github.com/shazeus/ghspy/blob/main/LICENSE"><img src="https://img.shields.io/github/license/shazeus/ghspy" alt="License"></a>
|
|
39
|
+
<a href="https://github.com/shazeus/ghspy/stargazers"><img src="https://img.shields.io/github/stars/shazeus/ghspy?style=social" alt="Stars"></a>
|
|
40
|
+
</p>
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
Discover emails, estimate timezones, map tech stacks, find collaborators, and analyze activity patterns — all from your terminal. GhSpy uses the public GitHub API to gather open-source intelligence on any GitHub user.
|
|
46
|
+
|
|
47
|
+
- **Email Discovery** — extract real email addresses from commit history
|
|
48
|
+
- **Timezone Estimation** — estimate location from commit hour patterns
|
|
49
|
+
- **Activity Analysis** — peak hours, active days, contribution streaks
|
|
50
|
+
- **Tech Stack Mapping** — languages, topics, original vs forked repos
|
|
51
|
+
- **Collaborator Detection** — frequent interactions, orgs, following
|
|
52
|
+
- **Identity Mapping** — cross-reference commit emails and author names
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install ghspy
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Requires Python 3.8+. Works on Linux, macOS, and Windows.
|
|
61
|
+
|
|
62
|
+
## Usage
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Full OSINT scan
|
|
66
|
+
ghspy scan torvalds
|
|
67
|
+
|
|
68
|
+
# Extract emails from commit history
|
|
69
|
+
ghspy emails dhh
|
|
70
|
+
|
|
71
|
+
# Estimate timezone
|
|
72
|
+
ghspy timezone antirez
|
|
73
|
+
|
|
74
|
+
# Activity patterns
|
|
75
|
+
ghspy activity shazeus
|
|
76
|
+
|
|
77
|
+
# Tech stack
|
|
78
|
+
ghspy techstack gvanrossum
|
|
79
|
+
|
|
80
|
+
# Collaborators & organizations
|
|
81
|
+
ghspy collabs octocat
|
|
82
|
+
|
|
83
|
+
# Export to JSON
|
|
84
|
+
ghspy export torvalds --format json -o report.json
|
|
85
|
+
|
|
86
|
+
# Export to CSV
|
|
87
|
+
ghspy export torvalds --format csv -o report.csv
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Commands
|
|
91
|
+
|
|
92
|
+
| Command | Description |
|
|
93
|
+
|---------|-------------|
|
|
94
|
+
| `ghspy scan <user>` | Full OSINT scan with all modules |
|
|
95
|
+
| `ghspy emails <user>` | Extract emails from commit history |
|
|
96
|
+
| `ghspy timezone <user>` | Estimate timezone from commit patterns |
|
|
97
|
+
| `ghspy activity <user>` | Activity breakdown by hour, day, and event type |
|
|
98
|
+
| `ghspy techstack <user>` | Languages, topics, and repo statistics |
|
|
99
|
+
| `ghspy collabs <user>` | Collaborators, organizations, and following |
|
|
100
|
+
| `ghspy export <user>` | Export findings to JSON or CSV |
|
|
101
|
+
| `ghspy rate-limit` | Check GitHub API rate limit |
|
|
102
|
+
|
|
103
|
+
## Configuration
|
|
104
|
+
|
|
105
|
+
### GitHub Token
|
|
106
|
+
|
|
107
|
+
Without a token you get **60 requests/hour**. With a token you get **5,000 requests/hour**. A full scan uses 20–50 requests depending on the user's repo count.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Set as environment variable (recommended)
|
|
111
|
+
export GITHUB_TOKEN=ghp_your_token_here
|
|
112
|
+
|
|
113
|
+
# Or pass directly
|
|
114
|
+
ghspy --token ghp_xxxx scan torvalds
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Generate a token at [github.com/settings/tokens](https://github.com/settings/tokens) — no special scopes needed for public data.
|
|
118
|
+
|
|
119
|
+
### JSON Output
|
|
120
|
+
|
|
121
|
+
Every command supports `--json-output` for piping to other tools:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
ghspy scan user --json-output | jq '.emails'
|
|
125
|
+
ghspy scan user --json-output | jq '.timezone.estimated_timezone'
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## How It Works
|
|
129
|
+
|
|
130
|
+
GhSpy queries the public GitHub REST API to collect:
|
|
131
|
+
|
|
132
|
+
1. **User profile** — public info, bio, location, social links
|
|
133
|
+
2. **Repositories** — languages, topics, fork status, activity dates
|
|
134
|
+
3. **Commit history** — author emails, timestamps, committer info
|
|
135
|
+
4. **Public events** — pushes, PRs, issues, comments
|
|
136
|
+
5. **Social graph** — followers, following, organizations
|
|
137
|
+
|
|
138
|
+
All data is publicly available through GitHub's API. No authentication bypass or scraping is involved.
|
|
139
|
+
|
|
140
|
+
## Disclaimer
|
|
141
|
+
|
|
142
|
+
This tool only accesses **publicly available data** through the official GitHub API. It does not bypass access controls, scrape private information, or violate GitHub's Terms of Service. Use responsibly.
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
ghspy/__init__.py
|
|
5
|
+
ghspy/__main__.py
|
|
6
|
+
ghspy/analyzer.py
|
|
7
|
+
ghspy/cli.py
|
|
8
|
+
ghspy/display.py
|
|
9
|
+
ghspy/github_api.py
|
|
10
|
+
ghspy.egg-info/PKG-INFO
|
|
11
|
+
ghspy.egg-info/SOURCES.txt
|
|
12
|
+
ghspy.egg-info/dependency_links.txt
|
|
13
|
+
ghspy.egg-info/entry_points.txt
|
|
14
|
+
ghspy.egg-info/requires.txt
|
|
15
|
+
ghspy.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ghspy
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ghspy"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "GitHub OSINT tool — extract intelligence from any GitHub user profile"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "shazeus"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["github", "osint", "reconnaissance", "cli", "security", "intelligence"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Environment :: Console",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
"Topic :: Security",
|
|
29
|
+
"Topic :: Utilities",
|
|
30
|
+
]
|
|
31
|
+
dependencies = [
|
|
32
|
+
"click>=8.0",
|
|
33
|
+
"requests>=2.28",
|
|
34
|
+
"rich>=13.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/shazeus/ghspy"
|
|
39
|
+
Repository = "https://github.com/shazeus/ghspy"
|
|
40
|
+
Issues = "https://github.com/shazeus/ghspy/issues"
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
ghspy = "ghspy.cli:cli"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
include = ["ghspy*"]
|
ghspy-0.1.0/setup.cfg
ADDED