gitosintx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitosintx-0.1.0/LICENSE +21 -0
- gitosintx-0.1.0/MANIFEST.in +3 -0
- gitosintx-0.1.0/PKG-INFO +165 -0
- gitosintx-0.1.0/README.md +136 -0
- gitosintx-0.1.0/examples/domains.txt +3 -0
- gitosintx-0.1.0/pyproject.toml +51 -0
- gitosintx-0.1.0/setup.cfg +4 -0
- gitosintx-0.1.0/src/gitosintx/__init__.py +4 -0
- gitosintx-0.1.0/src/gitosintx/__main__.py +4 -0
- gitosintx-0.1.0/src/gitosintx/banner.py +10 -0
- gitosintx-0.1.0/src/gitosintx/cli.py +262 -0
- gitosintx-0.1.0/src/gitosintx/github.py +203 -0
- gitosintx-0.1.0/src/gitosintx/models.py +62 -0
- gitosintx-0.1.0/src/gitosintx/report.py +157 -0
- gitosintx-0.1.0/src/gitosintx/utils.py +207 -0
- gitosintx-0.1.0/src/gitosintx.egg-info/PKG-INFO +165 -0
- gitosintx-0.1.0/src/gitosintx.egg-info/SOURCES.txt +20 -0
- gitosintx-0.1.0/src/gitosintx.egg-info/dependency_links.txt +1 -0
- gitosintx-0.1.0/src/gitosintx.egg-info/entry_points.txt +2 -0
- gitosintx-0.1.0/src/gitosintx.egg-info/requires.txt +1 -0
- gitosintx-0.1.0/src/gitosintx.egg-info/top_level.txt +1 -0
- gitosintx-0.1.0/tests/test_utils.py +27 -0
gitosintx-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Harith Dilshan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
gitosintx-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gitosintx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: GitHub OSINT tool for finding public repository mentions of domains and URLs.
|
|
5
|
+
Author: Harith Dilshan
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://h4rithd.com
|
|
8
|
+
Project-URL: Repository, https://github.com/h4rithd/GitOSINTX
|
|
9
|
+
Project-URL: Issues, https://github.com/h4rithd/GitOSINTX/issues
|
|
10
|
+
Keywords: osint,github-osint,bug-bounty,domain-recon,github-search,security-research
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Information Technology
|
|
14
|
+
Classifier: Intended Audience :: System Administrators
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Security
|
|
23
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: requests>=2.31.0
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# GitOSINTX
|
|
31
|
+
|
|
32
|
+
**GitOSINTX** is a GitHub OSINT command-line tool for finding public repository mentions of domains, URLs, and email-style references.
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
______ _ __ ____ _____ _____ _______ ______
|
|
36
|
+
/ ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
|
|
37
|
+
/ / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
|
|
38
|
+
/ /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
|
|
39
|
+
\____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
|
|
40
|
+
|
|
41
|
+
GitOSINTX - GitHub Domain & URL Mention Enumerator
|
|
42
|
+
Developed by Harith Dilshan | h4rithd.com
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## What it does
|
|
46
|
+
|
|
47
|
+
GitOSINTX accepts a domain or URL, normalizes it into a bare domain, generates multiple GitHub Search API queries, deduplicates results, classifies risky-looking references, and exports a clean JSON or HTML report.
|
|
48
|
+
|
|
49
|
+
It is useful for:
|
|
50
|
+
|
|
51
|
+
- Bug bounty passive recon
|
|
52
|
+
- Public code exposure discovery
|
|
53
|
+
- Domain and URL mention enumeration
|
|
54
|
+
- Finding hardcoded API endpoints in public repositories
|
|
55
|
+
- Identifying config, CI/CD, and sensitive-keyword references
|
|
56
|
+
|
|
57
|
+
## What it does **not** do
|
|
58
|
+
|
|
59
|
+
GitOSINTX does not bypass GitHub limits, scrape private repositories, validate leaked credentials, or exploit anything. It only queries public GitHub data available to your GitHub API access level.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
From PyPI after publication:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install gitosintx
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
For local development:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
git clone https://github.com/h4rithd/GitOSINTX
|
|
73
|
+
cd GitOSINTX
|
|
74
|
+
python3 -m pip install -e .
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## GitHub token
|
|
78
|
+
|
|
79
|
+
Authenticated GitHub requests are strongly recommended.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
export GITHUB_TOKEN='ghp_xxxxxxxxxxxxxxxxxxxx'
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Avoid passing tokens directly on the command line because shell history may store them.
|
|
86
|
+
|
|
87
|
+
## Usage
|
|
88
|
+
|
|
89
|
+
Search a single domain or URL and export HTML:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
gitosintx -u https://h4rithd.com -o html --out h4rithd-github-osint.html
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Search a single domain and export JSON:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
gitosintx -u h4rithd.com -o json --out h4rithd-github-osint.json
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Search a list of domains/URLs:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
gitosintx -list examples/domains.txt -o html --out multi-domain-report.html
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Run deeper extension/config-focused queries:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
gitosintx -u h4rithd.com --deep -o html --out deep-report.html
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Be friendlier to GitHub rate limits:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
gitosintx -u h4rithd.com --max-pages 1 --sleep 2 --wait-rate-limit
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Show help:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
gitosintx -h
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## CLI options
|
|
126
|
+
|
|
127
|
+
```text
|
|
128
|
+
-u, --url Single target domain or URL
|
|
129
|
+
-list, --list File containing domains/URLs, one per line
|
|
130
|
+
-o, --output Output format: html or json
|
|
131
|
+
--out Output report path
|
|
132
|
+
--token GitHub token; prefer GITHUB_TOKEN env var
|
|
133
|
+
--max-pages Maximum GitHub result pages per query
|
|
134
|
+
--per-page Results per GitHub API page, max 100
|
|
135
|
+
--sleep Delay between paginated requests
|
|
136
|
+
--wait-rate-limit Sleep and continue when rate limited
|
|
137
|
+
--deep Run additional config/extension-focused queries
|
|
138
|
+
--no-repo-search Disable repository metadata search
|
|
139
|
+
--no-email-query Disable @domain query
|
|
140
|
+
--include-forks Include forks in repository search where supported
|
|
141
|
+
--quiet Suppress banner/progress output
|
|
142
|
+
-v, --verbose Print query progress to stderr
|
|
143
|
+
--version Print version
|
|
144
|
+
-h, --help Show help
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Output tags
|
|
148
|
+
|
|
149
|
+
GitOSINTX applies simple triage tags to help prioritize manual review:
|
|
150
|
+
|
|
151
|
+
| Tag | Meaning |
|
|
152
|
+
| --- | --- |
|
|
153
|
+
| `sensitive-keyword` | Match appears near words like token, secret, password, api_key, private_key, etc. |
|
|
154
|
+
| `config-file` | Match appears in config-style files such as `.env`, `.yml`, `.json`, `.properties`, `.tfvars`, etc. |
|
|
155
|
+
| `cicd-devops` | Match appears in CI/CD or deployment files such as GitHub Actions, Dockerfile, Jenkinsfile, etc. |
|
|
156
|
+
| `url-reference` | Match contains URL-style syntax. |
|
|
157
|
+
| `email-reference` | Match contains email-style syntax. |
|
|
158
|
+
|
|
159
|
+
## Responsible use
|
|
160
|
+
|
|
161
|
+
Do not use discovered credentials. Do not validate tokens. Do not access systems without explicit authorization. For bug bounty, preserve evidence: repository, file path, commit/hash when available, matched snippet, exposure type, and remediation recommendation.
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
|
|
165
|
+
MIT License.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# GitOSINTX
|
|
2
|
+
|
|
3
|
+
**GitOSINTX** is a GitHub OSINT command-line tool for finding public repository mentions of domains, URLs, and email-style references.
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
______ _ __ ____ _____ _____ _______ ______
|
|
7
|
+
/ ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
|
|
8
|
+
/ / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
|
|
9
|
+
/ /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
|
|
10
|
+
\____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
|
|
11
|
+
|
|
12
|
+
GitOSINTX - GitHub Domain & URL Mention Enumerator
|
|
13
|
+
Developed by Harith Dilshan | h4rithd.com
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## What it does
|
|
17
|
+
|
|
18
|
+
GitOSINTX accepts a domain or URL, normalizes it into a bare domain, generates multiple GitHub Search API queries, deduplicates results, classifies risky-looking references, and exports a clean JSON or HTML report.
|
|
19
|
+
|
|
20
|
+
It is useful for:
|
|
21
|
+
|
|
22
|
+
- Bug bounty passive recon
|
|
23
|
+
- Public code exposure discovery
|
|
24
|
+
- Domain and URL mention enumeration
|
|
25
|
+
- Finding hardcoded API endpoints in public repositories
|
|
26
|
+
- Identifying config, CI/CD, and sensitive-keyword references
|
|
27
|
+
|
|
28
|
+
## What it does **not** do
|
|
29
|
+
|
|
30
|
+
GitOSINTX does not bypass GitHub limits, scrape private repositories, validate leaked credentials, or exploit anything. It only queries public GitHub data available to your GitHub API access level.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
From PyPI after publication:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install gitosintx
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
For local development:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
git clone https://github.com/h4rithd/GitOSINTX
|
|
44
|
+
cd GitOSINTX
|
|
45
|
+
python3 -m pip install -e .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## GitHub token
|
|
49
|
+
|
|
50
|
+
Authenticated GitHub requests are strongly recommended.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
export GITHUB_TOKEN='ghp_xxxxxxxxxxxxxxxxxxxx'
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Avoid passing tokens directly on the command line because shell history may store them.
|
|
57
|
+
|
|
58
|
+
## Usage
|
|
59
|
+
|
|
60
|
+
Search a single domain or URL and export HTML:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
gitosintx -u https://h4rithd.com -o html --out h4rithd-github-osint.html
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Search a single domain and export JSON:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
gitosintx -u h4rithd.com -o json --out h4rithd-github-osint.json
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Search a list of domains/URLs:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
gitosintx -list examples/domains.txt -o html --out multi-domain-report.html
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Run deeper extension/config-focused queries:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
gitosintx -u h4rithd.com --deep -o html --out deep-report.html
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Be friendlier to GitHub rate limits:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
gitosintx -u h4rithd.com --max-pages 1 --sleep 2 --wait-rate-limit
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Show help:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
gitosintx -h
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## CLI options
|
|
97
|
+
|
|
98
|
+
```text
|
|
99
|
+
-u, --url Single target domain or URL
|
|
100
|
+
-list, --list File containing domains/URLs, one per line
|
|
101
|
+
-o, --output Output format: html or json
|
|
102
|
+
--out Output report path
|
|
103
|
+
--token GitHub token; prefer GITHUB_TOKEN env var
|
|
104
|
+
--max-pages Maximum GitHub result pages per query
|
|
105
|
+
--per-page Results per GitHub API page, max 100
|
|
106
|
+
--sleep Delay between paginated requests
|
|
107
|
+
--wait-rate-limit Sleep and continue when rate limited
|
|
108
|
+
--deep Run additional config/extension-focused queries
|
|
109
|
+
--no-repo-search Disable repository metadata search
|
|
110
|
+
--no-email-query Disable @domain query
|
|
111
|
+
--include-forks Include forks in repository search where supported
|
|
112
|
+
--quiet Suppress banner/progress output
|
|
113
|
+
-v, --verbose Print query progress to stderr
|
|
114
|
+
--version Print version
|
|
115
|
+
-h, --help Show help
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Output tags
|
|
119
|
+
|
|
120
|
+
GitOSINTX applies simple triage tags to help prioritize manual review:
|
|
121
|
+
|
|
122
|
+
| Tag | Meaning |
|
|
123
|
+
| --- | --- |
|
|
124
|
+
| `sensitive-keyword` | Match appears near words like token, secret, password, api_key, private_key, etc. |
|
|
125
|
+
| `config-file` | Match appears in config-style files such as `.env`, `.yml`, `.json`, `.properties`, `.tfvars`, etc. |
|
|
126
|
+
| `cicd-devops` | Match appears in CI/CD or deployment files such as GitHub Actions, Dockerfile, Jenkinsfile, etc. |
|
|
127
|
+
| `url-reference` | Match contains URL-style syntax. |
|
|
128
|
+
| `email-reference` | Match contains email-style syntax. |
|
|
129
|
+
|
|
130
|
+
## Responsible use
|
|
131
|
+
|
|
132
|
+
Do not use discovered credentials. Do not validate tokens. Do not access systems without explicit authorization. For bug bounty, preserve evidence: repository, file path, commit/hash when available, matched snippet, exposure type, and remediation recommendation.
|
|
133
|
+
|
|
134
|
+
## License
|
|
135
|
+
|
|
136
|
+
MIT License.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "gitosintx"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "GitHub OSINT tool for finding public repository mentions of domains and URLs."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Harith Dilshan" }
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"osint",
|
|
17
|
+
"github-osint",
|
|
18
|
+
"bug-bounty",
|
|
19
|
+
"domain-recon",
|
|
20
|
+
"github-search",
|
|
21
|
+
"security-research"
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Environment :: Console",
|
|
26
|
+
"Intended Audience :: Information Technology",
|
|
27
|
+
"Intended Audience :: System Administrators",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.9",
|
|
31
|
+
"Programming Language :: Python :: 3.10",
|
|
32
|
+
"Programming Language :: Python :: 3.11",
|
|
33
|
+
"Programming Language :: Python :: 3.12",
|
|
34
|
+
"Programming Language :: Python :: 3.13",
|
|
35
|
+
"Topic :: Security",
|
|
36
|
+
"Topic :: Internet :: WWW/HTTP :: Indexing/Search"
|
|
37
|
+
]
|
|
38
|
+
dependencies = [
|
|
39
|
+
"requests>=2.31.0"
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://h4rithd.com"
|
|
44
|
+
Repository = "https://github.com/h4rithd/GitOSINTX"
|
|
45
|
+
Issues = "https://github.com/h4rithd/GitOSINTX/issues"
|
|
46
|
+
|
|
47
|
+
[project.scripts]
|
|
48
|
+
gitosintx = "gitosintx.cli:main"
|
|
49
|
+
|
|
50
|
+
[tool.setuptools.packages.find]
|
|
51
|
+
where = ["src"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
BANNER = r"""
|
|
2
|
+
______ _ __ ____ _____ _____ _______ ______
|
|
3
|
+
/ ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
|
|
4
|
+
/ / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
|
|
5
|
+
/ /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
|
|
6
|
+
\____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
|
|
7
|
+
|
|
8
|
+
GitOSINTX - GitHub Domain & URL Mention Enumerator
|
|
9
|
+
Developed by Harith Dilshan | h4rithd.com
|
|
10
|
+
"""
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Tuple
|
|
8
|
+
|
|
9
|
+
from . import __version__
|
|
10
|
+
from .banner import BANNER
|
|
11
|
+
from .github import GitHubAPIError, GitHubClient, code_item_to_finding, repo_item_to_finding
|
|
12
|
+
from .models import Finding, ScanSummary
|
|
13
|
+
from .report import write_html_report, write_json_report
|
|
14
|
+
from .utils import (
|
|
15
|
+
build_code_queries,
|
|
16
|
+
build_repository_queries,
|
|
17
|
+
dedupe_preserve_order,
|
|
18
|
+
normalize_domain,
|
|
19
|
+
read_targets,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
DEFAULT_NOTES = [
|
|
24
|
+
"GitOSINTX uses the official GitHub REST Search API and only queries public GitHub data available to the authenticated user.",
|
|
25
|
+
"GitHub search is rate-limited and capped; results are broad OSINT evidence, not a guarantee of full GitHub coverage.",
|
|
26
|
+
"Do not use, validate, or abuse exposed credentials. Preserve location evidence and report responsibly through the correct program channel.",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
31
|
+
parser = argparse.ArgumentParser(
|
|
32
|
+
prog="gitosintx",
|
|
33
|
+
description="GitOSINTX - find public GitHub repository mentions of domains and URLs.",
|
|
34
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
35
|
+
)
|
|
36
|
+
target_group = parser.add_mutually_exclusive_group(required=True)
|
|
37
|
+
target_group.add_argument(
|
|
38
|
+
"-u",
|
|
39
|
+
"--url",
|
|
40
|
+
dest="url",
|
|
41
|
+
help="Single target domain or URL, e.g. https://h4rithd.com, http://h4rithd.com, or h4rithd.com",
|
|
42
|
+
)
|
|
43
|
+
target_group.add_argument(
|
|
44
|
+
"-list",
|
|
45
|
+
"--list",
|
|
46
|
+
dest="list_path",
|
|
47
|
+
help="File containing domains/URLs, one per line. Blank lines and # comments are ignored.",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"-o",
|
|
52
|
+
"--output",
|
|
53
|
+
choices=["json", "html"],
|
|
54
|
+
default="html",
|
|
55
|
+
help="Report output format.",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--out",
|
|
59
|
+
dest="out_file",
|
|
60
|
+
help="Output report file path. Defaults to gitosintx-report.html or gitosintx-report.json.",
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--token",
|
|
64
|
+
help="GitHub token. Prefer setting GITHUB_TOKEN instead of passing tokens on the command line.",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--max-pages",
|
|
68
|
+
type=int,
|
|
69
|
+
default=2,
|
|
70
|
+
help="Maximum GitHub result pages per query. GitHub allows up to 100 results per page.",
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--per-page",
|
|
74
|
+
type=int,
|
|
75
|
+
default=50,
|
|
76
|
+
help="Results per GitHub API page. Maximum is 100.",
|
|
77
|
+
)
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
"--sleep",
|
|
80
|
+
type=float,
|
|
81
|
+
default=1.0,
|
|
82
|
+
help="Delay in seconds between paginated API requests.",
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--wait-rate-limit",
|
|
86
|
+
action="store_true",
|
|
87
|
+
help="Sleep and resume when GitHub primary/secondary rate limits are detected.",
|
|
88
|
+
)
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--deep",
|
|
91
|
+
action="store_true",
|
|
92
|
+
help="Run additional extension/CI/config-focused queries. Slower and more rate-limit heavy.",
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--no-repo-search",
|
|
96
|
+
action="store_true",
|
|
97
|
+
help="Disable repository metadata search and only run code search.",
|
|
98
|
+
)
|
|
99
|
+
parser.add_argument(
|
|
100
|
+
"--no-email-query",
|
|
101
|
+
action="store_true",
|
|
102
|
+
help="Do not search for @domain email-style mentions.",
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--include-forks",
|
|
106
|
+
action="store_true",
|
|
107
|
+
help="Append fork:true to repository search queries. Code search may still include fork behavior controlled by GitHub.",
|
|
108
|
+
)
|
|
109
|
+
parser.add_argument(
|
|
110
|
+
"--quiet",
|
|
111
|
+
action="store_true",
|
|
112
|
+
help="Suppress banner and progress output.",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"-v",
|
|
116
|
+
"--verbose",
|
|
117
|
+
action="store_true",
|
|
118
|
+
help="Print API query progress to stderr.",
|
|
119
|
+
)
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
"--version",
|
|
122
|
+
action="version",
|
|
123
|
+
version=f"GitOSINTX {__version__}",
|
|
124
|
+
)
|
|
125
|
+
return parser
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _default_out_file(fmt: str) -> str:
|
|
129
|
+
return f"gitosintx-report.{fmt}"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _normalize_targets(raw_targets: List[str]) -> Tuple[List[str], Dict[str, str]]:
|
|
133
|
+
mapping: Dict[str, str] = {}
|
|
134
|
+
normalized: List[str] = []
|
|
135
|
+
for raw in raw_targets:
|
|
136
|
+
domain = normalize_domain(raw)
|
|
137
|
+
mapping[raw] = domain
|
|
138
|
+
normalized.append(domain)
|
|
139
|
+
return dedupe_preserve_order(normalized), mapping
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_scan(args: argparse.Namespace) -> Tuple[ScanSummary, List[Finding]]:
|
|
143
|
+
started_at = ScanSummary.now_iso()
|
|
144
|
+
raw_targets = read_targets(args.url, args.list_path)
|
|
145
|
+
domains, mapping = _normalize_targets(raw_targets)
|
|
146
|
+
token = args.token or os.getenv("GITHUB_TOKEN")
|
|
147
|
+
|
|
148
|
+
client = GitHubClient(
|
|
149
|
+
token=token,
|
|
150
|
+
per_page=args.per_page,
|
|
151
|
+
max_pages=args.max_pages,
|
|
152
|
+
sleep=args.sleep,
|
|
153
|
+
wait_rate_limit=args.wait_rate_limit,
|
|
154
|
+
verbose=args.verbose,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
findings_by_key: Dict[str, Finding] = {}
|
|
158
|
+
queries_executed = 0
|
|
159
|
+
|
|
160
|
+
for original_target, domain in mapping.items():
|
|
161
|
+
code_queries = build_code_queries(
|
|
162
|
+
domain,
|
|
163
|
+
deep=args.deep,
|
|
164
|
+
include_email=not args.no_email_query,
|
|
165
|
+
)
|
|
166
|
+
for query in code_queries:
|
|
167
|
+
queries_executed += 1
|
|
168
|
+
for item in client.search_code(query):
|
|
169
|
+
finding = code_item_to_finding(
|
|
170
|
+
target=original_target,
|
|
171
|
+
normalized_domain=domain,
|
|
172
|
+
query=query,
|
|
173
|
+
item=item,
|
|
174
|
+
)
|
|
175
|
+
existing = findings_by_key.get(finding.key())
|
|
176
|
+
if existing:
|
|
177
|
+
if query not in existing.query:
|
|
178
|
+
existing.query = f"{existing.query} || {query}"
|
|
179
|
+
for tag in finding.tags:
|
|
180
|
+
if tag not in existing.tags:
|
|
181
|
+
existing.tags.append(tag)
|
|
182
|
+
for fragment in finding.matched_fragments:
|
|
183
|
+
if fragment not in existing.matched_fragments:
|
|
184
|
+
existing.matched_fragments.append(fragment)
|
|
185
|
+
else:
|
|
186
|
+
findings_by_key[finding.key()] = finding
|
|
187
|
+
|
|
188
|
+
if not args.no_repo_search:
|
|
189
|
+
repo_queries = build_repository_queries(domain)
|
|
190
|
+
for query in repo_queries:
|
|
191
|
+
if args.include_forks:
|
|
192
|
+
query = f"{query} fork:true"
|
|
193
|
+
queries_executed += 1
|
|
194
|
+
for item in client.search_repositories(query):
|
|
195
|
+
finding = repo_item_to_finding(
|
|
196
|
+
target=original_target,
|
|
197
|
+
normalized_domain=domain,
|
|
198
|
+
query=query,
|
|
199
|
+
item=item,
|
|
200
|
+
)
|
|
201
|
+
findings_by_key.setdefault(finding.key(), finding)
|
|
202
|
+
|
|
203
|
+
findings = sorted(
|
|
204
|
+
findings_by_key.values(),
|
|
205
|
+
key=lambda f: (
|
|
206
|
+
0 if "sensitive-keyword" in f.tags else 1,
|
|
207
|
+
0 if "config-file" in f.tags else 1,
|
|
208
|
+
f.repo_full_name.lower(),
|
|
209
|
+
f.file_path or "",
|
|
210
|
+
),
|
|
211
|
+
)
|
|
212
|
+
unique_repos = len({f.repo_full_name for f in findings if f.repo_full_name})
|
|
213
|
+
summary = ScanSummary(
|
|
214
|
+
tool="GitOSINTX",
|
|
215
|
+
version=__version__,
|
|
216
|
+
started_at=started_at,
|
|
217
|
+
finished_at=ScanSummary.now_iso(),
|
|
218
|
+
targets=raw_targets,
|
|
219
|
+
normalized_domains=domains,
|
|
220
|
+
queries_executed=queries_executed,
|
|
221
|
+
findings_count=len(findings),
|
|
222
|
+
unique_repositories=unique_repos,
|
|
223
|
+
notes=DEFAULT_NOTES.copy(),
|
|
224
|
+
)
|
|
225
|
+
if not token:
|
|
226
|
+
summary.notes.append(
|
|
227
|
+
"No GitHub token was provided. Authenticated searches are strongly recommended for reliability and higher rate limits."
|
|
228
|
+
)
|
|
229
|
+
return summary, findings
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def main(argv: List[str] | None = None) -> int:
|
|
233
|
+
parser = build_parser()
|
|
234
|
+
args = parser.parse_args(argv)
|
|
235
|
+
|
|
236
|
+
if not args.quiet:
|
|
237
|
+
print(BANNER)
|
|
238
|
+
|
|
239
|
+
out_file = args.out_file or _default_out_file(args.output)
|
|
240
|
+
try:
|
|
241
|
+
summary, findings = run_scan(args)
|
|
242
|
+
if args.output == "json":
|
|
243
|
+
write_json_report(out_file, summary, findings)
|
|
244
|
+
else:
|
|
245
|
+
write_html_report(out_file, summary, findings)
|
|
246
|
+
except (ValueError, FileNotFoundError, GitHubAPIError) as exc:
|
|
247
|
+
print(f"[!] {exc}", file=sys.stderr)
|
|
248
|
+
return 2
|
|
249
|
+
except KeyboardInterrupt:
|
|
250
|
+
print("\n[!] Interrupted by user.", file=sys.stderr)
|
|
251
|
+
return 130
|
|
252
|
+
|
|
253
|
+
if not args.quiet:
|
|
254
|
+
print(f"[+] Findings: {summary.findings_count}")
|
|
255
|
+
print(f"[+] Unique repositories: {summary.unique_repositories}")
|
|
256
|
+
print(f"[+] Queries executed: {summary.queries_executed}")
|
|
257
|
+
print(f"[+] Report written: {Path(out_file).resolve()}")
|
|
258
|
+
return 0
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
if __name__ == "__main__":
|
|
262
|
+
raise SystemExit(main())
|