gitosintx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitosintx/__init__.py +4 -0
- gitosintx/__main__.py +4 -0
- gitosintx/banner.py +10 -0
- gitosintx/cli.py +262 -0
- gitosintx/github.py +203 -0
- gitosintx/models.py +62 -0
- gitosintx/report.py +157 -0
- gitosintx/utils.py +207 -0
- gitosintx-0.1.0.dist-info/METADATA +165 -0
- gitosintx-0.1.0.dist-info/RECORD +14 -0
- gitosintx-0.1.0.dist-info/WHEEL +5 -0
- gitosintx-0.1.0.dist-info/entry_points.txt +2 -0
- gitosintx-0.1.0.dist-info/licenses/LICENSE +21 -0
- gitosintx-0.1.0.dist-info/top_level.txt +1 -0
gitosintx/__init__.py
ADDED
gitosintx/__main__.py
ADDED
gitosintx/banner.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
BANNER = r"""
|
|
2
|
+
______ _ __ ____ _____ _____ _______ ______
|
|
3
|
+
/ ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
|
|
4
|
+
/ / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
|
|
5
|
+
/ /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
|
|
6
|
+
\____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
|
|
7
|
+
|
|
8
|
+
GitOSINTX - GitHub Domain & URL Mention Enumerator
|
|
9
|
+
Developed by Harith Dilshan | h4rithd.com
|
|
10
|
+
"""
|
gitosintx/cli.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Tuple
|
|
8
|
+
|
|
9
|
+
from . import __version__
|
|
10
|
+
from .banner import BANNER
|
|
11
|
+
from .github import GitHubAPIError, GitHubClient, code_item_to_finding, repo_item_to_finding
|
|
12
|
+
from .models import Finding, ScanSummary
|
|
13
|
+
from .report import write_html_report, write_json_report
|
|
14
|
+
from .utils import (
|
|
15
|
+
build_code_queries,
|
|
16
|
+
build_repository_queries,
|
|
17
|
+
dedupe_preserve_order,
|
|
18
|
+
normalize_domain,
|
|
19
|
+
read_targets,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
DEFAULT_NOTES = [
|
|
24
|
+
"GitOSINTX uses the official GitHub REST Search API and only queries public GitHub data available to the authenticated user.",
|
|
25
|
+
"GitHub search is rate-limited and capped; results are broad OSINT evidence, not a guarantee of full GitHub coverage.",
|
|
26
|
+
"Do not use, validate, or abuse exposed credentials. Preserve location evidence and report responsibly through the correct program channel.",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
31
|
+
parser = argparse.ArgumentParser(
|
|
32
|
+
prog="gitosintx",
|
|
33
|
+
description="GitOSINTX - find public GitHub repository mentions of domains and URLs.",
|
|
34
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
35
|
+
)
|
|
36
|
+
target_group = parser.add_mutually_exclusive_group(required=True)
|
|
37
|
+
target_group.add_argument(
|
|
38
|
+
"-u",
|
|
39
|
+
"--url",
|
|
40
|
+
dest="url",
|
|
41
|
+
help="Single target domain or URL, e.g. https://h4rithd.com, http://h4rithd.com, or h4rithd.com",
|
|
42
|
+
)
|
|
43
|
+
target_group.add_argument(
|
|
44
|
+
"-list",
|
|
45
|
+
"--list",
|
|
46
|
+
dest="list_path",
|
|
47
|
+
help="File containing domains/URLs, one per line. Blank lines and # comments are ignored.",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"-o",
|
|
52
|
+
"--output",
|
|
53
|
+
choices=["json", "html"],
|
|
54
|
+
default="html",
|
|
55
|
+
help="Report output format.",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--out",
|
|
59
|
+
dest="out_file",
|
|
60
|
+
help="Output report file path. Defaults to gitosintx-report.html or gitosintx-report.json.",
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--token",
|
|
64
|
+
help="GitHub token. Prefer setting GITHUB_TOKEN instead of passing tokens on the command line.",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--max-pages",
|
|
68
|
+
type=int,
|
|
69
|
+
default=2,
|
|
70
|
+
help="Maximum GitHub result pages per query. GitHub allows up to 100 results per page.",
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--per-page",
|
|
74
|
+
type=int,
|
|
75
|
+
default=50,
|
|
76
|
+
help="Results per GitHub API page. Maximum is 100.",
|
|
77
|
+
)
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
"--sleep",
|
|
80
|
+
type=float,
|
|
81
|
+
default=1.0,
|
|
82
|
+
help="Delay in seconds between paginated API requests.",
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--wait-rate-limit",
|
|
86
|
+
action="store_true",
|
|
87
|
+
help="Sleep and resume when GitHub primary/secondary rate limits are detected.",
|
|
88
|
+
)
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--deep",
|
|
91
|
+
action="store_true",
|
|
92
|
+
help="Run additional extension/CI/config-focused queries. Slower and more rate-limit heavy.",
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--no-repo-search",
|
|
96
|
+
action="store_true",
|
|
97
|
+
help="Disable repository metadata search and only run code search.",
|
|
98
|
+
)
|
|
99
|
+
parser.add_argument(
|
|
100
|
+
"--no-email-query",
|
|
101
|
+
action="store_true",
|
|
102
|
+
help="Do not search for @domain email-style mentions.",
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--include-forks",
|
|
106
|
+
action="store_true",
|
|
107
|
+
help="Append fork:true to repository search queries. Code search may still include fork behavior controlled by GitHub.",
|
|
108
|
+
)
|
|
109
|
+
parser.add_argument(
|
|
110
|
+
"--quiet",
|
|
111
|
+
action="store_true",
|
|
112
|
+
help="Suppress banner and progress output.",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"-v",
|
|
116
|
+
"--verbose",
|
|
117
|
+
action="store_true",
|
|
118
|
+
help="Print API query progress to stderr.",
|
|
119
|
+
)
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
"--version",
|
|
122
|
+
action="version",
|
|
123
|
+
version=f"GitOSINTX {__version__}",
|
|
124
|
+
)
|
|
125
|
+
return parser
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _default_out_file(fmt: str) -> str:
|
|
129
|
+
return f"gitosintx-report.{fmt}"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _normalize_targets(raw_targets: List[str]) -> Tuple[List[str], Dict[str, str]]:
|
|
133
|
+
mapping: Dict[str, str] = {}
|
|
134
|
+
normalized: List[str] = []
|
|
135
|
+
for raw in raw_targets:
|
|
136
|
+
domain = normalize_domain(raw)
|
|
137
|
+
mapping[raw] = domain
|
|
138
|
+
normalized.append(domain)
|
|
139
|
+
return dedupe_preserve_order(normalized), mapping
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_scan(args: argparse.Namespace) -> Tuple[ScanSummary, List[Finding]]:
|
|
143
|
+
started_at = ScanSummary.now_iso()
|
|
144
|
+
raw_targets = read_targets(args.url, args.list_path)
|
|
145
|
+
domains, mapping = _normalize_targets(raw_targets)
|
|
146
|
+
token = args.token or os.getenv("GITHUB_TOKEN")
|
|
147
|
+
|
|
148
|
+
client = GitHubClient(
|
|
149
|
+
token=token,
|
|
150
|
+
per_page=args.per_page,
|
|
151
|
+
max_pages=args.max_pages,
|
|
152
|
+
sleep=args.sleep,
|
|
153
|
+
wait_rate_limit=args.wait_rate_limit,
|
|
154
|
+
verbose=args.verbose,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
findings_by_key: Dict[str, Finding] = {}
|
|
158
|
+
queries_executed = 0
|
|
159
|
+
|
|
160
|
+
for original_target, domain in mapping.items():
|
|
161
|
+
code_queries = build_code_queries(
|
|
162
|
+
domain,
|
|
163
|
+
deep=args.deep,
|
|
164
|
+
include_email=not args.no_email_query,
|
|
165
|
+
)
|
|
166
|
+
for query in code_queries:
|
|
167
|
+
queries_executed += 1
|
|
168
|
+
for item in client.search_code(query):
|
|
169
|
+
finding = code_item_to_finding(
|
|
170
|
+
target=original_target,
|
|
171
|
+
normalized_domain=domain,
|
|
172
|
+
query=query,
|
|
173
|
+
item=item,
|
|
174
|
+
)
|
|
175
|
+
existing = findings_by_key.get(finding.key())
|
|
176
|
+
if existing:
|
|
177
|
+
if query not in existing.query:
|
|
178
|
+
existing.query = f"{existing.query} || {query}"
|
|
179
|
+
for tag in finding.tags:
|
|
180
|
+
if tag not in existing.tags:
|
|
181
|
+
existing.tags.append(tag)
|
|
182
|
+
for fragment in finding.matched_fragments:
|
|
183
|
+
if fragment not in existing.matched_fragments:
|
|
184
|
+
existing.matched_fragments.append(fragment)
|
|
185
|
+
else:
|
|
186
|
+
findings_by_key[finding.key()] = finding
|
|
187
|
+
|
|
188
|
+
if not args.no_repo_search:
|
|
189
|
+
repo_queries = build_repository_queries(domain)
|
|
190
|
+
for query in repo_queries:
|
|
191
|
+
if args.include_forks:
|
|
192
|
+
query = f"{query} fork:true"
|
|
193
|
+
queries_executed += 1
|
|
194
|
+
for item in client.search_repositories(query):
|
|
195
|
+
finding = repo_item_to_finding(
|
|
196
|
+
target=original_target,
|
|
197
|
+
normalized_domain=domain,
|
|
198
|
+
query=query,
|
|
199
|
+
item=item,
|
|
200
|
+
)
|
|
201
|
+
findings_by_key.setdefault(finding.key(), finding)
|
|
202
|
+
|
|
203
|
+
findings = sorted(
|
|
204
|
+
findings_by_key.values(),
|
|
205
|
+
key=lambda f: (
|
|
206
|
+
0 if "sensitive-keyword" in f.tags else 1,
|
|
207
|
+
0 if "config-file" in f.tags else 1,
|
|
208
|
+
f.repo_full_name.lower(),
|
|
209
|
+
f.file_path or "",
|
|
210
|
+
),
|
|
211
|
+
)
|
|
212
|
+
unique_repos = len({f.repo_full_name for f in findings if f.repo_full_name})
|
|
213
|
+
summary = ScanSummary(
|
|
214
|
+
tool="GitOSINTX",
|
|
215
|
+
version=__version__,
|
|
216
|
+
started_at=started_at,
|
|
217
|
+
finished_at=ScanSummary.now_iso(),
|
|
218
|
+
targets=raw_targets,
|
|
219
|
+
normalized_domains=domains,
|
|
220
|
+
queries_executed=queries_executed,
|
|
221
|
+
findings_count=len(findings),
|
|
222
|
+
unique_repositories=unique_repos,
|
|
223
|
+
notes=DEFAULT_NOTES.copy(),
|
|
224
|
+
)
|
|
225
|
+
if not token:
|
|
226
|
+
summary.notes.append(
|
|
227
|
+
"No GitHub token was provided. Authenticated searches are strongly recommended for reliability and higher rate limits."
|
|
228
|
+
)
|
|
229
|
+
return summary, findings
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def main(argv: List[str] | None = None) -> int:
|
|
233
|
+
parser = build_parser()
|
|
234
|
+
args = parser.parse_args(argv)
|
|
235
|
+
|
|
236
|
+
if not args.quiet:
|
|
237
|
+
print(BANNER)
|
|
238
|
+
|
|
239
|
+
out_file = args.out_file or _default_out_file(args.output)
|
|
240
|
+
try:
|
|
241
|
+
summary, findings = run_scan(args)
|
|
242
|
+
if args.output == "json":
|
|
243
|
+
write_json_report(out_file, summary, findings)
|
|
244
|
+
else:
|
|
245
|
+
write_html_report(out_file, summary, findings)
|
|
246
|
+
except (ValueError, FileNotFoundError, GitHubAPIError) as exc:
|
|
247
|
+
print(f"[!] {exc}", file=sys.stderr)
|
|
248
|
+
return 2
|
|
249
|
+
except KeyboardInterrupt:
|
|
250
|
+
print("\n[!] Interrupted by user.", file=sys.stderr)
|
|
251
|
+
return 130
|
|
252
|
+
|
|
253
|
+
if not args.quiet:
|
|
254
|
+
print(f"[+] Findings: {summary.findings_count}")
|
|
255
|
+
print(f"[+] Unique repositories: {summary.unique_repositories}")
|
|
256
|
+
print(f"[+] Queries executed: {summary.queries_executed}")
|
|
257
|
+
print(f"[+] Report written: {Path(out_file).resolve()}")
|
|
258
|
+
return 0
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
if __name__ == "__main__":
|
|
262
|
+
raise SystemExit(main())
|
gitosintx/github.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from .models import Finding
|
|
10
|
+
from .utils import classify_finding
|
|
11
|
+
|
|
12
|
+
GITHUB_API = "https://api.github.com"
|
|
13
|
+
API_VERSION = "2026-03-10"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GitHubAPIError(RuntimeError):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GitHubClient:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
token: Optional[str] = None,
|
|
24
|
+
per_page: int = 50,
|
|
25
|
+
max_pages: int = 2,
|
|
26
|
+
sleep: float = 1.0,
|
|
27
|
+
wait_rate_limit: bool = False,
|
|
28
|
+
verbose: bool = False,
|
|
29
|
+
) -> None:
|
|
30
|
+
self.token = token
|
|
31
|
+
self.per_page = max(1, min(per_page, 100))
|
|
32
|
+
self.max_pages = max(1, max_pages)
|
|
33
|
+
self.sleep = max(0.0, sleep)
|
|
34
|
+
self.wait_rate_limit = wait_rate_limit
|
|
35
|
+
self.verbose = verbose
|
|
36
|
+
self.session = requests.Session()
|
|
37
|
+
self.session.headers.update(
|
|
38
|
+
{
|
|
39
|
+
"Accept": "application/vnd.github.text-match+json, application/vnd.github+json",
|
|
40
|
+
"X-GitHub-Api-Version": API_VERSION,
|
|
41
|
+
"User-Agent": "GitOSINTX/0.1.0 (+https://h4rithd.com)",
|
|
42
|
+
}
|
|
43
|
+
)
|
|
44
|
+
if token:
|
|
45
|
+
self.session.headers.update({"Authorization": f"Bearer {token}"})
|
|
46
|
+
|
|
47
|
+
def _log(self, message: str) -> None:
|
|
48
|
+
if self.verbose:
|
|
49
|
+
print(message, file=sys.stderr)
|
|
50
|
+
|
|
51
|
+
def _request(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
52
|
+
url = f"{GITHUB_API}{endpoint}"
|
|
53
|
+
while True:
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
response = self.session.get(url, params=params, timeout=30)
|
|
57
|
+
except requests.RequestException as exc:
|
|
58
|
+
raise GitHubAPIError(f"GitHub API request failed: {exc}") from exc
|
|
59
|
+
remaining = response.headers.get("X-RateLimit-Remaining")
|
|
60
|
+
reset_at = response.headers.get("X-RateLimit-Reset")
|
|
61
|
+
|
|
62
|
+
if response.status_code in {403, 429}:
|
|
63
|
+
body = self._safe_json(response)
|
|
64
|
+
message = str(body.get("message", "")).lower()
|
|
65
|
+
|
|
66
|
+
rate_exhausted = remaining == "0" and reset_at is not None
|
|
67
|
+
secondary_limit = "secondary rate limit" in message or "abuse" in message
|
|
68
|
+
|
|
69
|
+
if self.wait_rate_limit and (rate_exhausted or secondary_limit):
|
|
70
|
+
if rate_exhausted:
|
|
71
|
+
delay = max(1, int(reset_at) - int(time.time()) + 3)
|
|
72
|
+
else:
|
|
73
|
+
delay = max(30, int(self.sleep * 10))
|
|
74
|
+
self._log(f"[!] GitHub rate limit hit. Sleeping {delay}s...")
|
|
75
|
+
time.sleep(delay)
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
raise GitHubAPIError(
|
|
79
|
+
"GitHub rate limit or abuse protection triggered. "
|
|
80
|
+
"Set GITHUB_TOKEN, reduce --max-pages, increase --sleep, or use --wait-rate-limit. "
|
|
81
|
+
f"GitHub said: {body.get('message', response.text)}"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if response.status_code == 401:
|
|
85
|
+
raise GitHubAPIError(
|
|
86
|
+
"GitHub authentication failed. Check your GITHUB_TOKEN. "
|
|
87
|
+
"Fine-grained tokens should have access to public repositories metadata/search."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if response.status_code >= 400:
|
|
91
|
+
body = self._safe_json(response)
|
|
92
|
+
raise GitHubAPIError(
|
|
93
|
+
f"GitHub API error HTTP {response.status_code}: {body.get('message', response.text)}"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return response.json()
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def _safe_json(response: requests.Response) -> Dict[str, Any]:
|
|
100
|
+
try:
|
|
101
|
+
data = response.json()
|
|
102
|
+
if isinstance(data, dict):
|
|
103
|
+
return data
|
|
104
|
+
return {"message": str(data)}
|
|
105
|
+
except Exception:
|
|
106
|
+
return {"message": response.text}
|
|
107
|
+
|
|
108
|
+
def search_code(self, query: str) -> Iterator[Dict[str, Any]]:
|
|
109
|
+
for page in range(1, self.max_pages + 1):
|
|
110
|
+
self._log(f"[*] Code search page {page}: {query}")
|
|
111
|
+
data = self._request(
|
|
112
|
+
"/search/code",
|
|
113
|
+
{"q": query, "per_page": self.per_page, "page": page},
|
|
114
|
+
)
|
|
115
|
+
items = data.get("items", []) or []
|
|
116
|
+
for item in items:
|
|
117
|
+
yield item
|
|
118
|
+
if len(items) < self.per_page:
|
|
119
|
+
break
|
|
120
|
+
if self.sleep:
|
|
121
|
+
time.sleep(self.sleep)
|
|
122
|
+
|
|
123
|
+
def search_repositories(self, query: str) -> Iterator[Dict[str, Any]]:
|
|
124
|
+
for page in range(1, self.max_pages + 1):
|
|
125
|
+
self._log(f"[*] Repo search page {page}: {query}")
|
|
126
|
+
data = self._request(
|
|
127
|
+
"/search/repositories",
|
|
128
|
+
{"q": query, "per_page": self.per_page, "page": page},
|
|
129
|
+
)
|
|
130
|
+
items = data.get("items", []) or []
|
|
131
|
+
for item in items:
|
|
132
|
+
yield item
|
|
133
|
+
if len(items) < self.per_page:
|
|
134
|
+
break
|
|
135
|
+
if self.sleep:
|
|
136
|
+
time.sleep(self.sleep)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _extract_fragments(item: Dict[str, Any]) -> List[str]:
|
|
140
|
+
fragments: List[str] = []
|
|
141
|
+
for match in item.get("text_matches", []) or []:
|
|
142
|
+
fragment = match.get("fragment")
|
|
143
|
+
if fragment:
|
|
144
|
+
fragments.append(fragment.strip())
|
|
145
|
+
return fragments
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def code_item_to_finding(
|
|
149
|
+
*,
|
|
150
|
+
target: str,
|
|
151
|
+
normalized_domain: str,
|
|
152
|
+
query: str,
|
|
153
|
+
item: Dict[str, Any],
|
|
154
|
+
) -> Finding:
|
|
155
|
+
repo = item.get("repository", {}) or {}
|
|
156
|
+
fragments = _extract_fragments(item)
|
|
157
|
+
path = item.get("path")
|
|
158
|
+
file_url = item.get("html_url")
|
|
159
|
+
return Finding(
|
|
160
|
+
target=target,
|
|
161
|
+
normalized_domain=normalized_domain,
|
|
162
|
+
query=query,
|
|
163
|
+
source_type="code",
|
|
164
|
+
repo_full_name=repo.get("full_name", ""),
|
|
165
|
+
repo_url=repo.get("html_url", ""),
|
|
166
|
+
file_path=path,
|
|
167
|
+
file_url=file_url,
|
|
168
|
+
file_name=item.get("name"),
|
|
169
|
+
sha=item.get("sha"),
|
|
170
|
+
language=repo.get("language"),
|
|
171
|
+
repo_description=repo.get("description"),
|
|
172
|
+
repo_stars=repo.get("stargazers_count"),
|
|
173
|
+
repo_forks=repo.get("forks_count"),
|
|
174
|
+
repo_updated_at=repo.get("updated_at"),
|
|
175
|
+
matched_fragments=fragments,
|
|
176
|
+
tags=classify_finding(path, fragments, file_url),
|
|
177
|
+
score=item.get("score"),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def repo_item_to_finding(
|
|
182
|
+
*,
|
|
183
|
+
target: str,
|
|
184
|
+
normalized_domain: str,
|
|
185
|
+
query: str,
|
|
186
|
+
item: Dict[str, Any],
|
|
187
|
+
) -> Finding:
|
|
188
|
+
return Finding(
|
|
189
|
+
target=target,
|
|
190
|
+
normalized_domain=normalized_domain,
|
|
191
|
+
query=query,
|
|
192
|
+
source_type="repository",
|
|
193
|
+
repo_full_name=item.get("full_name", ""),
|
|
194
|
+
repo_url=item.get("html_url", ""),
|
|
195
|
+
repo_description=item.get("description"),
|
|
196
|
+
repo_stars=item.get("stargazers_count"),
|
|
197
|
+
repo_forks=item.get("forks_count"),
|
|
198
|
+
repo_updated_at=item.get("updated_at"),
|
|
199
|
+
language=item.get("language"),
|
|
200
|
+
matched_fragments=[],
|
|
201
|
+
tags=classify_finding(None, [item.get("description") or "", item.get("full_name") or ""], item.get("html_url")),
|
|
202
|
+
score=item.get("score"),
|
|
203
|
+
)
|
gitosintx/models.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict, dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Finding:
|
|
10
|
+
target: str
|
|
11
|
+
normalized_domain: str
|
|
12
|
+
query: str
|
|
13
|
+
source_type: str
|
|
14
|
+
repo_full_name: str
|
|
15
|
+
repo_url: str
|
|
16
|
+
file_path: Optional[str] = None
|
|
17
|
+
file_url: Optional[str] = None
|
|
18
|
+
file_name: Optional[str] = None
|
|
19
|
+
sha: Optional[str] = None
|
|
20
|
+
language: Optional[str] = None
|
|
21
|
+
repo_description: Optional[str] = None
|
|
22
|
+
repo_stars: Optional[int] = None
|
|
23
|
+
repo_forks: Optional[int] = None
|
|
24
|
+
repo_updated_at: Optional[str] = None
|
|
25
|
+
matched_fragments: List[str] = field(default_factory=list)
|
|
26
|
+
tags: List[str] = field(default_factory=list)
|
|
27
|
+
score: Optional[float] = None
|
|
28
|
+
|
|
29
|
+
def key(self) -> str:
|
|
30
|
+
return "|".join(
|
|
31
|
+
[
|
|
32
|
+
self.source_type or "",
|
|
33
|
+
self.repo_full_name or "",
|
|
34
|
+
self.file_path or "",
|
|
35
|
+
self.file_url or "",
|
|
36
|
+
self.normalized_domain or "",
|
|
37
|
+
]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
41
|
+
return asdict(self)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class ScanSummary:
|
|
46
|
+
tool: str
|
|
47
|
+
version: str
|
|
48
|
+
started_at: str
|
|
49
|
+
finished_at: str
|
|
50
|
+
targets: List[str]
|
|
51
|
+
normalized_domains: List[str]
|
|
52
|
+
queries_executed: int
|
|
53
|
+
findings_count: int
|
|
54
|
+
unique_repositories: int
|
|
55
|
+
notes: List[str] = field(default_factory=list)
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def now_iso() -> str:
|
|
59
|
+
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
|
60
|
+
|
|
61
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
62
|
+
return asdict(self)
|
gitosintx/report.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import html
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable, List
|
|
7
|
+
|
|
8
|
+
from .models import Finding, ScanSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write_json_report(path: str, summary: ScanSummary, findings: Iterable[Finding]) -> None:
|
|
12
|
+
output = {
|
|
13
|
+
"summary": summary.to_dict(),
|
|
14
|
+
"findings": [finding.to_dict() for finding in findings],
|
|
15
|
+
}
|
|
16
|
+
Path(path).write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _badge(tag: str) -> str:
|
|
20
|
+
return f'<span class="badge">{html.escape(tag)}</span>'
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _safe(value: object) -> str:
|
|
24
|
+
if value is None:
|
|
25
|
+
return ""
|
|
26
|
+
return html.escape(str(value))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def write_html_report(path: str, summary: ScanSummary, findings: List[Finding]) -> None:
|
|
30
|
+
repo_count = summary.unique_repositories
|
|
31
|
+
rows = []
|
|
32
|
+
for finding in findings:
|
|
33
|
+
fragments = "\n---\n".join(finding.matched_fragments[:3])
|
|
34
|
+
file_cell = (
|
|
35
|
+
f'<a href="{_safe(finding.file_url)}" target="_blank" rel="noreferrer">{_safe(finding.file_path)}</a>'
|
|
36
|
+
if finding.file_url and finding.file_path
|
|
37
|
+
else _safe(finding.file_path or "-")
|
|
38
|
+
)
|
|
39
|
+
repo_cell = (
|
|
40
|
+
f'<a href="{_safe(finding.repo_url)}" target="_blank" rel="noreferrer">{_safe(finding.repo_full_name)}</a>'
|
|
41
|
+
if finding.repo_url
|
|
42
|
+
else _safe(finding.repo_full_name)
|
|
43
|
+
)
|
|
44
|
+
rows.append(
|
|
45
|
+
f"""
|
|
46
|
+
<tr>
|
|
47
|
+
<td>{_safe(finding.source_type)}</td>
|
|
48
|
+
<td>{repo_cell}<div class="muted">{_safe(finding.language or '')}</div></td>
|
|
49
|
+
<td>{file_cell}</td>
|
|
50
|
+
<td>{''.join(_badge(tag) for tag in finding.tags) or '<span class="muted">none</span>'}</td>
|
|
51
|
+
<td><code>{_safe(finding.query)}</code></td>
|
|
52
|
+
<td><pre>{_safe(fragments)}</pre></td>
|
|
53
|
+
</tr>
|
|
54
|
+
"""
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
notes = "".join(f"<li>{_safe(note)}</li>" for note in summary.notes)
|
|
58
|
+
targets = "".join(f"<span class='pill'>{_safe(target)}</span>" for target in summary.normalized_domains)
|
|
59
|
+
|
|
60
|
+
document = f"""<!doctype html>
|
|
61
|
+
<html lang="en">
|
|
62
|
+
<head>
|
|
63
|
+
<meta charset="utf-8">
|
|
64
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
65
|
+
<title>GitOSINTX Report</title>
|
|
66
|
+
<style>
|
|
67
|
+
:root {{
|
|
68
|
+
--bg: #0b1020;
|
|
69
|
+
--panel: #111935;
|
|
70
|
+
--panel2: #151f40;
|
|
71
|
+
--text: #eef2ff;
|
|
72
|
+
--muted: #aab4d4;
|
|
73
|
+
--accent: #7dd3fc;
|
|
74
|
+
--line: #273456;
|
|
75
|
+
--badge: #23345f;
|
|
76
|
+
--danger: #fda4af;
|
|
77
|
+
}}
|
|
78
|
+
* {{ box-sizing: border-box; }}
|
|
79
|
+
body {{
|
|
80
|
+
margin: 0;
|
|
81
|
+
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
|
82
|
+
background: radial-gradient(circle at top left, #1e3a8a 0, transparent 35%), var(--bg);
|
|
83
|
+
color: var(--text);
|
|
84
|
+
}}
|
|
85
|
+
header {{ padding: 42px 34px 22px; border-bottom: 1px solid var(--line); }}
|
|
86
|
+
h1 {{ margin: 0; font-size: 42px; letter-spacing: -0.04em; }}
|
|
87
|
+
h2 {{ margin-top: 32px; }}
|
|
88
|
+
a {{ color: var(--accent); text-decoration: none; }}
|
|
89
|
+
a:hover {{ text-decoration: underline; }}
|
|
90
|
+
.subtitle {{ color: var(--muted); margin-top: 8px; }}
|
|
91
|
+
.brand {{ color: var(--accent); font-weight: 700; }}
|
|
92
|
+
main {{ padding: 28px 34px 54px; }}
|
|
93
|
+
.grid {{ display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 16px; }}
|
|
94
|
+
.card {{ background: rgba(17, 25, 53, 0.88); border: 1px solid var(--line); border-radius: 18px; padding: 18px; box-shadow: 0 18px 40px rgba(0,0,0,.22); }}
|
|
95
|
+
.metric {{ font-size: 32px; font-weight: 800; }}
|
|
96
|
+
.label {{ color: var(--muted); font-size: 13px; margin-top: 4px; }}
|
|
97
|
+
.pill, .badge {{ display: inline-block; margin: 3px 5px 3px 0; padding: 5px 9px; border-radius: 999px; background: var(--badge); color: var(--text); font-size: 12px; border: 1px solid var(--line); }}
|
|
98
|
+
.badge {{ color: var(--accent); }}
|
|
99
|
+
.muted {{ color: var(--muted); font-size: 12px; margin-top: 4px; }}
|
|
100
|
+
table {{ width: 100%; border-collapse: collapse; margin-top: 18px; overflow: hidden; border-radius: 16px; }}
|
|
101
|
+
th, td {{ text-align: left; vertical-align: top; padding: 13px; border-bottom: 1px solid var(--line); }}
|
|
102
|
+
th {{ color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: .08em; background: var(--panel2); }}
|
|
103
|
+
td {{ background: rgba(17, 25, 53, 0.76); font-size: 14px; }}
|
|
104
|
+
code {{ color: #bae6fd; white-space: pre-wrap; overflow-wrap: anywhere; }}
|
|
105
|
+
pre {{ max-width: 520px; max-height: 220px; overflow: auto; padding: 10px; border-radius: 12px; background: #070b16; color: #dbeafe; border: 1px solid var(--line); white-space: pre-wrap; overflow-wrap: anywhere; }}
|
|
106
|
+
.notes {{ color: var(--muted); }}
|
|
107
|
+
footer {{ padding: 20px 34px; border-top: 1px solid var(--line); color: var(--muted); }}
|
|
108
|
+
@media (max-width: 980px) {{ .grid {{ grid-template-columns: repeat(2, 1fr); }} table {{ display: block; overflow-x: auto; }} }}
|
|
109
|
+
@media (max-width: 640px) {{ .grid {{ grid-template-columns: 1fr; }} header, main, footer {{ padding-left: 18px; padding-right: 18px; }} h1 {{ font-size: 34px; }} }}
|
|
110
|
+
</style>
|
|
111
|
+
</head>
|
|
112
|
+
<body>
|
|
113
|
+
<header>
|
|
114
|
+
<h1>GitOSINTX Report</h1>
|
|
115
|
+
<div class="subtitle">GitHub Domain & URL Mention Enumerator</div>
|
|
116
|
+
<div class="subtitle">Developed by <span class="brand">Harith Dilshan</span> | h4rithd.com</div>
|
|
117
|
+
</header>
|
|
118
|
+
<main>
|
|
119
|
+
<section class="grid">
|
|
120
|
+
<div class="card"><div class="metric">{summary.findings_count}</div><div class="label">Findings</div></div>
|
|
121
|
+
<div class="card"><div class="metric">{repo_count}</div><div class="label">Unique repositories</div></div>
|
|
122
|
+
<div class="card"><div class="metric">{summary.queries_executed}</div><div class="label">Queries executed</div></div>
|
|
123
|
+
<div class="card"><div class="metric">{len(summary.normalized_domains)}</div><div class="label">Domains searched</div></div>
|
|
124
|
+
</section>
|
|
125
|
+
|
|
126
|
+
<section class="card" style="margin-top: 18px;">
|
|
127
|
+
<strong>Targets</strong><br>
|
|
128
|
+
{targets}
|
|
129
|
+
</section>
|
|
130
|
+
|
|
131
|
+
<h2>Findings</h2>
|
|
132
|
+
<table>
|
|
133
|
+
<thead>
|
|
134
|
+
<tr>
|
|
135
|
+
<th>Type</th>
|
|
136
|
+
<th>Repository</th>
|
|
137
|
+
<th>File</th>
|
|
138
|
+
<th>Tags</th>
|
|
139
|
+
<th>Query</th>
|
|
140
|
+
<th>Matched fragment</th>
|
|
141
|
+
</tr>
|
|
142
|
+
</thead>
|
|
143
|
+
<tbody>
|
|
144
|
+
{''.join(rows) if rows else '<tr><td colspan="6">No findings.</td></tr>'}
|
|
145
|
+
</tbody>
|
|
146
|
+
</table>
|
|
147
|
+
|
|
148
|
+
<h2>Notes</h2>
|
|
149
|
+
<ul class="notes">{notes}</ul>
|
|
150
|
+
</main>
|
|
151
|
+
<footer>
|
|
152
|
+
Generated by GitOSINTX. Review findings manually before submitting security reports. Never use discovered credentials.
|
|
153
|
+
</footer>
|
|
154
|
+
</body>
|
|
155
|
+
</html>
|
|
156
|
+
"""
|
|
157
|
+
Path(path).write_text(document, encoding="utf-8")
|
gitosintx/utils.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterable, List, Sequence
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
DOMAIN_RE = re.compile(
|
|
9
|
+
r"^(?=.{1,253}$)(?!-)(?:[a-zA-Z0-9-]{1,63}(?<!-)\.)+[a-zA-Z]{2,63}$"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
SENSITIVE_WORDS = {
|
|
13
|
+
"password",
|
|
14
|
+
"passwd",
|
|
15
|
+
"pwd",
|
|
16
|
+
"secret",
|
|
17
|
+
"token",
|
|
18
|
+
"api_key",
|
|
19
|
+
"apikey",
|
|
20
|
+
"client_secret",
|
|
21
|
+
"access_key",
|
|
22
|
+
"private_key",
|
|
23
|
+
"credential",
|
|
24
|
+
"credentials",
|
|
25
|
+
"bearer",
|
|
26
|
+
"jwt",
|
|
27
|
+
"authorization",
|
|
28
|
+
"auth",
|
|
29
|
+
"session",
|
|
30
|
+
"cookie",
|
|
31
|
+
"db_password",
|
|
32
|
+
"database_url",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
CONFIG_EXTENSIONS = {
|
|
36
|
+
".env",
|
|
37
|
+
".yml",
|
|
38
|
+
".yaml",
|
|
39
|
+
".json",
|
|
40
|
+
".xml",
|
|
41
|
+
".ini",
|
|
42
|
+
".conf",
|
|
43
|
+
".config",
|
|
44
|
+
".properties",
|
|
45
|
+
".toml",
|
|
46
|
+
".tf",
|
|
47
|
+
".tfvars",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
CICD_MARKERS = {
|
|
51
|
+
".github/workflows",
|
|
52
|
+
".gitlab-ci.yml",
|
|
53
|
+
"jenkinsfile",
|
|
54
|
+
"circleci",
|
|
55
|
+
"azure-pipelines",
|
|
56
|
+
"bitbucket-pipelines",
|
|
57
|
+
"dockerfile",
|
|
58
|
+
"docker-compose",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def normalize_domain(value: str) -> str:
|
|
63
|
+
"""Normalize URL/domain input into a bare lowercase domain.
|
|
64
|
+
|
|
65
|
+
Examples:
|
|
66
|
+
https://www.example.com/path -> www.example.com
|
|
67
|
+
http://example.com:443 -> example.com
|
|
68
|
+
example.com/login -> example.com
|
|
69
|
+
"""
|
|
70
|
+
raw = (value or "").strip()
|
|
71
|
+
if not raw:
|
|
72
|
+
raise ValueError("empty domain/url input")
|
|
73
|
+
|
|
74
|
+
if "://" not in raw:
|
|
75
|
+
parsed = urlparse("//" + raw, scheme="http")
|
|
76
|
+
else:
|
|
77
|
+
parsed = urlparse(raw)
|
|
78
|
+
|
|
79
|
+
host = parsed.hostname
|
|
80
|
+
if not host:
|
|
81
|
+
# Handles odd values where urlparse cannot infer netloc.
|
|
82
|
+
host = raw.split("/")[0].split(":")[0]
|
|
83
|
+
|
|
84
|
+
host = host.strip().strip(".").lower()
|
|
85
|
+
if host.startswith("*."):
|
|
86
|
+
host = host[2:]
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
host = host.encode("idna").decode("ascii")
|
|
90
|
+
except UnicodeError as exc:
|
|
91
|
+
raise ValueError(f"invalid internationalized domain: {value}") from exc
|
|
92
|
+
|
|
93
|
+
if not DOMAIN_RE.match(host):
|
|
94
|
+
raise ValueError(f"invalid domain/url input: {value}")
|
|
95
|
+
return host
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def read_targets(single_url: str | None, list_path: str | None) -> List[str]:
|
|
99
|
+
values: List[str] = []
|
|
100
|
+
if single_url:
|
|
101
|
+
values.append(single_url)
|
|
102
|
+
if list_path:
|
|
103
|
+
path = Path(list_path).expanduser()
|
|
104
|
+
if not path.exists():
|
|
105
|
+
raise FileNotFoundError(f"target list does not exist: {path}")
|
|
106
|
+
for line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
|
|
107
|
+
item = line.strip()
|
|
108
|
+
if not item or item.startswith("#"):
|
|
109
|
+
continue
|
|
110
|
+
values.append(item)
|
|
111
|
+
if not values:
|
|
112
|
+
raise ValueError("provide -u/--url or -list/--list")
|
|
113
|
+
return dedupe_preserve_order(values)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def dedupe_preserve_order(values: Iterable[str]) -> List[str]:
|
|
117
|
+
seen = set()
|
|
118
|
+
output = []
|
|
119
|
+
for value in values:
|
|
120
|
+
if value not in seen:
|
|
121
|
+
seen.add(value)
|
|
122
|
+
output.append(value)
|
|
123
|
+
return output
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def build_domain_variants(domain: str) -> List[str]:
|
|
127
|
+
variants = [domain]
|
|
128
|
+
if domain.startswith("www."):
|
|
129
|
+
variants.append(domain[4:])
|
|
130
|
+
else:
|
|
131
|
+
variants.append(f"www.{domain}")
|
|
132
|
+
return dedupe_preserve_order(variants)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build_code_queries(domain: str, deep: bool = False, include_email: bool = True) -> List[str]:
|
|
136
|
+
variants = build_domain_variants(domain)
|
|
137
|
+
queries: List[str] = []
|
|
138
|
+
|
|
139
|
+
for host in variants:
|
|
140
|
+
queries.extend(
|
|
141
|
+
[
|
|
142
|
+
f'"{host}" in:file',
|
|
143
|
+
f'"https://{host}" in:file',
|
|
144
|
+
f'"http://{host}" in:file',
|
|
145
|
+
f'"//{host}" in:file',
|
|
146
|
+
f'"{host}" in:path',
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
if include_email:
|
|
150
|
+
queries.append(f'"@{host}" in:file')
|
|
151
|
+
|
|
152
|
+
if deep:
|
|
153
|
+
queries.extend(
|
|
154
|
+
[
|
|
155
|
+
f'"{host}" extension:env',
|
|
156
|
+
f'"{host}" extension:yml',
|
|
157
|
+
f'"{host}" extension:yaml',
|
|
158
|
+
f'"{host}" extension:json',
|
|
159
|
+
f'"{host}" extension:js',
|
|
160
|
+
f'"{host}" extension:ts',
|
|
161
|
+
f'"{host}" extension:properties',
|
|
162
|
+
f'"{host}" extension:xml',
|
|
163
|
+
f'"{host}" filename:Dockerfile',
|
|
164
|
+
f'"{host}" filename:docker-compose.yml',
|
|
165
|
+
f'"{host}" filename:Jenkinsfile',
|
|
166
|
+
]
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return dedupe_preserve_order(queries)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def build_repository_queries(domain: str) -> List[str]:
|
|
173
|
+
parts = domain.split(".")
|
|
174
|
+
brand = parts[-3] if len(parts) >= 3 and parts[-2] in {"co", "com", "net", "org"} else parts[-2]
|
|
175
|
+
candidates = [domain, brand]
|
|
176
|
+
if domain.startswith("www."):
|
|
177
|
+
candidates.append(domain[4:])
|
|
178
|
+
return dedupe_preserve_order(
|
|
179
|
+
[
|
|
180
|
+
f'"{candidate}" in:name,description,readme'
|
|
181
|
+
for candidate in candidates
|
|
182
|
+
if candidate and len(candidate) > 2
|
|
183
|
+
]
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def classify_finding(path: str | None, fragments: Sequence[str] | None, file_url: str | None = None) -> List[str]:
|
|
188
|
+
haystack = " ".join([path or "", file_url or "", " ".join(fragments or [])]).lower()
|
|
189
|
+
tags = []
|
|
190
|
+
|
|
191
|
+
if any(word in haystack for word in SENSITIVE_WORDS):
|
|
192
|
+
tags.append("sensitive-keyword")
|
|
193
|
+
|
|
194
|
+
lower_path = (path or "").lower()
|
|
195
|
+
if any(lower_path.endswith(ext) or ext in lower_path for ext in CONFIG_EXTENSIONS):
|
|
196
|
+
tags.append("config-file")
|
|
197
|
+
|
|
198
|
+
if any(marker in lower_path for marker in CICD_MARKERS):
|
|
199
|
+
tags.append("cicd-devops")
|
|
200
|
+
|
|
201
|
+
if "http://" in haystack or "https://" in haystack or "//" in haystack:
|
|
202
|
+
tags.append("url-reference")
|
|
203
|
+
|
|
204
|
+
if "@" in haystack:
|
|
205
|
+
tags.append("email-reference")
|
|
206
|
+
|
|
207
|
+
return dedupe_preserve_order(tags)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gitosintx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: GitHub OSINT tool for finding public repository mentions of domains and URLs.
|
|
5
|
+
Author: Harith Dilshan
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://h4rithd.com
|
|
8
|
+
Project-URL: Repository, https://github.com/h4rithd/GitOSINTX
|
|
9
|
+
Project-URL: Issues, https://github.com/h4rithd/GitOSINTX/issues
|
|
10
|
+
Keywords: osint,github-osint,bug-bounty,domain-recon,github-search,security-research
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Information Technology
|
|
14
|
+
Classifier: Intended Audience :: System Administrators
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Security
|
|
23
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: requests>=2.31.0
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# GitOSINTX
|
|
31
|
+
|
|
32
|
+
**GitOSINTX** is a GitHub OSINT command-line tool for finding public repository mentions of domains, URLs, and email-style references.
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
______ _ __ ____ _____ _____ _______ ______
|
|
36
|
+
/ ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
|
|
37
|
+
/ / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
|
|
38
|
+
/ /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
|
|
39
|
+
\____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
|
|
40
|
+
|
|
41
|
+
GitOSINTX - GitHub Domain & URL Mention Enumerator
|
|
42
|
+
Developed by Harith Dilshan | h4rithd.com
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## What it does
|
|
46
|
+
|
|
47
|
+
GitOSINTX accepts a domain or URL, normalizes it into a bare domain, generates multiple GitHub Search API queries, deduplicates results, classifies risky-looking references, and exports a clean JSON or HTML report.
|
|
48
|
+
|
|
49
|
+
It is useful for:
|
|
50
|
+
|
|
51
|
+
- Bug bounty passive recon
|
|
52
|
+
- Public code exposure discovery
|
|
53
|
+
- Domain and URL mention enumeration
|
|
54
|
+
- Finding hardcoded API endpoints in public repositories
|
|
55
|
+
- Identifying config, CI/CD, and sensitive-keyword references
|
|
56
|
+
|
|
57
|
+
## What it does **not** do
|
|
58
|
+
|
|
59
|
+
GitOSINTX does not bypass GitHub limits, scrape private repositories, validate leaked credentials, or exploit anything. It only queries public GitHub data available to your GitHub API access level.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
From PyPI after publication:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install gitosintx
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
For local development:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
git clone https://github.com/h4rithd/GitOSINTX
|
|
73
|
+
cd GitOSINTX
|
|
74
|
+
python3 -m pip install -e .
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## GitHub token
|
|
78
|
+
|
|
79
|
+
Authenticated GitHub requests are strongly recommended.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
export GITHUB_TOKEN='ghp_xxxxxxxxxxxxxxxxxxxx'
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Avoid passing tokens directly on the command line because shell history may store them.
|
|
86
|
+
|
|
87
|
+
## Usage
|
|
88
|
+
|
|
89
|
+
Search a single domain or URL and export HTML:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
gitosintx -u https://h4rithd.com -o html --out h4rithd-github-osint.html
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Search a single domain and export JSON:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
gitosintx -u h4rithd.com -o json --out h4rithd-github-osint.json
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Search a list of domains/URLs:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
gitosintx -list examples/domains.txt -o html --out multi-domain-report.html
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Run deeper extension/config-focused queries:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
gitosintx -u h4rithd.com --deep -o html --out deep-report.html
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Be friendlier to GitHub rate limits:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
gitosintx -u h4rithd.com --max-pages 1 --sleep 2 --wait-rate-limit
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Show help:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
gitosintx -h
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## CLI options
|
|
126
|
+
|
|
127
|
+
```text
|
|
128
|
+
-u, --url Single target domain or URL
|
|
129
|
+
-list, --list File containing domains/URLs, one per line
|
|
130
|
+
-o, --output Output format: html or json
|
|
131
|
+
--out Output report path
|
|
132
|
+
--token GitHub token; prefer GITHUB_TOKEN env var
|
|
133
|
+
--max-pages Maximum GitHub result pages per query
|
|
134
|
+
--per-page Results per GitHub API page, max 100
|
|
135
|
+
--sleep Delay between paginated requests
|
|
136
|
+
--wait-rate-limit Sleep and continue when rate limited
|
|
137
|
+
--deep Run additional config/extension-focused queries
|
|
138
|
+
--no-repo-search Disable repository metadata search
|
|
139
|
+
--no-email-query Disable @domain query
|
|
140
|
+
--include-forks Include forks in repository search where supported
|
|
141
|
+
--quiet Suppress banner/progress output
|
|
142
|
+
-v, --verbose Print query progress to stderr
|
|
143
|
+
--version Print version
|
|
144
|
+
-h, --help Show help
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Output tags
|
|
148
|
+
|
|
149
|
+
GitOSINTX applies simple triage tags to help prioritize manual review:
|
|
150
|
+
|
|
151
|
+
| Tag | Meaning |
|
|
152
|
+
| --- | --- |
|
|
153
|
+
| `sensitive-keyword` | Match appears near words like token, secret, password, api_key, private_key, etc. |
|
|
154
|
+
| `config-file` | Match appears in config-style files such as `.env`, `.yml`, `.json`, `.properties`, `.tfvars`, etc. |
|
|
155
|
+
| `cicd-devops` | Match appears in CI/CD or deployment files such as GitHub Actions, Dockerfile, Jenkinsfile, etc. |
|
|
156
|
+
| `url-reference` | Match contains URL-style syntax. |
|
|
157
|
+
| `email-reference` | Match contains email-style syntax. |
|
|
158
|
+
|
|
159
|
+
## Responsible use
|
|
160
|
+
|
|
161
|
+
Do not use discovered credentials. Do not validate tokens. Do not access systems without explicit authorization. For bug bounty, preserve evidence: repository, file path, commit/hash when available, matched snippet, exposure type, and remediation recommendation.
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
|
|
165
|
+
MIT License.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
gitosintx/__init__.py,sha256=S_wG0eaO2wpE4YM4Nu3HaBE61B4DlgUNJwNA5UT4Z5U,111
|
|
2
|
+
gitosintx/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
+
gitosintx/banner.py,sha256=Jz0XQksBL1m0R7THq81FwZOVYkdsKNsvB2I2FqSJELs,378
|
|
4
|
+
gitosintx/cli.py,sha256=WiiV6oBUkYyReWLIAcqNFi86uBYs90MAKMu0J16EL2o,8810
|
|
5
|
+
gitosintx/github.py,sha256=PzOOFXmEE3LdLYb3YTHsSRUy3nTosITSEMN02eLs_aQ,7045
|
|
6
|
+
gitosintx/models.py,sha256=SNb8chEhY-O62FFRrHF-mgxGX2n7w4fOJL7c5GZv4d0,1644
|
|
7
|
+
gitosintx/report.py,sha256=YC4KUJVg7Xt4yOm_oxfJ8g3UfGk4t9bVHwAL3P2WEGo,6659
|
|
8
|
+
gitosintx/utils.py,sha256=Yn8qOfjpCkDgvj-BTWI7K-AWwwFRZVN23BraZMUtvUI,5670
|
|
9
|
+
gitosintx-0.1.0.dist-info/licenses/LICENSE,sha256=qopjeV5v847ZI8oileUZ_xRaSY5YVDv439ZB3XCf6qI,1071
|
|
10
|
+
gitosintx-0.1.0.dist-info/METADATA,sha256=H_5afMiUZadw94CFVeTqQz2xuZcdavPPNj87fN1E7Zg,5374
|
|
11
|
+
gitosintx-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
12
|
+
gitosintx-0.1.0.dist-info/entry_points.txt,sha256=qvWSrkvY4Mbu99XImHu4ash3LeDxe3KoDG0fUeGDQK8,49
|
|
13
|
+
gitosintx-0.1.0.dist-info/top_level.txt,sha256=VCA15sWsYEzXTdOYN_HYTcthdxc0MGxHEtb1ouGZqb0,10
|
|
14
|
+
gitosintx-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Harith Dilshan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
gitosintx
|