gitosintx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gitosintx/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """GitOSINTX - GitHub OSINT domain mention enumerator."""
2
+
3
+ __version__ = "0.1.0"
4
+ __author__ = "Harith Dilshan"
gitosintx/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
gitosintx/banner.py ADDED
@@ -0,0 +1,10 @@
1
+ BANNER = r"""
2
+ ______ _ __ ____ _____ _____ _______ ______
3
+ / ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
4
+ / / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
5
+ / /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
6
+ \____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
7
+
8
+ GitOSINTX - GitHub Domain & URL Mention Enumerator
9
+ Developed by Harith Dilshan | h4rithd.com
10
+ """
gitosintx/cli.py ADDED
@@ -0,0 +1,262 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Dict, List, Tuple
8
+
9
+ from . import __version__
10
+ from .banner import BANNER
11
+ from .github import GitHubAPIError, GitHubClient, code_item_to_finding, repo_item_to_finding
12
+ from .models import Finding, ScanSummary
13
+ from .report import write_html_report, write_json_report
14
+ from .utils import (
15
+ build_code_queries,
16
+ build_repository_queries,
17
+ dedupe_preserve_order,
18
+ normalize_domain,
19
+ read_targets,
20
+ )
21
+
22
+
23
+ DEFAULT_NOTES = [
24
+ "GitOSINTX uses the official GitHub REST Search API and only queries public GitHub data available to the authenticated user.",
25
+ "GitHub search is rate-limited and capped; results are broad OSINT evidence, not a guarantee of full GitHub coverage.",
26
+ "Do not use, validate, or abuse exposed credentials. Preserve location evidence and report responsibly through the correct program channel.",
27
+ ]
28
+
29
+
30
+ def build_parser() -> argparse.ArgumentParser:
31
+ parser = argparse.ArgumentParser(
32
+ prog="gitosintx",
33
+ description="GitOSINTX - find public GitHub repository mentions of domains and URLs.",
34
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
35
+ )
36
+ target_group = parser.add_mutually_exclusive_group(required=True)
37
+ target_group.add_argument(
38
+ "-u",
39
+ "--url",
40
+ dest="url",
41
+ help="Single target domain or URL, e.g. https://h4rithd.com, http://h4rithd.com, or h4rithd.com",
42
+ )
43
+ target_group.add_argument(
44
+ "-list",
45
+ "--list",
46
+ dest="list_path",
47
+ help="File containing domains/URLs, one per line. Blank lines and # comments are ignored.",
48
+ )
49
+
50
+ parser.add_argument(
51
+ "-o",
52
+ "--output",
53
+ choices=["json", "html"],
54
+ default="html",
55
+ help="Report output format.",
56
+ )
57
+ parser.add_argument(
58
+ "--out",
59
+ dest="out_file",
60
+ help="Output report file path. Defaults to gitosintx-report.html or gitosintx-report.json.",
61
+ )
62
+ parser.add_argument(
63
+ "--token",
64
+ help="GitHub token. Prefer setting GITHUB_TOKEN instead of passing tokens on the command line.",
65
+ )
66
+ parser.add_argument(
67
+ "--max-pages",
68
+ type=int,
69
+ default=2,
70
+ help="Maximum GitHub result pages per query. GitHub allows up to 100 results per page.",
71
+ )
72
+ parser.add_argument(
73
+ "--per-page",
74
+ type=int,
75
+ default=50,
76
+ help="Results per GitHub API page. Maximum is 100.",
77
+ )
78
+ parser.add_argument(
79
+ "--sleep",
80
+ type=float,
81
+ default=1.0,
82
+ help="Delay in seconds between paginated API requests.",
83
+ )
84
+ parser.add_argument(
85
+ "--wait-rate-limit",
86
+ action="store_true",
87
+ help="Sleep and resume when GitHub primary/secondary rate limits are detected.",
88
+ )
89
+ parser.add_argument(
90
+ "--deep",
91
+ action="store_true",
92
+ help="Run additional extension/CI/config-focused queries. Slower and more rate-limit heavy.",
93
+ )
94
+ parser.add_argument(
95
+ "--no-repo-search",
96
+ action="store_true",
97
+ help="Disable repository metadata search and only run code search.",
98
+ )
99
+ parser.add_argument(
100
+ "--no-email-query",
101
+ action="store_true",
102
+ help="Do not search for @domain email-style mentions.",
103
+ )
104
+ parser.add_argument(
105
+ "--include-forks",
106
+ action="store_true",
107
+ help="Append fork:true to repository search queries. Code search may still include fork behavior controlled by GitHub.",
108
+ )
109
+ parser.add_argument(
110
+ "--quiet",
111
+ action="store_true",
112
+ help="Suppress banner and progress output.",
113
+ )
114
+ parser.add_argument(
115
+ "-v",
116
+ "--verbose",
117
+ action="store_true",
118
+ help="Print API query progress to stderr.",
119
+ )
120
+ parser.add_argument(
121
+ "--version",
122
+ action="version",
123
+ version=f"GitOSINTX {__version__}",
124
+ )
125
+ return parser
126
+
127
+
128
+ def _default_out_file(fmt: str) -> str:
129
+ return f"gitosintx-report.{fmt}"
130
+
131
+
132
+ def _normalize_targets(raw_targets: List[str]) -> Tuple[List[str], Dict[str, str]]:
133
+ mapping: Dict[str, str] = {}
134
+ normalized: List[str] = []
135
+ for raw in raw_targets:
136
+ domain = normalize_domain(raw)
137
+ mapping[raw] = domain
138
+ normalized.append(domain)
139
+ return dedupe_preserve_order(normalized), mapping
140
+
141
+
142
+ def run_scan(args: argparse.Namespace) -> Tuple[ScanSummary, List[Finding]]:
143
+ started_at = ScanSummary.now_iso()
144
+ raw_targets = read_targets(args.url, args.list_path)
145
+ domains, mapping = _normalize_targets(raw_targets)
146
+ token = args.token or os.getenv("GITHUB_TOKEN")
147
+
148
+ client = GitHubClient(
149
+ token=token,
150
+ per_page=args.per_page,
151
+ max_pages=args.max_pages,
152
+ sleep=args.sleep,
153
+ wait_rate_limit=args.wait_rate_limit,
154
+ verbose=args.verbose,
155
+ )
156
+
157
+ findings_by_key: Dict[str, Finding] = {}
158
+ queries_executed = 0
159
+
160
+ for original_target, domain in mapping.items():
161
+ code_queries = build_code_queries(
162
+ domain,
163
+ deep=args.deep,
164
+ include_email=not args.no_email_query,
165
+ )
166
+ for query in code_queries:
167
+ queries_executed += 1
168
+ for item in client.search_code(query):
169
+ finding = code_item_to_finding(
170
+ target=original_target,
171
+ normalized_domain=domain,
172
+ query=query,
173
+ item=item,
174
+ )
175
+ existing = findings_by_key.get(finding.key())
176
+ if existing:
177
+ if query not in existing.query:
178
+ existing.query = f"{existing.query} || {query}"
179
+ for tag in finding.tags:
180
+ if tag not in existing.tags:
181
+ existing.tags.append(tag)
182
+ for fragment in finding.matched_fragments:
183
+ if fragment not in existing.matched_fragments:
184
+ existing.matched_fragments.append(fragment)
185
+ else:
186
+ findings_by_key[finding.key()] = finding
187
+
188
+ if not args.no_repo_search:
189
+ repo_queries = build_repository_queries(domain)
190
+ for query in repo_queries:
191
+ if args.include_forks:
192
+ query = f"{query} fork:true"
193
+ queries_executed += 1
194
+ for item in client.search_repositories(query):
195
+ finding = repo_item_to_finding(
196
+ target=original_target,
197
+ normalized_domain=domain,
198
+ query=query,
199
+ item=item,
200
+ )
201
+ findings_by_key.setdefault(finding.key(), finding)
202
+
203
+ findings = sorted(
204
+ findings_by_key.values(),
205
+ key=lambda f: (
206
+ 0 if "sensitive-keyword" in f.tags else 1,
207
+ 0 if "config-file" in f.tags else 1,
208
+ f.repo_full_name.lower(),
209
+ f.file_path or "",
210
+ ),
211
+ )
212
+ unique_repos = len({f.repo_full_name for f in findings if f.repo_full_name})
213
+ summary = ScanSummary(
214
+ tool="GitOSINTX",
215
+ version=__version__,
216
+ started_at=started_at,
217
+ finished_at=ScanSummary.now_iso(),
218
+ targets=raw_targets,
219
+ normalized_domains=domains,
220
+ queries_executed=queries_executed,
221
+ findings_count=len(findings),
222
+ unique_repositories=unique_repos,
223
+ notes=DEFAULT_NOTES.copy(),
224
+ )
225
+ if not token:
226
+ summary.notes.append(
227
+ "No GitHub token was provided. Authenticated searches are strongly recommended for reliability and higher rate limits."
228
+ )
229
+ return summary, findings
230
+
231
+
232
+ def main(argv: List[str] | None = None) -> int:
233
+ parser = build_parser()
234
+ args = parser.parse_args(argv)
235
+
236
+ if not args.quiet:
237
+ print(BANNER)
238
+
239
+ out_file = args.out_file or _default_out_file(args.output)
240
+ try:
241
+ summary, findings = run_scan(args)
242
+ if args.output == "json":
243
+ write_json_report(out_file, summary, findings)
244
+ else:
245
+ write_html_report(out_file, summary, findings)
246
+ except (ValueError, FileNotFoundError, GitHubAPIError) as exc:
247
+ print(f"[!] {exc}", file=sys.stderr)
248
+ return 2
249
+ except KeyboardInterrupt:
250
+ print("\n[!] Interrupted by user.", file=sys.stderr)
251
+ return 130
252
+
253
+ if not args.quiet:
254
+ print(f"[+] Findings: {summary.findings_count}")
255
+ print(f"[+] Unique repositories: {summary.unique_repositories}")
256
+ print(f"[+] Queries executed: {summary.queries_executed}")
257
+ print(f"[+] Report written: {Path(out_file).resolve()}")
258
+ return 0
259
+
260
+
261
+ if __name__ == "__main__":
262
+ raise SystemExit(main())
gitosintx/github.py ADDED
@@ -0,0 +1,203 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ import time
5
+ from typing import Any, Dict, Iterator, List, Optional
6
+
7
+ import requests
8
+
9
+ from .models import Finding
10
+ from .utils import classify_finding
11
+
12
+ GITHUB_API = "https://api.github.com"
13
+ API_VERSION = "2026-03-10"
14
+
15
+
16
+ class GitHubAPIError(RuntimeError):
17
+ pass
18
+
19
+
20
+ class GitHubClient:
21
+ def __init__(
22
+ self,
23
+ token: Optional[str] = None,
24
+ per_page: int = 50,
25
+ max_pages: int = 2,
26
+ sleep: float = 1.0,
27
+ wait_rate_limit: bool = False,
28
+ verbose: bool = False,
29
+ ) -> None:
30
+ self.token = token
31
+ self.per_page = max(1, min(per_page, 100))
32
+ self.max_pages = max(1, max_pages)
33
+ self.sleep = max(0.0, sleep)
34
+ self.wait_rate_limit = wait_rate_limit
35
+ self.verbose = verbose
36
+ self.session = requests.Session()
37
+ self.session.headers.update(
38
+ {
39
+ "Accept": "application/vnd.github.text-match+json, application/vnd.github+json",
40
+ "X-GitHub-Api-Version": API_VERSION,
41
+ "User-Agent": "GitOSINTX/0.1.0 (+https://h4rithd.com)",
42
+ }
43
+ )
44
+ if token:
45
+ self.session.headers.update({"Authorization": f"Bearer {token}"})
46
+
47
+ def _log(self, message: str) -> None:
48
+ if self.verbose:
49
+ print(message, file=sys.stderr)
50
+
51
+ def _request(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
52
+ url = f"{GITHUB_API}{endpoint}"
53
+ while True:
54
+
55
+ try:
56
+ response = self.session.get(url, params=params, timeout=30)
57
+ except requests.RequestException as exc:
58
+ raise GitHubAPIError(f"GitHub API request failed: {exc}") from exc
59
+ remaining = response.headers.get("X-RateLimit-Remaining")
60
+ reset_at = response.headers.get("X-RateLimit-Reset")
61
+
62
+ if response.status_code in {403, 429}:
63
+ body = self._safe_json(response)
64
+ message = str(body.get("message", "")).lower()
65
+
66
+ rate_exhausted = remaining == "0" and reset_at is not None
67
+ secondary_limit = "secondary rate limit" in message or "abuse" in message
68
+
69
+ if self.wait_rate_limit and (rate_exhausted or secondary_limit):
70
+ if rate_exhausted:
71
+ delay = max(1, int(reset_at) - int(time.time()) + 3)
72
+ else:
73
+ delay = max(30, int(self.sleep * 10))
74
+ self._log(f"[!] GitHub rate limit hit. Sleeping {delay}s...")
75
+ time.sleep(delay)
76
+ continue
77
+
78
+ raise GitHubAPIError(
79
+ "GitHub rate limit or abuse protection triggered. "
80
+ "Set GITHUB_TOKEN, reduce --max-pages, increase --sleep, or use --wait-rate-limit. "
81
+ f"GitHub said: {body.get('message', response.text)}"
82
+ )
83
+
84
+ if response.status_code == 401:
85
+ raise GitHubAPIError(
86
+ "GitHub authentication failed. Check your GITHUB_TOKEN. "
87
+ "Fine-grained tokens should have access to public repositories metadata/search."
88
+ )
89
+
90
+ if response.status_code >= 400:
91
+ body = self._safe_json(response)
92
+ raise GitHubAPIError(
93
+ f"GitHub API error HTTP {response.status_code}: {body.get('message', response.text)}"
94
+ )
95
+
96
+ return response.json()
97
+
98
+ @staticmethod
99
+ def _safe_json(response: requests.Response) -> Dict[str, Any]:
100
+ try:
101
+ data = response.json()
102
+ if isinstance(data, dict):
103
+ return data
104
+ return {"message": str(data)}
105
+ except Exception:
106
+ return {"message": response.text}
107
+
108
+ def search_code(self, query: str) -> Iterator[Dict[str, Any]]:
109
+ for page in range(1, self.max_pages + 1):
110
+ self._log(f"[*] Code search page {page}: {query}")
111
+ data = self._request(
112
+ "/search/code",
113
+ {"q": query, "per_page": self.per_page, "page": page},
114
+ )
115
+ items = data.get("items", []) or []
116
+ for item in items:
117
+ yield item
118
+ if len(items) < self.per_page:
119
+ break
120
+ if self.sleep:
121
+ time.sleep(self.sleep)
122
+
123
+ def search_repositories(self, query: str) -> Iterator[Dict[str, Any]]:
124
+ for page in range(1, self.max_pages + 1):
125
+ self._log(f"[*] Repo search page {page}: {query}")
126
+ data = self._request(
127
+ "/search/repositories",
128
+ {"q": query, "per_page": self.per_page, "page": page},
129
+ )
130
+ items = data.get("items", []) or []
131
+ for item in items:
132
+ yield item
133
+ if len(items) < self.per_page:
134
+ break
135
+ if self.sleep:
136
+ time.sleep(self.sleep)
137
+
138
+
139
+ def _extract_fragments(item: Dict[str, Any]) -> List[str]:
140
+ fragments: List[str] = []
141
+ for match in item.get("text_matches", []) or []:
142
+ fragment = match.get("fragment")
143
+ if fragment:
144
+ fragments.append(fragment.strip())
145
+ return fragments
146
+
147
+
148
+ def code_item_to_finding(
149
+ *,
150
+ target: str,
151
+ normalized_domain: str,
152
+ query: str,
153
+ item: Dict[str, Any],
154
+ ) -> Finding:
155
+ repo = item.get("repository", {}) or {}
156
+ fragments = _extract_fragments(item)
157
+ path = item.get("path")
158
+ file_url = item.get("html_url")
159
+ return Finding(
160
+ target=target,
161
+ normalized_domain=normalized_domain,
162
+ query=query,
163
+ source_type="code",
164
+ repo_full_name=repo.get("full_name", ""),
165
+ repo_url=repo.get("html_url", ""),
166
+ file_path=path,
167
+ file_url=file_url,
168
+ file_name=item.get("name"),
169
+ sha=item.get("sha"),
170
+ language=repo.get("language"),
171
+ repo_description=repo.get("description"),
172
+ repo_stars=repo.get("stargazers_count"),
173
+ repo_forks=repo.get("forks_count"),
174
+ repo_updated_at=repo.get("updated_at"),
175
+ matched_fragments=fragments,
176
+ tags=classify_finding(path, fragments, file_url),
177
+ score=item.get("score"),
178
+ )
179
+
180
+
181
+ def repo_item_to_finding(
182
+ *,
183
+ target: str,
184
+ normalized_domain: str,
185
+ query: str,
186
+ item: Dict[str, Any],
187
+ ) -> Finding:
188
+ return Finding(
189
+ target=target,
190
+ normalized_domain=normalized_domain,
191
+ query=query,
192
+ source_type="repository",
193
+ repo_full_name=item.get("full_name", ""),
194
+ repo_url=item.get("html_url", ""),
195
+ repo_description=item.get("description"),
196
+ repo_stars=item.get("stargazers_count"),
197
+ repo_forks=item.get("forks_count"),
198
+ repo_updated_at=item.get("updated_at"),
199
+ language=item.get("language"),
200
+ matched_fragments=[],
201
+ tags=classify_finding(None, [item.get("description") or "", item.get("full_name") or ""], item.get("html_url")),
202
+ score=item.get("score"),
203
+ )
gitosintx/models.py ADDED
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, dataclass, field
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Dict, List, Optional
6
+
7
+
8
+ @dataclass
9
+ class Finding:
10
+ target: str
11
+ normalized_domain: str
12
+ query: str
13
+ source_type: str
14
+ repo_full_name: str
15
+ repo_url: str
16
+ file_path: Optional[str] = None
17
+ file_url: Optional[str] = None
18
+ file_name: Optional[str] = None
19
+ sha: Optional[str] = None
20
+ language: Optional[str] = None
21
+ repo_description: Optional[str] = None
22
+ repo_stars: Optional[int] = None
23
+ repo_forks: Optional[int] = None
24
+ repo_updated_at: Optional[str] = None
25
+ matched_fragments: List[str] = field(default_factory=list)
26
+ tags: List[str] = field(default_factory=list)
27
+ score: Optional[float] = None
28
+
29
+ def key(self) -> str:
30
+ return "|".join(
31
+ [
32
+ self.source_type or "",
33
+ self.repo_full_name or "",
34
+ self.file_path or "",
35
+ self.file_url or "",
36
+ self.normalized_domain or "",
37
+ ]
38
+ )
39
+
40
+ def to_dict(self) -> Dict[str, Any]:
41
+ return asdict(self)
42
+
43
+
44
+ @dataclass
45
+ class ScanSummary:
46
+ tool: str
47
+ version: str
48
+ started_at: str
49
+ finished_at: str
50
+ targets: List[str]
51
+ normalized_domains: List[str]
52
+ queries_executed: int
53
+ findings_count: int
54
+ unique_repositories: int
55
+ notes: List[str] = field(default_factory=list)
56
+
57
+ @staticmethod
58
+ def now_iso() -> str:
59
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
60
+
61
+ def to_dict(self) -> Dict[str, Any]:
62
+ return asdict(self)
gitosintx/report.py ADDED
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Iterable, List
7
+
8
+ from .models import Finding, ScanSummary
9
+
10
+
11
+ def write_json_report(path: str, summary: ScanSummary, findings: Iterable[Finding]) -> None:
12
+ output = {
13
+ "summary": summary.to_dict(),
14
+ "findings": [finding.to_dict() for finding in findings],
15
+ }
16
+ Path(path).write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding="utf-8")
17
+
18
+
19
+ def _badge(tag: str) -> str:
20
+ return f'<span class="badge">{html.escape(tag)}</span>'
21
+
22
+
23
+ def _safe(value: object) -> str:
24
+ if value is None:
25
+ return ""
26
+ return html.escape(str(value))
27
+
28
+
29
+ def write_html_report(path: str, summary: ScanSummary, findings: List[Finding]) -> None:
30
+ repo_count = summary.unique_repositories
31
+ rows = []
32
+ for finding in findings:
33
+ fragments = "\n---\n".join(finding.matched_fragments[:3])
34
+ file_cell = (
35
+ f'<a href="{_safe(finding.file_url)}" target="_blank" rel="noreferrer">{_safe(finding.file_path)}</a>'
36
+ if finding.file_url and finding.file_path
37
+ else _safe(finding.file_path or "-")
38
+ )
39
+ repo_cell = (
40
+ f'<a href="{_safe(finding.repo_url)}" target="_blank" rel="noreferrer">{_safe(finding.repo_full_name)}</a>'
41
+ if finding.repo_url
42
+ else _safe(finding.repo_full_name)
43
+ )
44
+ rows.append(
45
+ f"""
46
+ <tr>
47
+ <td>{_safe(finding.source_type)}</td>
48
+ <td>{repo_cell}<div class="muted">{_safe(finding.language or '')}</div></td>
49
+ <td>{file_cell}</td>
50
+ <td>{''.join(_badge(tag) for tag in finding.tags) or '<span class="muted">none</span>'}</td>
51
+ <td><code>{_safe(finding.query)}</code></td>
52
+ <td><pre>{_safe(fragments)}</pre></td>
53
+ </tr>
54
+ """
55
+ )
56
+
57
+ notes = "".join(f"<li>{_safe(note)}</li>" for note in summary.notes)
58
+ targets = "".join(f"<span class='pill'>{_safe(target)}</span>" for target in summary.normalized_domains)
59
+
60
+ document = f"""<!doctype html>
61
+ <html lang="en">
62
+ <head>
63
+ <meta charset="utf-8">
64
+ <meta name="viewport" content="width=device-width, initial-scale=1">
65
+ <title>GitOSINTX Report</title>
66
+ <style>
67
+ :root {{
68
+ --bg: #0b1020;
69
+ --panel: #111935;
70
+ --panel2: #151f40;
71
+ --text: #eef2ff;
72
+ --muted: #aab4d4;
73
+ --accent: #7dd3fc;
74
+ --line: #273456;
75
+ --badge: #23345f;
76
+ --danger: #fda4af;
77
+ }}
78
+ * {{ box-sizing: border-box; }}
79
+ body {{
80
+ margin: 0;
81
+ font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
82
+ background: radial-gradient(circle at top left, #1e3a8a 0, transparent 35%), var(--bg);
83
+ color: var(--text);
84
+ }}
85
+ header {{ padding: 42px 34px 22px; border-bottom: 1px solid var(--line); }}
86
+ h1 {{ margin: 0; font-size: 42px; letter-spacing: -0.04em; }}
87
+ h2 {{ margin-top: 32px; }}
88
+ a {{ color: var(--accent); text-decoration: none; }}
89
+ a:hover {{ text-decoration: underline; }}
90
+ .subtitle {{ color: var(--muted); margin-top: 8px; }}
91
+ .brand {{ color: var(--accent); font-weight: 700; }}
92
+ main {{ padding: 28px 34px 54px; }}
93
+ .grid {{ display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 16px; }}
94
+ .card {{ background: rgba(17, 25, 53, 0.88); border: 1px solid var(--line); border-radius: 18px; padding: 18px; box-shadow: 0 18px 40px rgba(0,0,0,.22); }}
95
+ .metric {{ font-size: 32px; font-weight: 800; }}
96
+ .label {{ color: var(--muted); font-size: 13px; margin-top: 4px; }}
97
+ .pill, .badge {{ display: inline-block; margin: 3px 5px 3px 0; padding: 5px 9px; border-radius: 999px; background: var(--badge); color: var(--text); font-size: 12px; border: 1px solid var(--line); }}
98
+ .badge {{ color: var(--accent); }}
99
+ .muted {{ color: var(--muted); font-size: 12px; margin-top: 4px; }}
100
+ table {{ width: 100%; border-collapse: collapse; margin-top: 18px; overflow: hidden; border-radius: 16px; }}
101
+ th, td {{ text-align: left; vertical-align: top; padding: 13px; border-bottom: 1px solid var(--line); }}
102
+ th {{ color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: .08em; background: var(--panel2); }}
103
+ td {{ background: rgba(17, 25, 53, 0.76); font-size: 14px; }}
104
+ code {{ color: #bae6fd; white-space: pre-wrap; overflow-wrap: anywhere; }}
105
+ pre {{ max-width: 520px; max-height: 220px; overflow: auto; padding: 10px; border-radius: 12px; background: #070b16; color: #dbeafe; border: 1px solid var(--line); white-space: pre-wrap; overflow-wrap: anywhere; }}
106
+ .notes {{ color: var(--muted); }}
107
+ footer {{ padding: 20px 34px; border-top: 1px solid var(--line); color: var(--muted); }}
108
+ @media (max-width: 980px) {{ .grid {{ grid-template-columns: repeat(2, 1fr); }} table {{ display: block; overflow-x: auto; }} }}
109
+ @media (max-width: 640px) {{ .grid {{ grid-template-columns: 1fr; }} header, main, footer {{ padding-left: 18px; padding-right: 18px; }} h1 {{ font-size: 34px; }} }}
110
+ </style>
111
+ </head>
112
+ <body>
113
+ <header>
114
+ <h1>GitOSINTX Report</h1>
115
+ <div class="subtitle">GitHub Domain & URL Mention Enumerator</div>
116
+ <div class="subtitle">Developed by <span class="brand">Harith Dilshan</span> | h4rithd.com</div>
117
+ </header>
118
+ <main>
119
+ <section class="grid">
120
+ <div class="card"><div class="metric">{summary.findings_count}</div><div class="label">Findings</div></div>
121
+ <div class="card"><div class="metric">{repo_count}</div><div class="label">Unique repositories</div></div>
122
+ <div class="card"><div class="metric">{summary.queries_executed}</div><div class="label">Queries executed</div></div>
123
+ <div class="card"><div class="metric">{len(summary.normalized_domains)}</div><div class="label">Domains searched</div></div>
124
+ </section>
125
+
126
+ <section class="card" style="margin-top: 18px;">
127
+ <strong>Targets</strong><br>
128
+ {targets}
129
+ </section>
130
+
131
+ <h2>Findings</h2>
132
+ <table>
133
+ <thead>
134
+ <tr>
135
+ <th>Type</th>
136
+ <th>Repository</th>
137
+ <th>File</th>
138
+ <th>Tags</th>
139
+ <th>Query</th>
140
+ <th>Matched fragment</th>
141
+ </tr>
142
+ </thead>
143
+ <tbody>
144
+ {''.join(rows) if rows else '<tr><td colspan="6">No findings.</td></tr>'}
145
+ </tbody>
146
+ </table>
147
+
148
+ <h2>Notes</h2>
149
+ <ul class="notes">{notes}</ul>
150
+ </main>
151
+ <footer>
152
+ Generated by GitOSINTX. Review findings manually before submitting security reports. Never use discovered credentials.
153
+ </footer>
154
+ </body>
155
+ </html>
156
+ """
157
+ Path(path).write_text(document, encoding="utf-8")
gitosintx/utils.py ADDED
@@ -0,0 +1,207 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Iterable, List, Sequence
6
+ from urllib.parse import urlparse
7
+
8
+ DOMAIN_RE = re.compile(
9
+ r"^(?=.{1,253}$)(?!-)(?:[a-zA-Z0-9-]{1,63}(?<!-)\.)+[a-zA-Z]{2,63}$"
10
+ )
11
+
12
+ SENSITIVE_WORDS = {
13
+ "password",
14
+ "passwd",
15
+ "pwd",
16
+ "secret",
17
+ "token",
18
+ "api_key",
19
+ "apikey",
20
+ "client_secret",
21
+ "access_key",
22
+ "private_key",
23
+ "credential",
24
+ "credentials",
25
+ "bearer",
26
+ "jwt",
27
+ "authorization",
28
+ "auth",
29
+ "session",
30
+ "cookie",
31
+ "db_password",
32
+ "database_url",
33
+ }
34
+
35
+ CONFIG_EXTENSIONS = {
36
+ ".env",
37
+ ".yml",
38
+ ".yaml",
39
+ ".json",
40
+ ".xml",
41
+ ".ini",
42
+ ".conf",
43
+ ".config",
44
+ ".properties",
45
+ ".toml",
46
+ ".tf",
47
+ ".tfvars",
48
+ }
49
+
50
+ CICD_MARKERS = {
51
+ ".github/workflows",
52
+ ".gitlab-ci.yml",
53
+ "jenkinsfile",
54
+ "circleci",
55
+ "azure-pipelines",
56
+ "bitbucket-pipelines",
57
+ "dockerfile",
58
+ "docker-compose",
59
+ }
60
+
61
+
62
+ def normalize_domain(value: str) -> str:
63
+ """Normalize URL/domain input into a bare lowercase domain.
64
+
65
+ Examples:
66
+ https://www.example.com/path -> www.example.com
67
+ http://example.com:443 -> example.com
68
+ example.com/login -> example.com
69
+ """
70
+ raw = (value or "").strip()
71
+ if not raw:
72
+ raise ValueError("empty domain/url input")
73
+
74
+ if "://" not in raw:
75
+ parsed = urlparse("//" + raw, scheme="http")
76
+ else:
77
+ parsed = urlparse(raw)
78
+
79
+ host = parsed.hostname
80
+ if not host:
81
+ # Handles odd values where urlparse cannot infer netloc.
82
+ host = raw.split("/")[0].split(":")[0]
83
+
84
+ host = host.strip().strip(".").lower()
85
+ if host.startswith("*."):
86
+ host = host[2:]
87
+
88
+ try:
89
+ host = host.encode("idna").decode("ascii")
90
+ except UnicodeError as exc:
91
+ raise ValueError(f"invalid internationalized domain: {value}") from exc
92
+
93
+ if not DOMAIN_RE.match(host):
94
+ raise ValueError(f"invalid domain/url input: {value}")
95
+ return host
96
+
97
+
98
+ def read_targets(single_url: str | None, list_path: str | None) -> List[str]:
99
+ values: List[str] = []
100
+ if single_url:
101
+ values.append(single_url)
102
+ if list_path:
103
+ path = Path(list_path).expanduser()
104
+ if not path.exists():
105
+ raise FileNotFoundError(f"target list does not exist: {path}")
106
+ for line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
107
+ item = line.strip()
108
+ if not item or item.startswith("#"):
109
+ continue
110
+ values.append(item)
111
+ if not values:
112
+ raise ValueError("provide -u/--url or -list/--list")
113
+ return dedupe_preserve_order(values)
114
+
115
+
116
+ def dedupe_preserve_order(values: Iterable[str]) -> List[str]:
117
+ seen = set()
118
+ output = []
119
+ for value in values:
120
+ if value not in seen:
121
+ seen.add(value)
122
+ output.append(value)
123
+ return output
124
+
125
+
126
+ def build_domain_variants(domain: str) -> List[str]:
127
+ variants = [domain]
128
+ if domain.startswith("www."):
129
+ variants.append(domain[4:])
130
+ else:
131
+ variants.append(f"www.{domain}")
132
+ return dedupe_preserve_order(variants)
133
+
134
+
135
+ def build_code_queries(domain: str, deep: bool = False, include_email: bool = True) -> List[str]:
136
+ variants = build_domain_variants(domain)
137
+ queries: List[str] = []
138
+
139
+ for host in variants:
140
+ queries.extend(
141
+ [
142
+ f'"{host}" in:file',
143
+ f'"https://{host}" in:file',
144
+ f'"http://{host}" in:file',
145
+ f'"//{host}" in:file',
146
+ f'"{host}" in:path',
147
+ ]
148
+ )
149
+ if include_email:
150
+ queries.append(f'"@{host}" in:file')
151
+
152
+ if deep:
153
+ queries.extend(
154
+ [
155
+ f'"{host}" extension:env',
156
+ f'"{host}" extension:yml',
157
+ f'"{host}" extension:yaml',
158
+ f'"{host}" extension:json',
159
+ f'"{host}" extension:js',
160
+ f'"{host}" extension:ts',
161
+ f'"{host}" extension:properties',
162
+ f'"{host}" extension:xml',
163
+ f'"{host}" filename:Dockerfile',
164
+ f'"{host}" filename:docker-compose.yml',
165
+ f'"{host}" filename:Jenkinsfile',
166
+ ]
167
+ )
168
+
169
+ return dedupe_preserve_order(queries)
170
+
171
+
172
+ def build_repository_queries(domain: str) -> List[str]:
173
+ parts = domain.split(".")
174
+ brand = parts[-3] if len(parts) >= 3 and parts[-2] in {"co", "com", "net", "org"} else parts[-2]
175
+ candidates = [domain, brand]
176
+ if domain.startswith("www."):
177
+ candidates.append(domain[4:])
178
+ return dedupe_preserve_order(
179
+ [
180
+ f'"{candidate}" in:name,description,readme'
181
+ for candidate in candidates
182
+ if candidate and len(candidate) > 2
183
+ ]
184
+ )
185
+
186
+
187
+ def classify_finding(path: str | None, fragments: Sequence[str] | None, file_url: str | None = None) -> List[str]:
188
+ haystack = " ".join([path or "", file_url or "", " ".join(fragments or [])]).lower()
189
+ tags = []
190
+
191
+ if any(word in haystack for word in SENSITIVE_WORDS):
192
+ tags.append("sensitive-keyword")
193
+
194
+ lower_path = (path or "").lower()
195
+ if any(lower_path.endswith(ext) or ext in lower_path for ext in CONFIG_EXTENSIONS):
196
+ tags.append("config-file")
197
+
198
+ if any(marker in lower_path for marker in CICD_MARKERS):
199
+ tags.append("cicd-devops")
200
+
201
+ if "http://" in haystack or "https://" in haystack or "//" in haystack:
202
+ tags.append("url-reference")
203
+
204
+ if "@" in haystack:
205
+ tags.append("email-reference")
206
+
207
+ return dedupe_preserve_order(tags)
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: gitosintx
3
+ Version: 0.1.0
4
+ Summary: GitHub OSINT tool for finding public repository mentions of domains and URLs.
5
+ Author: Harith Dilshan
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://h4rithd.com
8
+ Project-URL: Repository, https://github.com/h4rithd/GitOSINTX
9
+ Project-URL: Issues, https://github.com/h4rithd/GitOSINTX/issues
10
+ Keywords: osint,github-osint,bug-bounty,domain-recon,github-search,security-research
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Information Technology
14
+ Classifier: Intended Audience :: System Administrators
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Security
23
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: requests>=2.31.0
28
+ Dynamic: license-file
29
+
30
+ # GitOSINTX
31
+
32
+ **GitOSINTX** is a GitHub OSINT command-line tool for finding public repository mentions of domains, URLs, and email-style references.
33
+
34
+ ```text
35
+ ______ _ __ ____ _____ _____ _______ ______
36
+ / ____/(_) /_/ __ \/ ___// _/ | / /_ __/ |/ / |
37
+ / / __/ / __/ / / /\__ \ / // |/ / / / | / /| |
38
+ / /_/ / / /_/ /_/ /___/ // // /| / / / / | ___ |
39
+ \____/_/\__/\____//____/___/_/ |_/ /_/ /_/|_|/ |_|
40
+
41
+ GitOSINTX - GitHub Domain & URL Mention Enumerator
42
+ Developed by Harith Dilshan | h4rithd.com
43
+ ```
44
+
45
+ ## What it does
46
+
47
+ GitOSINTX accepts a domain or URL, normalizes it into a bare domain, generates multiple GitHub Search API queries, deduplicates results, classifies risky-looking references, and exports a clean JSON or HTML report.
48
+
49
+ It is useful for:
50
+
51
+ - Bug bounty passive recon
52
+ - Public code exposure discovery
53
+ - Domain and URL mention enumeration
54
+ - Finding hardcoded API endpoints in public repositories
55
+ - Identifying config, CI/CD, and sensitive-keyword references
56
+
57
+ ## What it does **not** do
58
+
59
+ GitOSINTX does not bypass GitHub limits, scrape private repositories, validate leaked credentials, or exploit anything. It only queries public GitHub data available to your GitHub API access level.
60
+
61
+ ## Install
62
+
63
+ From PyPI after publication:
64
+
65
+ ```bash
66
+ pip install gitosintx
67
+ ```
68
+
69
+ For local development:
70
+
71
+ ```bash
72
+ git clone https://github.com/h4rithd/GitOSINTX
73
+ cd GitOSINTX
74
+ python3 -m pip install -e .
75
+ ```
76
+
77
+ ## GitHub token
78
+
79
+ Authenticated GitHub requests are strongly recommended.
80
+
81
+ ```bash
82
+ export GITHUB_TOKEN='ghp_xxxxxxxxxxxxxxxxxxxx'
83
+ ```
84
+
85
+ Avoid passing tokens directly on the command line because shell history may store them.
86
+
87
+ ## Usage
88
+
89
+ Search a single domain or URL and export HTML:
90
+
91
+ ```bash
92
+ gitosintx -u https://h4rithd.com -o html --out h4rithd-github-osint.html
93
+ ```
94
+
95
+ Search a single domain and export JSON:
96
+
97
+ ```bash
98
+ gitosintx -u h4rithd.com -o json --out h4rithd-github-osint.json
99
+ ```
100
+
101
+ Search a list of domains/URLs:
102
+
103
+ ```bash
104
+ gitosintx -list examples/domains.txt -o html --out multi-domain-report.html
105
+ ```
106
+
107
+ Run deeper extension/config-focused queries:
108
+
109
+ ```bash
110
+ gitosintx -u h4rithd.com --deep -o html --out deep-report.html
111
+ ```
112
+
113
+ Be friendlier to GitHub rate limits:
114
+
115
+ ```bash
116
+ gitosintx -u h4rithd.com --max-pages 1 --sleep 2 --wait-rate-limit
117
+ ```
118
+
119
+ Show help:
120
+
121
+ ```bash
122
+ gitosintx -h
123
+ ```
124
+
125
+ ## CLI options
126
+
127
+ ```text
128
+ -u, --url Single target domain or URL
129
+ -list, --list File containing domains/URLs, one per line
130
+ -o, --output Output format: html or json
131
+ --out Output report path
132
+ --token GitHub token; prefer GITHUB_TOKEN env var
133
+ --max-pages Maximum GitHub result pages per query
134
+ --per-page Results per GitHub API page, max 100
135
+ --sleep Delay between paginated requests
136
+ --wait-rate-limit Sleep and continue when rate limited
137
+ --deep Run additional config/extension-focused queries
138
+ --no-repo-search Disable repository metadata search
139
+ --no-email-query Disable @domain query
140
+ --include-forks Include forks in repository search where supported
141
+ --quiet Suppress banner/progress output
142
+ -v, --verbose Print query progress to stderr
143
+ --version Print version
144
+ -h, --help Show help
145
+ ```
146
+
147
+ ## Output tags
148
+
149
+ GitOSINTX applies simple triage tags to help prioritize manual review:
150
+
151
+ | Tag | Meaning |
152
+ | --- | --- |
153
+ | `sensitive-keyword` | Match appears near words like token, secret, password, api_key, private_key, etc. |
154
+ | `config-file` | Match appears in config-style files such as `.env`, `.yml`, `.json`, `.properties`, `.tfvars`, etc. |
155
+ | `cicd-devops` | Match appears in CI/CD or deployment files such as GitHub Actions, Dockerfile, Jenkinsfile, etc. |
156
+ | `url-reference` | Match contains URL-style syntax. |
157
+ | `email-reference` | Match contains email-style syntax. |
158
+
159
+ ## Responsible use
160
+
161
+ Do not use discovered credentials. Do not validate tokens. Do not access systems without explicit authorization. For bug bounty, preserve evidence: repository, file path, commit/hash when available, matched snippet, exposure type, and remediation recommendation.
162
+
163
+ ## License
164
+
165
+ MIT License.
@@ -0,0 +1,14 @@
1
+ gitosintx/__init__.py,sha256=S_wG0eaO2wpE4YM4Nu3HaBE61B4DlgUNJwNA5UT4Z5U,111
2
+ gitosintx/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
3
+ gitosintx/banner.py,sha256=Jz0XQksBL1m0R7THq81FwZOVYkdsKNsvB2I2FqSJELs,378
4
+ gitosintx/cli.py,sha256=WiiV6oBUkYyReWLIAcqNFi86uBYs90MAKMu0J16EL2o,8810
5
+ gitosintx/github.py,sha256=PzOOFXmEE3LdLYb3YTHsSRUy3nTosITSEMN02eLs_aQ,7045
6
+ gitosintx/models.py,sha256=SNb8chEhY-O62FFRrHF-mgxGX2n7w4fOJL7c5GZv4d0,1644
7
+ gitosintx/report.py,sha256=YC4KUJVg7Xt4yOm_oxfJ8g3UfGk4t9bVHwAL3P2WEGo,6659
8
+ gitosintx/utils.py,sha256=Yn8qOfjpCkDgvj-BTWI7K-AWwwFRZVN23BraZMUtvUI,5670
9
+ gitosintx-0.1.0.dist-info/licenses/LICENSE,sha256=qopjeV5v847ZI8oileUZ_xRaSY5YVDv439ZB3XCf6qI,1071
10
+ gitosintx-0.1.0.dist-info/METADATA,sha256=H_5afMiUZadw94CFVeTqQz2xuZcdavPPNj87fN1E7Zg,5374
11
+ gitosintx-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
12
+ gitosintx-0.1.0.dist-info/entry_points.txt,sha256=qvWSrkvY4Mbu99XImHu4ash3LeDxe3KoDG0fUeGDQK8,49
13
+ gitosintx-0.1.0.dist-info/top_level.txt,sha256=VCA15sWsYEzXTdOYN_HYTcthdxc0MGxHEtb1ouGZqb0,10
14
+ gitosintx-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gitosintx = gitosintx.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Harith Dilshan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ gitosintx